Restoring authorship annotation for <[email protected]>. Commit 2 of 2.

author: thegeorg <[email protected]> 2022-02-10 16:45:12 +0300
committer: Daniil Cherednik <[email protected]> 2022-02-10 16:45:12 +0300
commit: 49116032d905455a7b1c994e4a696afc885c1e71 (patch)
tree: be835aa92c6248212e705f25388ebafcf84bc7a1 /contrib/libs/apache/arrow/cpp
parent: 4e839db24a3bbc9f1c610c43d6faaaa99824dcca (diff)
337 files changed, 85425 insertions, 85425 deletions
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/adapters/orc/adapter.cc b/contrib/libs/apache/arrow/cpp/src/arrow/adapters/orc/adapter.cc
index 33ac56ff816..2f74b40e40d 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/adapters/orc/adapter.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/adapters/orc/adapter.cc
@@ -1,595 +1,595 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "arrow/adapters/orc/adapter.h" 
- 
-#include <algorithm> 
-#include <cstdint> 
-#include <functional> 
-#include <list> 
-#include <memory> 
-#include <sstream> 
-#include <string> 
-#include <utility> 
-#include <vector> 
- 
-#include "arrow/adapters/orc/adapter_util.h" 
-#include "arrow/buffer.h" 
-#include "arrow/builder.h" 
-#include "arrow/io/interfaces.h" 
-#include "arrow/memory_pool.h" 
-#include "arrow/record_batch.h" 
-#include "arrow/status.h" 
-#include "arrow/table.h" 
-#include "arrow/table_builder.h" 
-#include "arrow/type.h" 
-#include "arrow/type_traits.h" 
-#include "arrow/util/bit_util.h" 
-#include "arrow/util/checked_cast.h" 
-#include "arrow/util/decimal.h" 
-#include "arrow/util/key_value_metadata.h" 
-#include "arrow/util/macros.h" 
-#include "arrow/util/range.h" 
-#include "arrow/util/visibility.h" 
-#include "orc/Exceptions.hh" 
- 
-// alias to not interfere with nested orc namespace 
-namespace liborc = orc; 
- 
-#define ORC_THROW_NOT_OK(s)                   \ 
-  do {                                        \ 
-    Status _s = (s);                          \ 
-    if (!_s.ok()) {                           \ 
-      std::stringstream ss;                   \ 
-      ss << "Arrow error: " << _s.ToString(); \ 
-      throw liborc::ParseError(ss.str());     \ 
-    }                                         \ 
-  } while (0) 
- 
-#define ORC_ASSIGN_OR_THROW_IMPL(status_name, lhs, rexpr) \ 
-  auto status_name = (rexpr);                             \ 
-  ORC_THROW_NOT_OK(status_name.status());                 \ 
-  lhs = std::move(status_name).ValueOrDie(); 
- 
-#define ORC_ASSIGN_OR_THROW(lhs, rexpr)                                              \ 
-  ORC_ASSIGN_OR_THROW_IMPL(ARROW_ASSIGN_OR_RAISE_NAME(_error_or_value, __COUNTER__), \ 
-                           lhs, rexpr); 
- 
-#define ORC_BEGIN_CATCH_NOT_OK try { 
-#define ORC_END_CATCH_NOT_OK                   \ 
-  }                                            \ 
-  catch (const liborc::ParseError& e) {        \ 
-    return Status::IOError(e.what());          \ 
-  }                                            \ 
-  catch (const liborc::InvalidArgument& e) {   \ 
-    return Status::Invalid(e.what());          \ 
-  }                                            \ 
-  catch (const liborc::NotImplementedYet& e) { \ 
-    return Status::NotImplemented(e.what());   \ 
-  } 
- 
-#define ORC_CATCH_NOT_OK(_s)  \ 
-  ORC_BEGIN_CATCH_NOT_OK(_s); \ 
-  ORC_END_CATCH_NOT_OK 
- 
-namespace arrow { 
-namespace adapters { 
-namespace orc { 
- 
-namespace { 
- 
-// The following are required by ORC to be uint64_t 
-constexpr uint64_t kOrcWriterBatchSize = 128 * 1024; 
-constexpr uint64_t kOrcNaturalWriteSize = 128 * 1024; 
- 
-using internal::checked_cast; 
- 
-class ArrowInputFile : public liborc::InputStream { 
- public: 
-  explicit ArrowInputFile(const std::shared_ptr<io::RandomAccessFile>& file) 
-      : file_(file) {} 
- 
-  uint64_t getLength() const override { 
-    ORC_ASSIGN_OR_THROW(int64_t size, file_->GetSize()); 
-    return static_cast<uint64_t>(size); 
-  } 
- 
-  uint64_t getNaturalReadSize() const override { return 128 * 1024; } 
- 
-  void read(void* buf, uint64_t length, uint64_t offset) override { 
-    ORC_ASSIGN_OR_THROW(int64_t bytes_read, file_->ReadAt(offset, length, buf)); 
- 
-    if (static_cast<uint64_t>(bytes_read) != length) { 
-      throw liborc::ParseError("Short read from arrow input file"); 
-    } 
-  } 
- 
-  const std::string& getName() const override { 
-    static const std::string filename("ArrowInputFile"); 
-    return filename; 
-  } 
- 
- private: 
-  std::shared_ptr<io::RandomAccessFile> file_; 
-}; 
- 
-struct StripeInformation { 
-  uint64_t offset; 
-  uint64_t length; 
-  uint64_t num_rows; 
-  uint64_t first_row_of_stripe; 
-}; 
- 
-// The number of rows to read in a ColumnVectorBatch 
-constexpr int64_t kReadRowsBatch = 1000; 
- 
-class OrcStripeReader : public RecordBatchReader { 
- public: 
-  OrcStripeReader(std::unique_ptr<liborc::RowReader> row_reader, 
-                  std::shared_ptr<Schema> schema, int64_t batch_size, MemoryPool* pool) 
-      : row_reader_(std::move(row_reader)), 
-        schema_(schema), 
-        pool_(pool), 
-        batch_size_{batch_size} {} 
- 
-  std::shared_ptr<Schema> schema() const override { return schema_; } 
- 
-  Status ReadNext(std::shared_ptr<RecordBatch>* out) override { 
-    std::unique_ptr<liborc::ColumnVectorBatch> batch; 
-    ORC_CATCH_NOT_OK(batch = row_reader_->createRowBatch(batch_size_)); 
- 
-    const liborc::Type& type = row_reader_->getSelectedType(); 
-    if (!row_reader_->next(*batch)) { 
-      out->reset(); 
-      return Status::OK(); 
-    } 
- 
-    std::unique_ptr<RecordBatchBuilder> builder; 
-    RETURN_NOT_OK(RecordBatchBuilder::Make(schema_, pool_, batch->numElements, &builder)); 
- 
-    // The top-level type must be a struct to read into an arrow table 
-    const auto& struct_batch = checked_cast<liborc::StructVectorBatch&>(*batch); 
- 
-    for (int i = 0; i < builder->num_fields(); i++) { 
-      RETURN_NOT_OK(AppendBatch(type.getSubtype(i), struct_batch.fields[i], 0, 
-                                batch->numElements, builder->GetField(i))); 
-    } 
- 
-    RETURN_NOT_OK(builder->Flush(out)); 
-    return Status::OK(); 
-  } 
- 
- private: 
-  std::unique_ptr<liborc::RowReader> row_reader_; 
-  std::shared_ptr<Schema> schema_; 
-  MemoryPool* pool_; 
-  int64_t batch_size_; 
-}; 
- 
-}  // namespace 
- 
-class ORCFileReader::Impl { 
- public: 
-  Impl() {} 
-  ~Impl() {} 
- 
-  Status Open(const std::shared_ptr<io::RandomAccessFile>& file, MemoryPool* pool) { 
-    std::unique_ptr<ArrowInputFile> io_wrapper(new ArrowInputFile(file)); 
-    liborc::ReaderOptions options; 
-    std::unique_ptr<liborc::Reader> liborc_reader; 
-    ORC_CATCH_NOT_OK(liborc_reader = createReader(std::move(io_wrapper), options)); 
-    pool_ = pool; 
-    reader_ = std::move(liborc_reader); 
-    current_row_ = 0; 
- 
-    return Init(); 
-  } 
- 
-  Status Init() { 
-    int64_t nstripes = reader_->getNumberOfStripes(); 
-    stripes_.resize(nstripes); 
-    std::unique_ptr<liborc::StripeInformation> stripe; 
-    uint64_t first_row_of_stripe = 0; 
-    for (int i = 0; i < nstripes; ++i) { 
-      stripe = reader_->getStripe(i); 
-      stripes_[i] = StripeInformation({stripe->getOffset(), stripe->getLength(), 
-                                       stripe->getNumberOfRows(), first_row_of_stripe}); 
-      first_row_of_stripe += stripe->getNumberOfRows(); 
-    } 
-    return Status::OK(); 
-  } 
- 
-  int64_t NumberOfStripes() { return stripes_.size(); } 
- 
-  int64_t NumberOfRows() { return reader_->getNumberOfRows(); } 
- 
-  Status ReadSchema(std::shared_ptr<Schema>* out) { 
-    const liborc::Type& type = reader_->getType(); 
-    return GetArrowSchema(type, out); 
-  } 
- 
-  Status ReadSchema(const liborc::RowReaderOptions& opts, std::shared_ptr<Schema>* out) { 
-    std::unique_ptr<liborc::RowReader> row_reader; 
-    ORC_CATCH_NOT_OK(row_reader = reader_->createRowReader(opts)); 
-    const liborc::Type& type = row_reader->getSelectedType(); 
-    return GetArrowSchema(type, out); 
-  } 
- 
-  Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata() { 
-    const std::list<std::string> keys = reader_->getMetadataKeys(); 
-    auto metadata = std::make_shared<KeyValueMetadata>(); 
-    for (const auto& key : keys) { 
-      metadata->Append(key, reader_->getMetadataValue(key)); 
-    } 
-    return std::const_pointer_cast<const KeyValueMetadata>(metadata); 
-  } 
- 
-  Status GetArrowSchema(const liborc::Type& type, std::shared_ptr<Schema>* out) { 
-    if (type.getKind() != liborc::STRUCT) { 
-      return Status::NotImplemented( 
-          "Only ORC files with a top-level struct " 
-          "can be handled"); 
-    } 
-    int size = static_cast<int>(type.getSubtypeCount()); 
-    std::vector<std::shared_ptr<Field>> fields; 
-    for (int child = 0; child < size; ++child) { 
-      std::shared_ptr<DataType> elemtype; 
-      RETURN_NOT_OK(GetArrowType(type.getSubtype(child), &elemtype)); 
-      std::string name = type.getFieldName(child); 
-      fields.push_back(field(name, elemtype)); 
-    } 
-    ARROW_ASSIGN_OR_RAISE(auto metadata, ReadMetadata()); 
-    *out = std::make_shared<Schema>(std::move(fields), std::move(metadata)); 
-    return Status::OK(); 
-  } 
- 
-  Status Read(std::shared_ptr<Table>* out) { 
-    liborc::RowReaderOptions opts; 
-    std::shared_ptr<Schema> schema; 
-    RETURN_NOT_OK(ReadSchema(opts, &schema)); 
-    return ReadTable(opts, schema, out); 
-  } 
- 
-  Status Read(const std::shared_ptr<Schema>& schema, std::shared_ptr<Table>* out) { 
-    liborc::RowReaderOptions opts; 
-    return ReadTable(opts, schema, out); 
-  } 
- 
-  Status Read(const std::vector<int>& include_indices, std::shared_ptr<Table>* out) { 
-    liborc::RowReaderOptions opts; 
-    RETURN_NOT_OK(SelectIndices(&opts, include_indices)); 
-    std::shared_ptr<Schema> schema; 
-    RETURN_NOT_OK(ReadSchema(opts, &schema)); 
-    return ReadTable(opts, schema, out); 
-  } 
- 
-  Status Read(const std::shared_ptr<Schema>& schema, 
-              const std::vector<int>& include_indices, std::shared_ptr<Table>* out) { 
-    liborc::RowReaderOptions opts; 
-    RETURN_NOT_OK(SelectIndices(&opts, include_indices)); 
-    return ReadTable(opts, schema, out); 
-  } 
- 
-  Status ReadStripe(int64_t stripe, std::shared_ptr<RecordBatch>* out) { 
-    liborc::RowReaderOptions opts; 
-    RETURN_NOT_OK(SelectStripe(&opts, stripe)); 
-    std::shared_ptr<Schema> schema; 
-    RETURN_NOT_OK(ReadSchema(opts, &schema)); 
-    return ReadBatch(opts, schema, stripes_[stripe].num_rows, out); 
-  } 
- 
-  Status ReadStripe(int64_t stripe, const std::vector<int>& include_indices, 
-                    std::shared_ptr<RecordBatch>* out) { 
-    liborc::RowReaderOptions opts; 
-    RETURN_NOT_OK(SelectIndices(&opts, include_indices)); 
-    RETURN_NOT_OK(SelectStripe(&opts, stripe)); 
-    std::shared_ptr<Schema> schema; 
-    RETURN_NOT_OK(ReadSchema(opts, &schema)); 
-    return ReadBatch(opts, schema, stripes_[stripe].num_rows, out); 
-  } 
- 
-  Status SelectStripe(liborc::RowReaderOptions* opts, int64_t stripe) { 
-    ARROW_RETURN_IF(stripe < 0 || stripe >= NumberOfStripes(), 
-                    Status::Invalid("Out of bounds stripe: ", stripe)); 
- 
-    opts->range(stripes_[stripe].offset, stripes_[stripe].length); 
-    return Status::OK(); 
-  } 
- 
-  Status SelectStripeWithRowNumber(liborc::RowReaderOptions* opts, int64_t row_number, 
-                                   StripeInformation* out) { 
-    ARROW_RETURN_IF(row_number >= NumberOfRows(), 
-                    Status::Invalid("Out of bounds row number: ", row_number)); 
- 
-    for (auto it = stripes_.begin(); it != stripes_.end(); it++) { 
-      if (static_cast<uint64_t>(row_number) >= it->first_row_of_stripe && 
-          static_cast<uint64_t>(row_number) < it->first_row_of_stripe + it->num_rows) { 
-        opts->range(it->offset, it->length); 
-        *out = *it; 
-        return Status::OK(); 
-      } 
-    } 
- 
-    return Status::Invalid("Invalid row number", row_number); 
-  } 
- 
-  Status SelectIndices(liborc::RowReaderOptions* opts, 
-                       const std::vector<int>& include_indices) { 
-    std::list<uint64_t> include_indices_list; 
-    for (auto it = include_indices.begin(); it != include_indices.end(); ++it) { 
-      ARROW_RETURN_IF(*it < 0, Status::Invalid("Negative field index")); 
-      include_indices_list.push_back(*it); 
-    } 
-    opts->includeTypes(include_indices_list); 
-    return Status::OK(); 
-  } 
- 
-  Status ReadTable(const liborc::RowReaderOptions& row_opts, 
-                   const std::shared_ptr<Schema>& schema, std::shared_ptr<Table>* out) { 
-    liborc::RowReaderOptions opts(row_opts); 
-    std::vector<std::shared_ptr<RecordBatch>> batches(stripes_.size()); 
-    for (size_t stripe = 0; stripe < stripes_.size(); stripe++) { 
-      opts.range(stripes_[stripe].offset, stripes_[stripe].length); 
-      RETURN_NOT_OK(ReadBatch(opts, schema, stripes_[stripe].num_rows, &batches[stripe])); 
-    } 
-    return Table::FromRecordBatches(schema, std::move(batches)).Value(out); 
-  } 
- 
-  Status ReadBatch(const liborc::RowReaderOptions& opts, 
-                   const std::shared_ptr<Schema>& schema, int64_t nrows, 
-                   std::shared_ptr<RecordBatch>* out) { 
-    std::unique_ptr<liborc::RowReader> row_reader; 
-    std::unique_ptr<liborc::ColumnVectorBatch> batch; 
- 
-    ORC_BEGIN_CATCH_NOT_OK 
-    row_reader = reader_->createRowReader(opts); 
-    batch = row_reader->createRowBatch(std::min(nrows, kReadRowsBatch)); 
-    ORC_END_CATCH_NOT_OK 
- 
-    std::unique_ptr<RecordBatchBuilder> builder; 
-    RETURN_NOT_OK(RecordBatchBuilder::Make(schema, pool_, nrows, &builder)); 
- 
-    // The top-level type must be a struct to read into an arrow table 
-    const auto& struct_batch = checked_cast<liborc::StructVectorBatch&>(*batch); 
- 
-    const liborc::Type& type = row_reader->getSelectedType(); 
-    while (row_reader->next(*batch)) { 
-      for (int i = 0; i < builder->num_fields(); i++) { 
-        RETURN_NOT_OK(AppendBatch(type.getSubtype(i), struct_batch.fields[i], 0, 
-                                  batch->numElements, builder->GetField(i))); 
-      } 
-    } 
-    RETURN_NOT_OK(builder->Flush(out)); 
-    return Status::OK(); 
-  } 
- 
-  Status Seek(int64_t row_number) { 
-    ARROW_RETURN_IF(row_number >= NumberOfRows(), 
-                    Status::Invalid("Out of bounds row number: ", row_number)); 
- 
-    current_row_ = row_number; 
-    return Status::OK(); 
-  } 
- 
-  Status NextStripeReader(int64_t batch_size, const std::vector<int>& include_indices, 
-                          std::shared_ptr<RecordBatchReader>* out) { 
-    if (current_row_ >= NumberOfRows()) { 
-      out->reset(); 
-      return Status::OK(); 
-    } 
- 
-    liborc::RowReaderOptions opts; 
-    if (!include_indices.empty()) { 
-      RETURN_NOT_OK(SelectIndices(&opts, include_indices)); 
-    } 
-    StripeInformation stripe_info({0, 0, 0, 0}); 
-    RETURN_NOT_OK(SelectStripeWithRowNumber(&opts, current_row_, &stripe_info)); 
-    std::shared_ptr<Schema> schema; 
-    RETURN_NOT_OK(ReadSchema(opts, &schema)); 
-    std::unique_ptr<liborc::RowReader> row_reader; 
- 
-    ORC_BEGIN_CATCH_NOT_OK 
-    row_reader = reader_->createRowReader(opts); 
-    row_reader->seekToRow(current_row_); 
-    current_row_ = stripe_info.first_row_of_stripe + stripe_info.num_rows; 
-    ORC_END_CATCH_NOT_OK 
- 
-    *out = std::shared_ptr<RecordBatchReader>( 
-        new OrcStripeReader(std::move(row_reader), schema, batch_size, pool_)); 
-    return Status::OK(); 
-  } 
- 
-  Status NextStripeReader(int64_t batch_size, std::shared_ptr<RecordBatchReader>* out) { 
-    return NextStripeReader(batch_size, {}, out); 
-  } 
- 
- private: 
-  MemoryPool* pool_; 
-  std::unique_ptr<liborc::Reader> reader_; 
-  std::vector<StripeInformation> stripes_; 
-  int64_t current_row_; 
-}; 
- 
-ORCFileReader::ORCFileReader() { impl_.reset(new ORCFileReader::Impl()); } 
- 
-ORCFileReader::~ORCFileReader() {} 
- 
-Status ORCFileReader::Open(const std::shared_ptr<io::RandomAccessFile>& file, 
-                           MemoryPool* pool, std::unique_ptr<ORCFileReader>* reader) { 
-  auto result = std::unique_ptr<ORCFileReader>(new ORCFileReader()); 
-  RETURN_NOT_OK(result->impl_->Open(file, pool)); 
-  *reader = std::move(result); 
-  return Status::OK(); 
-} 
- 
-Result<std::shared_ptr<const KeyValueMetadata>> ORCFileReader::ReadMetadata() { 
-  return impl_->ReadMetadata(); 
-} 
- 
-Status ORCFileReader::ReadSchema(std::shared_ptr<Schema>* out) { 
-  return impl_->ReadSchema(out); 
-} 
- 
-Status ORCFileReader::Read(std::shared_ptr<Table>* out) { return impl_->Read(out); } 
- 
-Status ORCFileReader::Read(const std::shared_ptr<Schema>& schema, 
-                           std::shared_ptr<Table>* out) { 
-  return impl_->Read(schema, out); 
-} 
- 
-Status ORCFileReader::Read(const std::vector<int>& include_indices, 
-                           std::shared_ptr<Table>* out) { 
-  return impl_->Read(include_indices, out); 
-} 
- 
-Status ORCFileReader::Read(const std::shared_ptr<Schema>& schema, 
-                           const std::vector<int>& include_indices, 
-                           std::shared_ptr<Table>* out) { 
-  return impl_->Read(schema, include_indices, out); 
-} 
- 
-Status ORCFileReader::ReadStripe(int64_t stripe, std::shared_ptr<RecordBatch>* out) { 
-  return impl_->ReadStripe(stripe, out); 
-} 
- 
-Status ORCFileReader::ReadStripe(int64_t stripe, const std::vector<int>& include_indices, 
-                                 std::shared_ptr<RecordBatch>* out) { 
-  return impl_->ReadStripe(stripe, include_indices, out); 
-} 
- 
-Status ORCFileReader::Seek(int64_t row_number) { return impl_->Seek(row_number); } 
- 
-Status ORCFileReader::NextStripeReader(int64_t batch_sizes, 
-                                       std::shared_ptr<RecordBatchReader>* out) { 
-  return impl_->NextStripeReader(batch_sizes, out); 
-} 
- 
-Status ORCFileReader::NextStripeReader(int64_t batch_size, 
-                                       const std::vector<int>& include_indices, 
-                                       std::shared_ptr<RecordBatchReader>* out) { 
-  return impl_->NextStripeReader(batch_size, include_indices, out); 
-} 
- 
-int64_t ORCFileReader::NumberOfStripes() { return impl_->NumberOfStripes(); } 
- 
-int64_t ORCFileReader::NumberOfRows() { return impl_->NumberOfRows(); } 
- 
-namespace { 
- 
-class ArrowOutputStream : public liborc::OutputStream { 
- public: 
-  explicit ArrowOutputStream(arrow::io::OutputStream& output_stream) 
-      : output_stream_(output_stream), length_(0) {} 
- 
-  uint64_t getLength() const override { return length_; } 
- 
-  uint64_t getNaturalWriteSize() const override { return kOrcNaturalWriteSize; } 
- 
-  void write(const void* buf, size_t length) override { 
-    ORC_THROW_NOT_OK(output_stream_.Write(buf, static_cast<int64_t>(length))); 
-    length_ += static_cast<int64_t>(length); 
-  } 
- 
-  // Mandatory due to us implementing an ORC virtual class. 
-  // Used by ORC for error messages, not used by Arrow 
-  const std::string& getName() const override { 
-    static const std::string filename("ArrowOutputFile"); 
-    return filename; 
-  } 
- 
-  void close() override { 
-    if (!output_stream_.closed()) { 
-      ORC_THROW_NOT_OK(output_stream_.Close()); 
-    } 
-  } 
- 
-  void set_length(int64_t length) { length_ = length; } 
- 
- private: 
-  arrow::io::OutputStream& output_stream_; 
-  int64_t length_; 
-}; 
- 
-}  // namespace 
- 
-class ORCFileWriter::Impl { 
- public: 
-  Status Open(arrow::io::OutputStream* output_stream) { 
-    out_stream_ = std::unique_ptr<liborc::OutputStream>( 
-        checked_cast<liborc::OutputStream*>(new ArrowOutputStream(*output_stream))); 
-    return Status::OK(); 
-  } 
- 
-  Status Write(const Table& table) { 
-    std::unique_ptr<liborc::WriterOptions> orc_options = 
-        std::unique_ptr<liborc::WriterOptions>(new liborc::WriterOptions()); 
-    ARROW_ASSIGN_OR_RAISE(auto orc_schema, GetOrcType(*(table.schema()))); 
-    ORC_CATCH_NOT_OK( 
-        writer_ = liborc::createWriter(*orc_schema, out_stream_.get(), *orc_options)) 
- 
-    int64_t num_rows = table.num_rows(); 
-    const int num_cols_ = table.num_columns(); 
-    std::vector<int64_t> arrow_index_offset(num_cols_, 0); 
-    std::vector<int> arrow_chunk_offset(num_cols_, 0); 
-    std::unique_ptr<liborc::ColumnVectorBatch> batch = 
-        writer_->createRowBatch(kOrcWriterBatchSize); 
-    liborc::StructVectorBatch* root = 
-        internal::checked_cast<liborc::StructVectorBatch*>(batch.get()); 
-    while (num_rows > 0) { 
-      for (int i = 0; i < num_cols_; i++) { 
-        RETURN_NOT_OK(adapters::orc::WriteBatch( 
-            *(table.column(i)), kOrcWriterBatchSize, &(arrow_chunk_offset[i]), 
-            &(arrow_index_offset[i]), (root->fields)[i])); 
-      } 
-      root->numElements = (root->fields)[0]->numElements; 
-      writer_->add(*batch); 
-      batch->clear(); 
-      num_rows -= kOrcWriterBatchSize; 
-    } 
-    return Status::OK(); 
-  } 
- 
-  Status Close() { 
-    writer_->close(); 
-    return Status::OK(); 
-  } 
- 
- private: 
-  std::unique_ptr<liborc::Writer> writer_; 
-  std::unique_ptr<liborc::OutputStream> out_stream_; 
-}; 
- 
-ORCFileWriter::~ORCFileWriter() {} 
- 
-ORCFileWriter::ORCFileWriter() { impl_.reset(new ORCFileWriter::Impl()); } 
- 
-Result<std::unique_ptr<ORCFileWriter>> ORCFileWriter::Open( 
-    io::OutputStream* output_stream) { 
-  std::unique_ptr<ORCFileWriter> result = 
-      std::unique_ptr<ORCFileWriter>(new ORCFileWriter()); 
-  Status status = result->impl_->Open(output_stream); 
-  RETURN_NOT_OK(status); 
-  return std::move(result); 
-} 
- 
-Status ORCFileWriter::Write(const Table& table) { return impl_->Write(table); } 
- 
-Status ORCFileWriter::Close() { return impl_->Close(); } 
- 
-}  // namespace orc 
-}  // namespace adapters 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/adapters/orc/adapter.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <functional>
+#include <list>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/adapters/orc/adapter_util.h"
+#include "arrow/buffer.h"
+#include "arrow/builder.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/memory_pool.h"
+#include "arrow/record_batch.h"
+#include "arrow/status.h"
+#include "arrow/table.h"
+#include "arrow/table_builder.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/decimal.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/range.h"
+#include "arrow/util/visibility.h"
+#include "orc/Exceptions.hh"
+
+// alias to not interfere with nested orc namespace
+namespace liborc = orc;
+
+#define ORC_THROW_NOT_OK(s)                   \
+  do {                                        \
+    Status _s = (s);                          \
+    if (!_s.ok()) {                           \
+      std::stringstream ss;                   \
+      ss << "Arrow error: " << _s.ToString(); \
+      throw liborc::ParseError(ss.str());     \
+    }                                         \
+  } while (0)
+
+#define ORC_ASSIGN_OR_THROW_IMPL(status_name, lhs, rexpr) \
+  auto status_name = (rexpr);                             \
+  ORC_THROW_NOT_OK(status_name.status());                 \
+  lhs = std::move(status_name).ValueOrDie();
+
+#define ORC_ASSIGN_OR_THROW(lhs, rexpr)                                              \
+  ORC_ASSIGN_OR_THROW_IMPL(ARROW_ASSIGN_OR_RAISE_NAME(_error_or_value, __COUNTER__), \
+                           lhs, rexpr);
+
+#define ORC_BEGIN_CATCH_NOT_OK try {
+#define ORC_END_CATCH_NOT_OK                   \
+  }                                            \
+  catch (const liborc::ParseError& e) {        \
+    return Status::IOError(e.what());          \
+  }                                            \
+  catch (const liborc::InvalidArgument& e) {   \
+    return Status::Invalid(e.what());          \
+  }                                            \
+  catch (const liborc::NotImplementedYet& e) { \
+    return Status::NotImplemented(e.what());   \
+  }
+
+#define ORC_CATCH_NOT_OK(_s)  \
+  ORC_BEGIN_CATCH_NOT_OK(_s); \
+  ORC_END_CATCH_NOT_OK
+
+namespace arrow {
+namespace adapters {
+namespace orc {
+
+namespace {
+
+// The following are required by ORC to be uint64_t
+constexpr uint64_t kOrcWriterBatchSize = 128 * 1024;
+constexpr uint64_t kOrcNaturalWriteSize = 128 * 1024;
+
+using internal::checked_cast;
+
+class ArrowInputFile : public liborc::InputStream {
+ public:
+  explicit ArrowInputFile(const std::shared_ptr<io::RandomAccessFile>& file)
+      : file_(file) {}
+
+  uint64_t getLength() const override {
+    ORC_ASSIGN_OR_THROW(int64_t size, file_->GetSize());
+    return static_cast<uint64_t>(size);
+  }
+
+  uint64_t getNaturalReadSize() const override { return 128 * 1024; }
+
+  void read(void* buf, uint64_t length, uint64_t offset) override {
+    ORC_ASSIGN_OR_THROW(int64_t bytes_read, file_->ReadAt(offset, length, buf));
+
+    if (static_cast<uint64_t>(bytes_read) != length) {
+      throw liborc::ParseError("Short read from arrow input file");
+    }
+  }
+
+  const std::string& getName() const override {
+    static const std::string filename("ArrowInputFile");
+    return filename;
+  }
+
+ private:
+  std::shared_ptr<io::RandomAccessFile> file_;
+};
+
+struct StripeInformation {
+  uint64_t offset;
+  uint64_t length;
+  uint64_t num_rows;
+  uint64_t first_row_of_stripe;
+};
+
+// The number of rows to read in a ColumnVectorBatch
+constexpr int64_t kReadRowsBatch = 1000;
+
+class OrcStripeReader : public RecordBatchReader {
+ public:
+  OrcStripeReader(std::unique_ptr<liborc::RowReader> row_reader,
+                  std::shared_ptr<Schema> schema, int64_t batch_size, MemoryPool* pool)
+      : row_reader_(std::move(row_reader)),
+        schema_(schema),
+        pool_(pool),
+        batch_size_{batch_size} {}
+
+  std::shared_ptr<Schema> schema() const override { return schema_; }
+
+  Status ReadNext(std::shared_ptr<RecordBatch>* out) override {
+    std::unique_ptr<liborc::ColumnVectorBatch> batch;
+    ORC_CATCH_NOT_OK(batch = row_reader_->createRowBatch(batch_size_));
+
+    const liborc::Type& type = row_reader_->getSelectedType();
+    if (!row_reader_->next(*batch)) {
+      out->reset();
+      return Status::OK();
+    }
+
+    std::unique_ptr<RecordBatchBuilder> builder;
+    RETURN_NOT_OK(RecordBatchBuilder::Make(schema_, pool_, batch->numElements, &builder));
+
+    // The top-level type must be a struct to read into an arrow table
+    const auto& struct_batch = checked_cast<liborc::StructVectorBatch&>(*batch);
+
+    for (int i = 0; i < builder->num_fields(); i++) {
+      RETURN_NOT_OK(AppendBatch(type.getSubtype(i), struct_batch.fields[i], 0,
+                                batch->numElements, builder->GetField(i)));
+    }
+
+    RETURN_NOT_OK(builder->Flush(out));
+    return Status::OK();
+  }
+
+ private:
+  std::unique_ptr<liborc::RowReader> row_reader_;
+  std::shared_ptr<Schema> schema_;
+  MemoryPool* pool_;
+  int64_t batch_size_;
+};
+
+}  // namespace
+
+class ORCFileReader::Impl {
+ public:
+  Impl() {}
+  ~Impl() {}
+
+  Status Open(const std::shared_ptr<io::RandomAccessFile>& file, MemoryPool* pool) {
+    std::unique_ptr<ArrowInputFile> io_wrapper(new ArrowInputFile(file));
+    liborc::ReaderOptions options;
+    std::unique_ptr<liborc::Reader> liborc_reader;
+    ORC_CATCH_NOT_OK(liborc_reader = createReader(std::move(io_wrapper), options));
+    pool_ = pool;
+    reader_ = std::move(liborc_reader);
+    current_row_ = 0;
+
+    return Init();
+  }
+
+  Status Init() {
+    int64_t nstripes = reader_->getNumberOfStripes();
+    stripes_.resize(nstripes);
+    std::unique_ptr<liborc::StripeInformation> stripe;
+    uint64_t first_row_of_stripe = 0;
+    for (int i = 0; i < nstripes; ++i) {
+      stripe = reader_->getStripe(i);
+      stripes_[i] = StripeInformation({stripe->getOffset(), stripe->getLength(),
+                                       stripe->getNumberOfRows(), first_row_of_stripe});
+      first_row_of_stripe += stripe->getNumberOfRows();
+    }
+    return Status::OK();
+  }
+
+  int64_t NumberOfStripes() { return stripes_.size(); }
+
+  int64_t NumberOfRows() { return reader_->getNumberOfRows(); }
+
+  Status ReadSchema(std::shared_ptr<Schema>* out) {
+    const liborc::Type& type = reader_->getType();
+    return GetArrowSchema(type, out);
+  }
+
+  Status ReadSchema(const liborc::RowReaderOptions& opts, std::shared_ptr<Schema>* out) {
+    std::unique_ptr<liborc::RowReader> row_reader;
+    ORC_CATCH_NOT_OK(row_reader = reader_->createRowReader(opts));
+    const liborc::Type& type = row_reader->getSelectedType();
+    return GetArrowSchema(type, out);
+  }
+
+  Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata() {
+    const std::list<std::string> keys = reader_->getMetadataKeys();
+    auto metadata = std::make_shared<KeyValueMetadata>();
+    for (const auto& key : keys) {
+      metadata->Append(key, reader_->getMetadataValue(key));
+    }
+    return std::const_pointer_cast<const KeyValueMetadata>(metadata);
+  }
+
+  Status GetArrowSchema(const liborc::Type& type, std::shared_ptr<Schema>* out) {
+    if (type.getKind() != liborc::STRUCT) {
+      return Status::NotImplemented(
+          "Only ORC files with a top-level struct "
+          "can be handled");
+    }
+    int size = static_cast<int>(type.getSubtypeCount());
+    std::vector<std::shared_ptr<Field>> fields;
+    for (int child = 0; child < size; ++child) {
+      std::shared_ptr<DataType> elemtype;
+      RETURN_NOT_OK(GetArrowType(type.getSubtype(child), &elemtype));
+      std::string name = type.getFieldName(child);
+      fields.push_back(field(name, elemtype));
+    }
+    ARROW_ASSIGN_OR_RAISE(auto metadata, ReadMetadata());
+    *out = std::make_shared<Schema>(std::move(fields), std::move(metadata));
+    return Status::OK();
+  }
+
+  Status Read(std::shared_ptr<Table>* out) {
+    liborc::RowReaderOptions opts;
+    std::shared_ptr<Schema> schema;
+    RETURN_NOT_OK(ReadSchema(opts, &schema));
+    return ReadTable(opts, schema, out);
+  }
+
+  Status Read(const std::shared_ptr<Schema>& schema, std::shared_ptr<Table>* out) {
+    liborc::RowReaderOptions opts;
+    return ReadTable(opts, schema, out);
+  }
+
+  Status Read(const std::vector<int>& include_indices, std::shared_ptr<Table>* out) {
+    liborc::RowReaderOptions opts;
+    RETURN_NOT_OK(SelectIndices(&opts, include_indices));
+    std::shared_ptr<Schema> schema;
+    RETURN_NOT_OK(ReadSchema(opts, &schema));
+    return ReadTable(opts, schema, out);
+  }
+
+  Status Read(const std::shared_ptr<Schema>& schema,
+              const std::vector<int>& include_indices, std::shared_ptr<Table>* out) {
+    liborc::RowReaderOptions opts;
+    RETURN_NOT_OK(SelectIndices(&opts, include_indices));
+    return ReadTable(opts, schema, out);
+  }
+
+  Status ReadStripe(int64_t stripe, std::shared_ptr<RecordBatch>* out) {
+    liborc::RowReaderOptions opts;
+    RETURN_NOT_OK(SelectStripe(&opts, stripe));
+    std::shared_ptr<Schema> schema;
+    RETURN_NOT_OK(ReadSchema(opts, &schema));
+    return ReadBatch(opts, schema, stripes_[stripe].num_rows, out);
+  }
+
+  Status ReadStripe(int64_t stripe, const std::vector<int>& include_indices,
+                    std::shared_ptr<RecordBatch>* out) {
+    liborc::RowReaderOptions opts;
+    RETURN_NOT_OK(SelectIndices(&opts, include_indices));
+    RETURN_NOT_OK(SelectStripe(&opts, stripe));
+    std::shared_ptr<Schema> schema;
+    RETURN_NOT_OK(ReadSchema(opts, &schema));
+    return ReadBatch(opts, schema, stripes_[stripe].num_rows, out);
+  }
+
+  Status SelectStripe(liborc::RowReaderOptions* opts, int64_t stripe) {
+    ARROW_RETURN_IF(stripe < 0 || stripe >= NumberOfStripes(),
+                    Status::Invalid("Out of bounds stripe: ", stripe));
+
+    opts->range(stripes_[stripe].offset, stripes_[stripe].length);
+    return Status::OK();
+  }
+
+  Status SelectStripeWithRowNumber(liborc::RowReaderOptions* opts, int64_t row_number,
+                                   StripeInformation* out) {
+    ARROW_RETURN_IF(row_number >= NumberOfRows(),
+                    Status::Invalid("Out of bounds row number: ", row_number));
+
+    for (auto it = stripes_.begin(); it != stripes_.end(); it++) {
+      if (static_cast<uint64_t>(row_number) >= it->first_row_of_stripe &&
+          static_cast<uint64_t>(row_number) < it->first_row_of_stripe + it->num_rows) {
+        opts->range(it->offset, it->length);
+        *out = *it;
+        return Status::OK();
+      }
+    }
+
+    return Status::Invalid("Invalid row number", row_number);
+  }
+
+  Status SelectIndices(liborc::RowReaderOptions* opts,
+                       const std::vector<int>& include_indices) {
+    std::list<uint64_t> include_indices_list;
+    for (auto it = include_indices.begin(); it != include_indices.end(); ++it) {
+      ARROW_RETURN_IF(*it < 0, Status::Invalid("Negative field index"));
+      include_indices_list.push_back(*it);
+    }
+    opts->includeTypes(include_indices_list);
+    return Status::OK();
+  }
+
+  Status ReadTable(const liborc::RowReaderOptions& row_opts,
+                   const std::shared_ptr<Schema>& schema, std::shared_ptr<Table>* out) {
+    liborc::RowReaderOptions opts(row_opts);
+    std::vector<std::shared_ptr<RecordBatch>> batches(stripes_.size());
+    for (size_t stripe = 0; stripe < stripes_.size(); stripe++) {
+      opts.range(stripes_[stripe].offset, stripes_[stripe].length);
+      RETURN_NOT_OK(ReadBatch(opts, schema, stripes_[stripe].num_rows, &batches[stripe]));
+    }
+    return Table::FromRecordBatches(schema, std::move(batches)).Value(out);
+  }
+
+  Status ReadBatch(const liborc::RowReaderOptions& opts,
+                   const std::shared_ptr<Schema>& schema, int64_t nrows,
+                   std::shared_ptr<RecordBatch>* out) {
+    std::unique_ptr<liborc::RowReader> row_reader;
+    std::unique_ptr<liborc::ColumnVectorBatch> batch;
+
+    ORC_BEGIN_CATCH_NOT_OK
+    row_reader = reader_->createRowReader(opts);
+    batch = row_reader->createRowBatch(std::min(nrows, kReadRowsBatch));
+    ORC_END_CATCH_NOT_OK
+
+    std::unique_ptr<RecordBatchBuilder> builder;
+    RETURN_NOT_OK(RecordBatchBuilder::Make(schema, pool_, nrows, &builder));
+
+    // The top-level type must be a struct to read into an arrow table
+    const auto& struct_batch = checked_cast<liborc::StructVectorBatch&>(*batch);
+
+    const liborc::Type& type = row_reader->getSelectedType();
+    while (row_reader->next(*batch)) {
+      for (int i = 0; i < builder->num_fields(); i++) {
+        RETURN_NOT_OK(AppendBatch(type.getSubtype(i), struct_batch.fields[i], 0,
+                                  batch->numElements, builder->GetField(i)));
+      }
+    }
+    RETURN_NOT_OK(builder->Flush(out));
+    return Status::OK();
+  }
+
+  Status Seek(int64_t row_number) {
+    ARROW_RETURN_IF(row_number >= NumberOfRows(),
+                    Status::Invalid("Out of bounds row number: ", row_number));
+
+    current_row_ = row_number;
+    return Status::OK();
+  }
+
+  Status NextStripeReader(int64_t batch_size, const std::vector<int>& include_indices,
+                          std::shared_ptr<RecordBatchReader>* out) {
+    if (current_row_ >= NumberOfRows()) {
+      out->reset();
+      return Status::OK();
+    }
+
+    liborc::RowReaderOptions opts;
+    if (!include_indices.empty()) {
+      RETURN_NOT_OK(SelectIndices(&opts, include_indices));
+    }
+    StripeInformation stripe_info({0, 0, 0, 0});
+    RETURN_NOT_OK(SelectStripeWithRowNumber(&opts, current_row_, &stripe_info));
+    std::shared_ptr<Schema> schema;
+    RETURN_NOT_OK(ReadSchema(opts, &schema));
+    std::unique_ptr<liborc::RowReader> row_reader;
+
+    ORC_BEGIN_CATCH_NOT_OK
+    row_reader = reader_->createRowReader(opts);
+    row_reader->seekToRow(current_row_);
+    current_row_ = stripe_info.first_row_of_stripe + stripe_info.num_rows;
+    ORC_END_CATCH_NOT_OK
+
+    *out = std::shared_ptr<RecordBatchReader>(
+        new OrcStripeReader(std::move(row_reader), schema, batch_size, pool_));
+    return Status::OK();
+  }
+
+  Status NextStripeReader(int64_t batch_size, std::shared_ptr<RecordBatchReader>* out) {
+    return NextStripeReader(batch_size, {}, out);
+  }
+
+ private:
+  MemoryPool* pool_;
+  std::unique_ptr<liborc::Reader> reader_;
+  std::vector<StripeInformation> stripes_;
+  int64_t current_row_;
+};
+
+ORCFileReader::ORCFileReader() { impl_.reset(new ORCFileReader::Impl()); }
+
+ORCFileReader::~ORCFileReader() {}
+
+Status ORCFileReader::Open(const std::shared_ptr<io::RandomAccessFile>& file,
+                           MemoryPool* pool, std::unique_ptr<ORCFileReader>* reader) {
+  auto result = std::unique_ptr<ORCFileReader>(new ORCFileReader());
+  RETURN_NOT_OK(result->impl_->Open(file, pool));
+  *reader = std::move(result);
+  return Status::OK();
+}
+
+Result<std::shared_ptr<const KeyValueMetadata>> ORCFileReader::ReadMetadata() {
+  return impl_->ReadMetadata();
+}
+
+Status ORCFileReader::ReadSchema(std::shared_ptr<Schema>* out) {
+  return impl_->ReadSchema(out);
+}
+
+Status ORCFileReader::Read(std::shared_ptr<Table>* out) { return impl_->Read(out); }
+
+Status ORCFileReader::Read(const std::shared_ptr<Schema>& schema,
+                           std::shared_ptr<Table>* out) {
+  return impl_->Read(schema, out);
+}
+
+Status ORCFileReader::Read(const std::vector<int>& include_indices,
+                           std::shared_ptr<Table>* out) {
+  return impl_->Read(include_indices, out);
+}
+
+Status ORCFileReader::Read(const std::shared_ptr<Schema>& schema,
+                           const std::vector<int>& include_indices,
+                           std::shared_ptr<Table>* out) {
+  return impl_->Read(schema, include_indices, out);
+}
+
+Status ORCFileReader::ReadStripe(int64_t stripe, std::shared_ptr<RecordBatch>* out) {
+  return impl_->ReadStripe(stripe, out);
+}
+
+Status ORCFileReader::ReadStripe(int64_t stripe, const std::vector<int>& include_indices,
+                                 std::shared_ptr<RecordBatch>* out) {
+  return impl_->ReadStripe(stripe, include_indices, out);
+}
+
+Status ORCFileReader::Seek(int64_t row_number) { return impl_->Seek(row_number); }
+
+Status ORCFileReader::NextStripeReader(int64_t batch_sizes,
+                                       std::shared_ptr<RecordBatchReader>* out) {
+  return impl_->NextStripeReader(batch_sizes, out);
+}
+
+Status ORCFileReader::NextStripeReader(int64_t batch_size,
+                                       const std::vector<int>& include_indices,
+                                       std::shared_ptr<RecordBatchReader>* out) {
+  return impl_->NextStripeReader(batch_size, include_indices, out);
+}
+
+int64_t ORCFileReader::NumberOfStripes() { return impl_->NumberOfStripes(); }
+
+int64_t ORCFileReader::NumberOfRows() { return impl_->NumberOfRows(); }
+
+namespace {
+
+class ArrowOutputStream : public liborc::OutputStream {
+ public:
+  explicit ArrowOutputStream(arrow::io::OutputStream& output_stream)
+      : output_stream_(output_stream), length_(0) {}
+
+  uint64_t getLength() const override { return length_; }
+
+  uint64_t getNaturalWriteSize() const override { return kOrcNaturalWriteSize; }
+
+  void write(const void* buf, size_t length) override {
+    ORC_THROW_NOT_OK(output_stream_.Write(buf, static_cast<int64_t>(length)));
+    length_ += static_cast<int64_t>(length);
+  }
+
+  // Mandatory due to us implementing an ORC virtual class.
+  // Used by ORC for error messages, not used by Arrow
+  const std::string& getName() const override {
+    static const std::string filename("ArrowOutputFile");
+    return filename;
+  }
+
+  void close() override {
+    if (!output_stream_.closed()) {
+      ORC_THROW_NOT_OK(output_stream_.Close());
+    }
+  }
+
+  void set_length(int64_t length) { length_ = length; }
+
+ private:
+  arrow::io::OutputStream& output_stream_;
+  int64_t length_;
+};
+
+}  // namespace
+
+class ORCFileWriter::Impl {
+ public:
+  Status Open(arrow::io::OutputStream* output_stream) {
+    out_stream_ = std::unique_ptr<liborc::OutputStream>(
+        checked_cast<liborc::OutputStream*>(new ArrowOutputStream(*output_stream)));
+    return Status::OK();
+  }
+
+  Status Write(const Table& table) {
+    std::unique_ptr<liborc::WriterOptions> orc_options =
+        std::unique_ptr<liborc::WriterOptions>(new liborc::WriterOptions());
+    ARROW_ASSIGN_OR_RAISE(auto orc_schema, GetOrcType(*(table.schema())));
+    ORC_CATCH_NOT_OK(
+        writer_ = liborc::createWriter(*orc_schema, out_stream_.get(), *orc_options))
+
+    int64_t num_rows = table.num_rows();
+    const int num_cols_ = table.num_columns();
+    std::vector<int64_t> arrow_index_offset(num_cols_, 0);
+    std::vector<int> arrow_chunk_offset(num_cols_, 0);
+    std::unique_ptr<liborc::ColumnVectorBatch> batch =
+        writer_->createRowBatch(kOrcWriterBatchSize);
+    liborc::StructVectorBatch* root =
+        internal::checked_cast<liborc::StructVectorBatch*>(batch.get());
+    while (num_rows > 0) {
+      for (int i = 0; i < num_cols_; i++) {
+        RETURN_NOT_OK(adapters::orc::WriteBatch(
+            *(table.column(i)), kOrcWriterBatchSize, &(arrow_chunk_offset[i]),
+            &(arrow_index_offset[i]), (root->fields)[i]));
+      }
+      root->numElements = (root->fields)[0]->numElements;
+      writer_->add(*batch);
+      batch->clear();
+      num_rows -= kOrcWriterBatchSize;
+    }
+    return Status::OK();
+  }
+
+  Status Close() {
+    writer_->close();
+    return Status::OK();
+  }
+
+ private:
+  std::unique_ptr<liborc::Writer> writer_;
+  std::unique_ptr<liborc::OutputStream> out_stream_;
+};
+
+ORCFileWriter::~ORCFileWriter() {}
+
+ORCFileWriter::ORCFileWriter() { impl_.reset(new ORCFileWriter::Impl()); }
+
+Result<std::unique_ptr<ORCFileWriter>> ORCFileWriter::Open(
+    io::OutputStream* output_stream) {
+  std::unique_ptr<ORCFileWriter> result =
+      std::unique_ptr<ORCFileWriter>(new ORCFileWriter());
+  Status status = result->impl_->Open(output_stream);
+  RETURN_NOT_OK(status);
+  return std::move(result);
+}
+
+Status ORCFileWriter::Write(const Table& table) { return impl_->Write(table); }
+
+Status ORCFileWriter::Close() { return impl_->Close(); }
+
+}  // namespace orc
+}  // namespace adapters
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/adapters/orc/adapter.h b/contrib/libs/apache/arrow/cpp/src/arrow/adapters/orc/adapter.h
index e6e406068a9..012c1701980 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/adapters/orc/adapter.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/adapters/orc/adapter.h
@@ -1,181 +1,181 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <cstdint> 
-#include <memory> 
-#include <vector> 
- 
-#include "arrow/io/interfaces.h" 
-#include "arrow/memory_pool.h" 
-#include "arrow/record_batch.h" 
-#include "arrow/status.h" 
-#include "arrow/type.h" 
-#include "arrow/type_fwd.h" 
-#include "arrow/util/visibility.h" 
- 
-namespace arrow { 
-namespace adapters { 
-namespace orc { 
- 
-/// \class ORCFileReader 
-/// \brief Read an Arrow Table or RecordBatch from an ORC file. 
-class ARROW_EXPORT ORCFileReader { 
- public: 
-  ~ORCFileReader(); 
- 
-  /// \brief Creates a new ORC reader. 
-  /// 
-  /// \param[in] file the data source 
-  /// \param[in] pool a MemoryPool to use for buffer allocations 
-  /// \param[out] reader the returned reader object 
-  /// \return Status 
-  static Status Open(const std::shared_ptr<io::RandomAccessFile>& file, MemoryPool* pool, 
-                     std::unique_ptr<ORCFileReader>* reader); 
- 
-  /// \brief Return the metadata read from the ORC file 
-  /// 
-  /// \return A KeyValueMetadata object containing the ORC metadata 
-  Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata(); 
- 
-  /// \brief Return the schema read from the ORC file 
-  /// 
-  /// \param[out] out the returned Schema object 
-  Status ReadSchema(std::shared_ptr<Schema>* out); 
- 
-  /// \brief Read the file as a Table 
-  /// 
-  /// The table will be composed of one record batch per stripe. 
-  /// 
-  /// \param[out] out the returned Table 
-  Status Read(std::shared_ptr<Table>* out); 
- 
-  /// \brief Read the file as a Table 
-  /// 
-  /// The table will be composed of one record batch per stripe. 
-  /// 
-  /// \param[in] schema the Table schema 
-  /// \param[out] out the returned Table 
-  Status Read(const std::shared_ptr<Schema>& schema, std::shared_ptr<Table>* out); 
- 
-  /// \brief Read the file as a Table 
-  /// 
-  /// The table will be composed of one record batch per stripe. 
-  /// 
-  /// \param[in] include_indices the selected field indices to read 
-  /// \param[out] out the returned Table 
-  Status Read(const std::vector<int>& include_indices, std::shared_ptr<Table>* out); 
- 
-  /// \brief Read the file as a Table 
-  /// 
-  /// The table will be composed of one record batch per stripe. 
-  /// 
-  /// \param[in] schema the Table schema 
-  /// \param[in] include_indices the selected field indices to read 
-  /// \param[out] out the returned Table 
-  Status Read(const std::shared_ptr<Schema>& schema, 
-              const std::vector<int>& include_indices, std::shared_ptr<Table>* out); 
- 
-  /// \brief Read a single stripe as a RecordBatch 
-  /// 
-  /// \param[in] stripe the stripe index 
-  /// \param[out] out the returned RecordBatch 
-  Status ReadStripe(int64_t stripe, std::shared_ptr<RecordBatch>* out); 
- 
-  /// \brief Read a single stripe as a RecordBatch 
-  /// 
-  /// \param[in] stripe the stripe index 
-  /// \param[in] include_indices the selected field indices to read 
-  /// \param[out] out the returned RecordBatch 
-  Status ReadStripe(int64_t stripe, const std::vector<int>& include_indices, 
-                    std::shared_ptr<RecordBatch>* out); 
- 
-  /// \brief Seek to designated row. Invoke NextStripeReader() after seek 
-  ///        will return stripe reader starting from designated row. 
-  /// 
-  /// \param[in] row_number the rows number to seek 
-  Status Seek(int64_t row_number); 
- 
-  /// \brief Get a stripe level record batch iterator with specified row count 
-  ///         in each record batch. NextStripeReader serves as a fine grain 
-  ///         alternative to ReadStripe which may cause OOM issue by loading 
-  ///         the whole stripes into memory. 
-  /// 
-  /// \param[in] batch_size the number of rows each record batch contains in 
-  ///            record batch iteration. 
-  /// \param[out] out the returned stripe reader 
-  Status NextStripeReader(int64_t batch_size, std::shared_ptr<RecordBatchReader>* out); 
- 
-  /// \brief Get a stripe level record batch iterator with specified row count 
-  ///         in each record batch. NextStripeReader serves as a fine grain 
-  ///         alternative to ReadStripe which may cause OOM issue by loading 
-  ///         the whole stripes into memory. 
-  /// 
-  /// \param[in] batch_size Get a stripe level record batch iterator with specified row 
-  /// count in each record batch. 
-  /// 
-  /// \param[in] include_indices the selected field indices to read 
-  /// \param[out] out the returned stripe reader 
-  Status NextStripeReader(int64_t batch_size, const std::vector<int>& include_indices, 
-                          std::shared_ptr<RecordBatchReader>* out); 
- 
-  /// \brief The number of stripes in the file 
-  int64_t NumberOfStripes(); 
- 
-  /// \brief The number of rows in the file 
-  int64_t NumberOfRows(); 
- 
- private: 
-  class Impl; 
-  std::unique_ptr<Impl> impl_; 
-  ORCFileReader(); 
-}; 
- 
-/// \class ORCFileWriter 
-/// \brief Write an Arrow Table or RecordBatch to an ORC file. 
-class ARROW_EXPORT ORCFileWriter { 
- public: 
-  ~ORCFileWriter(); 
-  /// \brief Creates a new ORC writer. 
-  /// 
-  /// \param[in] output_stream a pointer to the io::OutputStream to write into 
-  /// \return the returned writer object 
-  static Result<std::unique_ptr<ORCFileWriter>> Open(io::OutputStream* output_stream); 
- 
-  /// \brief Write a table 
-  /// 
-  /// \param[in] table the Arrow table from which data is extracted 
-  /// \return Status 
-  Status Write(const Table& table); 
- 
-  /// \brief Close an ORC writer (orc::Writer) 
-  /// 
-  /// \return Status 
-  Status Close(); 
- 
- private: 
-  class Impl; 
-  std::unique_ptr<Impl> impl_; 
- 
- private: 
-  ORCFileWriter(); 
-}; 
- 
-}  // namespace orc 
-}  // namespace adapters 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "arrow/io/interfaces.h"
+#include "arrow/memory_pool.h"
+#include "arrow/record_batch.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace adapters {
+namespace orc {
+
+/// \class ORCFileReader
+/// \brief Read an Arrow Table or RecordBatch from an ORC file.
+class ARROW_EXPORT ORCFileReader {
+ public:
+  ~ORCFileReader();
+
+  /// \brief Creates a new ORC reader.
+  ///
+  /// \param[in] file the data source
+  /// \param[in] pool a MemoryPool to use for buffer allocations
+  /// \param[out] reader the returned reader object
+  /// \return Status
+  static Status Open(const std::shared_ptr<io::RandomAccessFile>& file, MemoryPool* pool,
+                     std::unique_ptr<ORCFileReader>* reader);
+
+  /// \brief Return the metadata read from the ORC file
+  ///
+  /// \return A KeyValueMetadata object containing the ORC metadata
+  Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata();
+
+  /// \brief Return the schema read from the ORC file
+  ///
+  /// \param[out] out the returned Schema object
+  Status ReadSchema(std::shared_ptr<Schema>* out);
+
+  /// \brief Read the file as a Table
+  ///
+  /// The table will be composed of one record batch per stripe.
+  ///
+  /// \param[out] out the returned Table
+  Status Read(std::shared_ptr<Table>* out);
+
+  /// \brief Read the file as a Table
+  ///
+  /// The table will be composed of one record batch per stripe.
+  ///
+  /// \param[in] schema the Table schema
+  /// \param[out] out the returned Table
+  Status Read(const std::shared_ptr<Schema>& schema, std::shared_ptr<Table>* out);
+
+  /// \brief Read the file as a Table
+  ///
+  /// The table will be composed of one record batch per stripe.
+  ///
+  /// \param[in] include_indices the selected field indices to read
+  /// \param[out] out the returned Table
+  Status Read(const std::vector<int>& include_indices, std::shared_ptr<Table>* out);
+
+  /// \brief Read the file as a Table
+  ///
+  /// The table will be composed of one record batch per stripe.
+  ///
+  /// \param[in] schema the Table schema
+  /// \param[in] include_indices the selected field indices to read
+  /// \param[out] out the returned Table
+  Status Read(const std::shared_ptr<Schema>& schema,
+              const std::vector<int>& include_indices, std::shared_ptr<Table>* out);
+
+  /// \brief Read a single stripe as a RecordBatch
+  ///
+  /// \param[in] stripe the stripe index
+  /// \param[out] out the returned RecordBatch
+  Status ReadStripe(int64_t stripe, std::shared_ptr<RecordBatch>* out);
+
+  /// \brief Read a single stripe as a RecordBatch
+  ///
+  /// \param[in] stripe the stripe index
+  /// \param[in] include_indices the selected field indices to read
+  /// \param[out] out the returned RecordBatch
+  Status ReadStripe(int64_t stripe, const std::vector<int>& include_indices,
+                    std::shared_ptr<RecordBatch>* out);
+
+  /// \brief Seek to designated row. Invoke NextStripeReader() after seek
+  ///        will return stripe reader starting from designated row.
+  ///
+  /// \param[in] row_number the rows number to seek
+  Status Seek(int64_t row_number);
+
+  /// \brief Get a stripe level record batch iterator with specified row count
+  ///         in each record batch. NextStripeReader serves as a fine grain
+  ///         alternative to ReadStripe which may cause OOM issue by loading
+  ///         the whole stripes into memory.
+  ///
+  /// \param[in] batch_size the number of rows each record batch contains in
+  ///            record batch iteration.
+  /// \param[out] out the returned stripe reader
+  Status NextStripeReader(int64_t batch_size, std::shared_ptr<RecordBatchReader>* out);
+
+  /// \brief Get a stripe level record batch iterator with specified row count
+  ///         in each record batch. NextStripeReader serves as a fine grain
+  ///         alternative to ReadStripe which may cause OOM issue by loading
+  ///         the whole stripes into memory.
+  ///
+  /// \param[in] batch_size Get a stripe level record batch iterator with specified row
+  /// count in each record batch.
+  ///
+  /// \param[in] include_indices the selected field indices to read
+  /// \param[out] out the returned stripe reader
+  Status NextStripeReader(int64_t batch_size, const std::vector<int>& include_indices,
+                          std::shared_ptr<RecordBatchReader>* out);
+
+  /// \brief The number of stripes in the file
+  int64_t NumberOfStripes();
+
+  /// \brief The number of rows in the file
+  int64_t NumberOfRows();
+
+ private:
+  class Impl;
+  std::unique_ptr<Impl> impl_;
+  ORCFileReader();
+};
+
+/// \class ORCFileWriter
+/// \brief Write an Arrow Table or RecordBatch to an ORC file.
+class ARROW_EXPORT ORCFileWriter {
+ public:
+  ~ORCFileWriter();
+  /// \brief Creates a new ORC writer.
+  ///
+  /// \param[in] output_stream a pointer to the io::OutputStream to write into
+  /// \return the returned writer object
+  static Result<std::unique_ptr<ORCFileWriter>> Open(io::OutputStream* output_stream);
+
+  /// \brief Write a table
+  ///
+  /// \param[in] table the Arrow table from which data is extracted
+  /// \return Status
+  Status Write(const Table& table);
+
+  /// \brief Close an ORC writer (orc::Writer)
+  ///
+  /// \return Status
+  Status Close();
+
+ private:
+  class Impl;
+  std::unique_ptr<Impl> impl_;
+
+ private:
+  ORCFileWriter();
+};
+
+}  // namespace orc
+}  // namespace adapters
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/adapters/orc/adapter_util.cc b/contrib/libs/apache/arrow/cpp/src/arrow/adapters/orc/adapter_util.cc
index cbd29b3741b..f956a6f6217 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/adapters/orc/adapter_util.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/adapters/orc/adapter_util.cc
@@ -1,1069 +1,1069 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "arrow/adapters/orc/adapter_util.h" 
- 
-#include <cmath> 
-#include <string> 
-#include <vector> 
- 
-#include "arrow/array/builder_base.h" 
-#include "arrow/builder.h" 
-#include "arrow/chunked_array.h" 
-#include "arrow/scalar.h" 
-#include "arrow/status.h" 
-#include "arrow/util/bitmap_ops.h" 
-#include "arrow/util/checked_cast.h" 
-#include "arrow/util/decimal.h" 
-#include "arrow/util/range.h" 
-#include "arrow/util/string_view.h" 
-#include "arrow/visitor_inline.h" 
-#include "orc/Exceptions.hh" 
-#include "orc/MemoryPool.hh" 
-#include "orc/OrcFile.hh" 
- 
-// alias to not interfere with nested orc namespace 
-namespace liborc = orc; 
- 
-namespace arrow { 
- 
-using internal::checked_cast; 
- 
-namespace adapters { 
-namespace orc { 
- 
-namespace { 
- 
-// The number of milliseconds, microseconds and nanoseconds in a second 
-constexpr int64_t kOneSecondMillis = 1000LL; 
-constexpr int64_t kOneMicroNanos = 1000LL; 
-constexpr int64_t kOneSecondMicros = 1000000LL; 
-constexpr int64_t kOneMilliNanos = 1000000LL; 
-constexpr int64_t kOneSecondNanos = 1000000000LL; 
- 
-Status AppendStructBatch(const liborc::Type* type, 
-                         liborc::ColumnVectorBatch* column_vector_batch, int64_t offset, 
-                         int64_t length, ArrayBuilder* abuilder) { 
-  auto builder = checked_cast<StructBuilder*>(abuilder); 
-  auto batch = checked_cast<liborc::StructVectorBatch*>(column_vector_batch); 
- 
-  const uint8_t* valid_bytes = nullptr; 
-  if (batch->hasNulls) { 
-    valid_bytes = reinterpret_cast<const uint8_t*>(batch->notNull.data()) + offset; 
-  } 
-  RETURN_NOT_OK(builder->AppendValues(length, valid_bytes)); 
- 
-  for (int i = 0; i < builder->num_fields(); i++) { 
-    RETURN_NOT_OK(AppendBatch(type->getSubtype(i), batch->fields[i], offset, length, 
-                              builder->field_builder(i))); 
-  } 
-  return Status::OK(); 
-} 
- 
-Status AppendListBatch(const liborc::Type* type, 
-                       liborc::ColumnVectorBatch* column_vector_batch, int64_t offset, 
-                       int64_t length, ArrayBuilder* abuilder) { 
-  auto builder = checked_cast<ListBuilder*>(abuilder); 
-  auto batch = checked_cast<liborc::ListVectorBatch*>(column_vector_batch); 
-  liborc::ColumnVectorBatch* elements = batch->elements.get(); 
-  const liborc::Type* elemtype = type->getSubtype(0); 
- 
-  const bool has_nulls = batch->hasNulls; 
-  for (int64_t i = offset; i < length + offset; i++) { 
-    if (!has_nulls || batch->notNull[i]) { 
-      int64_t start = batch->offsets[i]; 
-      int64_t end = batch->offsets[i + 1]; 
-      RETURN_NOT_OK(builder->Append()); 
-      RETURN_NOT_OK( 
-          AppendBatch(elemtype, elements, start, end - start, builder->value_builder())); 
-    } else { 
-      RETURN_NOT_OK(builder->AppendNull()); 
-    } 
-  } 
-  return Status::OK(); 
-} 
- 
-Status AppendMapBatch(const liborc::Type* type, 
-                      liborc::ColumnVectorBatch* column_vector_batch, int64_t offset, 
-                      int64_t length, ArrayBuilder* abuilder) { 
-  auto builder = checked_cast<MapBuilder*>(abuilder); 
-  auto batch = checked_cast<liborc::MapVectorBatch*>(column_vector_batch); 
-  liborc::ColumnVectorBatch* keys = batch->keys.get(); 
-  liborc::ColumnVectorBatch* items = batch->elements.get(); 
-  const liborc::Type* key_type = type->getSubtype(0); 
-  const liborc::Type* item_type = type->getSubtype(1); 
- 
-  const bool has_nulls = batch->hasNulls; 
-  for (int64_t i = offset; i < length + offset; i++) { 
-    if (!has_nulls || batch->notNull[i]) { 
-      int64_t start = batch->offsets[i]; 
-      int64_t end = batch->offsets[i + 1]; 
-      RETURN_NOT_OK(builder->Append()); 
-      RETURN_NOT_OK( 
-          AppendBatch(key_type, keys, start, end - start, builder->key_builder())); 
-      RETURN_NOT_OK( 
-          AppendBatch(item_type, items, start, end - start, builder->item_builder())); 
-    } else { 
-      RETURN_NOT_OK(builder->AppendNull()); 
-    } 
-  } 
-  return Status::OK(); 
-} 
- 
-template <class BuilderType, class BatchType, class ElemType> 
-Status AppendNumericBatch(liborc::ColumnVectorBatch* column_vector_batch, int64_t offset, 
-                          int64_t length, ArrayBuilder* abuilder) { 
-  auto builder = checked_cast<BuilderType*>(abuilder); 
-  auto batch = checked_cast<BatchType*>(column_vector_batch); 
- 
-  if (length == 0) { 
-    return Status::OK(); 
-  } 
-  const uint8_t* valid_bytes = nullptr; 
-  if (batch->hasNulls) { 
-    valid_bytes = reinterpret_cast<const uint8_t*>(batch->notNull.data()) + offset; 
-  } 
-  const ElemType* source = batch->data.data() + offset; 
-  RETURN_NOT_OK(builder->AppendValues(source, length, valid_bytes)); 
-  return Status::OK(); 
-} 
- 
-template <class BuilderType, class TargetType, class BatchType, class SourceType> 
-Status AppendNumericBatchCast(liborc::ColumnVectorBatch* column_vector_batch, 
-                              int64_t offset, int64_t length, ArrayBuilder* abuilder) { 
-  auto builder = checked_cast<BuilderType*>(abuilder); 
-  auto batch = checked_cast<BatchType*>(column_vector_batch); 
- 
-  if (length == 0) { 
-    return Status::OK(); 
-  } 
- 
-  const uint8_t* valid_bytes = nullptr; 
-  if (batch->hasNulls) { 
-    valid_bytes = reinterpret_cast<const uint8_t*>(batch->notNull.data()) + offset; 
-  } 
-  const SourceType* source = batch->data.data() + offset; 
-  auto cast_iter = internal::MakeLazyRange( 
-      [&source](int64_t index) { return static_cast<TargetType>(source[index]); }, 
-      length); 
- 
-  RETURN_NOT_OK(builder->AppendValues(cast_iter.begin(), cast_iter.end(), valid_bytes)); 
- 
-  return Status::OK(); 
-} 
- 
-Status AppendBoolBatch(liborc::ColumnVectorBatch* column_vector_batch, int64_t offset, 
-                       int64_t length, ArrayBuilder* abuilder) { 
-  auto builder = checked_cast<BooleanBuilder*>(abuilder); 
-  auto batch = checked_cast<liborc::LongVectorBatch*>(column_vector_batch); 
- 
-  if (length == 0) { 
-    return Status::OK(); 
-  } 
- 
-  const uint8_t* valid_bytes = nullptr; 
-  if (batch->hasNulls) { 
-    valid_bytes = reinterpret_cast<const uint8_t*>(batch->notNull.data()) + offset; 
-  } 
-  const int64_t* source = batch->data.data() + offset; 
- 
-  auto cast_iter = internal::MakeLazyRange( 
-      [&source](int64_t index) { return static_cast<bool>(source[index]); }, length); 
- 
-  RETURN_NOT_OK(builder->AppendValues(cast_iter.begin(), cast_iter.end(), valid_bytes)); 
- 
-  return Status::OK(); 
-} 
- 
-Status AppendTimestampBatch(liborc::ColumnVectorBatch* column_vector_batch, 
-                            int64_t offset, int64_t length, ArrayBuilder* abuilder) { 
-  auto builder = checked_cast<TimestampBuilder*>(abuilder); 
-  auto batch = checked_cast<liborc::TimestampVectorBatch*>(column_vector_batch); 
- 
-  if (length == 0) { 
-    return Status::OK(); 
-  } 
- 
-  const uint8_t* valid_bytes = nullptr; 
-  if (batch->hasNulls) { 
-    valid_bytes = reinterpret_cast<const uint8_t*>(batch->notNull.data()) + offset; 
-  } 
- 
-  const int64_t* seconds = batch->data.data() + offset; 
-  const int64_t* nanos = batch->nanoseconds.data() + offset; 
- 
-  auto transform_timestamp = [seconds, nanos](int64_t index) { 
-    return seconds[index] * kOneSecondNanos + nanos[index]; 
-  }; 
- 
-  auto transform_range = internal::MakeLazyRange(transform_timestamp, length); 
- 
-  RETURN_NOT_OK( 
-      builder->AppendValues(transform_range.begin(), transform_range.end(), valid_bytes)); 
-  return Status::OK(); 
-} 
- 
-template <class BuilderType> 
-Status AppendBinaryBatch(liborc::ColumnVectorBatch* column_vector_batch, int64_t offset, 
-                         int64_t length, ArrayBuilder* abuilder) { 
-  auto builder = checked_cast<BuilderType*>(abuilder); 
-  auto batch = checked_cast<liborc::StringVectorBatch*>(column_vector_batch); 
- 
-  const bool has_nulls = batch->hasNulls; 
-  for (int64_t i = offset; i < length + offset; i++) { 
-    if (!has_nulls || batch->notNull[i]) { 
-      RETURN_NOT_OK( 
-          builder->Append(batch->data[i], static_cast<int32_t>(batch->length[i]))); 
-    } else { 
-      RETURN_NOT_OK(builder->AppendNull()); 
-    } 
-  } 
-  return Status::OK(); 
-} 
- 
-Status AppendFixedBinaryBatch(liborc::ColumnVectorBatch* column_vector_batch, 
-                              int64_t offset, int64_t length, ArrayBuilder* abuilder) { 
-  auto builder = checked_cast<FixedSizeBinaryBuilder*>(abuilder); 
-  auto batch = checked_cast<liborc::StringVectorBatch*>(column_vector_batch); 
- 
-  const bool has_nulls = batch->hasNulls; 
-  for (int64_t i = offset; i < length + offset; i++) { 
-    if (!has_nulls || batch->notNull[i]) { 
-      RETURN_NOT_OK(builder->Append(batch->data[i])); 
-    } else { 
-      RETURN_NOT_OK(builder->AppendNull()); 
-    } 
-  } 
-  return Status::OK(); 
-} 
- 
-Status AppendDecimalBatch(const liborc::Type* type, 
-                          liborc::ColumnVectorBatch* column_vector_batch, int64_t offset, 
-                          int64_t length, ArrayBuilder* abuilder) { 
-  auto builder = checked_cast<Decimal128Builder*>(abuilder); 
- 
-  const bool has_nulls = column_vector_batch->hasNulls; 
-  if (type->getPrecision() == 0 || type->getPrecision() > 18) { 
-    auto batch = checked_cast<liborc::Decimal128VectorBatch*>(column_vector_batch); 
-    for (int64_t i = offset; i < length + offset; i++) { 
-      if (!has_nulls || batch->notNull[i]) { 
-        RETURN_NOT_OK(builder->Append( 
-            Decimal128(batch->values[i].getHighBits(), batch->values[i].getLowBits()))); 
-      } else { 
-        RETURN_NOT_OK(builder->AppendNull()); 
-      } 
-    } 
-  } else { 
-    auto batch = checked_cast<liborc::Decimal64VectorBatch*>(column_vector_batch); 
-    for (int64_t i = offset; i < length + offset; i++) { 
-      if (!has_nulls || batch->notNull[i]) { 
-        RETURN_NOT_OK(builder->Append(Decimal128(batch->values[i]))); 
-      } else { 
-        RETURN_NOT_OK(builder->AppendNull()); 
-      } 
-    } 
-  } 
-  return Status::OK(); 
-} 
- 
-}  // namespace 
- 
-Status AppendBatch(const liborc::Type* type, liborc::ColumnVectorBatch* batch, 
-                   int64_t offset, int64_t length, ArrayBuilder* builder) { 
-  if (type == nullptr) { 
-    return Status::OK(); 
-  } 
-  liborc::TypeKind kind = type->getKind(); 
-  switch (kind) { 
-    case liborc::STRUCT: 
-      return AppendStructBatch(type, batch, offset, length, builder); 
-    case liborc::LIST: 
-      return AppendListBatch(type, batch, offset, length, builder); 
-    case liborc::MAP: 
-      return AppendMapBatch(type, batch, offset, length, builder); 
-    case liborc::LONG: 
-      return AppendNumericBatch<Int64Builder, liborc::LongVectorBatch, int64_t>( 
-          batch, offset, length, builder); 
-    case liborc::INT: 
-      return AppendNumericBatchCast<Int32Builder, int32_t, liborc::LongVectorBatch, 
-                                    int64_t>(batch, offset, length, builder); 
-    case liborc::SHORT: 
-      return AppendNumericBatchCast<Int16Builder, int16_t, liborc::LongVectorBatch, 
-                                    int64_t>(batch, offset, length, builder); 
-    case liborc::BYTE: 
-      return AppendNumericBatchCast<Int8Builder, int8_t, liborc::LongVectorBatch, 
-                                    int64_t>(batch, offset, length, builder); 
-    case liborc::DOUBLE: 
-      return AppendNumericBatch<DoubleBuilder, liborc::DoubleVectorBatch, double>( 
-          batch, offset, length, builder); 
-    case liborc::FLOAT: 
-      return AppendNumericBatchCast<FloatBuilder, float, liborc::DoubleVectorBatch, 
-                                    double>(batch, offset, length, builder); 
-    case liborc::BOOLEAN: 
-      return AppendBoolBatch(batch, offset, length, builder); 
-    case liborc::VARCHAR: 
-    case liborc::STRING: 
-      return AppendBinaryBatch<StringBuilder>(batch, offset, length, builder); 
-    case liborc::BINARY: 
-      return AppendBinaryBatch<BinaryBuilder>(batch, offset, length, builder); 
-    case liborc::CHAR: 
-      return AppendFixedBinaryBatch(batch, offset, length, builder); 
-    case liborc::DATE: 
-      return AppendNumericBatchCast<Date32Builder, int32_t, liborc::LongVectorBatch, 
-                                    int64_t>(batch, offset, length, builder); 
-    case liborc::TIMESTAMP: 
-      return AppendTimestampBatch(batch, offset, length, builder); 
-    case liborc::DECIMAL: 
-      return AppendDecimalBatch(type, batch, offset, length, builder); 
-    default: 
-      return Status::NotImplemented("Not implemented type kind: ", kind); 
-  } 
-} 
- 
-namespace { 
- 
-using internal::checked_cast; 
-using internal::checked_pointer_cast; 
- 
-Status WriteBatch(const Array& parray, int64_t orc_offset, 
-                  liborc::ColumnVectorBatch* column_vector_batch); 
- 
-// Make sure children of StructArray have appropriate null. 
-Result<std::shared_ptr<Array>> NormalizeArray(const std::shared_ptr<Array>& array) { 
-  Type::type kind = array->type_id(); 
-  switch (kind) { 
-    case Type::type::STRUCT: { 
-      if (array->null_count() == 0) { 
-        return array; 
-      } else { 
-        auto struct_array = checked_pointer_cast<StructArray>(array); 
-        const std::shared_ptr<Buffer> bitmap = struct_array->null_bitmap(); 
-        std::shared_ptr<DataType> struct_type = struct_array->type(); 
-        std::size_t size = struct_type->fields().size(); 
-        std::vector<std::shared_ptr<Array>> new_children(size, nullptr); 
-        for (std::size_t i = 0; i < size; i++) { 
-          std::shared_ptr<Array> child = struct_array->field(i); 
-          const std::shared_ptr<Buffer> child_bitmap = child->null_bitmap(); 
-          std::shared_ptr<Buffer> final_child_bitmap; 
-          if (child_bitmap == nullptr) { 
-            final_child_bitmap = bitmap; 
-          } else { 
-            ARROW_ASSIGN_OR_RAISE( 
-                final_child_bitmap, 
-                internal::BitmapAnd(default_memory_pool(), bitmap->data(), 0, 
-                                    child_bitmap->data(), 0, struct_array->length(), 0)); 
-          } 
-          std::shared_ptr<ArrayData> child_array_data = child->data(); 
-          std::vector<std::shared_ptr<Buffer>> child_buffers = child_array_data->buffers; 
-          child_buffers[0] = final_child_bitmap; 
-          std::shared_ptr<ArrayData> new_child_array_data = 
-              ArrayData::Make(child->type(), child->length(), child_buffers, 
-                              child_array_data->child_data, child_array_data->dictionary); 
-          ARROW_ASSIGN_OR_RAISE(new_children[i], 
-                                NormalizeArray(MakeArray(new_child_array_data))); 
-        } 
-        return std::make_shared<StructArray>(struct_type, struct_array->length(), 
-                                             new_children, bitmap); 
-      } 
-    } 
-    case Type::type::LIST: { 
-      auto list_array = checked_pointer_cast<ListArray>(array); 
-      ARROW_ASSIGN_OR_RAISE(auto value_array, NormalizeArray(list_array->values())); 
-      return std::make_shared<ListArray>(list_array->type(), list_array->length(), 
-                                         list_array->value_offsets(), value_array, 
-                                         list_array->null_bitmap()); 
-    } 
-    case Type::type::LARGE_LIST: { 
-      auto list_array = checked_pointer_cast<LargeListArray>(array); 
-      ARROW_ASSIGN_OR_RAISE(auto value_array, NormalizeArray(list_array->values())); 
-      return std::make_shared<LargeListArray>(list_array->type(), list_array->length(), 
-                                              list_array->value_offsets(), value_array, 
-                                              list_array->null_bitmap()); 
-    } 
-    case Type::type::FIXED_SIZE_LIST: { 
-      auto list_array = checked_pointer_cast<FixedSizeListArray>(array); 
-      ARROW_ASSIGN_OR_RAISE(auto value_array, NormalizeArray(list_array->values())); 
-      return std::make_shared<FixedSizeListArray>(list_array->type(), 
-                                                  list_array->length(), value_array, 
-                                                  list_array->null_bitmap()); 
-    } 
-    case Type::type::MAP: { 
-      auto map_array = checked_pointer_cast<MapArray>(array); 
-      ARROW_ASSIGN_OR_RAISE(auto key_array, NormalizeArray(map_array->keys())); 
-      ARROW_ASSIGN_OR_RAISE(auto item_array, NormalizeArray(map_array->items())); 
-      return std::make_shared<MapArray>(map_array->type(), map_array->length(), 
-                                        map_array->value_offsets(), key_array, item_array, 
-                                        map_array->null_bitmap()); 
-    } 
-    default: { 
-      return array; 
-    } 
-  } 
-} 
- 
-template <class DataType, class BatchType, typename Enable = void> 
-struct Appender {}; 
- 
-// Types for long/double-like Appender, that is, numeric, boolean or date32 
-template <typename T> 
-using is_generic_type = 
-    std::integral_constant<bool, is_number_type<T>::value || 
-                                     std::is_same<Date32Type, T>::value || 
-                                     is_boolean_type<T>::value>; 
-template <typename T, typename R = void> 
-using enable_if_generic = enable_if_t<is_generic_type<T>::value, R>; 
- 
-// Number-like 
-template <class DataType, class BatchType> 
-struct Appender<DataType, BatchType, enable_if_generic<DataType>> { 
-  using ArrayType = typename TypeTraits<DataType>::ArrayType; 
-  using ValueType = typename TypeTraits<DataType>::CType; 
-  Status VisitNull() { 
-    batch->notNull[running_orc_offset] = false; 
-    running_orc_offset++; 
-    running_arrow_offset++; 
-    return Status::OK(); 
-  } 
-  Status VisitValue(ValueType v) { 
-    batch->data[running_orc_offset] = array.Value(running_arrow_offset); 
-    batch->notNull[running_orc_offset] = true; 
-    running_orc_offset++; 
-    running_arrow_offset++; 
-    return Status::OK(); 
-  } 
-  const ArrayType& array; 
-  BatchType* batch; 
-  int64_t running_orc_offset, running_arrow_offset; 
-}; 
- 
-// Binary 
-template <class DataType> 
-struct Appender<DataType, liborc::StringVectorBatch> { 
-  using ArrayType = typename TypeTraits<DataType>::ArrayType; 
-  using COffsetType = typename TypeTraits<DataType>::OffsetType::c_type; 
-  Status VisitNull() { 
-    batch->notNull[running_orc_offset] = false; 
-    running_orc_offset++; 
-    running_arrow_offset++; 
-    return Status::OK(); 
-  } 
-  Status VisitValue(util::string_view v) { 
-    batch->notNull[running_orc_offset] = true; 
-    COffsetType data_length = 0; 
-    batch->data[running_orc_offset] = reinterpret_cast<char*>( 
-        const_cast<uint8_t*>(array.GetValue(running_arrow_offset, &data_length))); 
-    batch->length[running_orc_offset] = data_length; 
-    running_orc_offset++; 
-    running_arrow_offset++; 
-    return Status::OK(); 
-  } 
-  const ArrayType& array; 
-  liborc::StringVectorBatch* batch; 
-  int64_t running_orc_offset, running_arrow_offset; 
-}; 
- 
-// Decimal 
-template <> 
-struct Appender<Decimal128Type, liborc::Decimal64VectorBatch> { 
-  Status VisitNull() { 
-    batch->notNull[running_orc_offset] = false; 
-    running_orc_offset++; 
-    running_arrow_offset++; 
-    return Status::OK(); 
-  } 
-  Status VisitValue(util::string_view v) { 
-    batch->notNull[running_orc_offset] = true; 
-    const Decimal128 dec_value(array.GetValue(running_arrow_offset)); 
-    batch->values[running_orc_offset] = static_cast<int64_t>(dec_value.low_bits()); 
-    running_orc_offset++; 
-    running_arrow_offset++; 
-    return Status::OK(); 
-  } 
-  const Decimal128Array& array; 
-  liborc::Decimal64VectorBatch* batch; 
-  int64_t running_orc_offset, running_arrow_offset; 
-}; 
- 
-template <> 
-struct Appender<Decimal128Type, liborc::Decimal128VectorBatch> { 
-  Status VisitNull() { 
-    batch->notNull[running_orc_offset] = false; 
-    running_orc_offset++; 
-    running_arrow_offset++; 
-    return Status::OK(); 
-  } 
-  Status VisitValue(util::string_view v) { 
-    batch->notNull[running_orc_offset] = true; 
-    const Decimal128 dec_value(array.GetValue(running_arrow_offset)); 
-    batch->values[running_orc_offset] = 
-        liborc::Int128(dec_value.high_bits(), dec_value.low_bits()); 
-    running_orc_offset++; 
-    running_arrow_offset++; 
-    return Status::OK(); 
-  } 
-  const Decimal128Array& array; 
-  liborc::Decimal128VectorBatch* batch; 
-  int64_t running_orc_offset, running_arrow_offset; 
-}; 
- 
-// Date64 and Timestamp 
-template <class DataType> 
-struct TimestampAppender { 
-  using ArrayType = typename TypeTraits<DataType>::ArrayType; 
-  Status VisitNull() { 
-    batch->notNull[running_orc_offset] = false; 
-    running_orc_offset++; 
-    running_arrow_offset++; 
-    return Status::OK(); 
-  } 
-  Status VisitValue(int64_t v) { 
-    int64_t data = array.Value(running_arrow_offset); 
-    batch->notNull[running_orc_offset] = true; 
-    batch->data[running_orc_offset] = 
-        static_cast<int64_t>(std::floor(data / conversion_factor_from_second)); 
-    batch->nanoseconds[running_orc_offset] = 
-        (data - conversion_factor_from_second * batch->data[running_orc_offset]) * 
-        conversion_factor_to_nano; 
-    running_orc_offset++; 
-    running_arrow_offset++; 
-    return Status::OK(); 
-  } 
-  const ArrayType& array; 
-  liborc::TimestampVectorBatch* batch; 
-  int64_t running_orc_offset, running_arrow_offset; 
-  int64_t conversion_factor_from_second, conversion_factor_to_nano; 
-}; 
- 
-// FSB 
-struct FixedSizeBinaryAppender { 
-  Status VisitNull() { 
-    batch->notNull[running_orc_offset] = false; 
-    running_orc_offset++; 
-    running_arrow_offset++; 
-    return Status::OK(); 
-  } 
-  Status VisitValue(util::string_view v) { 
-    batch->notNull[running_orc_offset] = true; 
-    batch->data[running_orc_offset] = reinterpret_cast<char*>( 
-        const_cast<uint8_t*>(array.GetValue(running_arrow_offset))); 
-    batch->length[running_orc_offset] = data_length; 
-    running_orc_offset++; 
-    running_arrow_offset++; 
-    return Status::OK(); 
-  } 
-  const FixedSizeBinaryArray& array; 
-  liborc::StringVectorBatch* batch; 
-  int64_t running_orc_offset, running_arrow_offset; 
-  const int32_t data_length; 
-}; 
- 
-// static_cast from int64_t or double to itself shouldn't introduce overhead 
-// Pleae see 
-// https://stackoverflow.com/questions/19106826/ 
-// can-static-cast-to-same-type-introduce-runtime-overhead 
-template <class DataType, class BatchType> 
-Status WriteGenericBatch(const Array& array, int64_t orc_offset, 
-                         liborc::ColumnVectorBatch* column_vector_batch) { 
-  using ArrayType = typename TypeTraits<DataType>::ArrayType; 
-  const ArrayType& array_(checked_cast<const ArrayType&>(array)); 
-  auto batch = checked_cast<BatchType*>(column_vector_batch); 
-  if (array.null_count()) { 
-    batch->hasNulls = true; 
-  } 
-  Appender<DataType, BatchType> appender{array_, batch, orc_offset, 0}; 
-  ArrayDataVisitor<DataType> visitor; 
-  RETURN_NOT_OK(visitor.Visit(*(array_.data()), &appender)); 
-  return Status::OK(); 
-} 
- 
-template <class DataType> 
-Status WriteTimestampBatch(const Array& array, int64_t orc_offset, 
-                           liborc::ColumnVectorBatch* column_vector_batch, 
-                           const int64_t& conversion_factor_from_second, 
-                           const int64_t& conversion_factor_to_nano) { 
-  using ArrayType = typename TypeTraits<DataType>::ArrayType; 
-  const ArrayType& array_(checked_cast<const ArrayType&>(array)); 
-  auto batch = checked_cast<liborc::TimestampVectorBatch*>(column_vector_batch); 
-  if (array.null_count()) { 
-    batch->hasNulls = true; 
-  } 
-  TimestampAppender<DataType> appender{array_, 
-                                       batch, 
-                                       orc_offset, 
-                                       0, 
-                                       conversion_factor_from_second, 
-                                       conversion_factor_to_nano}; 
-  ArrayDataVisitor<DataType> visitor; 
-  RETURN_NOT_OK(visitor.Visit(*(array_.data()), &appender)); 
-  return Status::OK(); 
-} 
- 
-Status WriteFixedSizeBinaryBatch(const Array& array, int64_t orc_offset, 
-                                 liborc::ColumnVectorBatch* column_vector_batch) { 
-  const FixedSizeBinaryArray& array_(checked_cast<const FixedSizeBinaryArray&>(array)); 
-  auto batch = checked_cast<liborc::StringVectorBatch*>(column_vector_batch); 
-  if (array.null_count()) { 
-    batch->hasNulls = true; 
-  } 
-  FixedSizeBinaryAppender appender{array_, batch, orc_offset, 0, array_.byte_width()}; 
-  ArrayDataVisitor<FixedSizeBinaryType> visitor; 
-  RETURN_NOT_OK(visitor.Visit(*(array_.data()), &appender)); 
-  return Status::OK(); 
-} 
- 
-Status WriteStructBatch(const Array& array, int64_t orc_offset, 
-                        liborc::ColumnVectorBatch* column_vector_batch) { 
-  std::shared_ptr<Array> array_ = MakeArray(array.data()); 
-  std::shared_ptr<StructArray> struct_array(checked_pointer_cast<StructArray>(array_)); 
-  auto batch = checked_cast<liborc::StructVectorBatch*>(column_vector_batch); 
-  std::size_t size = array.type()->fields().size(); 
-  int64_t arrow_length = array.length(); 
-  int64_t running_arrow_offset = 0, running_orc_offset = orc_offset; 
-  // First fill fields of ColumnVectorBatch 
-  if (array.null_count()) { 
-    batch->hasNulls = true; 
-  } 
-  for (; running_arrow_offset < arrow_length; 
-       running_orc_offset++, running_arrow_offset++) { 
-    if (array.IsNull(running_arrow_offset)) { 
-      batch->notNull[running_orc_offset] = false; 
-    } else { 
-      batch->notNull[running_orc_offset] = true; 
-    } 
-  } 
-  // Fill the fields 
-  for (std::size_t i = 0; i < size; i++) { 
-    batch->fields[i]->resize(orc_offset + arrow_length); 
-    RETURN_NOT_OK(WriteBatch(*(struct_array->field(i)), orc_offset, batch->fields[i])); 
-  } 
-  return Status::OK(); 
-} 
- 
-template <class ArrayType> 
-Status WriteListBatch(const Array& array, int64_t orc_offset, 
-                      liborc::ColumnVectorBatch* column_vector_batch) { 
-  const ArrayType& list_array(checked_cast<const ArrayType&>(array)); 
-  auto batch = checked_cast<liborc::ListVectorBatch*>(column_vector_batch); 
-  liborc::ColumnVectorBatch* element_batch = (batch->elements).get(); 
-  int64_t arrow_length = array.length(); 
-  int64_t running_arrow_offset = 0, running_orc_offset = orc_offset; 
-  if (orc_offset == 0) { 
-    batch->offsets[0] = 0; 
-  } 
-  if (array.null_count()) { 
-    batch->hasNulls = true; 
-  } 
-  for (; running_arrow_offset < arrow_length; 
-       running_orc_offset++, running_arrow_offset++) { 
-    if (array.IsNull(running_arrow_offset)) { 
-      batch->notNull[running_orc_offset] = false; 
-      batch->offsets[running_orc_offset + 1] = batch->offsets[running_orc_offset]; 
-    } else { 
-      batch->notNull[running_orc_offset] = true; 
-      batch->offsets[running_orc_offset + 1] = 
-          batch->offsets[running_orc_offset] + 
-          list_array.value_offset(running_arrow_offset + 1) - 
-          list_array.value_offset(running_arrow_offset); 
-      element_batch->resize(batch->offsets[running_orc_offset + 1]); 
-      int64_t subarray_arrow_offset = list_array.value_offset(running_arrow_offset), 
-              subarray_orc_offset = batch->offsets[running_orc_offset], 
-              subarray_orc_length = 
-                  batch->offsets[running_orc_offset + 1] - subarray_orc_offset; 
-      RETURN_NOT_OK(WriteBatch( 
-          *(list_array.values()->Slice(subarray_arrow_offset, subarray_orc_length)), 
-          subarray_orc_offset, element_batch)); 
-    } 
-  } 
-  return Status::OK(); 
-} 
- 
-Status WriteMapBatch(const Array& array, int64_t orc_offset, 
-                     liborc::ColumnVectorBatch* column_vector_batch) { 
-  const MapArray& map_array(checked_cast<const MapArray&>(array)); 
-  auto batch = checked_cast<liborc::MapVectorBatch*>(column_vector_batch); 
-  liborc::ColumnVectorBatch* key_batch = (batch->keys).get(); 
-  liborc::ColumnVectorBatch* element_batch = (batch->elements).get(); 
-  std::shared_ptr<Array> key_array = map_array.keys(); 
-  std::shared_ptr<Array> element_array = map_array.items(); 
-  int64_t arrow_length = array.length(); 
-  int64_t running_arrow_offset = 0, running_orc_offset = orc_offset; 
-  if (orc_offset == 0) { 
-    batch->offsets[0] = 0; 
-  } 
-  if (array.null_count()) { 
-    batch->hasNulls = true; 
-  } 
-  for (; running_arrow_offset < arrow_length; 
-       running_orc_offset++, running_arrow_offset++) { 
-    if (array.IsNull(running_arrow_offset)) { 
-      batch->notNull[running_orc_offset] = false; 
-      batch->offsets[running_orc_offset + 1] = batch->offsets[running_orc_offset]; 
-    } else { 
-      batch->notNull[running_orc_offset] = true; 
-      batch->offsets[running_orc_offset + 1] = 
-          batch->offsets[running_orc_offset] + 
-          map_array.value_offset(running_arrow_offset + 1) - 
-          map_array.value_offset(running_arrow_offset); 
-      int64_t subarray_arrow_offset = map_array.value_offset(running_arrow_offset), 
-              subarray_orc_offset = batch->offsets[running_orc_offset], 
-              new_subarray_orc_offset = batch->offsets[running_orc_offset + 1], 
-              subarray_orc_length = new_subarray_orc_offset - subarray_orc_offset; 
-      key_batch->resize(new_subarray_orc_offset); 
-      element_batch->resize(new_subarray_orc_offset); 
-      RETURN_NOT_OK( 
-          WriteBatch(*(key_array->Slice(subarray_arrow_offset, subarray_orc_length)), 
-                     subarray_orc_offset, key_batch)); 
-      RETURN_NOT_OK( 
-          WriteBatch(*(element_array->Slice(subarray_arrow_offset, subarray_orc_length)), 
-                     subarray_orc_offset, element_batch)); 
-    } 
-  } 
-  return Status::OK(); 
-} 
- 
-Status WriteBatch(const Array& array, int64_t orc_offset, 
-                  liborc::ColumnVectorBatch* column_vector_batch) { 
-  Type::type kind = array.type_id(); 
-  column_vector_batch->numElements = orc_offset; 
-  switch (kind) { 
-    case Type::type::BOOL: 
-      return WriteGenericBatch<BooleanType, liborc::LongVectorBatch>(array, orc_offset, 
-                                                                     column_vector_batch); 
-    case Type::type::INT8: 
-      return WriteGenericBatch<Int8Type, liborc::LongVectorBatch>(array, orc_offset, 
-                                                                  column_vector_batch); 
-    case Type::type::INT16: 
-      return WriteGenericBatch<Int16Type, liborc::LongVectorBatch>(array, orc_offset, 
-                                                                   column_vector_batch); 
-    case Type::type::INT32: 
-      return WriteGenericBatch<Int32Type, liborc::LongVectorBatch>(array, orc_offset, 
-                                                                   column_vector_batch); 
-    case Type::type::INT64: 
-      return WriteGenericBatch<Int64Type, liborc::LongVectorBatch>(array, orc_offset, 
-                                                                   column_vector_batch); 
-    case Type::type::FLOAT: 
-      return WriteGenericBatch<FloatType, liborc::DoubleVectorBatch>(array, orc_offset, 
-                                                                     column_vector_batch); 
-    case Type::type::DOUBLE: 
-      return WriteGenericBatch<DoubleType, liborc::DoubleVectorBatch>( 
-          array, orc_offset, column_vector_batch); 
-    case Type::type::BINARY: 
-      return WriteGenericBatch<BinaryType, liborc::StringVectorBatch>( 
-          array, orc_offset, column_vector_batch); 
-    case Type::type::LARGE_BINARY: 
-      return WriteGenericBatch<LargeBinaryType, liborc::StringVectorBatch>( 
-          array, orc_offset, column_vector_batch); 
-    case Type::type::STRING: 
-      return WriteGenericBatch<StringType, liborc::StringVectorBatch>( 
-          array, orc_offset, column_vector_batch); 
-    case Type::type::LARGE_STRING: 
-      return WriteGenericBatch<LargeStringType, liborc::StringVectorBatch>( 
-          array, orc_offset, column_vector_batch); 
-    case Type::type::FIXED_SIZE_BINARY: 
-      return WriteFixedSizeBinaryBatch(array, orc_offset, column_vector_batch); 
-    case Type::type::DATE32: 
-      return WriteGenericBatch<Date32Type, liborc::LongVectorBatch>(array, orc_offset, 
-                                                                    column_vector_batch); 
-    case Type::type::DATE64: 
-      return WriteTimestampBatch<Date64Type>(array, orc_offset, column_vector_batch, 
-                                             kOneSecondMillis, kOneMilliNanos); 
-    case Type::type::TIMESTAMP: { 
-      switch (internal::checked_pointer_cast<TimestampType>(array.type())->unit()) { 
-        case TimeUnit::type::SECOND: 
-          return WriteTimestampBatch<TimestampType>( 
-              array, orc_offset, column_vector_batch, 1, kOneSecondNanos); 
-        case TimeUnit::type::MILLI: 
-          return WriteTimestampBatch<TimestampType>( 
-              array, orc_offset, column_vector_batch, kOneSecondMillis, kOneMilliNanos); 
-        case TimeUnit::type::MICRO: 
-          return WriteTimestampBatch<TimestampType>( 
-              array, orc_offset, column_vector_batch, kOneSecondMicros, kOneMicroNanos); 
-        case TimeUnit::type::NANO: 
-          return WriteTimestampBatch<TimestampType>( 
-              array, orc_offset, column_vector_batch, kOneSecondNanos, 1); 
-        default: 
-          return Status::TypeError("Unknown or unsupported Arrow type: ", 
-                                   array.type()->ToString()); 
-      } 
-    } 
-    case Type::type::DECIMAL128: { 
-      int32_t precision = checked_pointer_cast<Decimal128Type>(array.type())->precision(); 
-      if (precision > 18) { 
-        return WriteGenericBatch<Decimal128Type, liborc::Decimal128VectorBatch>( 
-            array, orc_offset, column_vector_batch); 
-      } else { 
-        return WriteGenericBatch<Decimal128Type, liborc::Decimal64VectorBatch>( 
-            array, orc_offset, column_vector_batch); 
-      } 
-    } 
-    case Type::type::STRUCT: 
-      return WriteStructBatch(array, orc_offset, column_vector_batch); 
-    case Type::type::LIST: 
-      return WriteListBatch<ListArray>(array, orc_offset, column_vector_batch); 
-    case Type::type::LARGE_LIST: 
-      return WriteListBatch<LargeListArray>(array, orc_offset, column_vector_batch); 
-    case Type::type::FIXED_SIZE_LIST: 
-      return WriteListBatch<FixedSizeListArray>(array, orc_offset, column_vector_batch); 
-    case Type::type::MAP: 
-      return WriteMapBatch(array, orc_offset, column_vector_batch); 
-    default: { 
-      return Status::NotImplemented("Unknown or unsupported Arrow type: ", 
-                                    array.type()->ToString()); 
-    } 
-  } 
-  return Status::OK(); 
-} 
- 
-Result<ORC_UNIQUE_PTR<liborc::Type>> GetOrcType(const DataType& type) { 
-  Type::type kind = type.id(); 
-  switch (kind) { 
-    case Type::type::BOOL: 
-      return liborc::createPrimitiveType(liborc::TypeKind::BOOLEAN); 
-    case Type::type::INT8: 
-      return liborc::createPrimitiveType(liborc::TypeKind::BYTE); 
-    case Type::type::INT16: 
-      return liborc::createPrimitiveType(liborc::TypeKind::SHORT); 
-    case Type::type::INT32: 
-      return liborc::createPrimitiveType(liborc::TypeKind::INT); 
-    case Type::type::INT64: 
-      return liborc::createPrimitiveType(liborc::TypeKind::LONG); 
-    case Type::type::FLOAT: 
-      return liborc::createPrimitiveType(liborc::TypeKind::FLOAT); 
-    case Type::type::DOUBLE: 
-      return liborc::createPrimitiveType(liborc::TypeKind::DOUBLE); 
-    // Use STRING instead of VARCHAR for now, both use UTF-8 
-    case Type::type::STRING: 
-    case Type::type::LARGE_STRING: 
-      return liborc::createPrimitiveType(liborc::TypeKind::STRING); 
-    case Type::type::BINARY: 
-    case Type::type::LARGE_BINARY: 
-    case Type::type::FIXED_SIZE_BINARY: 
-      return liborc::createPrimitiveType(liborc::TypeKind::BINARY); 
-    case Type::type::DATE32: 
-      return liborc::createPrimitiveType(liborc::TypeKind::DATE); 
-    case Type::type::DATE64: 
-    case Type::type::TIMESTAMP: 
-      return liborc::createPrimitiveType(liborc::TypeKind::TIMESTAMP); 
-    case Type::type::DECIMAL128: { 
-      const uint64_t precision = 
-          static_cast<uint64_t>(checked_cast<const Decimal128Type&>(type).precision()); 
-      const uint64_t scale = 
-          static_cast<uint64_t>(checked_cast<const Decimal128Type&>(type).scale()); 
-      return liborc::createDecimalType(precision, scale); 
-    } 
-    case Type::type::LIST: 
-    case Type::type::FIXED_SIZE_LIST: 
-    case Type::type::LARGE_LIST: { 
-      std::shared_ptr<DataType> arrow_child_type = 
-          checked_cast<const BaseListType&>(type).value_type(); 
-      ARROW_ASSIGN_OR_RAISE(auto orc_subtype, GetOrcType(*arrow_child_type)); 
-      return liborc::createListType(std::move(orc_subtype)); 
-    } 
-    case Type::type::STRUCT: { 
-      ORC_UNIQUE_PTR<liborc::Type> out_type = liborc::createStructType(); 
-      std::vector<std::shared_ptr<Field>> arrow_fields = 
-          checked_cast<const StructType&>(type).fields(); 
-      for (std::vector<std::shared_ptr<Field>>::iterator it = arrow_fields.begin(); 
-           it != arrow_fields.end(); ++it) { 
-        std::string field_name = (*it)->name(); 
-        std::shared_ptr<DataType> arrow_child_type = (*it)->type(); 
-        ARROW_ASSIGN_OR_RAISE(auto orc_subtype, GetOrcType(*arrow_child_type)); 
-        out_type->addStructField(field_name, std::move(orc_subtype)); 
-      } 
-      return std::move(out_type); 
-    } 
-    case Type::type::MAP: { 
-      std::shared_ptr<DataType> key_arrow_type = 
-          checked_cast<const MapType&>(type).key_type(); 
-      std::shared_ptr<DataType> item_arrow_type = 
-          checked_cast<const MapType&>(type).item_type(); 
-      ARROW_ASSIGN_OR_RAISE(auto key_orc_type, GetOrcType(*key_arrow_type)); 
-      ARROW_ASSIGN_OR_RAISE(auto item_orc_type, GetOrcType(*item_arrow_type)); 
-      return liborc::createMapType(std::move(key_orc_type), std::move(item_orc_type)); 
-    } 
-    case Type::type::DENSE_UNION: 
-    case Type::type::SPARSE_UNION: { 
-      ORC_UNIQUE_PTR<liborc::Type> out_type = liborc::createUnionType(); 
-      std::vector<std::shared_ptr<Field>> arrow_fields = 
-          checked_cast<const UnionType&>(type).fields(); 
-      for (std::vector<std::shared_ptr<Field>>::iterator it = arrow_fields.begin(); 
-           it != arrow_fields.end(); ++it) { 
-        std::string field_name = (*it)->name(); 
-        std::shared_ptr<DataType> arrow_child_type = (*it)->type(); 
-        ARROW_ASSIGN_OR_RAISE(auto orc_subtype, GetOrcType(*arrow_child_type)); 
-        out_type->addUnionChild(std::move(orc_subtype)); 
-      } 
-      return std::move(out_type); 
-    } 
-    default: { 
-      return Status::NotImplemented("Unknown or unsupported Arrow type: ", 
-                                    type.ToString()); 
-    } 
-  } 
-} 
- 
-}  // namespace 
- 
-Status WriteBatch(const ChunkedArray& chunked_array, int64_t length, 
-                  int* arrow_chunk_offset, int64_t* arrow_index_offset, 
-                  liborc::ColumnVectorBatch* column_vector_batch) { 
-  int num_batch = chunked_array.num_chunks(); 
-  int64_t orc_offset = 0; 
-  while (*arrow_chunk_offset < num_batch && orc_offset < length) { 
-    ARROW_ASSIGN_OR_RAISE(auto array, 
-                          NormalizeArray(chunked_array.chunk(*arrow_chunk_offset))); 
-    int64_t num_written_elements = 
-        std::min(length - orc_offset, array->length() - *arrow_index_offset); 
-    if (num_written_elements > 0) { 
-      RETURN_NOT_OK(WriteBatch(*(array->Slice(*arrow_index_offset, num_written_elements)), 
-                               orc_offset, column_vector_batch)); 
-      orc_offset += num_written_elements; 
-      *arrow_index_offset += num_written_elements; 
-    } 
-    if (orc_offset < length) {  // Another Arrow Array done 
-      *arrow_index_offset = 0; 
-      (*arrow_chunk_offset)++; 
-    } 
-  } 
-  column_vector_batch->numElements = orc_offset; 
-  return Status::OK(); 
-} 
- 
-Status GetArrowType(const liborc::Type* type, std::shared_ptr<DataType>* out) { 
-  // When subselecting fields on read, liborc will set some nodes to nullptr, 
-  // so we need to check for nullptr before progressing 
-  if (type == nullptr) { 
-    *out = null(); 
-    return Status::OK(); 
-  } 
-  liborc::TypeKind kind = type->getKind(); 
-  const int subtype_count = static_cast<int>(type->getSubtypeCount()); 
- 
-  switch (kind) { 
-    case liborc::BOOLEAN: 
-      *out = boolean(); 
-      break; 
-    case liborc::BYTE: 
-      *out = int8(); 
-      break; 
-    case liborc::SHORT: 
-      *out = int16(); 
-      break; 
-    case liborc::INT: 
-      *out = int32(); 
-      break; 
-    case liborc::LONG: 
-      *out = int64(); 
-      break; 
-    case liborc::FLOAT: 
-      *out = float32(); 
-      break; 
-    case liborc::DOUBLE: 
-      *out = float64(); 
-      break; 
-    case liborc::VARCHAR: 
-    case liborc::STRING: 
-      *out = utf8(); 
-      break; 
-    case liborc::BINARY: 
-      *out = binary(); 
-      break; 
-    case liborc::CHAR: 
-      *out = fixed_size_binary(static_cast<int>(type->getMaximumLength())); 
-      break; 
-    case liborc::TIMESTAMP: 
-      *out = timestamp(TimeUnit::NANO); 
-      break; 
-    case liborc::DATE: 
-      *out = date32(); 
-      break; 
-    case liborc::DECIMAL: { 
-      const int precision = static_cast<int>(type->getPrecision()); 
-      const int scale = static_cast<int>(type->getScale()); 
-      if (precision == 0) { 
-        // In HIVE 0.11/0.12 precision is set as 0, but means max precision 
-        *out = decimal128(38, 6); 
-      } else { 
-        *out = decimal128(precision, scale); 
-      } 
-      break; 
-    } 
-    case liborc::LIST: { 
-      if (subtype_count != 1) { 
-        return Status::TypeError("Invalid Orc List type"); 
-      } 
-      std::shared_ptr<DataType> elemtype; 
-      RETURN_NOT_OK(GetArrowType(type->getSubtype(0), &elemtype)); 
-      *out = list(elemtype); 
-      break; 
-    } 
-    case liborc::MAP: { 
-      if (subtype_count != 2) { 
-        return Status::TypeError("Invalid Orc Map type"); 
-      } 
-      std::shared_ptr<DataType> key_type, item_type; 
-      RETURN_NOT_OK(GetArrowType(type->getSubtype(0), &key_type)); 
-      RETURN_NOT_OK(GetArrowType(type->getSubtype(1), &item_type)); 
-      *out = map(key_type, item_type); 
-      break; 
-    } 
-    case liborc::STRUCT: { 
-      std::vector<std::shared_ptr<Field>> fields; 
-      for (int child = 0; child < subtype_count; ++child) { 
-        std::shared_ptr<DataType> elem_type; 
-        RETURN_NOT_OK(GetArrowType(type->getSubtype(child), &elem_type)); 
-        std::string name = type->getFieldName(child); 
-        fields.push_back(field(name, elem_type)); 
-      } 
-      *out = struct_(fields); 
-      break; 
-    } 
-    case liborc::UNION: { 
-      std::vector<std::shared_ptr<Field>> fields; 
-      std::vector<int8_t> type_codes; 
-      for (int child = 0; child < subtype_count; ++child) { 
-        std::shared_ptr<DataType> elem_type; 
-        RETURN_NOT_OK(GetArrowType(type->getSubtype(child), &elem_type)); 
-        fields.push_back(field("_union_" + std::to_string(child), elem_type)); 
-        type_codes.push_back(static_cast<int8_t>(child)); 
-      } 
-      *out = sparse_union(fields, type_codes); 
-      break; 
-    } 
-    default: { 
-      return Status::TypeError("Unknown Orc type kind: ", type->toString()); 
-    } 
-  } 
-  return Status::OK(); 
-} 
- 
-Result<ORC_UNIQUE_PTR<liborc::Type>> GetOrcType(const Schema& schema) { 
-  int numFields = schema.num_fields(); 
-  ORC_UNIQUE_PTR<liborc::Type> out_type = liborc::createStructType(); 
-  for (int i = 0; i < numFields; i++) { 
-    std::shared_ptr<Field> field = schema.field(i); 
-    std::string field_name = field->name(); 
-    std::shared_ptr<DataType> arrow_child_type = field->type(); 
-    ARROW_ASSIGN_OR_RAISE(auto orc_subtype, GetOrcType(*arrow_child_type)); 
-    out_type->addStructField(field_name, std::move(orc_subtype)); 
-  } 
-  return std::move(out_type); 
-} 
- 
-}  // namespace orc 
-}  // namespace adapters 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/adapters/orc/adapter_util.h"
+
+#include <cmath>
+#include <string>
+#include <vector>
+
+#include "arrow/array/builder_base.h"
+#include "arrow/builder.h"
+#include "arrow/chunked_array.h"
+#include "arrow/scalar.h"
+#include "arrow/status.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/decimal.h"
+#include "arrow/util/range.h"
+#include "arrow/util/string_view.h"
+#include "arrow/visitor_inline.h"
+#include "orc/Exceptions.hh"
+#include "orc/MemoryPool.hh"
+#include "orc/OrcFile.hh"
+
+// alias to not interfere with nested orc namespace
+namespace liborc = orc;
+
+namespace arrow {
+
+using internal::checked_cast;
+
+namespace adapters {
+namespace orc {
+
+namespace {
+
+// The number of milliseconds, microseconds and nanoseconds in a second
+constexpr int64_t kOneSecondMillis = 1000LL;
+constexpr int64_t kOneMicroNanos = 1000LL;
+constexpr int64_t kOneSecondMicros = 1000000LL;
+constexpr int64_t kOneMilliNanos = 1000000LL;
+constexpr int64_t kOneSecondNanos = 1000000000LL;
+
+Status AppendStructBatch(const liborc::Type* type,
+                         liborc::ColumnVectorBatch* column_vector_batch, int64_t offset,
+                         int64_t length, ArrayBuilder* abuilder) {
+  auto builder = checked_cast<StructBuilder*>(abuilder);
+  auto batch = checked_cast<liborc::StructVectorBatch*>(column_vector_batch);
+
+  const uint8_t* valid_bytes = nullptr;
+  if (batch->hasNulls) {
+    valid_bytes = reinterpret_cast<const uint8_t*>(batch->notNull.data()) + offset;
+  }
+  RETURN_NOT_OK(builder->AppendValues(length, valid_bytes));
+
+  for (int i = 0; i < builder->num_fields(); i++) {
+    RETURN_NOT_OK(AppendBatch(type->getSubtype(i), batch->fields[i], offset, length,
+                              builder->field_builder(i)));
+  }
+  return Status::OK();
+}
+
+Status AppendListBatch(const liborc::Type* type,
+                       liborc::ColumnVectorBatch* column_vector_batch, int64_t offset,
+                       int64_t length, ArrayBuilder* abuilder) {
+  auto builder = checked_cast<ListBuilder*>(abuilder);
+  auto batch = checked_cast<liborc::ListVectorBatch*>(column_vector_batch);
+  liborc::ColumnVectorBatch* elements = batch->elements.get();
+  const liborc::Type* elemtype = type->getSubtype(0);
+
+  const bool has_nulls = batch->hasNulls;
+  for (int64_t i = offset; i < length + offset; i++) {
+    if (!has_nulls || batch->notNull[i]) {
+      int64_t start = batch->offsets[i];
+      int64_t end = batch->offsets[i + 1];
+      RETURN_NOT_OK(builder->Append());
+      RETURN_NOT_OK(
+          AppendBatch(elemtype, elements, start, end - start, builder->value_builder()));
+    } else {
+      RETURN_NOT_OK(builder->AppendNull());
+    }
+  }
+  return Status::OK();
+}
+
+Status AppendMapBatch(const liborc::Type* type,
+                      liborc::ColumnVectorBatch* column_vector_batch, int64_t offset,
+                      int64_t length, ArrayBuilder* abuilder) {
+  auto builder = checked_cast<MapBuilder*>(abuilder);
+  auto batch = checked_cast<liborc::MapVectorBatch*>(column_vector_batch);
+  liborc::ColumnVectorBatch* keys = batch->keys.get();
+  liborc::ColumnVectorBatch* items = batch->elements.get();
+  const liborc::Type* key_type = type->getSubtype(0);
+  const liborc::Type* item_type = type->getSubtype(1);
+
+  const bool has_nulls = batch->hasNulls;
+  for (int64_t i = offset; i < length + offset; i++) {
+    if (!has_nulls || batch->notNull[i]) {
+      int64_t start = batch->offsets[i];
+      int64_t end = batch->offsets[i + 1];
+      RETURN_NOT_OK(builder->Append());
+      RETURN_NOT_OK(
+          AppendBatch(key_type, keys, start, end - start, builder->key_builder()));
+      RETURN_NOT_OK(
+          AppendBatch(item_type, items, start, end - start, builder->item_builder()));
+    } else {
+      RETURN_NOT_OK(builder->AppendNull());
+    }
+  }
+  return Status::OK();
+}
+
+template <class BuilderType, class BatchType, class ElemType>
+Status AppendNumericBatch(liborc::ColumnVectorBatch* column_vector_batch, int64_t offset,
+                          int64_t length, ArrayBuilder* abuilder) {
+  auto builder = checked_cast<BuilderType*>(abuilder);
+  auto batch = checked_cast<BatchType*>(column_vector_batch);
+
+  if (length == 0) {
+    return Status::OK();
+  }
+  const uint8_t* valid_bytes = nullptr;
+  if (batch->hasNulls) {
+    valid_bytes = reinterpret_cast<const uint8_t*>(batch->notNull.data()) + offset;
+  }
+  const ElemType* source = batch->data.data() + offset;
+  RETURN_NOT_OK(builder->AppendValues(source, length, valid_bytes));
+  return Status::OK();
+}
+
+template <class BuilderType, class TargetType, class BatchType, class SourceType>
+Status AppendNumericBatchCast(liborc::ColumnVectorBatch* column_vector_batch,
+                              int64_t offset, int64_t length, ArrayBuilder* abuilder) {
+  auto builder = checked_cast<BuilderType*>(abuilder);
+  auto batch = checked_cast<BatchType*>(column_vector_batch);
+
+  if (length == 0) {
+    return Status::OK();
+  }
+
+  const uint8_t* valid_bytes = nullptr;
+  if (batch->hasNulls) {
+    valid_bytes = reinterpret_cast<const uint8_t*>(batch->notNull.data()) + offset;
+  }
+  const SourceType* source = batch->data.data() + offset;
+  auto cast_iter = internal::MakeLazyRange(
+      [&source](int64_t index) { return static_cast<TargetType>(source[index]); },
+      length);
+
+  RETURN_NOT_OK(builder->AppendValues(cast_iter.begin(), cast_iter.end(), valid_bytes));
+
+  return Status::OK();
+}
+
+Status AppendBoolBatch(liborc::ColumnVectorBatch* column_vector_batch, int64_t offset,
+                       int64_t length, ArrayBuilder* abuilder) {
+  auto builder = checked_cast<BooleanBuilder*>(abuilder);
+  auto batch = checked_cast<liborc::LongVectorBatch*>(column_vector_batch);
+
+  if (length == 0) {
+    return Status::OK();
+  }
+
+  const uint8_t* valid_bytes = nullptr;
+  if (batch->hasNulls) {
+    valid_bytes = reinterpret_cast<const uint8_t*>(batch->notNull.data()) + offset;
+  }
+  const int64_t* source = batch->data.data() + offset;
+
+  auto cast_iter = internal::MakeLazyRange(
+      [&source](int64_t index) { return static_cast<bool>(source[index]); }, length);
+
+  RETURN_NOT_OK(builder->AppendValues(cast_iter.begin(), cast_iter.end(), valid_bytes));
+
+  return Status::OK();
+}
+
+Status AppendTimestampBatch(liborc::ColumnVectorBatch* column_vector_batch,
+                            int64_t offset, int64_t length, ArrayBuilder* abuilder) {
+  auto builder = checked_cast<TimestampBuilder*>(abuilder);
+  auto batch = checked_cast<liborc::TimestampVectorBatch*>(column_vector_batch);
+
+  if (length == 0) {
+    return Status::OK();
+  }
+
+  const uint8_t* valid_bytes = nullptr;
+  if (batch->hasNulls) {
+    valid_bytes = reinterpret_cast<const uint8_t*>(batch->notNull.data()) + offset;
+  }
+
+  const int64_t* seconds = batch->data.data() + offset;
+  const int64_t* nanos = batch->nanoseconds.data() + offset;
+
+  auto transform_timestamp = [seconds, nanos](int64_t index) {
+    return seconds[index] * kOneSecondNanos + nanos[index];
+  };
+
+  auto transform_range = internal::MakeLazyRange(transform_timestamp, length);
+
+  RETURN_NOT_OK(
+      builder->AppendValues(transform_range.begin(), transform_range.end(), valid_bytes));
+  return Status::OK();
+}
+
+template <class BuilderType>
+Status AppendBinaryBatch(liborc::ColumnVectorBatch* column_vector_batch, int64_t offset,
+                         int64_t length, ArrayBuilder* abuilder) {
+  auto builder = checked_cast<BuilderType*>(abuilder);
+  auto batch = checked_cast<liborc::StringVectorBatch*>(column_vector_batch);
+
+  const bool has_nulls = batch->hasNulls;
+  for (int64_t i = offset; i < length + offset; i++) {
+    if (!has_nulls || batch->notNull[i]) {
+      RETURN_NOT_OK(
+          builder->Append(batch->data[i], static_cast<int32_t>(batch->length[i])));
+    } else {
+      RETURN_NOT_OK(builder->AppendNull());
+    }
+  }
+  return Status::OK();
+}
+
+Status AppendFixedBinaryBatch(liborc::ColumnVectorBatch* column_vector_batch,
+                              int64_t offset, int64_t length, ArrayBuilder* abuilder) {
+  auto builder = checked_cast<FixedSizeBinaryBuilder*>(abuilder);
+  auto batch = checked_cast<liborc::StringVectorBatch*>(column_vector_batch);
+
+  const bool has_nulls = batch->hasNulls;
+  for (int64_t i = offset; i < length + offset; i++) {
+    if (!has_nulls || batch->notNull[i]) {
+      RETURN_NOT_OK(builder->Append(batch->data[i]));
+    } else {
+      RETURN_NOT_OK(builder->AppendNull());
+    }
+  }
+  return Status::OK();
+}
+
+Status AppendDecimalBatch(const liborc::Type* type,
+                          liborc::ColumnVectorBatch* column_vector_batch, int64_t offset,
+                          int64_t length, ArrayBuilder* abuilder) {
+  auto builder = checked_cast<Decimal128Builder*>(abuilder);
+
+  const bool has_nulls = column_vector_batch->hasNulls;
+  if (type->getPrecision() == 0 || type->getPrecision() > 18) {
+    auto batch = checked_cast<liborc::Decimal128VectorBatch*>(column_vector_batch);
+    for (int64_t i = offset; i < length + offset; i++) {
+      if (!has_nulls || batch->notNull[i]) {
+        RETURN_NOT_OK(builder->Append(
+            Decimal128(batch->values[i].getHighBits(), batch->values[i].getLowBits())));
+      } else {
+        RETURN_NOT_OK(builder->AppendNull());
+      }
+    }
+  } else {
+    auto batch = checked_cast<liborc::Decimal64VectorBatch*>(column_vector_batch);
+    for (int64_t i = offset; i < length + offset; i++) {
+      if (!has_nulls || batch->notNull[i]) {
+        RETURN_NOT_OK(builder->Append(Decimal128(batch->values[i])));
+      } else {
+        RETURN_NOT_OK(builder->AppendNull());
+      }
+    }
+  }
+  return Status::OK();
+}
+
+}  // namespace
+
+Status AppendBatch(const liborc::Type* type, liborc::ColumnVectorBatch* batch,
+                   int64_t offset, int64_t length, ArrayBuilder* builder) {
+  if (type == nullptr) {
+    return Status::OK();
+  }
+  liborc::TypeKind kind = type->getKind();
+  switch (kind) {
+    case liborc::STRUCT:
+      return AppendStructBatch(type, batch, offset, length, builder);
+    case liborc::LIST:
+      return AppendListBatch(type, batch, offset, length, builder);
+    case liborc::MAP:
+      return AppendMapBatch(type, batch, offset, length, builder);
+    case liborc::LONG:
+      return AppendNumericBatch<Int64Builder, liborc::LongVectorBatch, int64_t>(
+          batch, offset, length, builder);
+    case liborc::INT:
+      return AppendNumericBatchCast<Int32Builder, int32_t, liborc::LongVectorBatch,
+                                    int64_t>(batch, offset, length, builder);
+    case liborc::SHORT:
+      return AppendNumericBatchCast<Int16Builder, int16_t, liborc::LongVectorBatch,
+                                    int64_t>(batch, offset, length, builder);
+    case liborc::BYTE:
+      return AppendNumericBatchCast<Int8Builder, int8_t, liborc::LongVectorBatch,
+                                    int64_t>(batch, offset, length, builder);
+    case liborc::DOUBLE:
+      return AppendNumericBatch<DoubleBuilder, liborc::DoubleVectorBatch, double>(
+          batch, offset, length, builder);
+    case liborc::FLOAT:
+      return AppendNumericBatchCast<FloatBuilder, float, liborc::DoubleVectorBatch,
+                                    double>(batch, offset, length, builder);
+    case liborc::BOOLEAN:
+      return AppendBoolBatch(batch, offset, length, builder);
+    case liborc::VARCHAR:
+    case liborc::STRING:
+      return AppendBinaryBatch<StringBuilder>(batch, offset, length, builder);
+    case liborc::BINARY:
+      return AppendBinaryBatch<BinaryBuilder>(batch, offset, length, builder);
+    case liborc::CHAR:
+      return AppendFixedBinaryBatch(batch, offset, length, builder);
+    case liborc::DATE:
+      return AppendNumericBatchCast<Date32Builder, int32_t, liborc::LongVectorBatch,
+                                    int64_t>(batch, offset, length, builder);
+    case liborc::TIMESTAMP:
+      return AppendTimestampBatch(batch, offset, length, builder);
+    case liborc::DECIMAL:
+      return AppendDecimalBatch(type, batch, offset, length, builder);
+    default:
+      return Status::NotImplemented("Not implemented type kind: ", kind);
+  }
+}
+
+namespace {
+
+using internal::checked_cast;
+using internal::checked_pointer_cast;
+
+Status WriteBatch(const Array& parray, int64_t orc_offset,
+                  liborc::ColumnVectorBatch* column_vector_batch);
+
+// Make sure children of StructArray have appropriate null.
+Result<std::shared_ptr<Array>> NormalizeArray(const std::shared_ptr<Array>& array) {
+  Type::type kind = array->type_id();
+  switch (kind) {
+    case Type::type::STRUCT: {
+      if (array->null_count() == 0) {
+        return array;
+      } else {
+        auto struct_array = checked_pointer_cast<StructArray>(array);
+        const std::shared_ptr<Buffer> bitmap = struct_array->null_bitmap();
+        std::shared_ptr<DataType> struct_type = struct_array->type();
+        std::size_t size = struct_type->fields().size();
+        std::vector<std::shared_ptr<Array>> new_children(size, nullptr);
+        for (std::size_t i = 0; i < size; i++) {
+          std::shared_ptr<Array> child = struct_array->field(i);
+          const std::shared_ptr<Buffer> child_bitmap = child->null_bitmap();
+          std::shared_ptr<Buffer> final_child_bitmap;
+          if (child_bitmap == nullptr) {
+            final_child_bitmap = bitmap;
+          } else {
+            ARROW_ASSIGN_OR_RAISE(
+                final_child_bitmap,
+                internal::BitmapAnd(default_memory_pool(), bitmap->data(), 0,
+                                    child_bitmap->data(), 0, struct_array->length(), 0));
+          }
+          std::shared_ptr<ArrayData> child_array_data = child->data();
+          std::vector<std::shared_ptr<Buffer>> child_buffers = child_array_data->buffers;
+          child_buffers[0] = final_child_bitmap;
+          std::shared_ptr<ArrayData> new_child_array_data =
+              ArrayData::Make(child->type(), child->length(), child_buffers,
+                              child_array_data->child_data, child_array_data->dictionary);
+          ARROW_ASSIGN_OR_RAISE(new_children[i],
+                                NormalizeArray(MakeArray(new_child_array_data)));
+        }
+        return std::make_shared<StructArray>(struct_type, struct_array->length(),
+                                             new_children, bitmap);
+      }
+    }
+    case Type::type::LIST: {
+      auto list_array = checked_pointer_cast<ListArray>(array);
+      ARROW_ASSIGN_OR_RAISE(auto value_array, NormalizeArray(list_array->values()));
+      return std::make_shared<ListArray>(list_array->type(), list_array->length(),
+                                         list_array->value_offsets(), value_array,
+                                         list_array->null_bitmap());
+    }
+    case Type::type::LARGE_LIST: {
+      auto list_array = checked_pointer_cast<LargeListArray>(array);
+      ARROW_ASSIGN_OR_RAISE(auto value_array, NormalizeArray(list_array->values()));
+      return std::make_shared<LargeListArray>(list_array->type(), list_array->length(),
+                                              list_array->value_offsets(), value_array,
+                                              list_array->null_bitmap());
+    }
+    case Type::type::FIXED_SIZE_LIST: {
+      auto list_array = checked_pointer_cast<FixedSizeListArray>(array);
+      ARROW_ASSIGN_OR_RAISE(auto value_array, NormalizeArray(list_array->values()));
+      return std::make_shared<FixedSizeListArray>(list_array->type(),
+                                                  list_array->length(), value_array,
+                                                  list_array->null_bitmap());
+    }
+    case Type::type::MAP: {
+      auto map_array = checked_pointer_cast<MapArray>(array);
+      ARROW_ASSIGN_OR_RAISE(auto key_array, NormalizeArray(map_array->keys()));
+      ARROW_ASSIGN_OR_RAISE(auto item_array, NormalizeArray(map_array->items()));
+      return std::make_shared<MapArray>(map_array->type(), map_array->length(),
+                                        map_array->value_offsets(), key_array, item_array,
+                                        map_array->null_bitmap());
+    }
+    default: {
+      return array;
+    }
+  }
+}
+
+template <class DataType, class BatchType, typename Enable = void>
+struct Appender {};
+
+// Types for long/double-like Appender, that is, numeric, boolean or date32
+template <typename T>
+using is_generic_type =
+    std::integral_constant<bool, is_number_type<T>::value ||
+                                     std::is_same<Date32Type, T>::value ||
+                                     is_boolean_type<T>::value>;
+template <typename T, typename R = void>
+using enable_if_generic = enable_if_t<is_generic_type<T>::value, R>;
+
+// Number-like
+template <class DataType, class BatchType>
+struct Appender<DataType, BatchType, enable_if_generic<DataType>> {
+  using ArrayType = typename TypeTraits<DataType>::ArrayType;
+  using ValueType = typename TypeTraits<DataType>::CType;
+  Status VisitNull() {
+    batch->notNull[running_orc_offset] = false;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  Status VisitValue(ValueType v) {
+    batch->data[running_orc_offset] = array.Value(running_arrow_offset);
+    batch->notNull[running_orc_offset] = true;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  const ArrayType& array;
+  BatchType* batch;
+  int64_t running_orc_offset, running_arrow_offset;
+};
+
+// Binary
+template <class DataType>
+struct Appender<DataType, liborc::StringVectorBatch> {
+  using ArrayType = typename TypeTraits<DataType>::ArrayType;
+  using COffsetType = typename TypeTraits<DataType>::OffsetType::c_type;
+  Status VisitNull() {
+    batch->notNull[running_orc_offset] = false;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  Status VisitValue(util::string_view v) {
+    batch->notNull[running_orc_offset] = true;
+    COffsetType data_length = 0;
+    batch->data[running_orc_offset] = reinterpret_cast<char*>(
+        const_cast<uint8_t*>(array.GetValue(running_arrow_offset, &data_length)));
+    batch->length[running_orc_offset] = data_length;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  const ArrayType& array;
+  liborc::StringVectorBatch* batch;
+  int64_t running_orc_offset, running_arrow_offset;
+};
+
+// Decimal
+template <>
+struct Appender<Decimal128Type, liborc::Decimal64VectorBatch> {
+  Status VisitNull() {
+    batch->notNull[running_orc_offset] = false;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  Status VisitValue(util::string_view v) {
+    batch->notNull[running_orc_offset] = true;
+    const Decimal128 dec_value(array.GetValue(running_arrow_offset));
+    batch->values[running_orc_offset] = static_cast<int64_t>(dec_value.low_bits());
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  const Decimal128Array& array;
+  liborc::Decimal64VectorBatch* batch;
+  int64_t running_orc_offset, running_arrow_offset;
+};
+
+template <>
+struct Appender<Decimal128Type, liborc::Decimal128VectorBatch> {
+  Status VisitNull() {
+    batch->notNull[running_orc_offset] = false;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  Status VisitValue(util::string_view v) {
+    batch->notNull[running_orc_offset] = true;
+    const Decimal128 dec_value(array.GetValue(running_arrow_offset));
+    batch->values[running_orc_offset] =
+        liborc::Int128(dec_value.high_bits(), dec_value.low_bits());
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  const Decimal128Array& array;
+  liborc::Decimal128VectorBatch* batch;
+  int64_t running_orc_offset, running_arrow_offset;
+};
+
+// Date64 and Timestamp
+template <class DataType>
+struct TimestampAppender {
+  using ArrayType = typename TypeTraits<DataType>::ArrayType;
+  Status VisitNull() {
+    batch->notNull[running_orc_offset] = false;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  Status VisitValue(int64_t v) {
+    int64_t data = array.Value(running_arrow_offset);
+    batch->notNull[running_orc_offset] = true;
+    batch->data[running_orc_offset] =
+        static_cast<int64_t>(std::floor(data / conversion_factor_from_second));
+    batch->nanoseconds[running_orc_offset] =
+        (data - conversion_factor_from_second * batch->data[running_orc_offset]) *
+        conversion_factor_to_nano;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  const ArrayType& array;
+  liborc::TimestampVectorBatch* batch;
+  int64_t running_orc_offset, running_arrow_offset;
+  int64_t conversion_factor_from_second, conversion_factor_to_nano;
+};
+
+// FSB
+struct FixedSizeBinaryAppender {
+  Status VisitNull() {
+    batch->notNull[running_orc_offset] = false;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  Status VisitValue(util::string_view v) {
+    batch->notNull[running_orc_offset] = true;
+    batch->data[running_orc_offset] = reinterpret_cast<char*>(
+        const_cast<uint8_t*>(array.GetValue(running_arrow_offset)));
+    batch->length[running_orc_offset] = data_length;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  const FixedSizeBinaryArray& array;
+  liborc::StringVectorBatch* batch;
+  int64_t running_orc_offset, running_arrow_offset;
+  const int32_t data_length;
+};
+
+// static_cast from int64_t or double to itself shouldn't introduce overhead
+// Pleae see
+// https://stackoverflow.com/questions/19106826/
+// can-static-cast-to-same-type-introduce-runtime-overhead
+template <class DataType, class BatchType>
+Status WriteGenericBatch(const Array& array, int64_t orc_offset,
+                         liborc::ColumnVectorBatch* column_vector_batch) {
+  using ArrayType = typename TypeTraits<DataType>::ArrayType;
+  const ArrayType& array_(checked_cast<const ArrayType&>(array));
+  auto batch = checked_cast<BatchType*>(column_vector_batch);
+  if (array.null_count()) {
+    batch->hasNulls = true;
+  }
+  Appender<DataType, BatchType> appender{array_, batch, orc_offset, 0};
+  ArrayDataVisitor<DataType> visitor;
+  RETURN_NOT_OK(visitor.Visit(*(array_.data()), &appender));
+  return Status::OK();
+}
+
+template <class DataType>
+Status WriteTimestampBatch(const Array& array, int64_t orc_offset,
+                           liborc::ColumnVectorBatch* column_vector_batch,
+                           const int64_t& conversion_factor_from_second,
+                           const int64_t& conversion_factor_to_nano) {
+  using ArrayType = typename TypeTraits<DataType>::ArrayType;
+  const ArrayType& array_(checked_cast<const ArrayType&>(array));
+  auto batch = checked_cast<liborc::TimestampVectorBatch*>(column_vector_batch);
+  if (array.null_count()) {
+    batch->hasNulls = true;
+  }
+  TimestampAppender<DataType> appender{array_,
+                                       batch,
+                                       orc_offset,
+                                       0,
+                                       conversion_factor_from_second,
+                                       conversion_factor_to_nano};
+  ArrayDataVisitor<DataType> visitor;
+  RETURN_NOT_OK(visitor.Visit(*(array_.data()), &appender));
+  return Status::OK();
+}
+
+Status WriteFixedSizeBinaryBatch(const Array& array, int64_t orc_offset,
+                                 liborc::ColumnVectorBatch* column_vector_batch) {
+  const FixedSizeBinaryArray& array_(checked_cast<const FixedSizeBinaryArray&>(array));
+  auto batch = checked_cast<liborc::StringVectorBatch*>(column_vector_batch);
+  if (array.null_count()) {
+    batch->hasNulls = true;
+  }
+  FixedSizeBinaryAppender appender{array_, batch, orc_offset, 0, array_.byte_width()};
+  ArrayDataVisitor<FixedSizeBinaryType> visitor;
+  RETURN_NOT_OK(visitor.Visit(*(array_.data()), &appender));
+  return Status::OK();
+}
+
+Status WriteStructBatch(const Array& array, int64_t orc_offset,
+                        liborc::ColumnVectorBatch* column_vector_batch) {
+  std::shared_ptr<Array> array_ = MakeArray(array.data());
+  std::shared_ptr<StructArray> struct_array(checked_pointer_cast<StructArray>(array_));
+  auto batch = checked_cast<liborc::StructVectorBatch*>(column_vector_batch);
+  std::size_t size = array.type()->fields().size();
+  int64_t arrow_length = array.length();
+  int64_t running_arrow_offset = 0, running_orc_offset = orc_offset;
+  // First fill fields of ColumnVectorBatch
+  if (array.null_count()) {
+    batch->hasNulls = true;
+  }
+  for (; running_arrow_offset < arrow_length;
+       running_orc_offset++, running_arrow_offset++) {
+    if (array.IsNull(running_arrow_offset)) {
+      batch->notNull[running_orc_offset] = false;
+    } else {
+      batch->notNull[running_orc_offset] = true;
+    }
+  }
+  // Fill the fields
+  for (std::size_t i = 0; i < size; i++) {
+    batch->fields[i]->resize(orc_offset + arrow_length);
+    RETURN_NOT_OK(WriteBatch(*(struct_array->field(i)), orc_offset, batch->fields[i]));
+  }
+  return Status::OK();
+}
+
+template <class ArrayType>
+Status WriteListBatch(const Array& array, int64_t orc_offset,
+                      liborc::ColumnVectorBatch* column_vector_batch) {
+  const ArrayType& list_array(checked_cast<const ArrayType&>(array));
+  auto batch = checked_cast<liborc::ListVectorBatch*>(column_vector_batch);
+  liborc::ColumnVectorBatch* element_batch = (batch->elements).get();
+  int64_t arrow_length = array.length();
+  int64_t running_arrow_offset = 0, running_orc_offset = orc_offset;
+  if (orc_offset == 0) {
+    batch->offsets[0] = 0;
+  }
+  if (array.null_count()) {
+    batch->hasNulls = true;
+  }
+  for (; running_arrow_offset < arrow_length;
+       running_orc_offset++, running_arrow_offset++) {
+    if (array.IsNull(running_arrow_offset)) {
+      batch->notNull[running_orc_offset] = false;
+      batch->offsets[running_orc_offset + 1] = batch->offsets[running_orc_offset];
+    } else {
+      batch->notNull[running_orc_offset] = true;
+      batch->offsets[running_orc_offset + 1] =
+          batch->offsets[running_orc_offset] +
+          list_array.value_offset(running_arrow_offset + 1) -
+          list_array.value_offset(running_arrow_offset);
+      element_batch->resize(batch->offsets[running_orc_offset + 1]);
+      int64_t subarray_arrow_offset = list_array.value_offset(running_arrow_offset),
+              subarray_orc_offset = batch->offsets[running_orc_offset],
+              subarray_orc_length =
+                  batch->offsets[running_orc_offset + 1] - subarray_orc_offset;
+      RETURN_NOT_OK(WriteBatch(
+          *(list_array.values()->Slice(subarray_arrow_offset, subarray_orc_length)),
+          subarray_orc_offset, element_batch));
+    }
+  }
+  return Status::OK();
+}
+
+Status WriteMapBatch(const Array& array, int64_t orc_offset,
+                     liborc::ColumnVectorBatch* column_vector_batch) {
+  const MapArray& map_array(checked_cast<const MapArray&>(array));
+  auto batch = checked_cast<liborc::MapVectorBatch*>(column_vector_batch);
+  liborc::ColumnVectorBatch* key_batch = (batch->keys).get();
+  liborc::ColumnVectorBatch* element_batch = (batch->elements).get();
+  std::shared_ptr<Array> key_array = map_array.keys();
+  std::shared_ptr<Array> element_array = map_array.items();
+  int64_t arrow_length = array.length();
+  int64_t running_arrow_offset = 0, running_orc_offset = orc_offset;
+  if (orc_offset == 0) {
+    batch->offsets[0] = 0;
+  }
+  if (array.null_count()) {
+    batch->hasNulls = true;
+  }
+  for (; running_arrow_offset < arrow_length;
+       running_orc_offset++, running_arrow_offset++) {
+    if (array.IsNull(running_arrow_offset)) {
+      batch->notNull[running_orc_offset] = false;
+      batch->offsets[running_orc_offset + 1] = batch->offsets[running_orc_offset];
+    } else {
+      batch->notNull[running_orc_offset] = true;
+      batch->offsets[running_orc_offset + 1] =
+          batch->offsets[running_orc_offset] +
+          map_array.value_offset(running_arrow_offset + 1) -
+          map_array.value_offset(running_arrow_offset);
+      int64_t subarray_arrow_offset = map_array.value_offset(running_arrow_offset),
+              subarray_orc_offset = batch->offsets[running_orc_offset],
+              new_subarray_orc_offset = batch->offsets[running_orc_offset + 1],
+              subarray_orc_length = new_subarray_orc_offset - subarray_orc_offset;
+      key_batch->resize(new_subarray_orc_offset);
+      element_batch->resize(new_subarray_orc_offset);
+      RETURN_NOT_OK(
+          WriteBatch(*(key_array->Slice(subarray_arrow_offset, subarray_orc_length)),
+                     subarray_orc_offset, key_batch));
+      RETURN_NOT_OK(
+          WriteBatch(*(element_array->Slice(subarray_arrow_offset, subarray_orc_length)),
+                     subarray_orc_offset, element_batch));
+    }
+  }
+  return Status::OK();
+}
+
+Status WriteBatch(const Array& array, int64_t orc_offset,
+                  liborc::ColumnVectorBatch* column_vector_batch) {
+  Type::type kind = array.type_id();
+  column_vector_batch->numElements = orc_offset;
+  switch (kind) {
+    case Type::type::BOOL:
+      return WriteGenericBatch<BooleanType, liborc::LongVectorBatch>(array, orc_offset,
+                                                                     column_vector_batch);
+    case Type::type::INT8:
+      return WriteGenericBatch<Int8Type, liborc::LongVectorBatch>(array, orc_offset,
+                                                                  column_vector_batch);
+    case Type::type::INT16:
+      return WriteGenericBatch<Int16Type, liborc::LongVectorBatch>(array, orc_offset,
+                                                                   column_vector_batch);
+    case Type::type::INT32:
+      return WriteGenericBatch<Int32Type, liborc::LongVectorBatch>(array, orc_offset,
+                                                                   column_vector_batch);
+    case Type::type::INT64:
+      return WriteGenericBatch<Int64Type, liborc::LongVectorBatch>(array, orc_offset,
+                                                                   column_vector_batch);
+    case Type::type::FLOAT:
+      return WriteGenericBatch<FloatType, liborc::DoubleVectorBatch>(array, orc_offset,
+                                                                     column_vector_batch);
+    case Type::type::DOUBLE:
+      return WriteGenericBatch<DoubleType, liborc::DoubleVectorBatch>(
+          array, orc_offset, column_vector_batch);
+    case Type::type::BINARY:
+      return WriteGenericBatch<BinaryType, liborc::StringVectorBatch>(
+          array, orc_offset, column_vector_batch);
+    case Type::type::LARGE_BINARY:
+      return WriteGenericBatch<LargeBinaryType, liborc::StringVectorBatch>(
+          array, orc_offset, column_vector_batch);
+    case Type::type::STRING:
+      return WriteGenericBatch<StringType, liborc::StringVectorBatch>(
+          array, orc_offset, column_vector_batch);
+    case Type::type::LARGE_STRING:
+      return WriteGenericBatch<LargeStringType, liborc::StringVectorBatch>(
+          array, orc_offset, column_vector_batch);
+    case Type::type::FIXED_SIZE_BINARY:
+      return WriteFixedSizeBinaryBatch(array, orc_offset, column_vector_batch);
+    case Type::type::DATE32:
+      return WriteGenericBatch<Date32Type, liborc::LongVectorBatch>(array, orc_offset,
+                                                                    column_vector_batch);
+    case Type::type::DATE64:
+      return WriteTimestampBatch<Date64Type>(array, orc_offset, column_vector_batch,
+                                             kOneSecondMillis, kOneMilliNanos);
+    case Type::type::TIMESTAMP: {
+      switch (internal::checked_pointer_cast<TimestampType>(array.type())->unit()) {
+        case TimeUnit::type::SECOND:
+          return WriteTimestampBatch<TimestampType>(
+              array, orc_offset, column_vector_batch, 1, kOneSecondNanos);
+        case TimeUnit::type::MILLI:
+          return WriteTimestampBatch<TimestampType>(
+              array, orc_offset, column_vector_batch, kOneSecondMillis, kOneMilliNanos);
+        case TimeUnit::type::MICRO:
+          return WriteTimestampBatch<TimestampType>(
+              array, orc_offset, column_vector_batch, kOneSecondMicros, kOneMicroNanos);
+        case TimeUnit::type::NANO:
+          return WriteTimestampBatch<TimestampType>(
+              array, orc_offset, column_vector_batch, kOneSecondNanos, 1);
+        default:
+          return Status::TypeError("Unknown or unsupported Arrow type: ",
+                                   array.type()->ToString());
+      }
+    }
+    case Type::type::DECIMAL128: {
+      int32_t precision = checked_pointer_cast<Decimal128Type>(array.type())->precision();
+      if (precision > 18) {
+        return WriteGenericBatch<Decimal128Type, liborc::Decimal128VectorBatch>(
+            array, orc_offset, column_vector_batch);
+      } else {
+        return WriteGenericBatch<Decimal128Type, liborc::Decimal64VectorBatch>(
+            array, orc_offset, column_vector_batch);
+      }
+    }
+    case Type::type::STRUCT:
+      return WriteStructBatch(array, orc_offset, column_vector_batch);
+    case Type::type::LIST:
+      return WriteListBatch<ListArray>(array, orc_offset, column_vector_batch);
+    case Type::type::LARGE_LIST:
+      return WriteListBatch<LargeListArray>(array, orc_offset, column_vector_batch);
+    case Type::type::FIXED_SIZE_LIST:
+      return WriteListBatch<FixedSizeListArray>(array, orc_offset, column_vector_batch);
+    case Type::type::MAP:
+      return WriteMapBatch(array, orc_offset, column_vector_batch);
+    default: {
+      return Status::NotImplemented("Unknown or unsupported Arrow type: ",
+                                    array.type()->ToString());
+    }
+  }
+  return Status::OK();
+}
+
+Result<ORC_UNIQUE_PTR<liborc::Type>> GetOrcType(const DataType& type) {
+  Type::type kind = type.id();
+  switch (kind) {
+    case Type::type::BOOL:
+      return liborc::createPrimitiveType(liborc::TypeKind::BOOLEAN);
+    case Type::type::INT8:
+      return liborc::createPrimitiveType(liborc::TypeKind::BYTE);
+    case Type::type::INT16:
+      return liborc::createPrimitiveType(liborc::TypeKind::SHORT);
+    case Type::type::INT32:
+      return liborc::createPrimitiveType(liborc::TypeKind::INT);
+    case Type::type::INT64:
+      return liborc::createPrimitiveType(liborc::TypeKind::LONG);
+    case Type::type::FLOAT:
+      return liborc::createPrimitiveType(liborc::TypeKind::FLOAT);
+    case Type::type::DOUBLE:
+      return liborc::createPrimitiveType(liborc::TypeKind::DOUBLE);
+    // Use STRING instead of VARCHAR for now, both use UTF-8
+    case Type::type::STRING:
+    case Type::type::LARGE_STRING:
+      return liborc::createPrimitiveType(liborc::TypeKind::STRING);
+    case Type::type::BINARY:
+    case Type::type::LARGE_BINARY:
+    case Type::type::FIXED_SIZE_BINARY:
+      return liborc::createPrimitiveType(liborc::TypeKind::BINARY);
+    case Type::type::DATE32:
+      return liborc::createPrimitiveType(liborc::TypeKind::DATE);
+    case Type::type::DATE64:
+    case Type::type::TIMESTAMP:
+      return liborc::createPrimitiveType(liborc::TypeKind::TIMESTAMP);
+    case Type::type::DECIMAL128: {
+      const uint64_t precision =
+          static_cast<uint64_t>(checked_cast<const Decimal128Type&>(type).precision());
+      const uint64_t scale =
+          static_cast<uint64_t>(checked_cast<const Decimal128Type&>(type).scale());
+      return liborc::createDecimalType(precision, scale);
+    }
+    case Type::type::LIST:
+    case Type::type::FIXED_SIZE_LIST:
+    case Type::type::LARGE_LIST: {
+      std::shared_ptr<DataType> arrow_child_type =
+          checked_cast<const BaseListType&>(type).value_type();
+      ARROW_ASSIGN_OR_RAISE(auto orc_subtype, GetOrcType(*arrow_child_type));
+      return liborc::createListType(std::move(orc_subtype));
+    }
+    case Type::type::STRUCT: {
+      ORC_UNIQUE_PTR<liborc::Type> out_type = liborc::createStructType();
+      std::vector<std::shared_ptr<Field>> arrow_fields =
+          checked_cast<const StructType&>(type).fields();
+      for (std::vector<std::shared_ptr<Field>>::iterator it = arrow_fields.begin();
+           it != arrow_fields.end(); ++it) {
+        std::string field_name = (*it)->name();
+        std::shared_ptr<DataType> arrow_child_type = (*it)->type();
+        ARROW_ASSIGN_OR_RAISE(auto orc_subtype, GetOrcType(*arrow_child_type));
+        out_type->addStructField(field_name, std::move(orc_subtype));
+      }
+      return std::move(out_type);
+    }
+    case Type::type::MAP: {
+      std::shared_ptr<DataType> key_arrow_type =
+          checked_cast<const MapType&>(type).key_type();
+      std::shared_ptr<DataType> item_arrow_type =
+          checked_cast<const MapType&>(type).item_type();
+      ARROW_ASSIGN_OR_RAISE(auto key_orc_type, GetOrcType(*key_arrow_type));
+      ARROW_ASSIGN_OR_RAISE(auto item_orc_type, GetOrcType(*item_arrow_type));
+      return liborc::createMapType(std::move(key_orc_type), std::move(item_orc_type));
+    }
+    case Type::type::DENSE_UNION:
+    case Type::type::SPARSE_UNION: {
+      ORC_UNIQUE_PTR<liborc::Type> out_type = liborc::createUnionType();
+      std::vector<std::shared_ptr<Field>> arrow_fields =
+          checked_cast<const UnionType&>(type).fields();
+      for (std::vector<std::shared_ptr<Field>>::iterator it = arrow_fields.begin();
+           it != arrow_fields.end(); ++it) {
+        std::string field_name = (*it)->name();
+        std::shared_ptr<DataType> arrow_child_type = (*it)->type();
+        ARROW_ASSIGN_OR_RAISE(auto orc_subtype, GetOrcType(*arrow_child_type));
+        out_type->addUnionChild(std::move(orc_subtype));
+      }
+      return std::move(out_type);
+    }
+    default: {
+      return Status::NotImplemented("Unknown or unsupported Arrow type: ",
+                                    type.ToString());
+    }
+  }
+}
+
+}  // namespace
+
+Status WriteBatch(const ChunkedArray& chunked_array, int64_t length,
+                  int* arrow_chunk_offset, int64_t* arrow_index_offset,
+                  liborc::ColumnVectorBatch* column_vector_batch) {
+  int num_batch = chunked_array.num_chunks();
+  int64_t orc_offset = 0;
+  while (*arrow_chunk_offset < num_batch && orc_offset < length) {
+    ARROW_ASSIGN_OR_RAISE(auto array,
+                          NormalizeArray(chunked_array.chunk(*arrow_chunk_offset)));
+    int64_t num_written_elements =
+        std::min(length - orc_offset, array->length() - *arrow_index_offset);
+    if (num_written_elements > 0) {
+      RETURN_NOT_OK(WriteBatch(*(array->Slice(*arrow_index_offset, num_written_elements)),
+                               orc_offset, column_vector_batch));
+      orc_offset += num_written_elements;
+      *arrow_index_offset += num_written_elements;
+    }
+    if (orc_offset < length) {  // Another Arrow Array done
+      *arrow_index_offset = 0;
+      (*arrow_chunk_offset)++;
+    }
+  }
+  column_vector_batch->numElements = orc_offset;
+  return Status::OK();
+}
+
+Status GetArrowType(const liborc::Type* type, std::shared_ptr<DataType>* out) {
+  // When subselecting fields on read, liborc will set some nodes to nullptr,
+  // so we need to check for nullptr before progressing
+  if (type == nullptr) {
+    *out = null();
+    return Status::OK();
+  }
+  liborc::TypeKind kind = type->getKind();
+  const int subtype_count = static_cast<int>(type->getSubtypeCount());
+
+  switch (kind) {
+    case liborc::BOOLEAN:
+      *out = boolean();
+      break;
+    case liborc::BYTE:
+      *out = int8();
+      break;
+    case liborc::SHORT:
+      *out = int16();
+      break;
+    case liborc::INT:
+      *out = int32();
+      break;
+    case liborc::LONG:
+      *out = int64();
+      break;
+    case liborc::FLOAT:
+      *out = float32();
+      break;
+    case liborc::DOUBLE:
+      *out = float64();
+      break;
+    case liborc::VARCHAR:
+    case liborc::STRING:
+      *out = utf8();
+      break;
+    case liborc::BINARY:
+      *out = binary();
+      break;
+    case liborc::CHAR:
+      *out = fixed_size_binary(static_cast<int>(type->getMaximumLength()));
+      break;
+    case liborc::TIMESTAMP:
+      *out = timestamp(TimeUnit::NANO);
+      break;
+    case liborc::DATE:
+      *out = date32();
+      break;
+    case liborc::DECIMAL: {
+      const int precision = static_cast<int>(type->getPrecision());
+      const int scale = static_cast<int>(type->getScale());
+      if (precision == 0) {
+        // In HIVE 0.11/0.12 precision is set as 0, but means max precision
+        *out = decimal128(38, 6);
+      } else {
+        *out = decimal128(precision, scale);
+      }
+      break;
+    }
+    case liborc::LIST: {
+      if (subtype_count != 1) {
+        return Status::TypeError("Invalid Orc List type");
+      }
+      std::shared_ptr<DataType> elemtype;
+      RETURN_NOT_OK(GetArrowType(type->getSubtype(0), &elemtype));
+      *out = list(elemtype);
+      break;
+    }
+    case liborc::MAP: {
+      if (subtype_count != 2) {
+        return Status::TypeError("Invalid Orc Map type");
+      }
+      std::shared_ptr<DataType> key_type, item_type;
+      RETURN_NOT_OK(GetArrowType(type->getSubtype(0), &key_type));
+      RETURN_NOT_OK(GetArrowType(type->getSubtype(1), &item_type));
+      *out = map(key_type, item_type);
+      break;
+    }
+    case liborc::STRUCT: {
+      std::vector<std::shared_ptr<Field>> fields;
+      for (int child = 0; child < subtype_count; ++child) {
+        std::shared_ptr<DataType> elem_type;
+        RETURN_NOT_OK(GetArrowType(type->getSubtype(child), &elem_type));
+        std::string name = type->getFieldName(child);
+        fields.push_back(field(name, elem_type));
+      }
+      *out = struct_(fields);
+      break;
+    }
+    case liborc::UNION: {
+      std::vector<std::shared_ptr<Field>> fields;
+      std::vector<int8_t> type_codes;
+      for (int child = 0; child < subtype_count; ++child) {
+        std::shared_ptr<DataType> elem_type;
+        RETURN_NOT_OK(GetArrowType(type->getSubtype(child), &elem_type));
+        fields.push_back(field("_union_" + std::to_string(child), elem_type));
+        type_codes.push_back(static_cast<int8_t>(child));
+      }
+      *out = sparse_union(fields, type_codes);
+      break;
+    }
+    default: {
+      return Status::TypeError("Unknown Orc type kind: ", type->toString());
+    }
+  }
+  return Status::OK();
+}
+
+Result<ORC_UNIQUE_PTR<liborc::Type>> GetOrcType(const Schema& schema) {
+  int numFields = schema.num_fields();
+  ORC_UNIQUE_PTR<liborc::Type> out_type = liborc::createStructType();
+  for (int i = 0; i < numFields; i++) {
+    std::shared_ptr<Field> field = schema.field(i);
+    std::string field_name = field->name();
+    std::shared_ptr<DataType> arrow_child_type = field->type();
+    ARROW_ASSIGN_OR_RAISE(auto orc_subtype, GetOrcType(*arrow_child_type));
+    out_type->addStructField(field_name, std::move(orc_subtype));
+  }
+  return std::move(out_type);
+}
+
+}  // namespace orc
+}  // namespace adapters
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/adapters/orc/adapter_util.h b/contrib/libs/apache/arrow/cpp/src/arrow/adapters/orc/adapter_util.h
index 8176715aa51..3e6d0fcc660 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/adapters/orc/adapter_util.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/adapters/orc/adapter_util.h
@@ -1,57 +1,57 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <cstdint> 
-#include <memory> 
- 
-#include "arrow/array/builder_base.h" 
-#include "arrow/status.h" 
-#include "orc/OrcFile.hh" 
- 
-namespace liborc = orc; 
- 
-namespace arrow { 
- 
-namespace adapters { 
- 
-namespace orc { 
- 
-Status GetArrowType(const liborc::Type* type, std::shared_ptr<DataType>* out); 
- 
-Result<ORC_UNIQUE_PTR<liborc::Type>> GetOrcType(const Schema& schema); 
- 
-Status AppendBatch(const liborc::Type* type, liborc::ColumnVectorBatch* batch, 
-                   int64_t offset, int64_t length, arrow::ArrayBuilder* builder); 
- 
-/// \brief Write a chunked array to an orc::ColumnVectorBatch 
-/// 
-/// \param[in] chunked_array the chunked array 
-/// \param[in] length the orc::ColumnVectorBatch size limit 
-/// \param[in,out] arrow_chunk_offset The current chunk being processed 
-/// \param[in,out] arrow_index_offset The index of the arrow_chunk_offset array 
-/// before or after a process 
-/// \param[in,out] column_vector_batch the orc::ColumnVectorBatch to be filled 
-/// \return Status 
-Status WriteBatch(const ChunkedArray& chunked_array, int64_t length, 
-                  int* arrow_chunk_offset, int64_t* arrow_index_offset, 
-                  liborc::ColumnVectorBatch* column_vector_batch); 
- 
-}  // namespace orc 
-}  // namespace adapters 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+
+#include "arrow/array/builder_base.h"
+#include "arrow/status.h"
+#include "orc/OrcFile.hh"
+
+namespace liborc = orc;
+
+namespace arrow {
+
+namespace adapters {
+
+namespace orc {
+
+Status GetArrowType(const liborc::Type* type, std::shared_ptr<DataType>* out);
+
+Result<ORC_UNIQUE_PTR<liborc::Type>> GetOrcType(const Schema& schema);
+
+Status AppendBatch(const liborc::Type* type, liborc::ColumnVectorBatch* batch,
+                   int64_t offset, int64_t length, arrow::ArrayBuilder* builder);
+
+/// \brief Write a chunked array to an orc::ColumnVectorBatch
+///
+/// \param[in] chunked_array the chunked array
+/// \param[in] length the orc::ColumnVectorBatch size limit
+/// \param[in,out] arrow_chunk_offset The current chunk being processed
+/// \param[in,out] arrow_index_offset The index of the arrow_chunk_offset array
+/// before or after a process
+/// \param[in,out] column_vector_batch the orc::ColumnVectorBatch to be filled
+/// \return Status
+Status WriteBatch(const ChunkedArray& chunked_array, int64_t length,
+                  int* arrow_chunk_offset, int64_t* arrow_index_offset,
+                  liborc::ColumnVectorBatch* column_vector_batch);
+
+}  // namespace orc
+}  // namespace adapters
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_base.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_base.cc
index 5d731baa777..67c5ca84e1f 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_base.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_base.cc
@@ -73,10 +73,10 @@ struct ScalarFromArraySlotImpl {
     return Finish(Decimal128(a.GetValue(index_)));
   }
 
-  Status Visit(const Decimal256Array& a) { 
-    return Finish(Decimal256(a.GetValue(index_))); 
-  } 
- 
+  Status Visit(const Decimal256Array& a) {
+    return Finish(Decimal256(a.GetValue(index_)));
+  }
+
   template <typename T>
   Status Visit(const BaseBinaryArray<T>& a) {
     return Finish(a.GetString(index_));
@@ -222,31 +222,31 @@ bool Array::ApproxEquals(const std::shared_ptr<Array>& arr,
 }
 
 bool Array::RangeEquals(const Array& other, int64_t start_idx, int64_t end_idx,
-                        int64_t other_start_idx, const EqualOptions& opts) const { 
-  return ArrayRangeEquals(*this, other, start_idx, end_idx, other_start_idx, opts); 
+                        int64_t other_start_idx, const EqualOptions& opts) const {
+  return ArrayRangeEquals(*this, other, start_idx, end_idx, other_start_idx, opts);
 }
 
 bool Array::RangeEquals(const std::shared_ptr<Array>& other, int64_t start_idx,
-                        int64_t end_idx, int64_t other_start_idx, 
-                        const EqualOptions& opts) const { 
+                        int64_t end_idx, int64_t other_start_idx,
+                        const EqualOptions& opts) const {
   if (!other) {
     return false;
   }
-  return ArrayRangeEquals(*this, *other, start_idx, end_idx, other_start_idx, opts); 
+  return ArrayRangeEquals(*this, *other, start_idx, end_idx, other_start_idx, opts);
 }
 
 bool Array::RangeEquals(int64_t start_idx, int64_t end_idx, int64_t other_start_idx,
-                        const Array& other, const EqualOptions& opts) const { 
-  return ArrayRangeEquals(*this, other, start_idx, end_idx, other_start_idx, opts); 
+                        const Array& other, const EqualOptions& opts) const {
+  return ArrayRangeEquals(*this, other, start_idx, end_idx, other_start_idx, opts);
 }
 
 bool Array::RangeEquals(int64_t start_idx, int64_t end_idx, int64_t other_start_idx,
-                        const std::shared_ptr<Array>& other, 
-                        const EqualOptions& opts) const { 
+                        const std::shared_ptr<Array>& other,
+                        const EqualOptions& opts) const {
   if (!other) {
     return false;
   }
-  return ArrayRangeEquals(*this, *other, start_idx, end_idx, other_start_idx, opts); 
+  return ArrayRangeEquals(*this, *other, start_idx, end_idx, other_start_idx, opts);
 }
 
 std::shared_ptr<Array> Array::Slice(int64_t offset, int64_t length) const {
@@ -302,7 +302,7 @@ Status Array::Validate() const { return internal::ValidateArray(*this); }
 
 Status Array::ValidateFull() const {
   RETURN_NOT_OK(internal::ValidateArray(*this));
-  return internal::ValidateArrayFull(*this); 
+  return internal::ValidateArrayFull(*this);
 }
 
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_base.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_base.h
index 469ae94d2eb..2add572e7a4 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_base.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_base.h
@@ -56,17 +56,17 @@ class ARROW_EXPORT Array {
 
   /// \brief Return true if value at index is null. Does not boundscheck
   bool IsNull(int64_t i) const {
-    return null_bitmap_data_ != NULLPTR 
-               ? !BitUtil::GetBit(null_bitmap_data_, i + data_->offset) 
-               : data_->null_count == data_->length; 
+    return null_bitmap_data_ != NULLPTR
+               ? !BitUtil::GetBit(null_bitmap_data_, i + data_->offset)
+               : data_->null_count == data_->length;
   }
 
   /// \brief Return true if value at index is valid (not null). Does not
   /// boundscheck
   bool IsValid(int64_t i) const {
-    return null_bitmap_data_ != NULLPTR 
-               ? BitUtil::GetBit(null_bitmap_data_, i + data_->offset) 
-               : data_->null_count != data_->length; 
+    return null_bitmap_data_ != NULLPTR
+               ? BitUtil::GetBit(null_bitmap_data_, i + data_->offset)
+               : data_->null_count != data_->length;
   }
 
   /// \brief Return a Scalar containing the value of this array at i
@@ -93,7 +93,7 @@ class ARROW_EXPORT Array {
   ///
   /// Note that for `null_count == 0` or for null type, this will be null.
   /// This buffer does not account for any slice offset
-  const std::shared_ptr<Buffer>& null_bitmap() const { return data_->buffers[0]; } 
+  const std::shared_ptr<Buffer>& null_bitmap() const { return data_->buffers[0]; }
 
   /// Raw pointer to the null bitmap.
   ///
@@ -121,17 +121,17 @@ class ARROW_EXPORT Array {
   /// Compare if the range of slots specified are equal for the given array and
   /// this array.  end_idx exclusive.  This methods does not bounds check.
   bool RangeEquals(int64_t start_idx, int64_t end_idx, int64_t other_start_idx,
-                   const Array& other, 
-                   const EqualOptions& = EqualOptions::Defaults()) const; 
+                   const Array& other,
+                   const EqualOptions& = EqualOptions::Defaults()) const;
   bool RangeEquals(int64_t start_idx, int64_t end_idx, int64_t other_start_idx,
-                   const std::shared_ptr<Array>& other, 
-                   const EqualOptions& = EqualOptions::Defaults()) const; 
+                   const std::shared_ptr<Array>& other,
+                   const EqualOptions& = EqualOptions::Defaults()) const;
   bool RangeEquals(const Array& other, int64_t start_idx, int64_t end_idx,
-                   int64_t other_start_idx, 
-                   const EqualOptions& = EqualOptions::Defaults()) const; 
+                   int64_t other_start_idx,
+                   const EqualOptions& = EqualOptions::Defaults()) const;
   bool RangeEquals(const std::shared_ptr<Array>& other, int64_t start_idx,
-                   int64_t end_idx, int64_t other_start_idx, 
-                   const EqualOptions& = EqualOptions::Defaults()) const; 
+                   int64_t end_idx, int64_t other_start_idx,
+                   const EqualOptions& = EqualOptions::Defaults()) const;
 
   Status Accept(ArrayVisitor* visitor) const;
 
@@ -162,7 +162,7 @@ class ARROW_EXPORT Array {
   /// Input-checking variant of Array::Slice
   Result<std::shared_ptr<Array>> SliceSafe(int64_t offset) const;
 
-  const std::shared_ptr<ArrayData>& data() const { return data_; } 
+  const std::shared_ptr<ArrayData>& data() const { return data_; }
 
   int num_fields() const { return static_cast<int>(data_->child_data.size()); }
 
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_binary.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_binary.cc
index 14a3a8ef961..9466b5a48f9 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_binary.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_binary.cc
@@ -21,9 +21,9 @@
 #include <memory>
 
 #include "arrow/array/array_base.h"
-#include "arrow/array/validate.h" 
+#include "arrow/array/validate.h"
 #include "arrow/type.h"
-#include "arrow/type_traits.h" 
+#include "arrow/type_traits.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/logging.h"
 
@@ -32,7 +32,7 @@ namespace arrow {
 using internal::checked_cast;
 
 BinaryArray::BinaryArray(const std::shared_ptr<ArrayData>& data) {
-  ARROW_CHECK(is_binary_like(data->type->id())); 
+  ARROW_CHECK(is_binary_like(data->type->id()));
   SetData(data);
 }
 
@@ -45,7 +45,7 @@ BinaryArray::BinaryArray(int64_t length, const std::shared_ptr<Buffer>& value_of
 }
 
 LargeBinaryArray::LargeBinaryArray(const std::shared_ptr<ArrayData>& data) {
-  ARROW_CHECK(is_large_binary_like(data->type->id())); 
+  ARROW_CHECK(is_large_binary_like(data->type->id()));
   SetData(data);
 }
 
@@ -71,7 +71,7 @@ StringArray::StringArray(int64_t length, const std::shared_ptr<Buffer>& value_of
                           offset));
 }
 
-Status StringArray::ValidateUTF8() const { return internal::ValidateUTF8(*data_); } 
+Status StringArray::ValidateUTF8() const { return internal::ValidateUTF8(*data_); }
 
 LargeStringArray::LargeStringArray(const std::shared_ptr<ArrayData>& data) {
   ARROW_CHECK_EQ(data->type->id(), Type::LARGE_STRING);
@@ -87,7 +87,7 @@ LargeStringArray::LargeStringArray(int64_t length,
                           null_count, offset));
 }
 
-Status LargeStringArray::ValidateUTF8() const { return internal::ValidateUTF8(*data_); } 
+Status LargeStringArray::ValidateUTF8() const { return internal::ValidateUTF8(*data_); }
 
 FixedSizeBinaryArray::FixedSizeBinaryArray(const std::shared_ptr<ArrayData>& data) {
   SetData(data);
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_binary.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_binary.h
index 735042f4a09..f8e8c4f8a44 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_binary.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_binary.h
@@ -28,7 +28,7 @@
 #include "arrow/array/array_base.h"
 #include "arrow/array/data.h"
 #include "arrow/buffer.h"
-#include "arrow/stl_iterator.h" 
+#include "arrow/stl_iterator.h"
 #include "arrow/type.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/macros.h"
@@ -47,7 +47,7 @@ class BaseBinaryArray : public FlatArray {
  public:
   using TypeClass = TYPE;
   using offset_type = typename TypeClass::offset_type;
-  using IteratorType = stl::ArrayIterator<BaseBinaryArray<TYPE>>; 
+  using IteratorType = stl::ArrayIterator<BaseBinaryArray<TYPE>>;
 
   /// Return the pointer to the given elements bytes
   // XXX should GetValue(int64_t i) return a string_view?
@@ -71,13 +71,13 @@ class BaseBinaryArray : public FlatArray {
                              raw_value_offsets_[i + 1] - pos);
   }
 
-  /// \brief Get binary value as a string_view 
-  /// Provided for consistency with other arrays. 
-  /// 
-  /// \param i the value index 
-  /// \return the view over the selected value 
-  util::string_view Value(int64_t i) const { return GetView(i); } 
- 
+  /// \brief Get binary value as a string_view
+  /// Provided for consistency with other arrays.
+  ///
+  /// \param i the value index
+  /// \return the view over the selected value
+  util::string_view Value(int64_t i) const { return GetView(i); }
+
   /// \brief Get binary value as a std::string
   ///
   /// \param i the value index
@@ -124,13 +124,13 @@ class BaseBinaryArray : public FlatArray {
     }
   }
 
-  IteratorType begin() const { return IteratorType(*this); } 
- 
-  IteratorType end() const { return IteratorType(*this, length()); } 
- 
+  IteratorType begin() const { return IteratorType(*this); }
+
+  IteratorType end() const { return IteratorType(*this, length()); }
+
  protected:
   // For subclasses
-  BaseBinaryArray() = default; 
+  BaseBinaryArray() = default;
 
   // Protected method for constructors
   void SetData(const std::shared_ptr<ArrayData>& data) {
@@ -139,8 +139,8 @@ class BaseBinaryArray : public FlatArray {
     raw_data_ = data->GetValuesSafe<uint8_t>(2, /*offset=*/0);
   }
 
-  const offset_type* raw_value_offsets_ = NULLPTR; 
-  const uint8_t* raw_data_ = NULLPTR; 
+  const offset_type* raw_value_offsets_ = NULLPTR;
+  const uint8_t* raw_data_ = NULLPTR;
 };
 
 /// Concrete Array class for variable-size binary data
@@ -216,7 +216,7 @@ class ARROW_EXPORT LargeStringArray : public LargeBinaryArray {
 class ARROW_EXPORT FixedSizeBinaryArray : public PrimitiveArray {
  public:
   using TypeClass = FixedSizeBinaryType;
-  using IteratorType = stl::ArrayIterator<FixedSizeBinaryArray>; 
+  using IteratorType = stl::ArrayIterator<FixedSizeBinaryArray>;
 
   explicit FixedSizeBinaryArray(const std::shared_ptr<ArrayData>& data);
 
@@ -238,10 +238,10 @@ class ARROW_EXPORT FixedSizeBinaryArray : public PrimitiveArray {
 
   const uint8_t* raw_values() const { return raw_values_ + data_->offset * byte_width_; }
 
-  IteratorType begin() const { return IteratorType(*this); } 
- 
-  IteratorType end() const { return IteratorType(*this, length()); } 
- 
+  IteratorType begin() const { return IteratorType(*this); }
+
+  IteratorType end() const { return IteratorType(*this, length()); }
+
  protected:
   void SetData(const std::shared_ptr<ArrayData>& data) {
     this->PrimitiveArray::SetData(data);
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_decimal.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_decimal.cc
index 58852a7b6c5..d65f6ee5356 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_decimal.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_decimal.cc
@@ -33,11 +33,11 @@ namespace arrow {
 using internal::checked_cast;
 
 // ----------------------------------------------------------------------
-// Decimal128 
+// Decimal128
 
 Decimal128Array::Decimal128Array(const std::shared_ptr<ArrayData>& data)
     : FixedSizeBinaryArray(data) {
-  ARROW_CHECK_EQ(data->type->id(), Type::DECIMAL128); 
+  ARROW_CHECK_EQ(data->type->id(), Type::DECIMAL128);
 }
 
 std::string Decimal128Array::FormatValue(int64_t i) const {
@@ -46,18 +46,18 @@ std::string Decimal128Array::FormatValue(int64_t i) const {
   return value.ToString(type_.scale());
 }
 
-// ---------------------------------------------------------------------- 
-// Decimal256 
- 
-Decimal256Array::Decimal256Array(const std::shared_ptr<ArrayData>& data) 
-    : FixedSizeBinaryArray(data) { 
-  ARROW_CHECK_EQ(data->type->id(), Type::DECIMAL256); 
-} 
- 
-std::string Decimal256Array::FormatValue(int64_t i) const { 
-  const auto& type_ = checked_cast<const Decimal256Type&>(*type()); 
-  const Decimal256 value(GetValue(i)); 
-  return value.ToString(type_.scale()); 
-} 
- 
+// ----------------------------------------------------------------------
+// Decimal256
+
+Decimal256Array::Decimal256Array(const std::shared_ptr<ArrayData>& data)
+    : FixedSizeBinaryArray(data) {
+  ARROW_CHECK_EQ(data->type->id(), Type::DECIMAL256);
+}
+
+std::string Decimal256Array::FormatValue(int64_t i) const {
+  const auto& type_ = checked_cast<const Decimal256Type&>(*type());
+  const Decimal256 value(GetValue(i));
+  return value.ToString(type_.scale());
+}
+
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_decimal.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_decimal.h
index e32b9d26a35..8d7d1c59cd0 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_decimal.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_decimal.h
@@ -47,20 +47,20 @@ class ARROW_EXPORT Decimal128Array : public FixedSizeBinaryArray {
 // Backward compatibility
 using DecimalArray = Decimal128Array;
 
-// ---------------------------------------------------------------------- 
-// Decimal256Array 
- 
-/// Concrete Array class for 256-bit decimal data 
-class ARROW_EXPORT Decimal256Array : public FixedSizeBinaryArray { 
- public: 
-  using TypeClass = Decimal256Type; 
- 
-  using FixedSizeBinaryArray::FixedSizeBinaryArray; 
- 
-  /// \brief Construct Decimal256Array from ArrayData instance 
-  explicit Decimal256Array(const std::shared_ptr<ArrayData>& data); 
- 
-  std::string FormatValue(int64_t i) const; 
-}; 
- 
+// ----------------------------------------------------------------------
+// Decimal256Array
+
+/// Concrete Array class for 256-bit decimal data
+class ARROW_EXPORT Decimal256Array : public FixedSizeBinaryArray {
+ public:
+  using TypeClass = Decimal256Type;
+
+  using FixedSizeBinaryArray::FixedSizeBinaryArray;
+
+  /// \brief Construct Decimal256Array from ArrayData instance
+  explicit Decimal256Array(const std::shared_ptr<ArrayData>& data);
+
+  std::string FormatValue(int64_t i) const;
+};
+
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_dict.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_dict.cc
index ddb44b470f6..2fa95e9a176 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_dict.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_dict.cc
@@ -29,10 +29,10 @@
 #include "arrow/array/dict_internal.h"
 #include "arrow/array/util.h"
 #include "arrow/buffer.h"
-#include "arrow/chunked_array.h" 
-#include "arrow/datum.h" 
+#include "arrow/chunked_array.h"
+#include "arrow/datum.h"
 #include "arrow/status.h"
-#include "arrow/table.h" 
+#include "arrow/table.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/bit_util.h"
@@ -144,88 +144,88 @@ bool DictionaryArray::CanCompareIndices(const DictionaryArray& other) const {
 }
 
 // ----------------------------------------------------------------------
-// Dictionary transposition 
-
-namespace { 
- 
-inline bool IsTrivialTransposition(const int32_t* transpose_map, 
-                                   int64_t input_dict_size) { 
-  for (int64_t i = 0; i < input_dict_size; ++i) { 
-    if (transpose_map[i] != i) { 
-      return false; 
-    } 
-  } 
-  return true; 
-} 
- 
-Result<std::shared_ptr<ArrayData>> TransposeDictIndices( 
-    const std::shared_ptr<ArrayData>& data, const std::shared_ptr<DataType>& in_type, 
-    const std::shared_ptr<DataType>& out_type, 
-    const std::shared_ptr<ArrayData>& dictionary, const int32_t* transpose_map, 
-    MemoryPool* pool) { 
-  // Note that in_type may be different from data->type if data is of type ExtensionType 
-  if (in_type->id() != Type::DICTIONARY || out_type->id() != Type::DICTIONARY) { 
-    return Status::TypeError("Expected dictionary type"); 
-  } 
-  const int64_t in_dict_len = data->dictionary->length; 
-  const auto& in_dict_type = checked_cast<const DictionaryType&>(*in_type); 
-  const auto& out_dict_type = checked_cast<const DictionaryType&>(*out_type); 
- 
-  const auto& in_index_type = *in_dict_type.index_type(); 
-  const auto& out_index_type = 
-      checked_cast<const FixedWidthType&>(*out_dict_type.index_type()); 
- 
-  if (in_index_type.id() == out_index_type.id() && 
-      IsTrivialTransposition(transpose_map, in_dict_len)) { 
-    // Index type and values will be identical => we can simply reuse 
-    // the existing buffers. 
-    auto out_data = 
-        ArrayData::Make(out_type, data->length, {data->buffers[0], data->buffers[1]}, 
-                        data->null_count, data->offset); 
-    out_data->dictionary = dictionary; 
-    return out_data; 
-  } 
- 
-  // Default path: compute a buffer of transposed indices. 
-  ARROW_ASSIGN_OR_RAISE( 
-      auto out_buffer, 
-      AllocateBuffer(data->length * (out_index_type.bit_width() / CHAR_BIT), pool)); 
- 
-  // Shift null buffer if the original offset is non-zero 
-  std::shared_ptr<Buffer> null_bitmap; 
-  if (data->offset != 0 && data->null_count != 0) { 
-    ARROW_ASSIGN_OR_RAISE(null_bitmap, CopyBitmap(pool, data->buffers[0]->data(), 
-                                                  data->offset, data->length)); 
-  } else { 
-    null_bitmap = data->buffers[0]; 
-  } 
- 
-  auto out_data = ArrayData::Make(out_type, data->length, 
-                                  {null_bitmap, std::move(out_buffer)}, data->null_count); 
-  out_data->dictionary = dictionary; 
-  RETURN_NOT_OK(internal::TransposeInts( 
-      in_index_type, out_index_type, data->GetValues<uint8_t>(1, 0), 
-      out_data->GetMutableValues<uint8_t>(1, 0), data->offset, out_data->offset, 
-      data->length, transpose_map)); 
-  return out_data; 
-} 
- 
-}  // namespace 
- 
-Result<std::shared_ptr<Array>> DictionaryArray::Transpose( 
-    const std::shared_ptr<DataType>& type, const std::shared_ptr<Array>& dictionary, 
-    const int32_t* transpose_map, MemoryPool* pool) const { 
-  ARROW_ASSIGN_OR_RAISE(auto transposed, 
-                        TransposeDictIndices(data_, data_->type, type, dictionary->data(), 
-                                             transpose_map, pool)); 
-  return MakeArray(std::move(transposed)); 
-} 
- 
-// ---------------------------------------------------------------------- 
-// Dictionary unification 
- 
-namespace { 
- 
+// Dictionary transposition
+
+namespace {
+
+inline bool IsTrivialTransposition(const int32_t* transpose_map,
+                                   int64_t input_dict_size) {
+  for (int64_t i = 0; i < input_dict_size; ++i) {
+    if (transpose_map[i] != i) {
+      return false;
+    }
+  }
+  return true;
+}
+
+Result<std::shared_ptr<ArrayData>> TransposeDictIndices(
+    const std::shared_ptr<ArrayData>& data, const std::shared_ptr<DataType>& in_type,
+    const std::shared_ptr<DataType>& out_type,
+    const std::shared_ptr<ArrayData>& dictionary, const int32_t* transpose_map,
+    MemoryPool* pool) {
+  // Note that in_type may be different from data->type if data is of type ExtensionType
+  if (in_type->id() != Type::DICTIONARY || out_type->id() != Type::DICTIONARY) {
+    return Status::TypeError("Expected dictionary type");
+  }
+  const int64_t in_dict_len = data->dictionary->length;
+  const auto& in_dict_type = checked_cast<const DictionaryType&>(*in_type);
+  const auto& out_dict_type = checked_cast<const DictionaryType&>(*out_type);
+
+  const auto& in_index_type = *in_dict_type.index_type();
+  const auto& out_index_type =
+      checked_cast<const FixedWidthType&>(*out_dict_type.index_type());
+
+  if (in_index_type.id() == out_index_type.id() &&
+      IsTrivialTransposition(transpose_map, in_dict_len)) {
+    // Index type and values will be identical => we can simply reuse
+    // the existing buffers.
+    auto out_data =
+        ArrayData::Make(out_type, data->length, {data->buffers[0], data->buffers[1]},
+                        data->null_count, data->offset);
+    out_data->dictionary = dictionary;
+    return out_data;
+  }
+
+  // Default path: compute a buffer of transposed indices.
+  ARROW_ASSIGN_OR_RAISE(
+      auto out_buffer,
+      AllocateBuffer(data->length * (out_index_type.bit_width() / CHAR_BIT), pool));
+
+  // Shift null buffer if the original offset is non-zero
+  std::shared_ptr<Buffer> null_bitmap;
+  if (data->offset != 0 && data->null_count != 0) {
+    ARROW_ASSIGN_OR_RAISE(null_bitmap, CopyBitmap(pool, data->buffers[0]->data(),
+                                                  data->offset, data->length));
+  } else {
+    null_bitmap = data->buffers[0];
+  }
+
+  auto out_data = ArrayData::Make(out_type, data->length,
+                                  {null_bitmap, std::move(out_buffer)}, data->null_count);
+  out_data->dictionary = dictionary;
+  RETURN_NOT_OK(internal::TransposeInts(
+      in_index_type, out_index_type, data->GetValues<uint8_t>(1, 0),
+      out_data->GetMutableValues<uint8_t>(1, 0), data->offset, out_data->offset,
+      data->length, transpose_map));
+  return out_data;
+}
+
+}  // namespace
+
+Result<std::shared_ptr<Array>> DictionaryArray::Transpose(
+    const std::shared_ptr<DataType>& type, const std::shared_ptr<Array>& dictionary,
+    const int32_t* transpose_map, MemoryPool* pool) const {
+  ARROW_ASSIGN_OR_RAISE(auto transposed,
+                        TransposeDictIndices(data_, data_->type, type, dictionary->data(),
+                                             transpose_map, pool));
+  return MakeArray(std::move(transposed));
+}
+
+// ----------------------------------------------------------------------
+// Dictionary unification
+
+namespace {
+
 template <typename T>
 class DictionaryUnifierImpl : public DictionaryUnifier {
  public:
@@ -288,23 +288,23 @@ class DictionaryUnifierImpl : public DictionaryUnifier {
     return Status::OK();
   }
 
-  Status GetResultWithIndexType(const std::shared_ptr<DataType>& index_type, 
-                                std::shared_ptr<Array>* out_dict) override { 
-    int64_t dict_length = memo_table_.size(); 
-    if (!internal::IntegersCanFit(Datum(dict_length), *index_type).ok()) { 
-      return Status::Invalid( 
-          "These dictionaries cannot be combined.  The unified dictionary requires a " 
-          "larger index type."); 
-    } 
- 
-    // Build unified dictionary array 
-    std::shared_ptr<ArrayData> data; 
-    RETURN_NOT_OK(DictTraits::GetDictionaryArrayData(pool_, value_type_, memo_table_, 
-                                                     0 /* start_offset */, &data)); 
-    *out_dict = MakeArray(data); 
-    return Status::OK(); 
-  } 
- 
+  Status GetResultWithIndexType(const std::shared_ptr<DataType>& index_type,
+                                std::shared_ptr<Array>* out_dict) override {
+    int64_t dict_length = memo_table_.size();
+    if (!internal::IntegersCanFit(Datum(dict_length), *index_type).ok()) {
+      return Status::Invalid(
+          "These dictionaries cannot be combined.  The unified dictionary requires a "
+          "larger index type.");
+    }
+
+    // Build unified dictionary array
+    std::shared_ptr<ArrayData> data;
+    RETURN_NOT_OK(DictTraits::GetDictionaryArrayData(pool_, value_type_, memo_table_,
+                                                     0 /* start_offset */, &data));
+    *out_dict = MakeArray(data);
+    return Status::OK();
+  }
+
  private:
   MemoryPool* pool_;
   std::shared_ptr<DataType> value_type_;
@@ -322,7 +322,7 @@ struct MakeUnifier {
   template <typename T>
   enable_if_no_memoize<T, Status> Visit(const T&) {
     // Default implementation for non-dictionary-supported datatypes
-    return Status::NotImplemented("Unification of ", *value_type, 
+    return Status::NotImplemented("Unification of ", *value_type,
                                   " dictionaries is not implemented");
   }
 
@@ -333,110 +333,110 @@ struct MakeUnifier {
   }
 };
 
-struct RecursiveUnifier { 
-  MemoryPool* pool; 
-
-  // Return true if any of the arrays was changed (including descendents) 
-  Result<bool> Unify(std::shared_ptr<DataType> type, ArrayDataVector* chunks) { 
-    DCHECK(!chunks->empty()); 
-    bool changed = false; 
-    std::shared_ptr<DataType> ext_type = nullptr; 
-
-    if (type->id() == Type::EXTENSION) { 
-      ext_type = std::move(type); 
-      type = checked_cast<const ExtensionType&>(*ext_type).storage_type(); 
-    } 
-
-    // Unify all child dictionaries (if any) 
-    if (type->num_fields() > 0) { 
-      ArrayDataVector children(chunks->size()); 
-      for (int i = 0; i < type->num_fields(); ++i) { 
-        std::transform(chunks->begin(), chunks->end(), children.begin(), 
-                       [i](const std::shared_ptr<ArrayData>& array) { 
-                         return array->child_data[i]; 
-                       }); 
-        ARROW_ASSIGN_OR_RAISE(bool child_changed, 
-                              Unify(type->field(i)->type(), &children)); 
-        if (child_changed) { 
-          // Only do this when unification actually occurred 
-          for (size_t j = 0; j < chunks->size(); ++j) { 
-            (*chunks)[j]->child_data[i] = std::move(children[j]); 
-          } 
-          changed = true; 
-        } 
-      } 
+struct RecursiveUnifier {
+  MemoryPool* pool;
+
+  // Return true if any of the arrays was changed (including descendents)
+  Result<bool> Unify(std::shared_ptr<DataType> type, ArrayDataVector* chunks) {
+    DCHECK(!chunks->empty());
+    bool changed = false;
+    std::shared_ptr<DataType> ext_type = nullptr;
+
+    if (type->id() == Type::EXTENSION) {
+      ext_type = std::move(type);
+      type = checked_cast<const ExtensionType&>(*ext_type).storage_type();
     }
 
-    // Unify this dictionary 
-    if (type->id() == Type::DICTIONARY) { 
-      const auto& dict_type = checked_cast<const DictionaryType&>(*type); 
-      // XXX Ideally, we should unify dictionaries nested in value_type first, 
-      // but DictionaryUnifier doesn't supported nested dictionaries anyway, 
-      // so this will fail. 
-      ARROW_ASSIGN_OR_RAISE(auto unifier, 
-                            DictionaryUnifier::Make(dict_type.value_type(), this->pool)); 
-      // Unify all dictionary array chunks 
-      BufferVector transpose_maps(chunks->size()); 
-      for (size_t j = 0; j < chunks->size(); ++j) { 
-        DCHECK_NE((*chunks)[j]->dictionary, nullptr); 
-        RETURN_NOT_OK( 
-            unifier->Unify(*MakeArray((*chunks)[j]->dictionary), &transpose_maps[j])); 
-      } 
-      std::shared_ptr<Array> dictionary; 
-      RETURN_NOT_OK(unifier->GetResultWithIndexType(dict_type.index_type(), &dictionary)); 
-      for (size_t j = 0; j < chunks->size(); ++j) { 
-        ARROW_ASSIGN_OR_RAISE( 
-            (*chunks)[j], 
-            TransposeDictIndices( 
-                (*chunks)[j], type, type, dictionary->data(), 
-                reinterpret_cast<const int32_t*>(transpose_maps[j]->data()), this->pool)); 
-        if (ext_type) { 
-          (*chunks)[j]->type = ext_type; 
-        } 
-      } 
-      changed = true; 
-    } 
-
-    return changed; 
+    // Unify all child dictionaries (if any)
+    if (type->num_fields() > 0) {
+      ArrayDataVector children(chunks->size());
+      for (int i = 0; i < type->num_fields(); ++i) {
+        std::transform(chunks->begin(), chunks->end(), children.begin(),
+                       [i](const std::shared_ptr<ArrayData>& array) {
+                         return array->child_data[i];
+                       });
+        ARROW_ASSIGN_OR_RAISE(bool child_changed,
+                              Unify(type->field(i)->type(), &children));
+        if (child_changed) {
+          // Only do this when unification actually occurred
+          for (size_t j = 0; j < chunks->size(); ++j) {
+            (*chunks)[j]->child_data[i] = std::move(children[j]);
+          }
+          changed = true;
+        }
+      }
+    }
+
+    // Unify this dictionary
+    if (type->id() == Type::DICTIONARY) {
+      const auto& dict_type = checked_cast<const DictionaryType&>(*type);
+      // XXX Ideally, we should unify dictionaries nested in value_type first,
+      // but DictionaryUnifier doesn't supported nested dictionaries anyway,
+      // so this will fail.
+      ARROW_ASSIGN_OR_RAISE(auto unifier,
+                            DictionaryUnifier::Make(dict_type.value_type(), this->pool));
+      // Unify all dictionary array chunks
+      BufferVector transpose_maps(chunks->size());
+      for (size_t j = 0; j < chunks->size(); ++j) {
+        DCHECK_NE((*chunks)[j]->dictionary, nullptr);
+        RETURN_NOT_OK(
+            unifier->Unify(*MakeArray((*chunks)[j]->dictionary), &transpose_maps[j]));
+      }
+      std::shared_ptr<Array> dictionary;
+      RETURN_NOT_OK(unifier->GetResultWithIndexType(dict_type.index_type(), &dictionary));
+      for (size_t j = 0; j < chunks->size(); ++j) {
+        ARROW_ASSIGN_OR_RAISE(
+            (*chunks)[j],
+            TransposeDictIndices(
+                (*chunks)[j], type, type, dictionary->data(),
+                reinterpret_cast<const int32_t*>(transpose_maps[j]->data()), this->pool));
+        if (ext_type) {
+          (*chunks)[j]->type = ext_type;
+        }
+      }
+      changed = true;
+    }
+
+    return changed;
   }
-}; 
+};
 
-}  // namespace 
+}  // namespace
 
-Result<std::unique_ptr<DictionaryUnifier>> DictionaryUnifier::Make( 
-    std::shared_ptr<DataType> value_type, MemoryPool* pool) { 
-  MakeUnifier maker(pool, value_type); 
-  RETURN_NOT_OK(VisitTypeInline(*value_type, &maker)); 
-  return std::move(maker.result); 
-} 
+Result<std::unique_ptr<DictionaryUnifier>> DictionaryUnifier::Make(
+    std::shared_ptr<DataType> value_type, MemoryPool* pool) {
+  MakeUnifier maker(pool, value_type);
+  RETURN_NOT_OK(VisitTypeInline(*value_type, &maker));
+  return std::move(maker.result);
+}
 
-Result<std::shared_ptr<ChunkedArray>> DictionaryUnifier::UnifyChunkedArray( 
-    const std::shared_ptr<ChunkedArray>& array, MemoryPool* pool) { 
-  if (array->num_chunks() <= 1) { 
-    return array; 
+Result<std::shared_ptr<ChunkedArray>> DictionaryUnifier::UnifyChunkedArray(
+    const std::shared_ptr<ChunkedArray>& array, MemoryPool* pool) {
+  if (array->num_chunks() <= 1) {
+    return array;
   }
 
-  ArrayDataVector data_chunks(array->num_chunks()); 
-  std::transform(array->chunks().begin(), array->chunks().end(), data_chunks.begin(), 
-                 [](const std::shared_ptr<Array>& array) { return array->data(); }); 
-  ARROW_ASSIGN_OR_RAISE(bool changed, 
-                        RecursiveUnifier{pool}.Unify(array->type(), &data_chunks)); 
-  if (!changed) { 
-    return array; 
+  ArrayDataVector data_chunks(array->num_chunks());
+  std::transform(array->chunks().begin(), array->chunks().end(), data_chunks.begin(),
+                 [](const std::shared_ptr<Array>& array) { return array->data(); });
+  ARROW_ASSIGN_OR_RAISE(bool changed,
+                        RecursiveUnifier{pool}.Unify(array->type(), &data_chunks));
+  if (!changed) {
+    return array;
   }
-  ArrayVector chunks(array->num_chunks()); 
-  std::transform(data_chunks.begin(), data_chunks.end(), chunks.begin(), 
-                 [](const std::shared_ptr<ArrayData>& data) { return MakeArray(data); }); 
-  return std::make_shared<ChunkedArray>(std::move(chunks), array->type()); 
-} 
-
-Result<std::shared_ptr<Table>> DictionaryUnifier::UnifyTable(const Table& table, 
-                                                             MemoryPool* pool) { 
-  ChunkedArrayVector columns = table.columns(); 
-  for (auto& col : columns) { 
-    ARROW_ASSIGN_OR_RAISE(col, DictionaryUnifier::UnifyChunkedArray(col, pool)); 
+  ArrayVector chunks(array->num_chunks());
+  std::transform(data_chunks.begin(), data_chunks.end(), chunks.begin(),
+                 [](const std::shared_ptr<ArrayData>& data) { return MakeArray(data); });
+  return std::make_shared<ChunkedArray>(std::move(chunks), array->type());
+}
+
+Result<std::shared_ptr<Table>> DictionaryUnifier::UnifyTable(const Table& table,
+                                                             MemoryPool* pool) {
+  ChunkedArrayVector columns = table.columns();
+  for (auto& col : columns) {
+    ARROW_ASSIGN_OR_RAISE(col, DictionaryUnifier::UnifyChunkedArray(col, pool));
   }
-  return Table::Make(table.schema(), std::move(columns), table.num_rows()); 
+  return Table::Make(table.schema(), std::move(columns), table.num_rows());
 }
 
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_dict.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_dict.h
index eb039331b51..8791eaa07db 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_dict.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_dict.h
@@ -120,61 +120,61 @@ class ARROW_EXPORT DictionaryArray : public Array {
   mutable std::shared_ptr<Array> dictionary_;
 };
 
-/// \brief Helper class for incremental dictionary unification 
-class ARROW_EXPORT DictionaryUnifier { 
- public: 
-  virtual ~DictionaryUnifier() = default; 
- 
-  /// \brief Construct a DictionaryUnifier 
-  /// \param[in] value_type the data type of the dictionaries 
-  /// \param[in] pool MemoryPool to use for memory allocations 
-  static Result<std::unique_ptr<DictionaryUnifier>> Make( 
-      std::shared_ptr<DataType> value_type, MemoryPool* pool = default_memory_pool()); 
- 
-  /// \brief Unify dictionaries accross array chunks 
-  /// 
-  /// The dictionaries in the array chunks will be unified, their indices 
-  /// accordingly transposed. 
-  /// 
-  /// Only dictionaries with a primitive value type are currently supported. 
-  /// However, dictionaries nested inside a more complex type are correctly unified. 
-  static Result<std::shared_ptr<ChunkedArray>> UnifyChunkedArray( 
-      const std::shared_ptr<ChunkedArray>& array, 
-      MemoryPool* pool = default_memory_pool()); 
- 
-  /// \brief Unify dictionaries accross the chunks of each table column 
-  /// 
-  /// The dictionaries in each table column will be unified, their indices 
-  /// accordingly transposed. 
-  /// 
-  /// Only dictionaries with a primitive value type are currently supported. 
-  /// However, dictionaries nested inside a more complex type are correctly unified. 
-  static Result<std::shared_ptr<Table>> UnifyTable( 
-      const Table& table, MemoryPool* pool = default_memory_pool()); 
- 
-  /// \brief Append dictionary to the internal memo 
-  virtual Status Unify(const Array& dictionary) = 0; 
- 
-  /// \brief Append dictionary and compute transpose indices 
-  /// \param[in] dictionary the dictionary values to unify 
-  /// \param[out] out_transpose a Buffer containing computed transpose indices 
-  /// as int32_t values equal in length to the passed dictionary. The value in 
-  /// each slot corresponds to the new index value for each original index 
-  /// for a DictionaryArray with the old dictionary 
-  virtual Status Unify(const Array& dictionary, 
-                       std::shared_ptr<Buffer>* out_transpose) = 0; 
- 
-  /// \brief Return a result DictionaryType with the smallest possible index 
-  /// type to accommodate the unified dictionary. The unifier cannot be used 
-  /// after this is called 
-  virtual Status GetResult(std::shared_ptr<DataType>* out_type, 
-                           std::shared_ptr<Array>* out_dict) = 0; 
- 
-  /// \brief Return a unified dictionary with the given index type.  If 
-  /// the index type is not large enough then an invalid status will be returned. 
-  /// The unifier cannot be used after this is called 
-  virtual Status GetResultWithIndexType(const std::shared_ptr<DataType>& index_type, 
-                                        std::shared_ptr<Array>* out_dict) = 0; 
-}; 
- 
+/// \brief Helper class for incremental dictionary unification
+class ARROW_EXPORT DictionaryUnifier {
+ public:
+  virtual ~DictionaryUnifier() = default;
+
+  /// \brief Construct a DictionaryUnifier
+  /// \param[in] value_type the data type of the dictionaries
+  /// \param[in] pool MemoryPool to use for memory allocations
+  static Result<std::unique_ptr<DictionaryUnifier>> Make(
+      std::shared_ptr<DataType> value_type, MemoryPool* pool = default_memory_pool());
+
+  /// \brief Unify dictionaries accross array chunks
+  ///
+  /// The dictionaries in the array chunks will be unified, their indices
+  /// accordingly transposed.
+  ///
+  /// Only dictionaries with a primitive value type are currently supported.
+  /// However, dictionaries nested inside a more complex type are correctly unified.
+  static Result<std::shared_ptr<ChunkedArray>> UnifyChunkedArray(
+      const std::shared_ptr<ChunkedArray>& array,
+      MemoryPool* pool = default_memory_pool());
+
+  /// \brief Unify dictionaries accross the chunks of each table column
+  ///
+  /// The dictionaries in each table column will be unified, their indices
+  /// accordingly transposed.
+  ///
+  /// Only dictionaries with a primitive value type are currently supported.
+  /// However, dictionaries nested inside a more complex type are correctly unified.
+  static Result<std::shared_ptr<Table>> UnifyTable(
+      const Table& table, MemoryPool* pool = default_memory_pool());
+
+  /// \brief Append dictionary to the internal memo
+  virtual Status Unify(const Array& dictionary) = 0;
+
+  /// \brief Append dictionary and compute transpose indices
+  /// \param[in] dictionary the dictionary values to unify
+  /// \param[out] out_transpose a Buffer containing computed transpose indices
+  /// as int32_t values equal in length to the passed dictionary. The value in
+  /// each slot corresponds to the new index value for each original index
+  /// for a DictionaryArray with the old dictionary
+  virtual Status Unify(const Array& dictionary,
+                       std::shared_ptr<Buffer>* out_transpose) = 0;
+
+  /// \brief Return a result DictionaryType with the smallest possible index
+  /// type to accommodate the unified dictionary. The unifier cannot be used
+  /// after this is called
+  virtual Status GetResult(std::shared_ptr<DataType>* out_type,
+                           std::shared_ptr<Array>* out_dict) = 0;
+
+  /// \brief Return a unified dictionary with the given index type.  If
+  /// the index type is not large enough then an invalid status will be returned.
+  /// The unifier cannot be used after this is called
+  virtual Status GetResultWithIndexType(const std::shared_ptr<DataType>& index_type,
+                                        std::shared_ptr<Array>* out_dict) = 0;
+};
+
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_nested.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_nested.cc
index fdbc0eb8f3d..f967127c5f1 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_nested.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_nested.cc
@@ -70,8 +70,8 @@ Status CleanListOffsets(const Array& offsets, MemoryPool* pool,
     ARROW_ASSIGN_OR_RAISE(auto clean_offsets,
                           AllocateBuffer(num_offsets * sizeof(offset_type), pool));
 
-    // Copy valid bits, ignoring the final offset (since for a length N list array, 
-    // we have N + 1 offsets) 
+    // Copy valid bits, ignoring the final offset (since for a length N list array,
+    // we have N + 1 offsets)
     ARROW_ASSIGN_OR_RAISE(
         auto clean_valid_bits,
         offsets.null_bitmap()->CopySlice(0, BitUtil::BytesForBits(num_offsets - 1)));
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_primitive.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_primitive.h
index 3b8f769b7dc..b601eb770c3 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/array_primitive.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/array_primitive.h
@@ -25,7 +25,7 @@
 
 #include "arrow/array/array_base.h"
 #include "arrow/array/data.h"
-#include "arrow/stl_iterator.h" 
+#include "arrow/stl_iterator.h"
 #include "arrow/type.h"
 #include "arrow/type_fwd.h"  // IWYU pragma: export
 #include "arrow/type_traits.h"
@@ -41,7 +41,7 @@ class NumericArray : public PrimitiveArray {
  public:
   using TypeClass = TYPE;
   using value_type = typename TypeClass::c_type;
-  using IteratorType = stl::ArrayIterator<NumericArray<TYPE>>; 
+  using IteratorType = stl::ArrayIterator<NumericArray<TYPE>>;
 
   explicit NumericArray(const std::shared_ptr<ArrayData>& data) : PrimitiveArray(data) {}
 
@@ -64,10 +64,10 @@ class NumericArray : public PrimitiveArray {
   // For API compatibility with BinaryArray etc.
   value_type GetView(int64_t i) const { return Value(i); }
 
-  IteratorType begin() const { return IteratorType(*this); } 
- 
-  IteratorType end() const { return IteratorType(*this, length()); } 
- 
+  IteratorType begin() const { return IteratorType(*this); }
+
+  IteratorType end() const { return IteratorType(*this, length()); }
+
  protected:
   using PrimitiveArray::PrimitiveArray;
 };
@@ -76,7 +76,7 @@ class NumericArray : public PrimitiveArray {
 class ARROW_EXPORT BooleanArray : public PrimitiveArray {
  public:
   using TypeClass = BooleanType;
-  using IteratorType = stl::ArrayIterator<BooleanArray>; 
+  using IteratorType = stl::ArrayIterator<BooleanArray>;
 
   explicit BooleanArray(const std::shared_ptr<ArrayData>& data);
 
@@ -99,10 +99,10 @@ class ARROW_EXPORT BooleanArray : public PrimitiveArray {
   /// values. Result is not cached.
   int64_t true_count() const;
 
-  IteratorType begin() const { return IteratorType(*this); } 
- 
-  IteratorType end() const { return IteratorType(*this, length()); } 
- 
+  IteratorType begin() const { return IteratorType(*this); }
+
+  IteratorType end() const { return IteratorType(*this, length()); }
+
  protected:
   using PrimitiveArray::PrimitiveArray;
 };
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_adaptive.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_adaptive.h
index 0c2782e7466..c0df797256d 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_adaptive.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_adaptive.h
@@ -64,26 +64,26 @@ class ARROW_EXPORT AdaptiveIntBuilderBase : public ArrayBuilder {
     return Status::OK();
   }
 
-  Status AppendEmptyValues(int64_t length) final { 
-    ARROW_RETURN_NOT_OK(CommitPendingData()); 
-    ARROW_RETURN_NOT_OK(Reserve(length)); 
-    memset(data_->mutable_data() + length_ * int_size_, 0, int_size_ * length); 
-    UnsafeSetNotNull(length); 
-    return Status::OK(); 
-  } 
- 
-  Status AppendEmptyValue() final { 
-    pending_data_[pending_pos_] = 0; 
-    pending_valid_[pending_pos_] = 1; 
-    ++pending_pos_; 
-    ++length_; 
- 
-    if (ARROW_PREDICT_FALSE(pending_pos_ >= pending_size_)) { 
-      return CommitPendingData(); 
-    } 
-    return Status::OK(); 
-  } 
- 
+  Status AppendEmptyValues(int64_t length) final {
+    ARROW_RETURN_NOT_OK(CommitPendingData());
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    memset(data_->mutable_data() + length_ * int_size_, 0, int_size_ * length);
+    UnsafeSetNotNull(length);
+    return Status::OK();
+  }
+
+  Status AppendEmptyValue() final {
+    pending_data_[pending_pos_] = 0;
+    pending_valid_[pending_pos_] = 1;
+    ++pending_pos_;
+    ++length_;
+
+    if (ARROW_PREDICT_FALSE(pending_pos_ >= pending_size_)) {
+      return CommitPendingData();
+    }
+    return Status::OK();
+  }
+
   void Reset() override;
   Status Resize(int64_t capacity) override;
 
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_base.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_base.cc
index ff11984790c..c892e3d664b 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_base.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_base.cc
@@ -24,11 +24,11 @@
 #include "arrow/array/data.h"
 #include "arrow/array/util.h"
 #include "arrow/buffer.h"
-#include "arrow/builder.h" 
-#include "arrow/scalar.h" 
+#include "arrow/builder.h"
+#include "arrow/scalar.h"
 #include "arrow/status.h"
 #include "arrow/util/logging.h"
-#include "arrow/visitor_inline.h" 
+#include "arrow/visitor_inline.h"
 
 namespace arrow {
 
@@ -95,162 +95,162 @@ Status ArrayBuilder::Advance(int64_t elements) {
   return null_bitmap_builder_.Advance(elements);
 }
 
-namespace { 
-struct AppendScalarImpl { 
-  template <typename T> 
-  enable_if_t<has_c_type<T>::value || is_decimal_type<T>::value || 
-                  is_fixed_size_binary_type<T>::value, 
-              Status> 
-  Visit(const T&) { 
-    auto builder = internal::checked_cast<typename TypeTraits<T>::BuilderType*>(builder_); 
-    RETURN_NOT_OK(builder->Reserve(n_repeats_ * (scalars_end_ - scalars_begin_))); 
- 
-    for (int64_t i = 0; i < n_repeats_; i++) { 
-      for (const std::shared_ptr<Scalar>* raw = scalars_begin_; raw != scalars_end_; 
-           raw++) { 
-        auto scalar = 
-            internal::checked_cast<const typename TypeTraits<T>::ScalarType*>(raw->get()); 
-        if (scalar->is_valid) { 
-          builder->UnsafeAppend(scalar->value); 
-        } else { 
-          builder->UnsafeAppendNull(); 
-        } 
-      } 
-    } 
-    return Status::OK(); 
-  } 
- 
-  template <typename T> 
-  enable_if_base_binary<T, Status> Visit(const T&) { 
-    int64_t data_size = 0; 
-    for (const std::shared_ptr<Scalar>* raw = scalars_begin_; raw != scalars_end_; 
-         raw++) { 
-      auto scalar = 
-          internal::checked_cast<const typename TypeTraits<T>::ScalarType*>(raw->get()); 
-      if (scalar->is_valid) { 
-        data_size += scalar->value->size(); 
-      } 
-    } 
- 
-    auto builder = internal::checked_cast<typename TypeTraits<T>::BuilderType*>(builder_); 
-    RETURN_NOT_OK(builder->Reserve(n_repeats_ * (scalars_end_ - scalars_begin_))); 
-    RETURN_NOT_OK(builder->ReserveData(n_repeats_ * data_size)); 
- 
-    for (int64_t i = 0; i < n_repeats_; i++) { 
-      for (const std::shared_ptr<Scalar>* raw = scalars_begin_; raw != scalars_end_; 
-           raw++) { 
-        auto scalar = 
-            internal::checked_cast<const typename TypeTraits<T>::ScalarType*>(raw->get()); 
-        if (scalar->is_valid) { 
-          builder->UnsafeAppend(util::string_view{*scalar->value}); 
-        } else { 
-          builder->UnsafeAppendNull(); 
-        } 
-      } 
-    } 
-    return Status::OK(); 
-  } 
- 
-  template <typename T> 
-  enable_if_list_like<T, Status> Visit(const T&) { 
-    auto builder = internal::checked_cast<typename TypeTraits<T>::BuilderType*>(builder_); 
-    int64_t num_children = 0; 
-    for (const std::shared_ptr<Scalar>* scalar = scalars_begin_; scalar != scalars_end_; 
-         scalar++) { 
-      if (!(*scalar)->is_valid) continue; 
-      num_children += 
-          internal::checked_cast<const BaseListScalar&>(**scalar).value->length(); 
-    } 
-    RETURN_NOT_OK(builder->value_builder()->Reserve(num_children * n_repeats_)); 
- 
-    for (int64_t i = 0; i < n_repeats_; i++) { 
-      for (const std::shared_ptr<Scalar>* scalar = scalars_begin_; scalar != scalars_end_; 
-           scalar++) { 
-        if ((*scalar)->is_valid) { 
-          RETURN_NOT_OK(builder->Append()); 
-          const Array& list = 
-              *internal::checked_cast<const BaseListScalar&>(**scalar).value; 
-          for (int64_t i = 0; i < list.length(); i++) { 
-            ARROW_ASSIGN_OR_RAISE(auto scalar, list.GetScalar(i)); 
-            RETURN_NOT_OK(builder->value_builder()->AppendScalar(*scalar)); 
-          } 
-        } else { 
-          RETURN_NOT_OK(builder_->AppendNull()); 
-        } 
-      } 
-    } 
-    return Status::OK(); 
-  } 
- 
-  Status Visit(const StructType& type) { 
-    auto* builder = internal::checked_cast<StructBuilder*>(builder_); 
-    auto count = n_repeats_ * (scalars_end_ - scalars_begin_); 
-    RETURN_NOT_OK(builder->Reserve(count)); 
-    for (int field_index = 0; field_index < type.num_fields(); ++field_index) { 
-      RETURN_NOT_OK(builder->field_builder(field_index)->Reserve(count)); 
-    } 
-    for (int64_t i = 0; i < n_repeats_; i++) { 
-      for (const std::shared_ptr<Scalar>* s = scalars_begin_; s != scalars_end_; s++) { 
-        const auto& scalar = internal::checked_cast<const StructScalar&>(**s); 
-        for (int field_index = 0; field_index < type.num_fields(); ++field_index) { 
-          if (!scalar.is_valid || !scalar.value[field_index]) { 
-            RETURN_NOT_OK(builder->field_builder(field_index)->AppendNull()); 
-          } else { 
-            RETURN_NOT_OK(builder->field_builder(field_index) 
-                              ->AppendScalar(*scalar.value[field_index])); 
-          } 
-        } 
-        RETURN_NOT_OK(builder->Append(scalar.is_valid)); 
-      } 
-    } 
-    return Status::OK(); 
-  } 
- 
-  Status Visit(const DataType& type) { 
-    return Status::NotImplemented("AppendScalar for type ", type); 
-  } 
- 
-  Status Convert() { return VisitTypeInline(*(*scalars_begin_)->type, this); } 
- 
-  const std::shared_ptr<Scalar>* scalars_begin_; 
-  const std::shared_ptr<Scalar>* scalars_end_; 
-  int64_t n_repeats_; 
-  ArrayBuilder* builder_; 
-}; 
-}  // namespace 
- 
-Status ArrayBuilder::AppendScalar(const Scalar& scalar) { 
-  if (!scalar.type->Equals(type())) { 
-    return Status::Invalid("Cannot append scalar of type ", scalar.type->ToString(), 
-                           " to builder for type ", type()->ToString()); 
-  } 
-  std::shared_ptr<Scalar> shared{const_cast<Scalar*>(&scalar), [](Scalar*) {}}; 
-  return AppendScalarImpl{&shared, &shared + 1, /*n_repeats=*/1, this}.Convert(); 
-} 
- 
-Status ArrayBuilder::AppendScalar(const Scalar& scalar, int64_t n_repeats) { 
-  if (!scalar.type->Equals(type())) { 
-    return Status::Invalid("Cannot append scalar of type ", scalar.type->ToString(), 
-                           " to builder for type ", type()->ToString()); 
-  } 
-  std::shared_ptr<Scalar> shared{const_cast<Scalar*>(&scalar), [](Scalar*) {}}; 
-  return AppendScalarImpl{&shared, &shared + 1, n_repeats, this}.Convert(); 
-} 
- 
-Status ArrayBuilder::AppendScalars(const ScalarVector& scalars) { 
-  if (scalars.empty()) return Status::OK(); 
-  const auto ty = type(); 
-  for (const auto& scalar : scalars) { 
-    if (!scalar->type->Equals(ty)) { 
-      return Status::Invalid("Cannot append scalar of type ", scalar->type->ToString(), 
-                             " to builder for type ", type()->ToString()); 
-    } 
-  } 
-  return AppendScalarImpl{scalars.data(), scalars.data() + scalars.size(), 
-                          /*n_repeats=*/1, this} 
-      .Convert(); 
-} 
- 
+namespace {
+struct AppendScalarImpl {
+  template <typename T>
+  enable_if_t<has_c_type<T>::value || is_decimal_type<T>::value ||
+                  is_fixed_size_binary_type<T>::value,
+              Status>
+  Visit(const T&) {
+    auto builder = internal::checked_cast<typename TypeTraits<T>::BuilderType*>(builder_);
+    RETURN_NOT_OK(builder->Reserve(n_repeats_ * (scalars_end_ - scalars_begin_)));
+
+    for (int64_t i = 0; i < n_repeats_; i++) {
+      for (const std::shared_ptr<Scalar>* raw = scalars_begin_; raw != scalars_end_;
+           raw++) {
+        auto scalar =
+            internal::checked_cast<const typename TypeTraits<T>::ScalarType*>(raw->get());
+        if (scalar->is_valid) {
+          builder->UnsafeAppend(scalar->value);
+        } else {
+          builder->UnsafeAppendNull();
+        }
+      }
+    }
+    return Status::OK();
+  }
+
+  template <typename T>
+  enable_if_base_binary<T, Status> Visit(const T&) {
+    int64_t data_size = 0;
+    for (const std::shared_ptr<Scalar>* raw = scalars_begin_; raw != scalars_end_;
+         raw++) {
+      auto scalar =
+          internal::checked_cast<const typename TypeTraits<T>::ScalarType*>(raw->get());
+      if (scalar->is_valid) {
+        data_size += scalar->value->size();
+      }
+    }
+
+    auto builder = internal::checked_cast<typename TypeTraits<T>::BuilderType*>(builder_);
+    RETURN_NOT_OK(builder->Reserve(n_repeats_ * (scalars_end_ - scalars_begin_)));
+    RETURN_NOT_OK(builder->ReserveData(n_repeats_ * data_size));
+
+    for (int64_t i = 0; i < n_repeats_; i++) {
+      for (const std::shared_ptr<Scalar>* raw = scalars_begin_; raw != scalars_end_;
+           raw++) {
+        auto scalar =
+            internal::checked_cast<const typename TypeTraits<T>::ScalarType*>(raw->get());
+        if (scalar->is_valid) {
+          builder->UnsafeAppend(util::string_view{*scalar->value});
+        } else {
+          builder->UnsafeAppendNull();
+        }
+      }
+    }
+    return Status::OK();
+  }
+
+  template <typename T>
+  enable_if_list_like<T, Status> Visit(const T&) {
+    auto builder = internal::checked_cast<typename TypeTraits<T>::BuilderType*>(builder_);
+    int64_t num_children = 0;
+    for (const std::shared_ptr<Scalar>* scalar = scalars_begin_; scalar != scalars_end_;
+         scalar++) {
+      if (!(*scalar)->is_valid) continue;
+      num_children +=
+          internal::checked_cast<const BaseListScalar&>(**scalar).value->length();
+    }
+    RETURN_NOT_OK(builder->value_builder()->Reserve(num_children * n_repeats_));
+
+    for (int64_t i = 0; i < n_repeats_; i++) {
+      for (const std::shared_ptr<Scalar>* scalar = scalars_begin_; scalar != scalars_end_;
+           scalar++) {
+        if ((*scalar)->is_valid) {
+          RETURN_NOT_OK(builder->Append());
+          const Array& list =
+              *internal::checked_cast<const BaseListScalar&>(**scalar).value;
+          for (int64_t i = 0; i < list.length(); i++) {
+            ARROW_ASSIGN_OR_RAISE(auto scalar, list.GetScalar(i));
+            RETURN_NOT_OK(builder->value_builder()->AppendScalar(*scalar));
+          }
+        } else {
+          RETURN_NOT_OK(builder_->AppendNull());
+        }
+      }
+    }
+    return Status::OK();
+  }
+
+  Status Visit(const StructType& type) {
+    auto* builder = internal::checked_cast<StructBuilder*>(builder_);
+    auto count = n_repeats_ * (scalars_end_ - scalars_begin_);
+    RETURN_NOT_OK(builder->Reserve(count));
+    for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
+      RETURN_NOT_OK(builder->field_builder(field_index)->Reserve(count));
+    }
+    for (int64_t i = 0; i < n_repeats_; i++) {
+      for (const std::shared_ptr<Scalar>* s = scalars_begin_; s != scalars_end_; s++) {
+        const auto& scalar = internal::checked_cast<const StructScalar&>(**s);
+        for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
+          if (!scalar.is_valid || !scalar.value[field_index]) {
+            RETURN_NOT_OK(builder->field_builder(field_index)->AppendNull());
+          } else {
+            RETURN_NOT_OK(builder->field_builder(field_index)
+                              ->AppendScalar(*scalar.value[field_index]));
+          }
+        }
+        RETURN_NOT_OK(builder->Append(scalar.is_valid));
+      }
+    }
+    return Status::OK();
+  }
+
+  Status Visit(const DataType& type) {
+    return Status::NotImplemented("AppendScalar for type ", type);
+  }
+
+  Status Convert() { return VisitTypeInline(*(*scalars_begin_)->type, this); }
+
+  const std::shared_ptr<Scalar>* scalars_begin_;
+  const std::shared_ptr<Scalar>* scalars_end_;
+  int64_t n_repeats_;
+  ArrayBuilder* builder_;
+};
+}  // namespace
+
+Status ArrayBuilder::AppendScalar(const Scalar& scalar) {
+  if (!scalar.type->Equals(type())) {
+    return Status::Invalid("Cannot append scalar of type ", scalar.type->ToString(),
+                           " to builder for type ", type()->ToString());
+  }
+  std::shared_ptr<Scalar> shared{const_cast<Scalar*>(&scalar), [](Scalar*) {}};
+  return AppendScalarImpl{&shared, &shared + 1, /*n_repeats=*/1, this}.Convert();
+}
+
+Status ArrayBuilder::AppendScalar(const Scalar& scalar, int64_t n_repeats) {
+  if (!scalar.type->Equals(type())) {
+    return Status::Invalid("Cannot append scalar of type ", scalar.type->ToString(),
+                           " to builder for type ", type()->ToString());
+  }
+  std::shared_ptr<Scalar> shared{const_cast<Scalar*>(&scalar), [](Scalar*) {}};
+  return AppendScalarImpl{&shared, &shared + 1, n_repeats, this}.Convert();
+}
+
+Status ArrayBuilder::AppendScalars(const ScalarVector& scalars) {
+  if (scalars.empty()) return Status::OK();
+  const auto ty = type();
+  for (const auto& scalar : scalars) {
+    if (!scalar->type->Equals(ty)) {
+      return Status::Invalid("Cannot append scalar of type ", scalar->type->ToString(),
+                             " to builder for type ", type()->ToString());
+    }
+  }
+  return AppendScalarImpl{scalars.data(), scalars.data() + scalars.size(),
+                          /*n_repeats=*/1, this}
+      .Convert();
+}
+
 Status ArrayBuilder::Finish(std::shared_ptr<Array>* out) {
   std::shared_ptr<ArrayData> internal_data;
   RETURN_NOT_OK(FinishInternal(&internal_data));
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_base.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_base.h
index 7a1ad81998a..905b3c1b491 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_base.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_base.h
@@ -29,7 +29,7 @@
 #include "arrow/buffer.h"
 #include "arrow/buffer_builder.h"
 #include "arrow/status.h"
-#include "arrow/type_fwd.h" 
+#include "arrow/type_fwd.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/visibility.h"
 
@@ -51,7 +51,7 @@ class ARROW_EXPORT ArrayBuilder {
   explicit ArrayBuilder(MemoryPool* pool) : pool_(pool), null_bitmap_builder_(pool) {}
 
   virtual ~ArrayBuilder() = default;
-  ARROW_DEFAULT_MOVE_AND_ASSIGN(ArrayBuilder); 
+  ARROW_DEFAULT_MOVE_AND_ASSIGN(ArrayBuilder);
 
   /// For nested types. Since the objects are owned by this class instance, we
   /// skip shared pointers and just return a raw pointer
@@ -98,30 +98,30 @@ class ARROW_EXPORT ArrayBuilder {
   /// Reset the builder.
   virtual void Reset();
 
-  /// \brief Append a null value to builder 
+  /// \brief Append a null value to builder
   virtual Status AppendNull() = 0;
-  /// \brief Append a number of null values to builder 
+  /// \brief Append a number of null values to builder
   virtual Status AppendNulls(int64_t length) = 0;
 
-  /// \brief Append a non-null value to builder 
-  /// 
-  /// The appended value is an implementation detail, but the corresponding 
-  /// memory slot is guaranteed to be initialized. 
-  /// This method is useful when appending a null value to a parent nested type. 
-  virtual Status AppendEmptyValue() = 0; 
- 
-  /// \brief Append a number of non-null values to builder 
-  /// 
-  /// The appended values are an implementation detail, but the corresponding 
-  /// memory slot is guaranteed to be initialized. 
-  /// This method is useful when appending null values to a parent nested type. 
-  virtual Status AppendEmptyValues(int64_t length) = 0; 
- 
-  /// \brief Append a value from a scalar 
-  Status AppendScalar(const Scalar& scalar); 
-  Status AppendScalar(const Scalar& scalar, int64_t n_repeats); 
-  Status AppendScalars(const ScalarVector& scalars); 
- 
+  /// \brief Append a non-null value to builder
+  ///
+  /// The appended value is an implementation detail, but the corresponding
+  /// memory slot is guaranteed to be initialized.
+  /// This method is useful when appending a null value to a parent nested type.
+  virtual Status AppendEmptyValue() = 0;
+
+  /// \brief Append a number of non-null values to builder
+  ///
+  /// The appended values are an implementation detail, but the corresponding
+  /// memory slot is guaranteed to be initialized.
+  /// This method is useful when appending null values to a parent nested type.
+  virtual Status AppendEmptyValues(int64_t length) = 0;
+
+  /// \brief Append a value from a scalar
+  Status AppendScalar(const Scalar& scalar);
+  Status AppendScalar(const Scalar& scalar, int64_t n_repeats);
+  Status AppendScalars(const ScalarVector& scalars);
+
   /// For cases where raw data was memcpy'd into the internal buffers, allows us
   /// to advance the length of the builder. It is your responsibility to use
   /// this function responsibly.
@@ -253,24 +253,24 @@ class ARROW_EXPORT ArrayBuilder {
   ARROW_DISALLOW_COPY_AND_ASSIGN(ArrayBuilder);
 };
 
-/// \brief Construct an empty ArrayBuilder corresponding to the data 
-/// type 
-/// \param[in] pool the MemoryPool to use for allocations 
-/// \param[in] type the data type to create the builder for 
-/// \param[out] out the created ArrayBuilder 
-ARROW_EXPORT 
-Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type, 
-                   std::unique_ptr<ArrayBuilder>* out); 
- 
-/// \brief Construct an empty DictionaryBuilder initialized optionally 
-/// with a pre-existing dictionary 
-/// \param[in] pool the MemoryPool to use for allocations 
-/// \param[in] type the dictionary type to create the builder for 
-/// \param[in] dictionary the initial dictionary, if any. May be nullptr 
-/// \param[out] out the created ArrayBuilder 
-ARROW_EXPORT 
-Status MakeDictionaryBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type, 
-                             const std::shared_ptr<Array>& dictionary, 
-                             std::unique_ptr<ArrayBuilder>* out); 
- 
+/// \brief Construct an empty ArrayBuilder corresponding to the data
+/// type
+/// \param[in] pool the MemoryPool to use for allocations
+/// \param[in] type the data type to create the builder for
+/// \param[out] out the created ArrayBuilder
+ARROW_EXPORT
+Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
+                   std::unique_ptr<ArrayBuilder>* out);
+
+/// \brief Construct an empty DictionaryBuilder initialized optionally
+/// with a pre-existing dictionary
+/// \param[in] pool the MemoryPool to use for allocations
+/// \param[in] type the dictionary type to create the builder for
+/// \param[in] dictionary the initial dictionary, if any. May be nullptr
+/// \param[out] out the created ArrayBuilder
+ARROW_EXPORT
+Status MakeDictionaryBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
+                             const std::shared_ptr<Array>& dictionary,
+                             std::unique_ptr<ArrayBuilder>* out);
+
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_binary.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_binary.cc
index 26d6a7129f6..6822dc89903 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_binary.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_binary.cc
@@ -73,20 +73,20 @@ Status FixedSizeBinaryBuilder::AppendNulls(int64_t length) {
   return Status::OK();
 }
 
-Status FixedSizeBinaryBuilder::AppendEmptyValue() { 
-  RETURN_NOT_OK(Reserve(1)); 
-  UnsafeAppendToBitmap(true); 
-  byte_builder_.UnsafeAppend(/*num_copies=*/byte_width_, 0); 
-  return Status::OK(); 
-} 
- 
-Status FixedSizeBinaryBuilder::AppendEmptyValues(int64_t length) { 
-  RETURN_NOT_OK(Reserve(length)); 
-  UnsafeAppendToBitmap(length, true); 
-  byte_builder_.UnsafeAppend(/*num_copies=*/length * byte_width_, 0); 
-  return Status::OK(); 
-} 
- 
+Status FixedSizeBinaryBuilder::AppendEmptyValue() {
+  RETURN_NOT_OK(Reserve(1));
+  UnsafeAppendToBitmap(true);
+  byte_builder_.UnsafeAppend(/*num_copies=*/byte_width_, 0);
+  return Status::OK();
+}
+
+Status FixedSizeBinaryBuilder::AppendEmptyValues(int64_t length) {
+  RETURN_NOT_OK(Reserve(length));
+  UnsafeAppendToBitmap(length, true);
+  byte_builder_.UnsafeAppend(/*num_copies=*/length * byte_width_, 0);
+  return Status::OK();
+}
+
 void FixedSizeBinaryBuilder::Reset() {
   ArrayBuilder::Reset();
   byte_builder_.Reset();
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_binary.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_binary.h
index 346e90d25a0..62edc69fb8e 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_binary.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_binary.h
@@ -61,7 +61,7 @@ class BaseBinaryBuilder : public ArrayBuilder {
     ARROW_RETURN_NOT_OK(AppendNextOffset());
     // Safety check for UBSAN.
     if (ARROW_PREDICT_TRUE(length > 0)) {
-      ARROW_RETURN_NOT_OK(ValidateOverflow(length)); 
+      ARROW_RETURN_NOT_OK(ValidateOverflow(length));
       ARROW_RETURN_NOT_OK(value_data_builder_.Append(value, length));
     }
 
@@ -77,23 +77,23 @@ class BaseBinaryBuilder : public ArrayBuilder {
     return Append(value.data(), static_cast<offset_type>(value.size()));
   }
 
-  /// Extend the last appended value by appending more data at the end 
-  /// 
-  /// Unlike Append, this does not create a new offset. 
-  Status ExtendCurrent(const uint8_t* value, offset_type length) { 
-    // Safety check for UBSAN. 
-    if (ARROW_PREDICT_TRUE(length > 0)) { 
-      ARROW_RETURN_NOT_OK(ValidateOverflow(length)); 
-      ARROW_RETURN_NOT_OK(value_data_builder_.Append(value, length)); 
-    } 
-    return Status::OK(); 
-  } 
- 
-  Status ExtendCurrent(util::string_view value) { 
-    return ExtendCurrent(reinterpret_cast<const uint8_t*>(value.data()), 
-                         static_cast<offset_type>(value.size())); 
-  } 
- 
+  /// Extend the last appended value by appending more data at the end
+  ///
+  /// Unlike Append, this does not create a new offset.
+  Status ExtendCurrent(const uint8_t* value, offset_type length) {
+    // Safety check for UBSAN.
+    if (ARROW_PREDICT_TRUE(length > 0)) {
+      ARROW_RETURN_NOT_OK(ValidateOverflow(length));
+      ARROW_RETURN_NOT_OK(value_data_builder_.Append(value, length));
+    }
+    return Status::OK();
+  }
+
+  Status ExtendCurrent(util::string_view value) {
+    return ExtendCurrent(reinterpret_cast<const uint8_t*>(value.data()),
+                         static_cast<offset_type>(value.size()));
+  }
+
   Status AppendNulls(int64_t length) final {
     const int64_t num_bytes = value_data_builder_.length();
     ARROW_RETURN_NOT_OK(Reserve(length));
@@ -111,23 +111,23 @@ class BaseBinaryBuilder : public ArrayBuilder {
     return Status::OK();
   }
 
-  Status AppendEmptyValue() final { 
-    ARROW_RETURN_NOT_OK(AppendNextOffset()); 
-    ARROW_RETURN_NOT_OK(Reserve(1)); 
-    UnsafeAppendToBitmap(true); 
-    return Status::OK(); 
-  } 
- 
-  Status AppendEmptyValues(int64_t length) final { 
-    const int64_t num_bytes = value_data_builder_.length(); 
-    ARROW_RETURN_NOT_OK(Reserve(length)); 
-    for (int64_t i = 0; i < length; ++i) { 
-      offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_bytes)); 
-    } 
-    UnsafeAppendToBitmap(length, true); 
-    return Status::OK(); 
-  } 
- 
+  Status AppendEmptyValue() final {
+    ARROW_RETURN_NOT_OK(AppendNextOffset());
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppendToBitmap(true);
+    return Status::OK();
+  }
+
+  Status AppendEmptyValues(int64_t length) final {
+    const int64_t num_bytes = value_data_builder_.length();
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    for (int64_t i = 0; i < length; ++i) {
+      offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_bytes));
+    }
+    UnsafeAppendToBitmap(length, true);
+    return Status::OK();
+  }
+
   /// \brief Append without checking capacity
   ///
   /// Offsets and data should have been presized using Reserve() and
@@ -150,28 +150,28 @@ class BaseBinaryBuilder : public ArrayBuilder {
     UnsafeAppend(value.data(), static_cast<offset_type>(value.size()));
   }
 
-  /// Like ExtendCurrent, but do not check capacity 
-  void UnsafeExtendCurrent(const uint8_t* value, offset_type length) { 
-    value_data_builder_.UnsafeAppend(value, length); 
-  } 
- 
-  void UnsafeExtendCurrent(util::string_view value) { 
-    UnsafeExtendCurrent(reinterpret_cast<const uint8_t*>(value.data()), 
-                        static_cast<offset_type>(value.size())); 
-  } 
- 
+  /// Like ExtendCurrent, but do not check capacity
+  void UnsafeExtendCurrent(const uint8_t* value, offset_type length) {
+    value_data_builder_.UnsafeAppend(value, length);
+  }
+
+  void UnsafeExtendCurrent(util::string_view value) {
+    UnsafeExtendCurrent(reinterpret_cast<const uint8_t*>(value.data()),
+                        static_cast<offset_type>(value.size()));
+  }
+
   void UnsafeAppendNull() {
     const int64_t num_bytes = value_data_builder_.length();
     offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_bytes));
     UnsafeAppendToBitmap(false);
   }
 
-  void UnsafeAppendEmptyValue() { 
-    const int64_t num_bytes = value_data_builder_.length(); 
-    offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_bytes)); 
-    UnsafeAppendToBitmap(true); 
-  } 
- 
+  void UnsafeAppendEmptyValue() {
+    const int64_t num_bytes = value_data_builder_.length();
+    offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_bytes));
+    UnsafeAppendToBitmap(true);
+  }
+
   /// \brief Append a sequence of strings in one shot.
   ///
   /// \param[in] values a vector of strings
@@ -467,14 +467,14 @@ class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder {
     return Status::OK();
   }
 
-  Status Append(const Buffer& s) { 
-    ARROW_RETURN_NOT_OK(Reserve(1)); 
-    UnsafeAppend(util::string_view(s)); 
-    return Status::OK(); 
-  } 
- 
-  Status Append(const std::shared_ptr<Buffer>& s) { return Append(*s); } 
- 
+  Status Append(const Buffer& s) {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppend(util::string_view(s));
+    return Status::OK();
+  }
+
+  Status Append(const std::shared_ptr<Buffer>& s) { return Append(*s); }
+
   template <size_t NBYTES>
   Status Append(const std::array<uint8_t, NBYTES>& value) {
     ARROW_RETURN_NOT_OK(Reserve(1));
@@ -489,9 +489,9 @@ class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder {
   Status AppendNull() final;
   Status AppendNulls(int64_t length) final;
 
-  Status AppendEmptyValue() final; 
-  Status AppendEmptyValues(int64_t length) final; 
- 
+  Status AppendEmptyValue() final;
+  Status AppendEmptyValues(int64_t length) final;
+
   void UnsafeAppend(const uint8_t* value) {
     UnsafeAppendToBitmap(true);
     if (ARROW_PREDICT_TRUE(byte_width_ > 0)) {
@@ -510,10 +510,10 @@ class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder {
     UnsafeAppend(reinterpret_cast<const uint8_t*>(value.data()));
   }
 
-  void UnsafeAppend(const Buffer& s) { UnsafeAppend(util::string_view(s)); } 
- 
-  void UnsafeAppend(const std::shared_ptr<Buffer>& s) { UnsafeAppend(*s); } 
- 
+  void UnsafeAppend(const Buffer& s) { UnsafeAppend(util::string_view(s)); }
+
+  void UnsafeAppend(const std::shared_ptr<Buffer>& s) { UnsafeAppend(*s); }
+
   void UnsafeAppendNull() {
     UnsafeAppendToBitmap(false);
     byte_builder_.UnsafeAppend(/*num_copies=*/byte_width_, 0);
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_decimal.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_decimal.cc
index 34c81f76c6f..bd7615a7309 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_decimal.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_decimal.cc
@@ -67,39 +67,39 @@ Status Decimal128Builder::FinishInternal(std::shared_ptr<ArrayData>* out) {
   return Status::OK();
 }
 
-// ---------------------------------------------------------------------- 
-// Decimal256Builder 
- 
-Decimal256Builder::Decimal256Builder(const std::shared_ptr<DataType>& type, 
-                                     MemoryPool* pool) 
-    : FixedSizeBinaryBuilder(type, pool), 
-      decimal_type_(internal::checked_pointer_cast<Decimal256Type>(type)) {} 
- 
-Status Decimal256Builder::Append(const Decimal256& value) { 
-  RETURN_NOT_OK(FixedSizeBinaryBuilder::Reserve(1)); 
-  UnsafeAppend(value); 
-  return Status::OK(); 
-} 
- 
-void Decimal256Builder::UnsafeAppend(const Decimal256& value) { 
-  value.ToBytes(GetMutableValue(length())); 
-  byte_builder_.UnsafeAdvance(32); 
-  UnsafeAppendToBitmap(true); 
-} 
- 
-void Decimal256Builder::UnsafeAppend(util::string_view value) { 
-  FixedSizeBinaryBuilder::UnsafeAppend(value); 
-} 
- 
-Status Decimal256Builder::FinishInternal(std::shared_ptr<ArrayData>* out) { 
-  std::shared_ptr<Buffer> data; 
-  RETURN_NOT_OK(byte_builder_.Finish(&data)); 
-  std::shared_ptr<Buffer> null_bitmap; 
-  RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap)); 
- 
-  *out = ArrayData::Make(type(), length_, {null_bitmap, data}, null_count_); 
-  capacity_ = length_ = null_count_ = 0; 
-  return Status::OK(); 
-} 
- 
+// ----------------------------------------------------------------------
+// Decimal256Builder
+
+Decimal256Builder::Decimal256Builder(const std::shared_ptr<DataType>& type,
+                                     MemoryPool* pool)
+    : FixedSizeBinaryBuilder(type, pool),
+      decimal_type_(internal::checked_pointer_cast<Decimal256Type>(type)) {}
+
+Status Decimal256Builder::Append(const Decimal256& value) {
+  RETURN_NOT_OK(FixedSizeBinaryBuilder::Reserve(1));
+  UnsafeAppend(value);
+  return Status::OK();
+}
+
+void Decimal256Builder::UnsafeAppend(const Decimal256& value) {
+  value.ToBytes(GetMutableValue(length()));
+  byte_builder_.UnsafeAdvance(32);
+  UnsafeAppendToBitmap(true);
+}
+
+void Decimal256Builder::UnsafeAppend(util::string_view value) {
+  FixedSizeBinaryBuilder::UnsafeAppend(value);
+}
+
+Status Decimal256Builder::FinishInternal(std::shared_ptr<ArrayData>* out) {
+  std::shared_ptr<Buffer> data;
+  RETURN_NOT_OK(byte_builder_.Finish(&data));
+  std::shared_ptr<Buffer> null_bitmap;
+  RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
+
+  *out = ArrayData::Make(type(), length_, {null_bitmap, data}, null_count_);
+  capacity_ = length_ = null_count_ = 0;
+  return Status::OK();
+}
+
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_decimal.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_decimal.h
index 7fee4ab4c73..f48392ed001 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_decimal.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_decimal.h
@@ -32,7 +32,7 @@ namespace arrow {
 class ARROW_EXPORT Decimal128Builder : public FixedSizeBinaryBuilder {
  public:
   using TypeClass = Decimal128Type;
-  using ValueType = Decimal128; 
+  using ValueType = Decimal128;
 
   explicit Decimal128Builder(const std::shared_ptr<DataType>& type,
                              MemoryPool* pool = default_memory_pool());
@@ -59,36 +59,36 @@ class ARROW_EXPORT Decimal128Builder : public FixedSizeBinaryBuilder {
   std::shared_ptr<Decimal128Type> decimal_type_;
 };
 
-class ARROW_EXPORT Decimal256Builder : public FixedSizeBinaryBuilder { 
- public: 
-  using TypeClass = Decimal256Type; 
-  using ValueType = Decimal256; 
- 
-  explicit Decimal256Builder(const std::shared_ptr<DataType>& type, 
-                             MemoryPool* pool = default_memory_pool()); 
- 
-  using FixedSizeBinaryBuilder::Append; 
-  using FixedSizeBinaryBuilder::AppendValues; 
-  using FixedSizeBinaryBuilder::Reset; 
- 
-  Status Append(const Decimal256& val); 
-  void UnsafeAppend(const Decimal256& val); 
-  void UnsafeAppend(util::string_view val); 
- 
-  Status FinishInternal(std::shared_ptr<ArrayData>* out) override; 
- 
-  /// \cond FALSE 
-  using ArrayBuilder::Finish; 
-  /// \endcond 
- 
-  Status Finish(std::shared_ptr<Decimal256Array>* out) { return FinishTyped(out); } 
- 
-  std::shared_ptr<DataType> type() const override { return decimal_type_; } 
- 
- protected: 
-  std::shared_ptr<Decimal256Type> decimal_type_; 
-}; 
- 
+class ARROW_EXPORT Decimal256Builder : public FixedSizeBinaryBuilder {
+ public:
+  using TypeClass = Decimal256Type;
+  using ValueType = Decimal256;
+
+  explicit Decimal256Builder(const std::shared_ptr<DataType>& type,
+                             MemoryPool* pool = default_memory_pool());
+
+  using FixedSizeBinaryBuilder::Append;
+  using FixedSizeBinaryBuilder::AppendValues;
+  using FixedSizeBinaryBuilder::Reset;
+
+  Status Append(const Decimal256& val);
+  void UnsafeAppend(const Decimal256& val);
+  void UnsafeAppend(util::string_view val);
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<Decimal256Array>* out) { return FinishTyped(out); }
+
+  std::shared_ptr<DataType> type() const override { return decimal_type_; }
+
+ protected:
+  std::shared_ptr<Decimal256Type> decimal_type_;
+};
+
 using DecimalBuilder = Decimal128Builder;
 
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_dict.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_dict.cc
index 7bbb6b25499..b13f6a2db34 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_dict.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_dict.cc
@@ -45,7 +45,7 @@ class DictionaryMemoTable::DictionaryMemoTableImpl {
 
     template <typename T>
     enable_if_no_memoize<T, Status> Visit(const T&) {
-      return Status::NotImplemented("Initialization of ", value_type_->ToString(), 
+      return Status::NotImplemented("Initialization of ", value_type_->ToString(),
                                     " memo table is not implemented");
     }
 
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_dict.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_dict.h
index d5541db2e7c..eb96482dbf7 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_dict.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_dict.h
@@ -29,7 +29,7 @@
 #include "arrow/array/builder_primitive.h"  // IWYU pragma: export
 #include "arrow/array/data.h"
 #include "arrow/array/util.h"
-#include "arrow/scalar.h" 
+#include "arrow/scalar.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
@@ -241,20 +241,20 @@ class DictionaryBuilderBase : public ArrayBuilder {
 
   /// \brief Append a decimal (only for Decimal128Type)
   template <typename T1 = T>
-  enable_if_decimal128<T1, Status> Append(const Decimal128& value) { 
+  enable_if_decimal128<T1, Status> Append(const Decimal128& value) {
     uint8_t data[16];
     value.ToBytes(data);
     return Append(data, 16);
   }
 
-  /// \brief Append a decimal (only for Decimal128Type) 
-  template <typename T1 = T> 
-  enable_if_decimal256<T1, Status> Append(const Decimal256& value) { 
-    uint8_t data[32]; 
-    value.ToBytes(data); 
-    return Append(data, 32); 
-  } 
- 
+  /// \brief Append a decimal (only for Decimal128Type)
+  template <typename T1 = T>
+  enable_if_decimal256<T1, Status> Append(const Decimal256& value) {
+    uint8_t data[32];
+    value.ToBytes(data);
+    return Append(data, 32);
+  }
+
   /// \brief Append a scalar null value
   Status AppendNull() final {
     length_ += 1;
@@ -270,18 +270,18 @@ class DictionaryBuilderBase : public ArrayBuilder {
     return indices_builder_.AppendNulls(length);
   }
 
-  Status AppendEmptyValue() final { 
-    length_ += 1; 
- 
-    return indices_builder_.AppendEmptyValue(); 
-  } 
- 
-  Status AppendEmptyValues(int64_t length) final { 
-    length_ += length; 
- 
-    return indices_builder_.AppendEmptyValues(length); 
-  } 
- 
+  Status AppendEmptyValue() final {
+    length_ += 1;
+
+    return indices_builder_.AppendEmptyValue();
+  }
+
+  Status AppendEmptyValues(int64_t length) final {
+    length_ += length;
+
+    return indices_builder_.AppendEmptyValues(length);
+  }
+
   /// \brief Insert values into the dictionary's memo, but do not append any
   /// indices. Can be used to initialize a new builder with known dictionary
   /// values
@@ -458,18 +458,18 @@ class DictionaryBuilderBase<BuilderType, NullType> : public ArrayBuilder {
     return indices_builder_.AppendNulls(length);
   }
 
-  Status AppendEmptyValue() final { 
-    length_ += 1; 
- 
-    return indices_builder_.AppendEmptyValue(); 
-  } 
- 
-  Status AppendEmptyValues(int64_t length) final { 
-    length_ += length; 
- 
-    return indices_builder_.AppendEmptyValues(length); 
-  } 
- 
+  Status AppendEmptyValue() final {
+    length_ += 1;
+
+    return indices_builder_.AppendEmptyValue();
+  }
+
+  Status AppendEmptyValues(int64_t length) final {
+    length_ += length;
+
+    return indices_builder_.AppendEmptyValues(length);
+  }
+
   /// \brief Append a whole dense array to the builder
   Status AppendArray(const Array& array) {
 #ifndef NDEBUG
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_nested.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_nested.cc
index b49741d365f..a3bcde0381a 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_nested.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_nested.cc
@@ -123,24 +123,24 @@ Status MapBuilder::AppendNulls(int64_t length) {
   return Status::OK();
 }
 
-Status MapBuilder::AppendEmptyValue() { 
-  DCHECK_EQ(item_builder_->length(), key_builder_->length()); 
-  RETURN_NOT_OK(AdjustStructBuilderLength()); 
-  RETURN_NOT_OK(list_builder_->AppendEmptyValue()); 
-  length_ = list_builder_->length(); 
-  null_count_ = list_builder_->null_count(); 
-  return Status::OK(); 
-} 
- 
-Status MapBuilder::AppendEmptyValues(int64_t length) { 
-  DCHECK_EQ(item_builder_->length(), key_builder_->length()); 
-  RETURN_NOT_OK(AdjustStructBuilderLength()); 
-  RETURN_NOT_OK(list_builder_->AppendEmptyValues(length)); 
-  length_ = list_builder_->length(); 
-  null_count_ = list_builder_->null_count(); 
-  return Status::OK(); 
-} 
- 
+Status MapBuilder::AppendEmptyValue() {
+  DCHECK_EQ(item_builder_->length(), key_builder_->length());
+  RETURN_NOT_OK(AdjustStructBuilderLength());
+  RETURN_NOT_OK(list_builder_->AppendEmptyValue());
+  length_ = list_builder_->length();
+  null_count_ = list_builder_->null_count();
+  return Status::OK();
+}
+
+Status MapBuilder::AppendEmptyValues(int64_t length) {
+  DCHECK_EQ(item_builder_->length(), key_builder_->length());
+  RETURN_NOT_OK(AdjustStructBuilderLength());
+  RETURN_NOT_OK(list_builder_->AppendEmptyValues(length));
+  length_ = list_builder_->length();
+  null_count_ = list_builder_->null_count();
+  return Status::OK();
+}
+
 Status MapBuilder::AdjustStructBuilderLength() {
   // If key/item builders have been appended, adjust struct builder length
   // to match. Struct and key are non-nullable, append all valid values.
@@ -213,18 +213,18 @@ Status FixedSizeListBuilder::ValidateOverflow(int64_t new_elements) {
   return Status::OK();
 }
 
-Status FixedSizeListBuilder::AppendEmptyValue() { 
-  RETURN_NOT_OK(Reserve(1)); 
-  UnsafeAppendToBitmap(true); 
-  return value_builder_->AppendEmptyValues(list_size_); 
-} 
- 
-Status FixedSizeListBuilder::AppendEmptyValues(int64_t length) { 
-  RETURN_NOT_OK(Reserve(length)); 
-  UnsafeAppendToBitmap(length, true); 
-  return value_builder_->AppendEmptyValues(list_size_ * length); 
-} 
- 
+Status FixedSizeListBuilder::AppendEmptyValue() {
+  RETURN_NOT_OK(Reserve(1));
+  UnsafeAppendToBitmap(true);
+  return value_builder_->AppendEmptyValues(list_size_);
+}
+
+Status FixedSizeListBuilder::AppendEmptyValues(int64_t length) {
+  RETURN_NOT_OK(Reserve(length));
+  UnsafeAppendToBitmap(length, true);
+  return value_builder_->AppendEmptyValues(list_size_ * length);
+}
+
 Status FixedSizeListBuilder::Resize(int64_t capacity) {
   RETURN_NOT_OK(CheckCapacity(capacity));
   return ArrayBuilder::Resize(capacity);
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_nested.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_nested.h
index 3acf421ef3e..12b999b786e 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_nested.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_nested.h
@@ -109,19 +109,19 @@ class BaseListBuilder : public ArrayBuilder {
     return Status::OK();
   }
 
-  Status AppendEmptyValue() final { return Append(true); } 
- 
-  Status AppendEmptyValues(int64_t length) final { 
-    ARROW_RETURN_NOT_OK(Reserve(length)); 
-    ARROW_RETURN_NOT_OK(ValidateOverflow(0)); 
-    UnsafeAppendToBitmap(length, true); 
-    const int64_t num_values = value_builder_->length(); 
-    for (int64_t i = 0; i < length; ++i) { 
-      offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_values)); 
-    } 
-    return Status::OK(); 
-  } 
- 
+  Status AppendEmptyValue() final { return Append(true); }
+
+  Status AppendEmptyValues(int64_t length) final {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    ARROW_RETURN_NOT_OK(ValidateOverflow(0));
+    UnsafeAppendToBitmap(length, true);
+    const int64_t num_values = value_builder_->length();
+    for (int64_t i = 0; i < length; ++i) {
+      offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_values));
+    }
+    return Status::OK();
+  }
+
   Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
     ARROW_RETURN_NOT_OK(AppendNextOffset());
 
@@ -271,10 +271,10 @@ class ARROW_EXPORT MapBuilder : public ArrayBuilder {
 
   Status AppendNulls(int64_t length) final;
 
-  Status AppendEmptyValue() final; 
- 
-  Status AppendEmptyValues(int64_t length) final; 
- 
+  Status AppendEmptyValue() final;
+
+  Status AppendEmptyValues(int64_t length) final;
+
   /// \brief Get builder to append keys.
   ///
   /// Append a key with this builder should be followed by appending
@@ -370,10 +370,10 @@ class ARROW_EXPORT FixedSizeListBuilder : public ArrayBuilder {
 
   Status ValidateOverflow(int64_t new_elements);
 
-  Status AppendEmptyValue() final; 
- 
-  Status AppendEmptyValues(int64_t length) final; 
- 
+  Status AppendEmptyValue() final;
+
+  Status AppendEmptyValues(int64_t length) final;
+
   ArrayBuilder* value_builder() const { return value_builder_.get(); }
 
   std::shared_ptr<DataType> type() const override {
@@ -431,42 +431,42 @@ class ARROW_EXPORT StructBuilder : public ArrayBuilder {
     return Status::OK();
   }
 
-  /// \brief Append a null value. Automatically appends an empty value to each child 
+  /// \brief Append a null value. Automatically appends an empty value to each child
   /// builder.
   Status AppendNull() final {
     for (const auto& field : children_) {
-      ARROW_RETURN_NOT_OK(field->AppendEmptyValue()); 
+      ARROW_RETURN_NOT_OK(field->AppendEmptyValue());
     }
     return Append(false);
   }
 
-  /// \brief Append multiple null values. Automatically appends empty values to each 
+  /// \brief Append multiple null values. Automatically appends empty values to each
   /// child builder.
-  Status AppendNulls(int64_t length) final { 
-    for (const auto& field : children_) { 
-      ARROW_RETURN_NOT_OK(field->AppendEmptyValues(length)); 
-    } 
-    ARROW_RETURN_NOT_OK(Reserve(length)); 
-    UnsafeAppendToBitmap(length, false); 
-    return Status::OK(); 
-  } 
-
-  Status AppendEmptyValue() final { 
-    for (const auto& field : children_) { 
-      ARROW_RETURN_NOT_OK(field->AppendEmptyValue()); 
-    } 
-    return Append(true); 
-  } 
- 
-  Status AppendEmptyValues(int64_t length) final { 
-    for (const auto& field : children_) { 
-      ARROW_RETURN_NOT_OK(field->AppendEmptyValues(length)); 
-    } 
-    ARROW_RETURN_NOT_OK(Reserve(length)); 
-    UnsafeAppendToBitmap(length, true); 
-    return Status::OK(); 
-  } 
- 
+  Status AppendNulls(int64_t length) final {
+    for (const auto& field : children_) {
+      ARROW_RETURN_NOT_OK(field->AppendEmptyValues(length));
+    }
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    UnsafeAppendToBitmap(length, false);
+    return Status::OK();
+  }
+
+  Status AppendEmptyValue() final {
+    for (const auto& field : children_) {
+      ARROW_RETURN_NOT_OK(field->AppendEmptyValue());
+    }
+    return Append(true);
+  }
+
+  Status AppendEmptyValues(int64_t length) final {
+    for (const auto& field : children_) {
+      ARROW_RETURN_NOT_OK(field->AppendEmptyValues(length));
+    }
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    UnsafeAppendToBitmap(length, true);
+    return Status::OK();
+  }
+
   void Reset() override;
 
   ArrayBuilder* field_builder(int i) const { return children_[i].get(); }
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_primitive.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_primitive.cc
index ef5c4d14f7f..e403c42411d 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_primitive.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_primitive.cc
@@ -65,8 +65,8 @@ Status BooleanBuilder::Resize(int64_t capacity) {
 }
 
 Status BooleanBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
-  ARROW_ASSIGN_OR_RAISE(auto null_bitmap, null_bitmap_builder_.FinishWithLength(length_)); 
-  ARROW_ASSIGN_OR_RAISE(auto data, data_builder_.FinishWithLength(length_)); 
+  ARROW_ASSIGN_OR_RAISE(auto null_bitmap, null_bitmap_builder_.FinishWithLength(length_));
+  ARROW_ASSIGN_OR_RAISE(auto data, data_builder_.FinishWithLength(length_));
 
   *out = ArrayData::Make(boolean(), length_, {null_bitmap, data}, null_count_);
 
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_primitive.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_primitive.h
index 3dd2370cddb..80cfc4061bb 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_primitive.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_primitive.h
@@ -23,7 +23,7 @@
 
 #include "arrow/array/builder_base.h"
 #include "arrow/array/data.h"
-#include "arrow/result.h" 
+#include "arrow/result.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
 
@@ -47,10 +47,10 @@ class ARROW_EXPORT NullBuilder : public ArrayBuilder {
   /// \brief Append a single null element
   Status AppendNull() final { return AppendNulls(1); }
 
-  Status AppendEmptyValues(int64_t length) final { return AppendNulls(length); } 
- 
-  Status AppendEmptyValue() final { return AppendEmptyValues(1); } 
- 
+  Status AppendEmptyValues(int64_t length) final { return AppendNulls(length); }
+
+  Status AppendEmptyValue() final { return AppendEmptyValues(1); }
+
   Status Append(std::nullptr_t) { return AppendNull(); }
 
   Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
@@ -105,22 +105,22 @@ class NumericBuilder : public ArrayBuilder {
     return Status::OK();
   }
 
-  /// \brief Append a empty element 
-  Status AppendEmptyValue() final { 
-    ARROW_RETURN_NOT_OK(Reserve(1)); 
-    data_builder_.UnsafeAppend(value_type{});  // zero 
-    UnsafeAppendToBitmap(true); 
-    return Status::OK(); 
-  } 
- 
-  /// \brief Append several empty elements 
-  Status AppendEmptyValues(int64_t length) final { 
-    ARROW_RETURN_NOT_OK(Reserve(length)); 
-    data_builder_.UnsafeAppend(length, value_type{});  // zero 
-    UnsafeSetNotNull(length); 
-    return Status::OK(); 
-  } 
- 
+  /// \brief Append a empty element
+  Status AppendEmptyValue() final {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    data_builder_.UnsafeAppend(value_type{});  // zero
+    UnsafeAppendToBitmap(true);
+    return Status::OK();
+  }
+
+  /// \brief Append several empty elements
+  Status AppendEmptyValues(int64_t length) final {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    data_builder_.UnsafeAppend(length, value_type{});  // zero
+    UnsafeSetNotNull(length);
+    return Status::OK();
+  }
+
   value_type GetValue(int64_t index) const { return data_builder_.data()[index]; }
 
   void Reset() override { data_builder_.Reset(); }
@@ -186,9 +186,9 @@ class NumericBuilder : public ArrayBuilder {
   }
 
   Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
-    ARROW_ASSIGN_OR_RAISE(auto null_bitmap, 
-                          null_bitmap_builder_.FinishWithLength(length_)); 
-    ARROW_ASSIGN_OR_RAISE(auto data, data_builder_.FinishWithLength(length_)); 
+    ARROW_ASSIGN_OR_RAISE(auto null_bitmap,
+                          null_bitmap_builder_.FinishWithLength(length_));
+    ARROW_ASSIGN_OR_RAISE(auto data, data_builder_.FinishWithLength(length_));
     *out = ArrayData::Make(type(), length_, {null_bitmap, data}, null_count_);
     capacity_ = length_ = null_count_ = 0;
     return Status::OK();
@@ -318,20 +318,20 @@ class ARROW_EXPORT BooleanBuilder : public ArrayBuilder {
     return Status::OK();
   }
 
-  Status AppendEmptyValue() final { 
-    ARROW_RETURN_NOT_OK(Reserve(1)); 
-    data_builder_.UnsafeAppend(false); 
-    UnsafeSetNotNull(1); 
-    return Status::OK(); 
-  } 
- 
-  Status AppendEmptyValues(int64_t length) final { 
-    ARROW_RETURN_NOT_OK(Reserve(length)); 
-    data_builder_.UnsafeAppend(length, false); 
-    UnsafeSetNotNull(length); 
-    return Status::OK(); 
-  } 
- 
+  Status AppendEmptyValue() final {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    data_builder_.UnsafeAppend(false);
+    UnsafeSetNotNull(1);
+    return Status::OK();
+  }
+
+  Status AppendEmptyValues(int64_t length) final {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    data_builder_.UnsafeAppend(length, false);
+    UnsafeSetNotNull(length);
+    return Status::OK();
+  }
+
   /// Scalar append
   Status Append(const bool val) {
     ARROW_RETURN_NOT_OK(Reserve(1));
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_union.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_union.cc
index 0168646cf48..8617cb73fce 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_union.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_union.cc
@@ -65,8 +65,8 @@ BasicUnionBuilder::BasicUnionBuilder(
   children_ = children;
 
   type_id_to_children_.resize(union_type.max_type_code() + 1, nullptr);
-  DCHECK_LE( 
-      type_id_to_children_.size() - 1, 
+  DCHECK_LE(
+      type_id_to_children_.size() - 1,
       static_cast<decltype(type_id_to_children_)::size_type>(UnionType::kMaxTypeCode));
 
   for (size_t i = 0; i < children.size(); ++i) {
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_union.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_union.h
index 979b3f1effc..060be474fb8 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_union.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/builder_union.h
@@ -117,26 +117,26 @@ class ARROW_EXPORT DenseUnionBuilder : public BasicUnionBuilder {
     return child_builder->AppendNull();
   }
 
-  Status AppendEmptyValue() final { 
-    const int8_t first_child_code = type_codes_[0]; 
-    ArrayBuilder* child_builder = type_id_to_children_[first_child_code]; 
-    ARROW_RETURN_NOT_OK(types_builder_.Append(first_child_code)); 
-    ARROW_RETURN_NOT_OK( 
-        offsets_builder_.Append(static_cast<int32_t>(child_builder->length()))); 
-    // Append an empty value arbitrarily to the first child 
-    return child_builder->AppendEmptyValue(); 
-  } 
- 
-  Status AppendEmptyValues(int64_t length) final { 
-    const int8_t first_child_code = type_codes_[0]; 
-    ArrayBuilder* child_builder = type_id_to_children_[first_child_code]; 
-    ARROW_RETURN_NOT_OK(types_builder_.Append(length, first_child_code)); 
-    ARROW_RETURN_NOT_OK( 
-        offsets_builder_.Append(length, static_cast<int32_t>(child_builder->length()))); 
-    // Append just a single empty value to the first child 
-    return child_builder->AppendEmptyValue(); 
-  } 
- 
+  Status AppendEmptyValue() final {
+    const int8_t first_child_code = type_codes_[0];
+    ArrayBuilder* child_builder = type_id_to_children_[first_child_code];
+    ARROW_RETURN_NOT_OK(types_builder_.Append(first_child_code));
+    ARROW_RETURN_NOT_OK(
+        offsets_builder_.Append(static_cast<int32_t>(child_builder->length())));
+    // Append an empty value arbitrarily to the first child
+    return child_builder->AppendEmptyValue();
+  }
+
+  Status AppendEmptyValues(int64_t length) final {
+    const int8_t first_child_code = type_codes_[0];
+    ArrayBuilder* child_builder = type_id_to_children_[first_child_code];
+    ARROW_RETURN_NOT_OK(types_builder_.Append(length, first_child_code));
+    ARROW_RETURN_NOT_OK(
+        offsets_builder_.Append(length, static_cast<int32_t>(child_builder->length())));
+    // Append just a single empty value to the first child
+    return child_builder->AppendEmptyValue();
+  }
+
   /// \brief Append an element to the UnionArray. This must be followed
   ///        by an append to the appropriate child builder.
   ///
@@ -179,45 +179,45 @@ class ARROW_EXPORT SparseUnionBuilder : public BasicUnionBuilder {
                      const std::shared_ptr<DataType>& type)
       : BasicUnionBuilder(pool, children, type) {}
 
-  /// \brief Append a null value. 
-  /// 
-  /// A null is appended to the first child, empty values to the other children. 
+  /// \brief Append a null value.
+  ///
+  /// A null is appended to the first child, empty values to the other children.
   Status AppendNull() final {
-    const auto first_child_code = type_codes_[0]; 
-    ARROW_RETURN_NOT_OK(types_builder_.Append(first_child_code)); 
-    ARROW_RETURN_NOT_OK(type_id_to_children_[first_child_code]->AppendNull()); 
-    for (int i = 1; i < static_cast<int>(type_codes_.size()); ++i) { 
-      ARROW_RETURN_NOT_OK(type_id_to_children_[type_codes_[i]]->AppendEmptyValue()); 
-    } 
-    return Status::OK(); 
-  } 
- 
-  /// \brief Append multiple null values. 
-  /// 
-  /// Nulls are appended to the first child, empty values to the other children. 
-  Status AppendNulls(int64_t length) final { 
-    const auto first_child_code = type_codes_[0]; 
-    ARROW_RETURN_NOT_OK(types_builder_.Append(length, first_child_code)); 
-    ARROW_RETURN_NOT_OK(type_id_to_children_[first_child_code]->AppendNulls(length)); 
-    for (int i = 1; i < static_cast<int>(type_codes_.size()); ++i) { 
-      ARROW_RETURN_NOT_OK( 
-          type_id_to_children_[type_codes_[i]]->AppendEmptyValues(length)); 
-    } 
-    return Status::OK(); 
-  } 
- 
-  Status AppendEmptyValue() final { 
+    const auto first_child_code = type_codes_[0];
+    ARROW_RETURN_NOT_OK(types_builder_.Append(first_child_code));
+    ARROW_RETURN_NOT_OK(type_id_to_children_[first_child_code]->AppendNull());
+    for (int i = 1; i < static_cast<int>(type_codes_.size()); ++i) {
+      ARROW_RETURN_NOT_OK(type_id_to_children_[type_codes_[i]]->AppendEmptyValue());
+    }
+    return Status::OK();
+  }
+
+  /// \brief Append multiple null values.
+  ///
+  /// Nulls are appended to the first child, empty values to the other children.
+  Status AppendNulls(int64_t length) final {
+    const auto first_child_code = type_codes_[0];
+    ARROW_RETURN_NOT_OK(types_builder_.Append(length, first_child_code));
+    ARROW_RETURN_NOT_OK(type_id_to_children_[first_child_code]->AppendNulls(length));
+    for (int i = 1; i < static_cast<int>(type_codes_.size()); ++i) {
+      ARROW_RETURN_NOT_OK(
+          type_id_to_children_[type_codes_[i]]->AppendEmptyValues(length));
+    }
+    return Status::OK();
+  }
+
+  Status AppendEmptyValue() final {
     ARROW_RETURN_NOT_OK(types_builder_.Append(type_codes_[0]));
     for (int8_t code : type_codes_) {
-      ARROW_RETURN_NOT_OK(type_id_to_children_[code]->AppendEmptyValue()); 
+      ARROW_RETURN_NOT_OK(type_id_to_children_[code]->AppendEmptyValue());
     }
     return Status::OK();
   }
 
-  Status AppendEmptyValues(int64_t length) final { 
+  Status AppendEmptyValues(int64_t length) final {
     ARROW_RETURN_NOT_OK(types_builder_.Append(length, type_codes_[0]));
     for (int8_t code : type_codes_) {
-      ARROW_RETURN_NOT_OK(type_id_to_children_[code]->AppendEmptyValues(length)); 
+      ARROW_RETURN_NOT_OK(type_id_to_children_[code]->AppendEmptyValues(length));
     }
     return Status::OK();
   }
@@ -228,7 +228,7 @@ class ARROW_EXPORT SparseUnionBuilder : public BasicUnionBuilder {
   /// \param[in] next_type type_id of the child to which the next value will be appended.
   ///
   /// The corresponding child builder must be appended to independently after this method
-  /// is called, and all other child builders must have null or empty value appended. 
+  /// is called, and all other child builders must have null or empty value appended.
   Status Append(int8_t next_type) { return types_builder_.Append(next_type); }
 };
 
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/concatenate.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/concatenate.cc
index be9b5c3258c..32478783394 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/concatenate.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/concatenate.cc
@@ -36,7 +36,7 @@
 #include "arrow/util/bit_util.h"
 #include "arrow/util/bitmap_ops.h"
 #include "arrow/util/checked_cast.h"
-#include "arrow/util/int_util.h" 
+#include "arrow/util/int_util.h"
 #include "arrow/util/int_util_internal.h"
 #include "arrow/util/logging.h"
 #include "arrow/visitor_inline.h"
@@ -45,7 +45,7 @@ namespace arrow {
 
 using internal::SafeSignedAdd;
 
-namespace { 
+namespace {
 /// offset, length pair for representing a Range of a buffer or array
 struct Range {
   int64_t offset = -1, length = 0;
@@ -68,8 +68,8 @@ struct Bitmap {
 };
 
 // Allocate a buffer and concatenate bitmaps into it.
-Status ConcatenateBitmaps(const std::vector<Bitmap>& bitmaps, MemoryPool* pool, 
-                          std::shared_ptr<Buffer>* out) { 
+Status ConcatenateBitmaps(const std::vector<Bitmap>& bitmaps, MemoryPool* pool,
+                          std::shared_ptr<Buffer>* out) {
   int64_t out_length = 0;
   for (const auto& bitmap : bitmaps) {
     if (internal::AddWithOverflow(out_length, bitmap.range.length, &out_length)) {
@@ -96,15 +96,15 @@ Status ConcatenateBitmaps(const std::vector<Bitmap>& bitmaps, MemoryPool* pool,
 // Write offsets in src into dst, adjusting them such that first_offset
 // will be the first offset written.
 template <typename Offset>
-Status PutOffsets(const std::shared_ptr<Buffer>& src, Offset first_offset, Offset* dst, 
-                  Range* values_range); 
+Status PutOffsets(const std::shared_ptr<Buffer>& src, Offset first_offset, Offset* dst,
+                  Range* values_range);
 
 // Concatenate buffers holding offsets into a single buffer of offsets,
 // also computing the ranges of values spanned by each buffer of offsets.
 template <typename Offset>
-Status ConcatenateOffsets(const BufferVector& buffers, MemoryPool* pool, 
-                          std::shared_ptr<Buffer>* out, 
-                          std::vector<Range>* values_ranges) { 
+Status ConcatenateOffsets(const BufferVector& buffers, MemoryPool* pool,
+                          std::shared_ptr<Buffer>* out,
+                          std::vector<Range>* values_ranges) {
   values_ranges->resize(buffers.size());
 
   // allocate output buffer
@@ -132,8 +132,8 @@ Status ConcatenateOffsets(const BufferVector& buffers, MemoryPool* pool,
 }
 
 template <typename Offset>
-Status PutOffsets(const std::shared_ptr<Buffer>& src, Offset first_offset, Offset* dst, 
-                  Range* values_range) { 
+Status PutOffsets(const std::shared_ptr<Buffer>& src, Offset first_offset, Offset* dst,
+                  Range* values_range) {
   if (src->size() == 0) {
     // It's allowed to have an empty offsets buffer for a 0-length array
     // (see Array::Validate)
@@ -167,7 +167,7 @@ Status PutOffsets(const std::shared_ptr<Buffer>& src, Offset first_offset, Offse
 
 class ConcatenateImpl {
  public:
-  ConcatenateImpl(const ArrayDataVector& in, MemoryPool* pool) 
+  ConcatenateImpl(const ArrayDataVector& in, MemoryPool* pool)
       : in_(std::move(in)), pool_(pool), out_(std::make_shared<ArrayData>()) {
     out_->type = in[0]->type;
     for (size_t i = 0; i < in_.size(); ++i) {
@@ -202,7 +202,7 @@ class ConcatenateImpl {
   }
 
   Status Visit(const FixedWidthType& fixed) {
-    // Handles numbers, decimal128, decimal256, fixed_size_binary 
+    // Handles numbers, decimal128, decimal256, fixed_size_binary
     ARROW_ASSIGN_OR_RAISE(auto buffers, Buffers(1, fixed));
     return ConcatenateBuffers(buffers, pool_).Value(&out_->buffers[1]);
   }
@@ -243,8 +243,8 @@ class ConcatenateImpl {
     return ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[0]);
   }
 
-  Status Visit(const FixedSizeListType& fixed_size_list) { 
-    ARROW_ASSIGN_OR_RAISE(auto child_data, ChildData(0, fixed_size_list.list_size())); 
+  Status Visit(const FixedSizeListType& fixed_size_list) {
+    ARROW_ASSIGN_OR_RAISE(auto child_data, ChildData(0, fixed_size_list.list_size()));
     return ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[0]);
   }
 
@@ -256,47 +256,47 @@ class ConcatenateImpl {
     return Status::OK();
   }
 
-  Result<BufferVector> UnifyDictionaries(const DictionaryType& d) { 
-    BufferVector new_index_lookup; 
-    ARROW_ASSIGN_OR_RAISE(auto unifier, DictionaryUnifier::Make(d.value_type())); 
-    new_index_lookup.resize(in_.size()); 
-    for (size_t i = 0; i < in_.size(); i++) { 
-      auto item = in_[i]; 
-      auto dictionary_array = MakeArray(item->dictionary); 
-      RETURN_NOT_OK(unifier->Unify(*dictionary_array, &new_index_lookup[i])); 
-    } 
-    std::shared_ptr<Array> out_dictionary; 
-    RETURN_NOT_OK(unifier->GetResultWithIndexType(d.index_type(), &out_dictionary)); 
-    out_->dictionary = out_dictionary->data(); 
-    return new_index_lookup; 
-  } 
- 
-  // Transpose and concatenate dictionary indices 
-  Result<std::shared_ptr<Buffer>> ConcatenateDictionaryIndices( 
-      const DataType& index_type, const BufferVector& index_transpositions) { 
-    const auto index_width = 
-        internal::checked_cast<const FixedWidthType&>(index_type).bit_width() / 8; 
-    int64_t out_length = 0; 
-    for (const auto& data : in_) { 
-      out_length += data->length; 
-    } 
-    ARROW_ASSIGN_OR_RAISE(auto out, AllocateBuffer(out_length * index_width, pool_)); 
-    uint8_t* out_data = out->mutable_data(); 
-    for (size_t i = 0; i < in_.size(); i++) { 
-      const auto& data = in_[i]; 
-      auto transpose_map = 
-          reinterpret_cast<const int32_t*>(index_transpositions[i]->data()); 
-      RETURN_NOT_OK(internal::TransposeInts(index_type, index_type, 
-                                            /*src=*/data->GetValues<uint8_t>(1, 0), 
-                                            /*dest=*/out_data, 
-                                            /*src_offset=*/data->offset, 
-                                            /*dest_offset=*/0, /*length=*/data->length, 
-                                            transpose_map)); 
-      out_data += data->length * index_width; 
-    } 
-    return std::move(out); 
-  } 
- 
+  Result<BufferVector> UnifyDictionaries(const DictionaryType& d) {
+    BufferVector new_index_lookup;
+    ARROW_ASSIGN_OR_RAISE(auto unifier, DictionaryUnifier::Make(d.value_type()));
+    new_index_lookup.resize(in_.size());
+    for (size_t i = 0; i < in_.size(); i++) {
+      auto item = in_[i];
+      auto dictionary_array = MakeArray(item->dictionary);
+      RETURN_NOT_OK(unifier->Unify(*dictionary_array, &new_index_lookup[i]));
+    }
+    std::shared_ptr<Array> out_dictionary;
+    RETURN_NOT_OK(unifier->GetResultWithIndexType(d.index_type(), &out_dictionary));
+    out_->dictionary = out_dictionary->data();
+    return new_index_lookup;
+  }
+
+  // Transpose and concatenate dictionary indices
+  Result<std::shared_ptr<Buffer>> ConcatenateDictionaryIndices(
+      const DataType& index_type, const BufferVector& index_transpositions) {
+    const auto index_width =
+        internal::checked_cast<const FixedWidthType&>(index_type).bit_width() / 8;
+    int64_t out_length = 0;
+    for (const auto& data : in_) {
+      out_length += data->length;
+    }
+    ARROW_ASSIGN_OR_RAISE(auto out, AllocateBuffer(out_length * index_width, pool_));
+    uint8_t* out_data = out->mutable_data();
+    for (size_t i = 0; i < in_.size(); i++) {
+      const auto& data = in_[i];
+      auto transpose_map =
+          reinterpret_cast<const int32_t*>(index_transpositions[i]->data());
+      RETURN_NOT_OK(internal::TransposeInts(index_type, index_type,
+                                            /*src=*/data->GetValues<uint8_t>(1, 0),
+                                            /*dest=*/out_data,
+                                            /*src_offset=*/data->offset,
+                                            /*dest_offset=*/0, /*length=*/data->length,
+                                            transpose_map));
+      out_data += data->length * index_width;
+    }
+    return std::move(out);
+  }
+
   Status Visit(const DictionaryType& d) {
     auto fixed = internal::checked_cast<const FixedWidthType*>(d.index_type().get());
 
@@ -311,15 +311,15 @@ class ConcatenateImpl {
       }
     }
 
-    ARROW_ASSIGN_OR_RAISE(auto index_buffers, Buffers(1, *fixed)); 
+    ARROW_ASSIGN_OR_RAISE(auto index_buffers, Buffers(1, *fixed));
     if (dictionaries_same) {
       out_->dictionary = in_[0]->dictionary;
       return ConcatenateBuffers(index_buffers, pool_).Value(&out_->buffers[1]);
     } else {
-      ARROW_ASSIGN_OR_RAISE(auto index_lookup, UnifyDictionaries(d)); 
-      ARROW_ASSIGN_OR_RAISE(out_->buffers[1], 
-                            ConcatenateDictionaryIndices(*fixed, index_lookup)); 
-      return Status::OK(); 
+      ARROW_ASSIGN_OR_RAISE(auto index_lookup, UnifyDictionaries(d));
+      ARROW_ASSIGN_OR_RAISE(out_->buffers[1],
+                            ConcatenateDictionaryIndices(*fixed, index_lookup));
+      return Status::OK();
     }
   }
 
@@ -344,7 +344,7 @@ class ConcatenateImpl {
   Result<BufferVector> Buffers(size_t index) {
     BufferVector buffers;
     buffers.reserve(in_.size());
-    for (const auto& array_data : in_) { 
+    for (const auto& array_data : in_) {
       const auto& buffer = array_data->buffers[index];
       if (buffer != nullptr) {
         ARROW_ASSIGN_OR_RAISE(
@@ -386,7 +386,7 @@ class ConcatenateImpl {
   Result<BufferVector> Buffers(size_t index, int byte_width) {
     BufferVector buffers;
     buffers.reserve(in_.size());
-    for (const auto& array_data : in_) { 
+    for (const auto& array_data : in_) {
       const auto& buffer = array_data->buffers[index];
       if (buffer != nullptr) {
         ARROW_ASSIGN_OR_RAISE(auto sliced_buffer,
@@ -421,8 +421,8 @@ class ConcatenateImpl {
 
   // Gather the index-th child_data of each input into a vector.
   // Elements are sliced with that input's offset and length.
-  Result<ArrayDataVector> ChildData(size_t index) { 
-    ArrayDataVector child_data(in_.size()); 
+  Result<ArrayDataVector> ChildData(size_t index) {
+    ArrayDataVector child_data(in_.size());
     for (size_t i = 0; i < in_.size(); ++i) {
       ARROW_ASSIGN_OR_RAISE(child_data[i], in_[i]->child_data[index]->SliceSafe(
                                                in_[i]->offset, in_[i]->length));
@@ -431,22 +431,22 @@ class ConcatenateImpl {
   }
 
   // Gather the index-th child_data of each input into a vector.
-  // Elements are sliced with that input's offset and length multiplied by multiplier. 
-  Result<ArrayDataVector> ChildData(size_t index, size_t multiplier) { 
-    ArrayDataVector child_data(in_.size()); 
-    for (size_t i = 0; i < in_.size(); ++i) { 
-      ARROW_ASSIGN_OR_RAISE( 
-          child_data[i], in_[i]->child_data[index]->SliceSafe( 
-                             in_[i]->offset * multiplier, in_[i]->length * multiplier)); 
-    } 
-    return child_data; 
-  } 
- 
-  // Gather the index-th child_data of each input into a vector. 
+  // Elements are sliced with that input's offset and length multiplied by multiplier.
+  Result<ArrayDataVector> ChildData(size_t index, size_t multiplier) {
+    ArrayDataVector child_data(in_.size());
+    for (size_t i = 0; i < in_.size(); ++i) {
+      ARROW_ASSIGN_OR_RAISE(
+          child_data[i], in_[i]->child_data[index]->SliceSafe(
+                             in_[i]->offset * multiplier, in_[i]->length * multiplier));
+    }
+    return child_data;
+  }
+
+  // Gather the index-th child_data of each input into a vector.
   // Elements are sliced with the explicitly passed ranges.
-  Result<ArrayDataVector> ChildData(size_t index, const std::vector<Range>& ranges) { 
+  Result<ArrayDataVector> ChildData(size_t index, const std::vector<Range>& ranges) {
     DCHECK_EQ(in_.size(), ranges.size());
-    ArrayDataVector child_data(in_.size()); 
+    ArrayDataVector child_data(in_.size());
     for (size_t i = 0; i < in_.size(); ++i) {
       ARROW_ASSIGN_OR_RAISE(child_data[i], in_[i]->child_data[index]->SliceSafe(
                                                ranges[i].offset, ranges[i].length));
@@ -454,20 +454,20 @@ class ConcatenateImpl {
     return child_data;
   }
 
-  const ArrayDataVector& in_; 
+  const ArrayDataVector& in_;
   MemoryPool* pool_;
   std::shared_ptr<ArrayData> out_;
 };
 
-}  // namespace 
- 
+}  // namespace
+
 Result<std::shared_ptr<Array>> Concatenate(const ArrayVector& arrays, MemoryPool* pool) {
   if (arrays.size() == 0) {
     return Status::Invalid("Must pass at least one array");
   }
 
   // gather ArrayData of input arrays
-  ArrayDataVector data(arrays.size()); 
+  ArrayDataVector data(arrays.size());
   for (size_t i = 0; i < arrays.size(); ++i) {
     if (!arrays[i]->type()->Equals(*arrays[0]->type())) {
       return Status::Invalid("arrays to be concatenated must be identically typed, but ",
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/data.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/data.cc
index be30ff7d685..5a214473972 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/data.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/data.cc
@@ -37,13 +37,13 @@ namespace arrow {
 
 using internal::CountSetBits;
 
-static inline void AdjustNonNullable(Type::type type_id, int64_t length, 
+static inline void AdjustNonNullable(Type::type type_id, int64_t length,
                                      std::vector<std::shared_ptr<Buffer>>* buffers,
                                      int64_t* null_count) {
-  if (type_id == Type::NA) { 
-    *null_count = length; 
-    (*buffers)[0] = nullptr; 
-  } else if (internal::HasValidityBitmap(type_id)) { 
+  if (type_id == Type::NA) {
+    *null_count = length;
+    (*buffers)[0] = nullptr;
+  } else if (internal::HasValidityBitmap(type_id)) {
     if (*null_count == 0) {
       // In case there are no nulls, don't keep an allocated null bitmap around
       (*buffers)[0] = nullptr;
@@ -56,39 +56,39 @@ static inline void AdjustNonNullable(Type::type type_id, int64_t length,
   }
 }
 
-std::shared_ptr<ArrayData> ArrayData::Make(std::shared_ptr<DataType> type, int64_t length, 
+std::shared_ptr<ArrayData> ArrayData::Make(std::shared_ptr<DataType> type, int64_t length,
                                            std::vector<std::shared_ptr<Buffer>> buffers,
                                            int64_t null_count, int64_t offset) {
-  AdjustNonNullable(type->id(), length, &buffers, &null_count); 
-  return std::make_shared<ArrayData>(std::move(type), length, std::move(buffers), 
-                                     null_count, offset); 
+  AdjustNonNullable(type->id(), length, &buffers, &null_count);
+  return std::make_shared<ArrayData>(std::move(type), length, std::move(buffers),
+                                     null_count, offset);
 }
 
 std::shared_ptr<ArrayData> ArrayData::Make(
-    std::shared_ptr<DataType> type, int64_t length, 
+    std::shared_ptr<DataType> type, int64_t length,
     std::vector<std::shared_ptr<Buffer>> buffers,
     std::vector<std::shared_ptr<ArrayData>> child_data, int64_t null_count,
     int64_t offset) {
-  AdjustNonNullable(type->id(), length, &buffers, &null_count); 
-  return std::make_shared<ArrayData>(std::move(type), length, std::move(buffers), 
+  AdjustNonNullable(type->id(), length, &buffers, &null_count);
+  return std::make_shared<ArrayData>(std::move(type), length, std::move(buffers),
                                      std::move(child_data), null_count, offset);
 }
 
 std::shared_ptr<ArrayData> ArrayData::Make(
-    std::shared_ptr<DataType> type, int64_t length, 
+    std::shared_ptr<DataType> type, int64_t length,
     std::vector<std::shared_ptr<Buffer>> buffers,
     std::vector<std::shared_ptr<ArrayData>> child_data,
     std::shared_ptr<ArrayData> dictionary, int64_t null_count, int64_t offset) {
-  AdjustNonNullable(type->id(), length, &buffers, &null_count); 
-  auto data = std::make_shared<ArrayData>(std::move(type), length, std::move(buffers), 
+  AdjustNonNullable(type->id(), length, &buffers, &null_count);
+  auto data = std::make_shared<ArrayData>(std::move(type), length, std::move(buffers),
                                           std::move(child_data), null_count, offset);
   data->dictionary = std::move(dictionary);
   return data;
 }
 
-std::shared_ptr<ArrayData> ArrayData::Make(std::shared_ptr<DataType> type, int64_t length, 
-                                           int64_t null_count, int64_t offset) { 
-  return std::make_shared<ArrayData>(std::move(type), length, null_count, offset); 
+std::shared_ptr<ArrayData> ArrayData::Make(std::shared_ptr<DataType> type, int64_t length,
+                                           int64_t null_count, int64_t offset) {
+  return std::make_shared<ArrayData>(std::move(type), length, null_count, offset);
 }
 
 std::shared_ptr<ArrayData> ArrayData::Slice(int64_t off, int64_t len) const {
@@ -213,7 +213,7 @@ struct ViewDataImpl {
 
   Status MakeDataView(const std::shared_ptr<Field>& out_field,
                       std::shared_ptr<ArrayData>* out) {
-    const auto& out_type = out_field->type(); 
+    const auto& out_type = out_field->type();
     const auto out_layout = out_type->layout();
 
     AdjustInputPointer();
@@ -249,11 +249,11 @@ struct ViewDataImpl {
     } else {
       // No null bitmap in input, append no-nulls bitmap
       out_buffers.push_back(nullptr);
-      if (out_type->id() == Type::NA) { 
-        out_null_count = out_length; 
-      } else { 
-        out_null_count = 0; 
-      } 
+      if (out_type->id() == Type::NA) {
+        out_null_count = out_length;
+      } else {
+        out_null_count = 0;
+      }
     }
 
     // Process other buffers in output layout
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/data.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/data.h
index db166ffaa27..418d09def6b 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/data.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/data.h
@@ -71,47 +71,47 @@ constexpr int64_t kUnknownNullCount = -1;
 /// input array and replace them with newly-allocated data, changing the output
 /// data type as well.
 struct ARROW_EXPORT ArrayData {
-  ArrayData() = default; 
+  ArrayData() = default;
 
-  ArrayData(std::shared_ptr<DataType> type, int64_t length, 
+  ArrayData(std::shared_ptr<DataType> type, int64_t length,
             int64_t null_count = kUnknownNullCount, int64_t offset = 0)
-      : type(std::move(type)), length(length), null_count(null_count), offset(offset) {} 
+      : type(std::move(type)), length(length), null_count(null_count), offset(offset) {}
 
-  ArrayData(std::shared_ptr<DataType> type, int64_t length, 
+  ArrayData(std::shared_ptr<DataType> type, int64_t length,
             std::vector<std::shared_ptr<Buffer>> buffers,
             int64_t null_count = kUnknownNullCount, int64_t offset = 0)
-      : ArrayData(std::move(type), length, null_count, offset) { 
+      : ArrayData(std::move(type), length, null_count, offset) {
     this->buffers = std::move(buffers);
   }
 
-  ArrayData(std::shared_ptr<DataType> type, int64_t length, 
+  ArrayData(std::shared_ptr<DataType> type, int64_t length,
             std::vector<std::shared_ptr<Buffer>> buffers,
             std::vector<std::shared_ptr<ArrayData>> child_data,
             int64_t null_count = kUnknownNullCount, int64_t offset = 0)
-      : ArrayData(std::move(type), length, null_count, offset) { 
+      : ArrayData(std::move(type), length, null_count, offset) {
     this->buffers = std::move(buffers);
     this->child_data = std::move(child_data);
   }
 
-  static std::shared_ptr<ArrayData> Make(std::shared_ptr<DataType> type, int64_t length, 
+  static std::shared_ptr<ArrayData> Make(std::shared_ptr<DataType> type, int64_t length,
                                          std::vector<std::shared_ptr<Buffer>> buffers,
                                          int64_t null_count = kUnknownNullCount,
                                          int64_t offset = 0);
 
   static std::shared_ptr<ArrayData> Make(
-      std::shared_ptr<DataType> type, int64_t length, 
+      std::shared_ptr<DataType> type, int64_t length,
       std::vector<std::shared_ptr<Buffer>> buffers,
       std::vector<std::shared_ptr<ArrayData>> child_data,
       int64_t null_count = kUnknownNullCount, int64_t offset = 0);
 
   static std::shared_ptr<ArrayData> Make(
-      std::shared_ptr<DataType> type, int64_t length, 
+      std::shared_ptr<DataType> type, int64_t length,
       std::vector<std::shared_ptr<Buffer>> buffers,
       std::vector<std::shared_ptr<ArrayData>> child_data,
       std::shared_ptr<ArrayData> dictionary, int64_t null_count = kUnknownNullCount,
       int64_t offset = 0);
 
-  static std::shared_ptr<ArrayData> Make(std::shared_ptr<DataType> type, int64_t length, 
+  static std::shared_ptr<ArrayData> Make(std::shared_ptr<DataType> type, int64_t length,
                                          int64_t null_count = kUnknownNullCount,
                                          int64_t offset = 0);
 
@@ -230,11 +230,11 @@ struct ARROW_EXPORT ArrayData {
   }
 
   std::shared_ptr<DataType> type;
-  int64_t length = 0; 
-  mutable std::atomic<int64_t> null_count{0}; 
+  int64_t length = 0;
+  mutable std::atomic<int64_t> null_count{0};
   // The logical start point into the physical buffers (in values, not bytes).
   // Note that, for child data, this must be *added* to the child data's own offset.
-  int64_t offset = 0; 
+  int64_t offset = 0;
   std::vector<std::shared_ptr<Buffer>> buffers;
   std::vector<std::shared_ptr<ArrayData>> child_data;
 
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/util.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/util.cc
index 41d7242a44f..ed26ecff4e0 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/util.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/util.cc
@@ -41,7 +41,7 @@
 #include "arrow/util/bit_util.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/decimal.h"
-#include "arrow/util/endian.h" 
+#include "arrow/util/endian.h"
 #include "arrow/util/logging.h"
 #include "arrow/visitor_inline.h"
 
@@ -52,7 +52,7 @@ using internal::checked_cast;
 // ----------------------------------------------------------------------
 // Loading from ArrayData
 
-namespace { 
+namespace {
 
 class ArrayDataWrapper {
  public:
@@ -75,209 +75,209 @@ class ArrayDataWrapper {
   std::shared_ptr<Array>* out_;
 };
 
-class ArrayDataEndianSwapper { 
- public: 
-  ArrayDataEndianSwapper(const std::shared_ptr<ArrayData>& data, int64_t length) 
-      : data_(data), length_(length) { 
-    out_ = data->Copy(); 
-  } 
- 
-  Status SwapType(const DataType& type) { 
-    RETURN_NOT_OK(VisitTypeInline(type, this)); 
-    RETURN_NOT_OK(SwapChildren(type.fields())); 
-    if (internal::HasValidityBitmap(type.id())) { 
-      // Copy null bitmap 
-      out_->buffers[0] = data_->buffers[0]; 
-    } 
-    return Status::OK(); 
-  } 
- 
-  Status SwapChildren(const FieldVector& child_fields) { 
-    for (size_t i = 0; i < child_fields.size(); i++) { 
-      ARROW_ASSIGN_OR_RAISE(out_->child_data[i], 
-                            internal::SwapEndianArrayData(data_->child_data[i])); 
-    } 
-    return Status::OK(); 
-  } 
- 
-  template <typename T> 
-  Result<std::shared_ptr<Buffer>> ByteSwapBuffer( 
-      const std::shared_ptr<Buffer>& in_buffer) { 
-    if (sizeof(T) == 1) { 
-      // if data size is 1, element is not swapped. We can use the original buffer 
-      return in_buffer; 
-    } 
-    auto in_data = reinterpret_cast<const T*>(in_buffer->data()); 
-    ARROW_ASSIGN_OR_RAISE(auto out_buffer, AllocateBuffer(in_buffer->size())); 
-    auto out_data = reinterpret_cast<T*>(out_buffer->mutable_data()); 
-    int64_t length = in_buffer->size() / sizeof(T); 
-    for (int64_t i = 0; i < length; i++) { 
-      out_data[i] = BitUtil::ByteSwap(in_data[i]); 
-    } 
-    return std::move(out_buffer); 
-  } 
- 
-  template <typename VALUE_TYPE> 
-  Status SwapOffsets(int index) { 
-    if (data_->buffers[index] == nullptr || data_->buffers[index]->size() == 0) { 
-      out_->buffers[index] = data_->buffers[index]; 
-      return Status::OK(); 
-    } 
-    // Except union, offset has one more element rather than data->length 
-    ARROW_ASSIGN_OR_RAISE(out_->buffers[index], 
-                          ByteSwapBuffer<VALUE_TYPE>(data_->buffers[index])); 
-    return Status::OK(); 
-  } 
- 
-  template <typename T> 
-  enable_if_t<std::is_base_of<FixedWidthType, T>::value && 
-                  !std::is_base_of<FixedSizeBinaryType, T>::value && 
-                  !std::is_base_of<DictionaryType, T>::value, 
-              Status> 
-  Visit(const T& type) { 
-    using value_type = typename T::c_type; 
-    ARROW_ASSIGN_OR_RAISE(out_->buffers[1], 
-                          ByteSwapBuffer<value_type>(data_->buffers[1])); 
-    return Status::OK(); 
-  } 
- 
-  Status Visit(const Decimal128Type& type) { 
-    auto data = reinterpret_cast<const uint64_t*>(data_->buffers[1]->data()); 
-    ARROW_ASSIGN_OR_RAISE(auto new_buffer, AllocateBuffer(data_->buffers[1]->size())); 
-    auto new_data = reinterpret_cast<uint64_t*>(new_buffer->mutable_data()); 
-    int64_t length = length_; 
-    length = data_->buffers[1]->size() / (sizeof(uint64_t) * 2); 
-    for (int64_t i = 0; i < length; i++) { 
-      uint64_t tmp; 
-      auto idx = i * 2; 
-#if ARROW_LITTLE_ENDIAN 
-      tmp = BitUtil::FromBigEndian(data[idx]); 
-      new_data[idx] = BitUtil::FromBigEndian(data[idx + 1]); 
-      new_data[idx + 1] = tmp; 
-#else 
-      tmp = BitUtil::FromLittleEndian(data[idx]); 
-      new_data[idx] = BitUtil::FromLittleEndian(data[idx + 1]); 
-      new_data[idx + 1] = tmp; 
-#endif 
-    } 
-    out_->buffers[1] = std::move(new_buffer); 
-    return Status::OK(); 
-  } 
- 
-  Status Visit(const Decimal256Type& type) { 
-    auto data = reinterpret_cast<const uint64_t*>(data_->buffers[1]->data()); 
-    ARROW_ASSIGN_OR_RAISE(auto new_buffer, AllocateBuffer(data_->buffers[1]->size())); 
-    auto new_data = reinterpret_cast<uint64_t*>(new_buffer->mutable_data()); 
-    int64_t length = length_; 
-    length = data_->buffers[1]->size() / (sizeof(uint64_t) * 4); 
-    for (int64_t i = 0; i < length; i++) { 
-      uint64_t tmp0, tmp1, tmp2; 
-      auto idx = i * 4; 
-#if ARROW_LITTLE_ENDIAN 
-      tmp0 = BitUtil::FromBigEndian(data[idx]); 
-      tmp1 = BitUtil::FromBigEndian(data[idx + 1]); 
-      tmp2 = BitUtil::FromBigEndian(data[idx + 2]); 
-      new_data[idx] = BitUtil::FromBigEndian(data[idx + 3]); 
-      new_data[idx + 1] = tmp2; 
-      new_data[idx + 2] = tmp1; 
-      new_data[idx + 3] = tmp0; 
-#else 
-      tmp0 = BitUtil::FromLittleEndian(data[idx]); 
-      tmp1 = BitUtil::FromLittleEndian(data[idx + 1]); 
-      tmp2 = BitUtil::FromLittleEndian(data[idx + 2]); 
-      new_data[idx] = BitUtil::FromLittleEndian(data[idx + 3]); 
-      new_data[idx + 1] = tmp2; 
-      new_data[idx + 2] = tmp1; 
-      new_data[idx + 3] = tmp0; 
-#endif 
-    } 
-    out_->buffers[1] = std::move(new_buffer); 
-    return Status::OK(); 
-  } 
- 
-  Status Visit(const DayTimeIntervalType& type) { 
-    ARROW_ASSIGN_OR_RAISE(out_->buffers[1], ByteSwapBuffer<uint32_t>(data_->buffers[1])); 
-    return Status::OK(); 
-  } 
- 
-  Status Visit(const NullType& type) { return Status::OK(); } 
-  Status Visit(const BooleanType& type) { return Status::OK(); } 
-  Status Visit(const Int8Type& type) { return Status::OK(); } 
-  Status Visit(const UInt8Type& type) { return Status::OK(); } 
-  Status Visit(const FixedSizeBinaryType& type) { return Status::OK(); } 
-  Status Visit(const FixedSizeListType& type) { return Status::OK(); } 
-  Status Visit(const StructType& type) { return Status::OK(); } 
-  Status Visit(const UnionType& type) { 
-    out_->buffers[1] = data_->buffers[1]; 
-    if (type.mode() == UnionMode::DENSE) { 
-      RETURN_NOT_OK(SwapOffsets<int32_t>(2)); 
-    } 
-    return Status::OK(); 
-  } 
- 
-  template <typename T> 
-  enable_if_t<std::is_same<BinaryType, T>::value || std::is_same<StringType, T>::value, 
-              Status> 
-  Visit(const T& type) { 
-    RETURN_NOT_OK(SwapOffsets<int32_t>(1)); 
-    out_->buffers[2] = data_->buffers[2]; 
-    return Status::OK(); 
-  } 
- 
-  template <typename T> 
-  enable_if_t<std::is_same<LargeBinaryType, T>::value || 
-                  std::is_same<LargeStringType, T>::value, 
-              Status> 
-  Visit(const T& type) { 
-    RETURN_NOT_OK(SwapOffsets<int64_t>(1)); 
-    out_->buffers[2] = data_->buffers[2]; 
-    return Status::OK(); 
-  } 
- 
-  Status Visit(const ListType& type) { 
-    RETURN_NOT_OK(SwapOffsets<int32_t>(1)); 
-    return Status::OK(); 
-  } 
-  Status Visit(const LargeListType& type) { 
-    RETURN_NOT_OK(SwapOffsets<int64_t>(1)); 
-    return Status::OK(); 
-  } 
- 
-  Status Visit(const DictionaryType& type) { 
-    // dictionary was already swapped in ReadDictionary() in ipc/reader.cc 
-    RETURN_NOT_OK(SwapType(*type.index_type())); 
-    return Status::OK(); 
-  } 
- 
-  Status Visit(const ExtensionType& type) { 
-    RETURN_NOT_OK(SwapType(*type.storage_type())); 
-    return Status::OK(); 
-  } 
- 
-  const std::shared_ptr<ArrayData>& data_; 
-  int64_t length_; 
-  std::shared_ptr<ArrayData> out_; 
-}; 
- 
-}  // namespace 
- 
-namespace internal { 
- 
-Result<std::shared_ptr<ArrayData>> SwapEndianArrayData( 
-    const std::shared_ptr<ArrayData>& data) { 
-  if (data->offset != 0) { 
-    return Status::Invalid("Unsupported data format: data.offset != 0"); 
-  } 
-  ArrayDataEndianSwapper swapper(data, data->length); 
-  RETURN_NOT_OK(swapper.SwapType(*data->type)); 
-  return std::move(swapper.out_); 
-} 
- 
+class ArrayDataEndianSwapper {
+ public:
+  ArrayDataEndianSwapper(const std::shared_ptr<ArrayData>& data, int64_t length)
+      : data_(data), length_(length) {
+    out_ = data->Copy();
+  }
+
+  Status SwapType(const DataType& type) {
+    RETURN_NOT_OK(VisitTypeInline(type, this));
+    RETURN_NOT_OK(SwapChildren(type.fields()));
+    if (internal::HasValidityBitmap(type.id())) {
+      // Copy null bitmap
+      out_->buffers[0] = data_->buffers[0];
+    }
+    return Status::OK();
+  }
+
+  Status SwapChildren(const FieldVector& child_fields) {
+    for (size_t i = 0; i < child_fields.size(); i++) {
+      ARROW_ASSIGN_OR_RAISE(out_->child_data[i],
+                            internal::SwapEndianArrayData(data_->child_data[i]));
+    }
+    return Status::OK();
+  }
+
+  template <typename T>
+  Result<std::shared_ptr<Buffer>> ByteSwapBuffer(
+      const std::shared_ptr<Buffer>& in_buffer) {
+    if (sizeof(T) == 1) {
+      // if data size is 1, element is not swapped. We can use the original buffer
+      return in_buffer;
+    }
+    auto in_data = reinterpret_cast<const T*>(in_buffer->data());
+    ARROW_ASSIGN_OR_RAISE(auto out_buffer, AllocateBuffer(in_buffer->size()));
+    auto out_data = reinterpret_cast<T*>(out_buffer->mutable_data());
+    int64_t length = in_buffer->size() / sizeof(T);
+    for (int64_t i = 0; i < length; i++) {
+      out_data[i] = BitUtil::ByteSwap(in_data[i]);
+    }
+    return std::move(out_buffer);
+  }
+
+  template <typename VALUE_TYPE>
+  Status SwapOffsets(int index) {
+    if (data_->buffers[index] == nullptr || data_->buffers[index]->size() == 0) {
+      out_->buffers[index] = data_->buffers[index];
+      return Status::OK();
+    }
+    // Except union, offset has one more element rather than data->length
+    ARROW_ASSIGN_OR_RAISE(out_->buffers[index],
+                          ByteSwapBuffer<VALUE_TYPE>(data_->buffers[index]));
+    return Status::OK();
+  }
+
+  template <typename T>
+  enable_if_t<std::is_base_of<FixedWidthType, T>::value &&
+                  !std::is_base_of<FixedSizeBinaryType, T>::value &&
+                  !std::is_base_of<DictionaryType, T>::value,
+              Status>
+  Visit(const T& type) {
+    using value_type = typename T::c_type;
+    ARROW_ASSIGN_OR_RAISE(out_->buffers[1],
+                          ByteSwapBuffer<value_type>(data_->buffers[1]));
+    return Status::OK();
+  }
+
+  Status Visit(const Decimal128Type& type) {
+    auto data = reinterpret_cast<const uint64_t*>(data_->buffers[1]->data());
+    ARROW_ASSIGN_OR_RAISE(auto new_buffer, AllocateBuffer(data_->buffers[1]->size()));
+    auto new_data = reinterpret_cast<uint64_t*>(new_buffer->mutable_data());
+    int64_t length = length_;
+    length = data_->buffers[1]->size() / (sizeof(uint64_t) * 2);
+    for (int64_t i = 0; i < length; i++) {
+      uint64_t tmp;
+      auto idx = i * 2;
+#if ARROW_LITTLE_ENDIAN
+      tmp = BitUtil::FromBigEndian(data[idx]);
+      new_data[idx] = BitUtil::FromBigEndian(data[idx + 1]);
+      new_data[idx + 1] = tmp;
+#else
+      tmp = BitUtil::FromLittleEndian(data[idx]);
+      new_data[idx] = BitUtil::FromLittleEndian(data[idx + 1]);
+      new_data[idx + 1] = tmp;
+#endif
+    }
+    out_->buffers[1] = std::move(new_buffer);
+    return Status::OK();
+  }
+
+  Status Visit(const Decimal256Type& type) {
+    auto data = reinterpret_cast<const uint64_t*>(data_->buffers[1]->data());
+    ARROW_ASSIGN_OR_RAISE(auto new_buffer, AllocateBuffer(data_->buffers[1]->size()));
+    auto new_data = reinterpret_cast<uint64_t*>(new_buffer->mutable_data());
+    int64_t length = length_;
+    length = data_->buffers[1]->size() / (sizeof(uint64_t) * 4);
+    for (int64_t i = 0; i < length; i++) {
+      uint64_t tmp0, tmp1, tmp2;
+      auto idx = i * 4;
+#if ARROW_LITTLE_ENDIAN
+      tmp0 = BitUtil::FromBigEndian(data[idx]);
+      tmp1 = BitUtil::FromBigEndian(data[idx + 1]);
+      tmp2 = BitUtil::FromBigEndian(data[idx + 2]);
+      new_data[idx] = BitUtil::FromBigEndian(data[idx + 3]);
+      new_data[idx + 1] = tmp2;
+      new_data[idx + 2] = tmp1;
+      new_data[idx + 3] = tmp0;
+#else
+      tmp0 = BitUtil::FromLittleEndian(data[idx]);
+      tmp1 = BitUtil::FromLittleEndian(data[idx + 1]);
+      tmp2 = BitUtil::FromLittleEndian(data[idx + 2]);
+      new_data[idx] = BitUtil::FromLittleEndian(data[idx + 3]);
+      new_data[idx + 1] = tmp2;
+      new_data[idx + 2] = tmp1;
+      new_data[idx + 3] = tmp0;
+#endif
+    }
+    out_->buffers[1] = std::move(new_buffer);
+    return Status::OK();
+  }
+
+  Status Visit(const DayTimeIntervalType& type) {
+    ARROW_ASSIGN_OR_RAISE(out_->buffers[1], ByteSwapBuffer<uint32_t>(data_->buffers[1]));
+    return Status::OK();
+  }
+
+  Status Visit(const NullType& type) { return Status::OK(); }
+  Status Visit(const BooleanType& type) { return Status::OK(); }
+  Status Visit(const Int8Type& type) { return Status::OK(); }
+  Status Visit(const UInt8Type& type) { return Status::OK(); }
+  Status Visit(const FixedSizeBinaryType& type) { return Status::OK(); }
+  Status Visit(const FixedSizeListType& type) { return Status::OK(); }
+  Status Visit(const StructType& type) { return Status::OK(); }
+  Status Visit(const UnionType& type) {
+    out_->buffers[1] = data_->buffers[1];
+    if (type.mode() == UnionMode::DENSE) {
+      RETURN_NOT_OK(SwapOffsets<int32_t>(2));
+    }
+    return Status::OK();
+  }
+
+  template <typename T>
+  enable_if_t<std::is_same<BinaryType, T>::value || std::is_same<StringType, T>::value,
+              Status>
+  Visit(const T& type) {
+    RETURN_NOT_OK(SwapOffsets<int32_t>(1));
+    out_->buffers[2] = data_->buffers[2];
+    return Status::OK();
+  }
+
+  template <typename T>
+  enable_if_t<std::is_same<LargeBinaryType, T>::value ||
+                  std::is_same<LargeStringType, T>::value,
+              Status>
+  Visit(const T& type) {
+    RETURN_NOT_OK(SwapOffsets<int64_t>(1));
+    out_->buffers[2] = data_->buffers[2];
+    return Status::OK();
+  }
+
+  Status Visit(const ListType& type) {
+    RETURN_NOT_OK(SwapOffsets<int32_t>(1));
+    return Status::OK();
+  }
+  Status Visit(const LargeListType& type) {
+    RETURN_NOT_OK(SwapOffsets<int64_t>(1));
+    return Status::OK();
+  }
+
+  Status Visit(const DictionaryType& type) {
+    // dictionary was already swapped in ReadDictionary() in ipc/reader.cc
+    RETURN_NOT_OK(SwapType(*type.index_type()));
+    return Status::OK();
+  }
+
+  Status Visit(const ExtensionType& type) {
+    RETURN_NOT_OK(SwapType(*type.storage_type()));
+    return Status::OK();
+  }
+
+  const std::shared_ptr<ArrayData>& data_;
+  int64_t length_;
+  std::shared_ptr<ArrayData> out_;
+};
+
+}  // namespace
+
+namespace internal {
+
+Result<std::shared_ptr<ArrayData>> SwapEndianArrayData(
+    const std::shared_ptr<ArrayData>& data) {
+  if (data->offset != 0) {
+    return Status::Invalid("Unsupported data format: data.offset != 0");
+  }
+  ArrayDataEndianSwapper swapper(data, data->length);
+  RETURN_NOT_OK(swapper.SwapType(*data->type));
+  return std::move(swapper.out_);
+}
+
 }  // namespace internal
 
 std::shared_ptr<Array> MakeArray(const std::shared_ptr<ArrayData>& data) {
   std::shared_ptr<Array> out;
-  ArrayDataWrapper wrapper_visitor(data, &out); 
+  ArrayDataWrapper wrapper_visitor(data, &out);
   DCHECK_OK(VisitTypeInline(*data->type, &wrapper_visitor));
   DCHECK(out);
   return out;
@@ -286,7 +286,7 @@ std::shared_ptr<Array> MakeArray(const std::shared_ptr<ArrayData>& data) {
 // ----------------------------------------------------------------------
 // Misc APIs
 
-namespace { 
+namespace {
 
 // get the maximum buffer length required, then allocate a single zeroed buffer
 // to use anywhere a buffer is required
@@ -496,9 +496,9 @@ class RepeatedArrayFactory {
     return out_;
   }
 
-  Status Visit(const NullType& type) { 
-    DCHECK(false);  // already forwarded to MakeArrayOfNull 
-    return Status::OK(); 
+  Status Visit(const NullType& type) {
+    DCHECK(false);  // already forwarded to MakeArrayOfNull
+    return Status::OK();
   }
 
   Status Visit(const BooleanType&) {
@@ -510,29 +510,29 @@ class RepeatedArrayFactory {
   }
 
   template <typename T>
-  enable_if_t<is_number_type<T>::value || is_temporal_type<T>::value, Status> Visit( 
-      const T&) { 
+  enable_if_t<is_number_type<T>::value || is_temporal_type<T>::value, Status> Visit(
+      const T&) {
     auto value = checked_cast<const typename TypeTraits<T>::ScalarType&>(scalar_).value;
     return FinishFixedWidth(&value, sizeof(value));
   }
 
-  Status Visit(const FixedSizeBinaryType& type) { 
-    auto value = checked_cast<const FixedSizeBinaryScalar&>(scalar_).value; 
-    return FinishFixedWidth(value->data(), type.byte_width()); 
-  } 
- 
-  template <typename T> 
-  enable_if_decimal<T, Status> Visit(const T&) { 
-    using ScalarType = typename TypeTraits<T>::ScalarType; 
-    auto value = checked_cast<const ScalarType&>(scalar_).value.ToBytes(); 
+  Status Visit(const FixedSizeBinaryType& type) {
+    auto value = checked_cast<const FixedSizeBinaryScalar&>(scalar_).value;
+    return FinishFixedWidth(value->data(), type.byte_width());
+  }
+
+  template <typename T>
+  enable_if_decimal<T, Status> Visit(const T&) {
+    using ScalarType = typename TypeTraits<T>::ScalarType;
+    auto value = checked_cast<const ScalarType&>(scalar_).value.ToBytes();
+    return FinishFixedWidth(value.data(), value.size());
+  }
+
+  Status Visit(const Decimal256Type&) {
+    auto value = checked_cast<const Decimal256Scalar&>(scalar_).value.ToBytes();
     return FinishFixedWidth(value.data(), value.size());
   }
 
-  Status Visit(const Decimal256Type&) { 
-    auto value = checked_cast<const Decimal256Scalar&>(scalar_).value.ToBytes(); 
-    return FinishFixedWidth(value.data(), value.size()); 
-  } 
- 
   template <typename T>
   enable_if_base_binary<T, Status> Visit(const T&) {
     std::shared_ptr<Buffer> value =
@@ -613,18 +613,18 @@ class RepeatedArrayFactory {
     return Status::OK();
   }
 
-  Status Visit(const ExtensionType& type) { 
-    return Status::NotImplemented("construction from scalar of type ", *scalar_.type); 
-  } 
- 
-  Status Visit(const DenseUnionType& type) { 
-    return Status::NotImplemented("construction from scalar of type ", *scalar_.type); 
-  } 
- 
-  Status Visit(const SparseUnionType& type) { 
-    return Status::NotImplemented("construction from scalar of type ", *scalar_.type); 
-  } 
- 
+  Status Visit(const ExtensionType& type) {
+    return Status::NotImplemented("construction from scalar of type ", *scalar_.type);
+  }
+
+  Status Visit(const DenseUnionType& type) {
+    return Status::NotImplemented("construction from scalar of type ", *scalar_.type);
+  }
+
+  Status Visit(const SparseUnionType& type) {
+    return Status::NotImplemented("construction from scalar of type ", *scalar_.type);
+  }
+
   template <typename OffsetType>
   Status CreateOffsetsBuffer(OffsetType value_length, std::shared_ptr<Buffer>* out) {
     TypedBufferBuilder<OffsetType> builder(pool_);
@@ -660,11 +660,11 @@ class RepeatedArrayFactory {
   std::shared_ptr<Array> out_;
 };
 
-}  // namespace 
+}  // namespace
 
 Result<std::shared_ptr<Array>> MakeArrayOfNull(const std::shared_ptr<DataType>& type,
                                                int64_t length, MemoryPool* pool) {
-  ARROW_ASSIGN_OR_RAISE(auto data, NullArrayFactory(pool, type, length).Create()); 
+  ARROW_ASSIGN_OR_RAISE(auto data, NullArrayFactory(pool, type, length).Create());
   return MakeArray(data);
 }
 
@@ -673,7 +673,7 @@ Result<std::shared_ptr<Array>> MakeArrayFromScalar(const Scalar& scalar, int64_t
   if (!scalar.is_valid) {
     return MakeArrayOfNull(scalar.type, length, pool);
   }
-  return RepeatedArrayFactory(pool, scalar, length).Create(); 
+  return RepeatedArrayFactory(pool, scalar, length).Create();
 }
 
 namespace internal {
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/util.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/util.h
index ac71c6d8570..3ef4e08828f 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/util.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/util.h
@@ -56,17 +56,17 @@ Result<std::shared_ptr<Array>> MakeArrayFromScalar(
 
 namespace internal {
 
-/// \brief Swap endian of each element in a generic ArrayData 
-/// 
-/// As dictionaries are often shared between different arrays, dictionaries 
-/// are not swapped by this function and should be handled separately. 
-/// 
-/// \param[in] data the array contents 
-/// \return the resulting ArrayData whose elements were swapped 
-ARROW_EXPORT 
-Result<std::shared_ptr<ArrayData>> SwapEndianArrayData( 
-    const std::shared_ptr<ArrayData>& data); 
- 
+/// \brief Swap endian of each element in a generic ArrayData
+///
+/// As dictionaries are often shared between different arrays, dictionaries
+/// are not swapped by this function and should be handled separately.
+///
+/// \param[in] data the array contents
+/// \return the resulting ArrayData whose elements were swapped
+ARROW_EXPORT
+Result<std::shared_ptr<ArrayData>> SwapEndianArrayData(
+    const std::shared_ptr<ArrayData>& data);
+
 /// Given a number of ArrayVectors, treat each ArrayVector as the
 /// chunks of a chunked array.  Then rechunk each ArrayVector such that
 /// all ArrayVectors are chunked identically.  It is mandatory that
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/validate.cc b/contrib/libs/apache/arrow/cpp/src/arrow/array/validate.cc
index 5adc18bd495..5cc3bacf282 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/validate.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/validate.cc
@@ -23,12 +23,12 @@
 #include "arrow/extension_type.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
-#include "arrow/util/bit_block_counter.h" 
+#include "arrow/util/bit_block_counter.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/int_util_internal.h"
 #include "arrow/util/logging.h"
-#include "arrow/util/utf8.h" 
+#include "arrow/util/utf8.h"
 #include "arrow/visitor_inline.h"
 
 namespace arrow {
@@ -39,172 +39,172 @@ namespace internal {
 
 namespace {
 
-struct ValidateArrayImpl { 
-  const ArrayData& data; 
+struct ValidateArrayImpl {
+  const ArrayData& data;
 
-  Status Validate() { return ValidateWithType(*data.type); } 
- 
-  Status ValidateWithType(const DataType& type) { return VisitTypeInline(type, this); } 
- 
-  Status Visit(const NullType&) { 
-    if (data.null_count != data.length) { 
-      return Status::Invalid("Null array null_count unequal to its length"); 
+  Status Validate() { return ValidateWithType(*data.type); }
+
+  Status ValidateWithType(const DataType& type) { return VisitTypeInline(type, this); }
+
+  Status Visit(const NullType&) {
+    if (data.null_count != data.length) {
+      return Status::Invalid("Null array null_count unequal to its length");
     }
     return Status::OK();
   }
 
-  Status Visit(const FixedWidthType&) { 
-    if (data.length > 0) { 
-      if (!IsBufferValid(1)) { 
-        return Status::Invalid("Missing values buffer in non-empty array"); 
-      } 
+  Status Visit(const FixedWidthType&) {
+    if (data.length > 0) {
+      if (!IsBufferValid(1)) {
+        return Status::Invalid("Missing values buffer in non-empty array");
+      }
     }
     return Status::OK();
   }
 
-  Status Visit(const StringType& type) { return ValidateBinaryLike(type); } 
+  Status Visit(const StringType& type) { return ValidateBinaryLike(type); }
 
-  Status Visit(const BinaryType& type) { return ValidateBinaryLike(type); } 
+  Status Visit(const BinaryType& type) { return ValidateBinaryLike(type); }
 
-  Status Visit(const LargeStringType& type) { return ValidateBinaryLike(type); } 
+  Status Visit(const LargeStringType& type) { return ValidateBinaryLike(type); }
 
-  Status Visit(const LargeBinaryType& type) { return ValidateBinaryLike(type); } 
+  Status Visit(const LargeBinaryType& type) { return ValidateBinaryLike(type); }
 
-  Status Visit(const ListType& type) { return ValidateListLike(type); } 
+  Status Visit(const ListType& type) { return ValidateListLike(type); }
 
-  Status Visit(const LargeListType& type) { return ValidateListLike(type); } 
+  Status Visit(const LargeListType& type) { return ValidateListLike(type); }
 
-  Status Visit(const MapType& type) { return ValidateListLike(type); } 
- 
-  Status Visit(const FixedSizeListType& type) { 
-    const ArrayData& values = *data.child_data[0]; 
-    const int64_t list_size = type.list_size(); 
-    if (list_size < 0) { 
-      return Status::Invalid("Fixed size list has negative list size"); 
+  Status Visit(const MapType& type) { return ValidateListLike(type); }
+
+  Status Visit(const FixedSizeListType& type) {
+    const ArrayData& values = *data.child_data[0];
+    const int64_t list_size = type.list_size();
+    if (list_size < 0) {
+      return Status::Invalid("Fixed size list has negative list size");
     }
 
     int64_t expected_values_length = -1;
-    if (MultiplyWithOverflow(data.length, list_size, &expected_values_length) || 
-        values.length != expected_values_length) { 
-      return Status::Invalid("Values length (", values.length, 
-                             ") is not equal to the length (", data.length, 
-                             ") multiplied by the value size (", list_size, ")"); 
-    }
-
-    const Status child_valid = ValidateArray(values); 
-    if (!child_valid.ok()) { 
-      return Status::Invalid("Fixed size list child array invalid: ", 
-                             child_valid.ToString()); 
-    } 
- 
+    if (MultiplyWithOverflow(data.length, list_size, &expected_values_length) ||
+        values.length != expected_values_length) {
+      return Status::Invalid("Values length (", values.length,
+                             ") is not equal to the length (", data.length,
+                             ") multiplied by the value size (", list_size, ")");
+    }
+
+    const Status child_valid = ValidateArray(values);
+    if (!child_valid.ok()) {
+      return Status::Invalid("Fixed size list child array invalid: ",
+                             child_valid.ToString());
+    }
+
     return Status::OK();
   }
 
-  Status Visit(const StructType& type) { 
-    for (int i = 0; i < type.num_fields(); ++i) { 
-      const auto& field_data = *data.child_data[i]; 
+  Status Visit(const StructType& type) {
+    for (int i = 0; i < type.num_fields(); ++i) {
+      const auto& field_data = *data.child_data[i];
 
-      // Validate child first, to catch nonsensical length / offset etc. 
-      const Status field_valid = ValidateArray(field_data); 
-      if (!field_valid.ok()) { 
+      // Validate child first, to catch nonsensical length / offset etc.
+      const Status field_valid = ValidateArray(field_data);
+      if (!field_valid.ok()) {
         return Status::Invalid("Struct child array #", i,
-                               " invalid: ", field_valid.ToString()); 
+                               " invalid: ", field_valid.ToString());
       }
 
-      if (field_data.length < data.length + data.offset) { 
+      if (field_data.length < data.length + data.offset) {
         return Status::Invalid("Struct child array #", i,
-                               " has length smaller than expected for struct array (", 
-                               field_data.length, " < ", data.length + data.offset, ")"); 
+                               " has length smaller than expected for struct array (",
+                               field_data.length, " < ", data.length + data.offset, ")");
       }
 
-      const auto& field_type = type.field(i)->type(); 
-      if (!field_data.type->Equals(*field_type)) { 
-        return Status::Invalid("Struct child array #", i, " does not match type field: ", 
-                               field_data.type->ToString(), " vs ", 
-                               field_type->ToString()); 
+      const auto& field_type = type.field(i)->type();
+      if (!field_data.type->Equals(*field_type)) {
+        return Status::Invalid("Struct child array #", i, " does not match type field: ",
+                               field_data.type->ToString(), " vs ",
+                               field_type->ToString());
       }
     }
     return Status::OK();
   }
 
-  Status Visit(const UnionType& type) { 
-    for (int i = 0; i < type.num_fields(); ++i) { 
-      const auto& field_data = *data.child_data[i]; 
- 
-      // Validate child first, to catch nonsensical length / offset etc. 
-      const Status field_valid = ValidateArray(field_data); 
-      if (!field_valid.ok()) { 
-        return Status::Invalid("Union child array #", i, 
-                               " invalid: ", field_valid.ToString()); 
+  Status Visit(const UnionType& type) {
+    for (int i = 0; i < type.num_fields(); ++i) {
+      const auto& field_data = *data.child_data[i];
+
+      // Validate child first, to catch nonsensical length / offset etc.
+      const Status field_valid = ValidateArray(field_data);
+      if (!field_valid.ok()) {
+        return Status::Invalid("Union child array #", i,
+                               " invalid: ", field_valid.ToString());
       }
 
-      if (type.mode() == UnionMode::SPARSE && 
-          field_data.length < data.length + data.offset) { 
+      if (type.mode() == UnionMode::SPARSE &&
+          field_data.length < data.length + data.offset) {
         return Status::Invalid("Sparse union child array #", i,
-                               " has length smaller than expected for union array (", 
-                               field_data.length, " < ", data.length + data.offset, ")"); 
+                               " has length smaller than expected for union array (",
+                               field_data.length, " < ", data.length + data.offset, ")");
       }
 
-      const auto& field_type = type.field(i)->type(); 
-      if (!field_data.type->Equals(*field_type)) { 
-        return Status::Invalid("Union child array #", i, " does not match type field: ", 
-                               field_data.type->ToString(), " vs ", 
-                               field_type->ToString()); 
+      const auto& field_type = type.field(i)->type();
+      if (!field_data.type->Equals(*field_type)) {
+        return Status::Invalid("Union child array #", i, " does not match type field: ",
+                               field_data.type->ToString(), " vs ",
+                               field_type->ToString());
       }
     }
     return Status::OK();
   }
 
-  Status Visit(const DictionaryType& type) { 
-    Type::type index_type_id = type.index_type()->id(); 
+  Status Visit(const DictionaryType& type) {
+    Type::type index_type_id = type.index_type()->id();
     if (!is_integer(index_type_id)) {
       return Status::Invalid("Dictionary indices must be integer type");
     }
-    if (!data.dictionary) { 
+    if (!data.dictionary) {
       return Status::Invalid("Dictionary values must be non-null");
     }
-    const Status dict_valid = ValidateArray(*data.dictionary); 
+    const Status dict_valid = ValidateArray(*data.dictionary);
     if (!dict_valid.ok()) {
       return Status::Invalid("Dictionary array invalid: ", dict_valid.ToString());
     }
-    // Visit indices 
-    return ValidateWithType(*type.index_type()); 
+    // Visit indices
+    return ValidateWithType(*type.index_type());
+  }
+
+  Status Visit(const ExtensionType& type) {
+    // Visit storage
+    return ValidateWithType(*type.storage_type());
   }
 
-  Status Visit(const ExtensionType& type) { 
-    // Visit storage 
-    return ValidateWithType(*type.storage_type()); 
-  } 
+ private:
+  bool IsBufferValid(int index) { return IsBufferValid(data, index); }
 
- private: 
-  bool IsBufferValid(int index) { return IsBufferValid(data, index); } 
- 
-  static bool IsBufferValid(const ArrayData& data, int index) { 
-    return data.buffers[index] != nullptr && data.buffers[index]->address() != 0; 
+  static bool IsBufferValid(const ArrayData& data, int index) {
+    return data.buffers[index] != nullptr && data.buffers[index]->address() != 0;
   }
 
-  template <typename BinaryType> 
-  Status ValidateBinaryLike(const BinaryType& type) { 
-    if (!IsBufferValid(2)) { 
-      return Status::Invalid("Value data buffer is null"); 
+  template <typename BinaryType>
+  Status ValidateBinaryLike(const BinaryType& type) {
+    if (!IsBufferValid(2)) {
+      return Status::Invalid("Value data buffer is null");
     }
-    // First validate offsets, to make sure the accesses below are valid 
-    RETURN_NOT_OK(ValidateOffsets(type)); 
+    // First validate offsets, to make sure the accesses below are valid
+    RETURN_NOT_OK(ValidateOffsets(type));
+
+    if (data.length > 0 && data.buffers[1]->is_cpu()) {
+      using offset_type = typename BinaryType::offset_type;
 
-    if (data.length > 0 && data.buffers[1]->is_cpu()) { 
-      using offset_type = typename BinaryType::offset_type; 
- 
-      const auto offsets = data.GetValues<offset_type>(1); 
-      const Buffer& values = *data.buffers[2]; 
- 
-      const auto first_offset = offsets[0]; 
-      const auto last_offset = offsets[data.length]; 
+      const auto offsets = data.GetValues<offset_type>(1);
+      const Buffer& values = *data.buffers[2];
+
+      const auto first_offset = offsets[0];
+      const auto last_offset = offsets[data.length];
       // This early test avoids undefined behaviour when computing `data_extent`
       if (first_offset < 0 || last_offset < 0) {
         return Status::Invalid("Negative offsets in binary array");
       }
       const auto data_extent = last_offset - first_offset;
-      const auto values_length = values.size(); 
+      const auto values_length = values.size();
       if (values_length < data_extent) {
         return Status::Invalid("Length spanned by binary offsets (", data_extent,
                                ") larger than values array (size ", values_length, ")");
@@ -221,27 +221,27 @@ struct ValidateArrayImpl {
     return Status::OK();
   }
 
-  template <typename ListType> 
-  Status ValidateListLike(const ListType& type) { 
+  template <typename ListType>
+  Status ValidateListLike(const ListType& type) {
     // First validate offsets, to make sure the accesses below are valid
-    RETURN_NOT_OK(ValidateOffsets(type)); 
+    RETURN_NOT_OK(ValidateOffsets(type));
+
+    const ArrayData& values = *data.child_data[0];
 
-    const ArrayData& values = *data.child_data[0]; 
- 
     // An empty list array can have 0 offsets
-    if (data.length > 0 && data.buffers[1]->is_cpu()) { 
-      using offset_type = typename ListType::offset_type; 
- 
-      const auto offsets = data.GetValues<offset_type>(1); 
- 
-      const auto first_offset = offsets[0]; 
-      const auto last_offset = offsets[data.length]; 
+    if (data.length > 0 && data.buffers[1]->is_cpu()) {
+      using offset_type = typename ListType::offset_type;
+
+      const auto offsets = data.GetValues<offset_type>(1);
+
+      const auto first_offset = offsets[0];
+      const auto last_offset = offsets[data.length];
       // This early test avoids undefined behaviour when computing `data_extent`
       if (first_offset < 0 || last_offset < 0) {
         return Status::Invalid("Negative offsets in list array");
       }
       const auto data_extent = last_offset - first_offset;
-      const auto values_length = values.length; 
+      const auto values_length = values.length;
       if (values_length < data_extent) {
         return Status::Invalid("Length spanned by list offsets (", data_extent,
                                ") larger than values array (length ", values_length, ")");
@@ -256,32 +256,32 @@ struct ValidateArrayImpl {
       }
     }
 
-    const Status child_valid = ValidateArray(values); 
+    const Status child_valid = ValidateArray(values);
     if (!child_valid.ok()) {
       return Status::Invalid("List child array invalid: ", child_valid.ToString());
     }
     return Status::OK();
   }
 
-  template <typename TypeClass> 
-  Status ValidateOffsets(const TypeClass& type) { 
-    using offset_type = typename TypeClass::offset_type; 
+  template <typename TypeClass>
+  Status ValidateOffsets(const TypeClass& type) {
+    using offset_type = typename TypeClass::offset_type;
 
-    const Buffer* offsets = data.buffers[1].get(); 
-    if (offsets == nullptr) { 
-      // For length 0, an empty offsets buffer seems accepted as a special case 
-      // (ARROW-544) 
-      if (data.length > 0) { 
-        return Status::Invalid("Non-empty array but offsets are null"); 
+    const Buffer* offsets = data.buffers[1].get();
+    if (offsets == nullptr) {
+      // For length 0, an empty offsets buffer seems accepted as a special case
+      // (ARROW-544)
+      if (data.length > 0) {
+        return Status::Invalid("Non-empty array but offsets are null");
       }
       return Status::OK();
     }
 
     // An empty list array can have 0 offsets
-    auto required_offsets = (data.length > 0) ? data.length + data.offset + 1 : 0; 
-    if (offsets->size() / static_cast<int32_t>(sizeof(offset_type)) < required_offsets) { 
-      return Status::Invalid("Offsets buffer size (bytes): ", offsets->size(), 
-                             " isn't large enough for length: ", data.length); 
+    auto required_offsets = (data.length > 0) ? data.length + data.offset + 1 : 0;
+    if (offsets->size() / static_cast<int32_t>(sizeof(offset_type)) < required_offsets) {
+      return Status::Invalid("Offsets buffer size (bytes): ", offsets->size(),
+                             " isn't large enough for length: ", data.length);
     }
 
     return Status::OK();
@@ -291,12 +291,12 @@ struct ValidateArrayImpl {
 }  // namespace
 
 ARROW_EXPORT
-Status ValidateArray(const ArrayData& data) { 
-  // First check the data layout conforms to the spec 
-  const DataType& type = *data.type; 
+Status ValidateArray(const ArrayData& data) {
+  // First check the data layout conforms to the spec
+  const DataType& type = *data.type;
   const auto layout = type.layout();
 
-  if (data.length < 0) { 
+  if (data.length < 0) {
     return Status::Invalid("Array length is negative");
   }
 
@@ -306,14 +306,14 @@ Status ValidateArray(const ArrayData& data) {
                            "of type ",
                            type.ToString(), ", got ", data.buffers.size());
   }
- 
+
   // This check is required to avoid addition overflow below
   int64_t length_plus_offset = -1;
-  if (AddWithOverflow(data.length, data.offset, &length_plus_offset)) { 
+  if (AddWithOverflow(data.length, data.offset, &length_plus_offset)) {
     return Status::Invalid("Array of type ", type.ToString(),
                            " has impossibly large length and offset");
   }
- 
+
   for (int i = 0; i < static_cast<int>(data.buffers.size()); ++i) {
     const auto& buffer = data.buffers[i];
     const auto& spec = layout.buffers[i];
@@ -340,7 +340,7 @@ Status ValidateArray(const ArrayData& data) {
     }
     if (buffer->size() < min_buffer_size) {
       return Status::Invalid("Buffer #", i, " too small in array of type ",
-                             type.ToString(), " and length ", data.length, 
+                             type.ToString(), " and length ", data.length,
                              ": expected at least ", min_buffer_size, " byte(s), got ",
                              buffer->size());
     }
@@ -352,12 +352,12 @@ Status ValidateArray(const ArrayData& data) {
 
   // Check null_count() *after* validating the buffer sizes, to avoid
   // reading out of bounds.
-  if (data.null_count > data.length) { 
+  if (data.null_count > data.length) {
     return Status::Invalid("Null count exceeds array length");
   }
-  if (data.null_count < 0 && data.null_count != kUnknownNullCount) { 
-    return Status::Invalid("Negative null count"); 
-  } 
+  if (data.null_count < 0 && data.null_count != kUnknownNullCount) {
+    return Status::Invalid("Negative null count");
+  }
 
   if (type.id() != Type::EXTENSION) {
     if (data.child_data.size() != static_cast<size_t>(type.num_fields())) {
@@ -376,142 +376,142 @@ Status ValidateArray(const ArrayData& data) {
                            type.ToString());
   }
 
-  ValidateArrayImpl validator{data}; 
-  return validator.Validate(); 
+  ValidateArrayImpl validator{data};
+  return validator.Validate();
 }
 
-ARROW_EXPORT 
-Status ValidateArray(const Array& array) { return ValidateArray(*array.data()); } 
- 
+ARROW_EXPORT
+Status ValidateArray(const Array& array) { return ValidateArray(*array.data()); }
+
 ///////////////////////////////////////////////////////////////////////////
-// ValidateArrayFull: expensive validation checks 
+// ValidateArrayFull: expensive validation checks
 
 namespace {
 
-struct UTF8DataValidator { 
-  const ArrayData& data; 
+struct UTF8DataValidator {
+  const ArrayData& data;
 
-  Status Visit(const DataType&) { 
+  Status Visit(const DataType&) {
     // Default, should be unreachable
     return Status::NotImplemented("");
   }
 
-  template <typename StringType> 
-  enable_if_string<StringType, Status> Visit(const StringType&) { 
-    util::InitializeUTF8(); 
- 
-    int64_t i = 0; 
-    return VisitArrayDataInline<StringType>( 
-        data, 
-        [&](util::string_view v) { 
-          if (ARROW_PREDICT_FALSE(!util::ValidateUTF8(v))) { 
-            return Status::Invalid("Invalid UTF8 sequence at string index ", i); 
-          } 
-          ++i; 
-          return Status::OK(); 
-        }, 
-        [&]() { 
-          ++i; 
-          return Status::OK(); 
-        }); 
+  template <typename StringType>
+  enable_if_string<StringType, Status> Visit(const StringType&) {
+    util::InitializeUTF8();
+
+    int64_t i = 0;
+    return VisitArrayDataInline<StringType>(
+        data,
+        [&](util::string_view v) {
+          if (ARROW_PREDICT_FALSE(!util::ValidateUTF8(v))) {
+            return Status::Invalid("Invalid UTF8 sequence at string index ", i);
+          }
+          ++i;
+          return Status::OK();
+        },
+        [&]() {
+          ++i;
+          return Status::OK();
+        });
   }
 };
 
-struct BoundsChecker { 
-  const ArrayData& data; 
-  int64_t min_value; 
-  int64_t max_value; 
-
-  Status Visit(const DataType&) { 
-    // Default, should be unreachable 
-    return Status::NotImplemented(""); 
-  }
-
-  template <typename IntegerType> 
-  enable_if_integer<IntegerType, Status> Visit(const IntegerType&) { 
-    using c_type = typename IntegerType::c_type; 
- 
-    int64_t i = 0; 
-    return VisitArrayDataInline<IntegerType>( 
-        data, 
-        [&](c_type value) { 
-          const auto v = static_cast<int64_t>(value); 
-          if (ARROW_PREDICT_FALSE(v < min_value || v > max_value)) { 
-            return Status::Invalid("Value at position ", i, " out of bounds: ", v, 
-                                   " (should be in [", min_value, ", ", max_value, "])"); 
-          } 
-          ++i; 
-          return Status::OK(); 
-        }, 
-        [&]() { 
-          ++i; 
-          return Status::OK(); 
-        }); 
-  }
-}; 
-
-struct ValidateArrayFullImpl { 
-  const ArrayData& data; 
-
-  Status Validate() { return ValidateWithType(*data.type); } 
-
-  Status ValidateWithType(const DataType& type) { return VisitTypeInline(type, this); } 
-
-  Status Visit(const NullType& type) { return Status::OK(); } 
-
-  Status Visit(const FixedWidthType& type) { return Status::OK(); } 
- 
-  Status Visit(const StringType& type) { 
-    RETURN_NOT_OK(ValidateBinaryLike(type)); 
-    return ValidateUTF8(data); 
-  }
-
-  Status Visit(const LargeStringType& type) { 
-    RETURN_NOT_OK(ValidateBinaryLike(type)); 
-    return ValidateUTF8(data); 
-  } 
-
-  Status Visit(const BinaryType& type) { return ValidateBinaryLike(type); } 
- 
-  Status Visit(const LargeBinaryType& type) { return ValidateBinaryLike(type); } 
- 
-  Status Visit(const ListType& type) { return ValidateListLike(type); } 
- 
-  Status Visit(const LargeListType& type) { return ValidateListLike(type); } 
- 
-  Status Visit(const MapType& type) { return ValidateListLike(type); } 
- 
-  Status Visit(const FixedSizeListType& type) { 
-    const ArrayData& child = *data.child_data[0]; 
-    const Status child_valid = ValidateArrayFull(child); 
-    if (!child_valid.ok()) { 
-      return Status::Invalid("Fixed size list child array invalid: ", 
-                             child_valid.ToString()); 
-    } 
-    return Status::OK(); 
-  } 
- 
-  Status Visit(const StructType& type) { 
-    // Validate children 
-    for (int64_t i = 0; i < type.num_fields(); ++i) { 
-      const ArrayData& field = *data.child_data[i]; 
-      const Status field_valid = ValidateArrayFull(field); 
-      if (!field_valid.ok()) { 
-        return Status::Invalid("Struct child array #", i, 
-                               " invalid: ", field_valid.ToString()); 
+struct BoundsChecker {
+  const ArrayData& data;
+  int64_t min_value;
+  int64_t max_value;
+
+  Status Visit(const DataType&) {
+    // Default, should be unreachable
+    return Status::NotImplemented("");
+  }
+
+  template <typename IntegerType>
+  enable_if_integer<IntegerType, Status> Visit(const IntegerType&) {
+    using c_type = typename IntegerType::c_type;
+
+    int64_t i = 0;
+    return VisitArrayDataInline<IntegerType>(
+        data,
+        [&](c_type value) {
+          const auto v = static_cast<int64_t>(value);
+          if (ARROW_PREDICT_FALSE(v < min_value || v > max_value)) {
+            return Status::Invalid("Value at position ", i, " out of bounds: ", v,
+                                   " (should be in [", min_value, ", ", max_value, "])");
+          }
+          ++i;
+          return Status::OK();
+        },
+        [&]() {
+          ++i;
+          return Status::OK();
+        });
+  }
+};
+
+struct ValidateArrayFullImpl {
+  const ArrayData& data;
+
+  Status Validate() { return ValidateWithType(*data.type); }
+
+  Status ValidateWithType(const DataType& type) { return VisitTypeInline(type, this); }
+
+  Status Visit(const NullType& type) { return Status::OK(); }
+
+  Status Visit(const FixedWidthType& type) { return Status::OK(); }
+
+  Status Visit(const StringType& type) {
+    RETURN_NOT_OK(ValidateBinaryLike(type));
+    return ValidateUTF8(data);
+  }
+
+  Status Visit(const LargeStringType& type) {
+    RETURN_NOT_OK(ValidateBinaryLike(type));
+    return ValidateUTF8(data);
+  }
+
+  Status Visit(const BinaryType& type) { return ValidateBinaryLike(type); }
+
+  Status Visit(const LargeBinaryType& type) { return ValidateBinaryLike(type); }
+
+  Status Visit(const ListType& type) { return ValidateListLike(type); }
+
+  Status Visit(const LargeListType& type) { return ValidateListLike(type); }
+
+  Status Visit(const MapType& type) { return ValidateListLike(type); }
+
+  Status Visit(const FixedSizeListType& type) {
+    const ArrayData& child = *data.child_data[0];
+    const Status child_valid = ValidateArrayFull(child);
+    if (!child_valid.ok()) {
+      return Status::Invalid("Fixed size list child array invalid: ",
+                             child_valid.ToString());
+    }
+    return Status::OK();
+  }
+
+  Status Visit(const StructType& type) {
+    // Validate children
+    for (int64_t i = 0; i < type.num_fields(); ++i) {
+      const ArrayData& field = *data.child_data[i];
+      const Status field_valid = ValidateArrayFull(field);
+      if (!field_valid.ok()) {
+        return Status::Invalid("Struct child array #", i,
+                               " invalid: ", field_valid.ToString());
       }
-    } 
-    return Status::OK(); 
-  } 
- 
-  Status Visit(const UnionType& type) { 
-    const auto& child_ids = type.child_ids(); 
-    const auto& type_codes_map = type.type_codes(); 
- 
-    const int8_t* type_codes = data.GetValues<int8_t>(1); 
- 
-    for (int64_t i = 0; i < data.length; ++i) { 
-      // Note that union arrays never have top-level nulls 
+    }
+    return Status::OK();
+  }
+
+  Status Visit(const UnionType& type) {
+    const auto& child_ids = type.child_ids();
+    const auto& type_codes_map = type.type_codes();
+
+    const int8_t* type_codes = data.GetValues<int8_t>(1);
+
+    for (int64_t i = 0; i < data.length; ++i) {
+      // Note that union arrays never have top-level nulls
       const int32_t code = type_codes[i];
       if (code < 0 || child_ids[code] == UnionType::kInvalidChildId) {
         return Status::Invalid("Union value at position ", i, " has invalid type id ",
@@ -519,17 +519,17 @@ struct ValidateArrayFullImpl {
       }
     }
 
-    if (type.mode() == UnionMode::DENSE) { 
+    if (type.mode() == UnionMode::DENSE) {
       // Map logical type id to child length
       std::vector<int64_t> child_lengths(256);
-      for (int child_id = 0; child_id < type.num_fields(); ++child_id) { 
-        child_lengths[type_codes_map[child_id]] = data.child_data[child_id]->length; 
+      for (int child_id = 0; child_id < type.num_fields(); ++child_id) {
+        child_lengths[type_codes_map[child_id]] = data.child_data[child_id]->length;
       }
 
-      // Check offsets are in bounds 
-      std::vector<int64_t> last_child_offsets(256, 0); 
-      const int32_t* offsets = data.GetValues<int32_t>(2); 
-      for (int64_t i = 0; i < data.length; ++i) { 
+      // Check offsets are in bounds
+      std::vector<int64_t> last_child_offsets(256, 0);
+      const int32_t* offsets = data.GetValues<int32_t>(2);
+      for (int64_t i = 0; i < data.length; ++i) {
         const int32_t code = type_codes[i];
         const int32_t offset = offsets[i];
         if (offset < 0) {
@@ -542,78 +542,78 @@ struct ValidateArrayFullImpl {
                                  "than child length (",
                                  offset, " >= ", child_lengths[code], ")");
         }
-        if (offset < last_child_offsets[code]) { 
-          return Status::Invalid("Union value at position ", i, 
-                                 " has non-monotonic offset ", offset); 
-        } 
-        last_child_offsets[code] = offset; 
+        if (offset < last_child_offsets[code]) {
+          return Status::Invalid("Union value at position ", i,
+                                 " has non-monotonic offset ", offset);
+        }
+        last_child_offsets[code] = offset;
+      }
+    }
+
+    // Validate children
+    for (int64_t i = 0; i < type.num_fields(); ++i) {
+      const ArrayData& field = *data.child_data[i];
+      const Status field_valid = ValidateArrayFull(field);
+      if (!field_valid.ok()) {
+        return Status::Invalid("Union child array #", i,
+                               " invalid: ", field_valid.ToString());
       }
     }
- 
-    // Validate children 
-    for (int64_t i = 0; i < type.num_fields(); ++i) { 
-      const ArrayData& field = *data.child_data[i]; 
-      const Status field_valid = ValidateArrayFull(field); 
-      if (!field_valid.ok()) { 
-        return Status::Invalid("Union child array #", i, 
-                               " invalid: ", field_valid.ToString()); 
-      } 
-    } 
     return Status::OK();
   }
 
-  Status Visit(const DictionaryType& type) { 
+  Status Visit(const DictionaryType& type) {
     const Status indices_status =
-        CheckBounds(*type.index_type(), 0, data.dictionary->length - 1); 
+        CheckBounds(*type.index_type(), 0, data.dictionary->length - 1);
     if (!indices_status.ok()) {
       return Status::Invalid("Dictionary indices invalid: ", indices_status.ToString());
     }
-    return ValidateArrayFull(*data.dictionary); 
+    return ValidateArrayFull(*data.dictionary);
   }
 
-  Status Visit(const ExtensionType& type) { 
-    return ValidateWithType(*type.storage_type()); 
+  Status Visit(const ExtensionType& type) {
+    return ValidateWithType(*type.storage_type());
   }
 
  protected:
-  template <typename BinaryType> 
-  Status ValidateBinaryLike(const BinaryType& type) { 
-    const auto& data_buffer = data.buffers[2]; 
-    if (data_buffer == nullptr) { 
-      return Status::Invalid("Binary data buffer is null"); 
+  template <typename BinaryType>
+  Status ValidateBinaryLike(const BinaryType& type) {
+    const auto& data_buffer = data.buffers[2];
+    if (data_buffer == nullptr) {
+      return Status::Invalid("Binary data buffer is null");
     }
-    return ValidateOffsets(type, data_buffer->size()); 
+    return ValidateOffsets(type, data_buffer->size());
   }
 
-  template <typename ListType> 
-  Status ValidateListLike(const ListType& type) { 
-    const ArrayData& child = *data.child_data[0]; 
-    const Status child_valid = ValidateArrayFull(child); 
+  template <typename ListType>
+  Status ValidateListLike(const ListType& type) {
+    const ArrayData& child = *data.child_data[0];
+    const Status child_valid = ValidateArrayFull(child);
     if (!child_valid.ok()) {
       return Status::Invalid("List child array invalid: ", child_valid.ToString());
     }
-    return ValidateOffsets(type, child.offset + child.length); 
+    return ValidateOffsets(type, child.offset + child.length);
   }
 
-  template <typename TypeClass> 
-  Status ValidateOffsets(const TypeClass& type, int64_t offset_limit) { 
-    using offset_type = typename TypeClass::offset_type; 
-    if (data.length == 0) { 
+  template <typename TypeClass>
+  Status ValidateOffsets(const TypeClass& type, int64_t offset_limit) {
+    using offset_type = typename TypeClass::offset_type;
+    if (data.length == 0) {
       return Status::OK();
     }
- 
-    const offset_type* offsets = data.GetValues<offset_type>(1); 
-    if (offsets == nullptr) { 
-      return Status::Invalid("Non-empty array but offsets are null"); 
+
+    const offset_type* offsets = data.GetValues<offset_type>(1);
+    if (offsets == nullptr) {
+      return Status::Invalid("Non-empty array but offsets are null");
     }
 
-    auto prev_offset = offsets[0]; 
+    auto prev_offset = offsets[0];
     if (prev_offset < 0) {
-      return Status::Invalid("Offset invariant failure: array starts at negative offset ", 
-                             prev_offset); 
+      return Status::Invalid("Offset invariant failure: array starts at negative offset ",
+                             prev_offset);
     }
-    for (int64_t i = 1; i <= data.length; ++i) { 
-      const auto current_offset = offsets[i]; 
+    for (int64_t i = 1; i <= data.length; ++i) {
+      const auto current_offset = offsets[i];
       if (current_offset < prev_offset) {
         return Status::Invalid("Offset invariant failure: non-monotonic offset at slot ",
                                i, ": ", current_offset, " < ", prev_offset);
@@ -627,31 +627,31 @@ struct ValidateArrayFullImpl {
     return Status::OK();
   }
 
-  Status CheckBounds(const DataType& type, int64_t min_value, int64_t max_value) { 
-    BoundsChecker checker{data, min_value, max_value}; 
-    return VisitTypeInline(type, &checker); 
+  Status CheckBounds(const DataType& type, int64_t min_value, int64_t max_value) {
+    BoundsChecker checker{data, min_value, max_value};
+    return VisitTypeInline(type, &checker);
   }
 };
 
 }  // namespace
 
 ARROW_EXPORT
-Status ValidateArrayFull(const ArrayData& data) { 
-  return ValidateArrayFullImpl{data}.Validate(); 
+Status ValidateArrayFull(const ArrayData& data) {
+  return ValidateArrayFullImpl{data}.Validate();
+}
+
+ARROW_EXPORT
+Status ValidateArrayFull(const Array& array) { return ValidateArrayFull(*array.data()); }
+
+ARROW_EXPORT
+Status ValidateUTF8(const ArrayData& data) {
+  DCHECK(data.type->id() == Type::STRING || data.type->id() == Type::LARGE_STRING);
+  UTF8DataValidator validator{data};
+  return VisitTypeInline(*data.type, &validator);
 }
 
-ARROW_EXPORT 
-Status ValidateArrayFull(const Array& array) { return ValidateArrayFull(*array.data()); } 
- 
-ARROW_EXPORT 
-Status ValidateUTF8(const ArrayData& data) { 
-  DCHECK(data.type->id() == Type::STRING || data.type->id() == Type::LARGE_STRING); 
-  UTF8DataValidator validator{data}; 
-  return VisitTypeInline(*data.type, &validator); 
-} 
- 
-ARROW_EXPORT 
-Status ValidateUTF8(const Array& array) { return ValidateUTF8(*array.data()); } 
- 
+ARROW_EXPORT
+Status ValidateUTF8(const Array& array) { return ValidateUTF8(*array.data()); }
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/array/validate.h b/contrib/libs/apache/arrow/cpp/src/arrow/array/validate.h
index 7e07100e1fc..cae3e16b3c5 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/array/validate.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/array/validate.h
@@ -18,7 +18,7 @@
 #pragma once
 
 #include "arrow/status.h"
-#include "arrow/type_fwd.h" 
+#include "arrow/type_fwd.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
@@ -26,30 +26,30 @@ namespace internal {
 
 // Internal functions implementing Array::Validate() and friends.
 
-// O(1) array metadata validation 
- 
+// O(1) array metadata validation
+
 ARROW_EXPORT
 Status ValidateArray(const Array& array);
 
 ARROW_EXPORT
-Status ValidateArray(const ArrayData& data); 
-
-// O(N) array data validation. 
-// Note the "full" routines don't validate metadata.  It should be done 
-// beforehand using ValidateArray(), otherwise invalid memory accesses 
-// may occur. 
- 
-ARROW_EXPORT 
-Status ValidateArrayFull(const Array& array); 
- 
-ARROW_EXPORT 
-Status ValidateArrayFull(const ArrayData& data); 
- 
-ARROW_EXPORT 
-Status ValidateUTF8(const Array& array); 
- 
-ARROW_EXPORT 
-Status ValidateUTF8(const ArrayData& data); 
- 
+Status ValidateArray(const ArrayData& data);
+
+// O(N) array data validation.
+// Note the "full" routines don't validate metadata.  It should be done
+// beforehand using ValidateArray(), otherwise invalid memory accesses
+// may occur.
+
+ARROW_EXPORT
+Status ValidateArrayFull(const Array& array);
+
+ARROW_EXPORT
+Status ValidateArrayFull(const ArrayData& data);
+
+ARROW_EXPORT
+Status ValidateUTF8(const Array& array);
+
+ARROW_EXPORT
+Status ValidateUTF8(const ArrayData& data);
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/buffer.h b/contrib/libs/apache/arrow/cpp/src/arrow/buffer.h
index 7d71846d9ab..6c47a464b1d 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/buffer.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/buffer.h
@@ -56,13 +56,13 @@ class ARROW_EXPORT Buffer {
   ///
   /// \note The passed memory must be kept alive through some other means
   Buffer(const uint8_t* data, int64_t size)
-      : is_mutable_(false), is_cpu_(true), data_(data), size_(size), capacity_(size) { 
+      : is_mutable_(false), is_cpu_(true), data_(data), size_(size), capacity_(size) {
     SetMemoryManager(default_cpu_memory_manager());
   }
 
   Buffer(const uint8_t* data, int64_t size, std::shared_ptr<MemoryManager> mm,
          std::shared_ptr<Buffer> parent = NULLPTR)
-      : is_mutable_(false), data_(data), size_(size), capacity_(size), parent_(parent) { 
+      : is_mutable_(false), data_(data), size_(size), capacity_(size), parent_(parent) {
     SetMemoryManager(std::move(mm));
   }
 
@@ -121,7 +121,7 @@ class ARROW_EXPORT Buffer {
 #endif
     // A zero-capacity buffer can have a null data pointer
     if (capacity_ != 0) {
-      memset(mutable_data() + size_, 0, static_cast<size_t>(capacity_ - size_)); 
+      memset(mutable_data() + size_, 0, static_cast<size_t>(capacity_ - size_));
     }
   }
 
@@ -195,8 +195,8 @@ class ARROW_EXPORT Buffer {
     CheckCPU();
     CheckMutable();
 #endif
-    return ARROW_PREDICT_TRUE(is_cpu_ && is_mutable_) ? const_cast<uint8_t*>(data_) 
-                                                      : NULLPTR; 
+    return ARROW_PREDICT_TRUE(is_cpu_ && is_mutable_) ? const_cast<uint8_t*>(data_)
+                                                      : NULLPTR;
   }
 
   /// \brief Return the device address of the buffer's data
@@ -210,7 +210,7 @@ class ARROW_EXPORT Buffer {
 #ifndef NDEBUG
     CheckMutable();
 #endif
-    return ARROW_PREDICT_TRUE(is_mutable_) ? reinterpret_cast<uintptr_t>(data_) : 0; 
+    return ARROW_PREDICT_TRUE(is_mutable_) ? reinterpret_cast<uintptr_t>(data_) : 0;
   }
 
   /// \brief Return the buffer's size in bytes
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/buffer_builder.h b/contrib/libs/apache/arrow/cpp/src/arrow/buffer_builder.h
index cebaa5db510..c6250ae2b76 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/buffer_builder.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/buffer_builder.h
@@ -64,10 +64,10 @@ class ARROW_EXPORT BufferBuilder {
   /// \brief Resize the buffer to the nearest multiple of 64 bytes
   ///
   /// \param new_capacity the new capacity of the of the builder. Will be
-  /// rounded up to a multiple of 64 bytes for padding 
-  /// \param shrink_to_fit if new capacity is smaller than the existing, 
-  /// reallocate internal buffer. Set to false to avoid reallocations when 
-  /// shrinking the builder. 
+  /// rounded up to a multiple of 64 bytes for padding
+  /// \param shrink_to_fit if new capacity is smaller than the existing,
+  /// reallocate internal buffer. Set to false to avoid reallocations when
+  /// shrinking the builder.
   /// \return Status
   Status Resize(const int64_t new_capacity, bool shrink_to_fit = true) {
     if (buffer_ == NULLPTR) {
@@ -159,23 +159,23 @@ class ARROW_EXPORT BufferBuilder {
     return Status::OK();
   }
 
-  Result<std::shared_ptr<Buffer>> Finish(bool shrink_to_fit = true) { 
-    std::shared_ptr<Buffer> out; 
-    ARROW_RETURN_NOT_OK(Finish(&out, shrink_to_fit)); 
-    return out; 
-  } 
- 
-  /// \brief Like Finish, but override the final buffer size 
-  /// 
-  /// This is useful after writing data directly into the builder memory 
-  /// without calling the Append methods (basically, when using BufferBuilder 
-  /// mostly for memory allocation). 
-  Result<std::shared_ptr<Buffer>> FinishWithLength(int64_t final_length, 
-                                                   bool shrink_to_fit = true) { 
-    size_ = final_length; 
-    return Finish(shrink_to_fit); 
-  } 
- 
+  Result<std::shared_ptr<Buffer>> Finish(bool shrink_to_fit = true) {
+    std::shared_ptr<Buffer> out;
+    ARROW_RETURN_NOT_OK(Finish(&out, shrink_to_fit));
+    return out;
+  }
+
+  /// \brief Like Finish, but override the final buffer size
+  ///
+  /// This is useful after writing data directly into the builder memory
+  /// without calling the Append methods (basically, when using BufferBuilder
+  /// mostly for memory allocation).
+  Result<std::shared_ptr<Buffer>> FinishWithLength(int64_t final_length,
+                                                   bool shrink_to_fit = true) {
+    size_ = final_length;
+    return Finish(shrink_to_fit);
+  }
+
   void Reset() {
     buffer_ = NULLPTR;
     capacity_ = size_ = 0;
@@ -216,11 +216,11 @@ class TypedBufferBuilder<
                               MemoryPool* pool = default_memory_pool())
       : bytes_builder_(std::move(buffer), pool) {}
 
-  explicit TypedBufferBuilder(BufferBuilder builder) 
-      : bytes_builder_(std::move(builder)) {} 
- 
-  BufferBuilder* bytes_builder() { return &bytes_builder_; } 
- 
+  explicit TypedBufferBuilder(BufferBuilder builder)
+      : bytes_builder_(std::move(builder)) {}
+
+  BufferBuilder* bytes_builder() { return &bytes_builder_; }
+
   Status Append(T value) {
     return bytes_builder_.Append(reinterpret_cast<uint8_t*>(&value), sizeof(T));
   }
@@ -275,22 +275,22 @@ class TypedBufferBuilder<
     return bytes_builder_.Finish(out, shrink_to_fit);
   }
 
-  Result<std::shared_ptr<Buffer>> Finish(bool shrink_to_fit = true) { 
-    std::shared_ptr<Buffer> out; 
-    ARROW_RETURN_NOT_OK(Finish(&out, shrink_to_fit)); 
-    return out; 
-  } 
- 
-  /// \brief Like Finish, but override the final buffer size 
-  /// 
-  /// This is useful after writing data directly into the builder memory 
-  /// without calling the Append methods (basically, when using TypedBufferBuilder 
-  /// only for memory allocation). 
-  Result<std::shared_ptr<Buffer>> FinishWithLength(int64_t final_length, 
-                                                   bool shrink_to_fit = true) { 
-    return bytes_builder_.FinishWithLength(final_length * sizeof(T), shrink_to_fit); 
-  } 
- 
+  Result<std::shared_ptr<Buffer>> Finish(bool shrink_to_fit = true) {
+    std::shared_ptr<Buffer> out;
+    ARROW_RETURN_NOT_OK(Finish(&out, shrink_to_fit));
+    return out;
+  }
+
+  /// \brief Like Finish, but override the final buffer size
+  ///
+  /// This is useful after writing data directly into the builder memory
+  /// without calling the Append methods (basically, when using TypedBufferBuilder
+  /// only for memory allocation).
+  Result<std::shared_ptr<Buffer>> FinishWithLength(int64_t final_length,
+                                                   bool shrink_to_fit = true) {
+    return bytes_builder_.FinishWithLength(final_length * sizeof(T), shrink_to_fit);
+  }
+
   void Reset() { bytes_builder_.Reset(); }
 
   int64_t length() const { return bytes_builder_.length() / sizeof(T); }
@@ -309,11 +309,11 @@ class TypedBufferBuilder<bool> {
   explicit TypedBufferBuilder(MemoryPool* pool = default_memory_pool())
       : bytes_builder_(pool) {}
 
-  explicit TypedBufferBuilder(BufferBuilder builder) 
-      : bytes_builder_(std::move(builder)) {} 
- 
-  BufferBuilder* bytes_builder() { return &bytes_builder_; } 
- 
+  explicit TypedBufferBuilder(BufferBuilder builder)
+      : bytes_builder_(std::move(builder)) {}
+
+  BufferBuilder* bytes_builder() { return &bytes_builder_; }
+
   Status Append(bool value) {
     ARROW_RETURN_NOT_OK(Reserve(1));
     UnsafeAppend(value);
@@ -411,25 +411,25 @@ class TypedBufferBuilder<bool> {
     return bytes_builder_.Finish(out, shrink_to_fit);
   }
 
-  Result<std::shared_ptr<Buffer>> Finish(bool shrink_to_fit = true) { 
-    std::shared_ptr<Buffer> out; 
-    ARROW_RETURN_NOT_OK(Finish(&out, shrink_to_fit)); 
-    return out; 
-  } 
- 
-  /// \brief Like Finish, but override the final buffer size 
-  /// 
-  /// This is useful after writing data directly into the builder memory 
-  /// without calling the Append methods (basically, when using TypedBufferBuilder 
-  /// only for memory allocation). 
-  Result<std::shared_ptr<Buffer>> FinishWithLength(int64_t final_length, 
-                                                   bool shrink_to_fit = true) { 
-    const auto final_byte_length = BitUtil::BytesForBits(final_length); 
-    bytes_builder_.UnsafeAdvance(final_byte_length - bytes_builder_.length()); 
-    bit_length_ = false_count_ = 0; 
-    return bytes_builder_.FinishWithLength(final_byte_length, shrink_to_fit); 
-  } 
- 
+  Result<std::shared_ptr<Buffer>> Finish(bool shrink_to_fit = true) {
+    std::shared_ptr<Buffer> out;
+    ARROW_RETURN_NOT_OK(Finish(&out, shrink_to_fit));
+    return out;
+  }
+
+  /// \brief Like Finish, but override the final buffer size
+  ///
+  /// This is useful after writing data directly into the builder memory
+  /// without calling the Append methods (basically, when using TypedBufferBuilder
+  /// only for memory allocation).
+  Result<std::shared_ptr<Buffer>> FinishWithLength(int64_t final_length,
+                                                   bool shrink_to_fit = true) {
+    const auto final_byte_length = BitUtil::BytesForBits(final_length);
+    bytes_builder_.UnsafeAdvance(final_byte_length - bytes_builder_.length());
+    bit_length_ = false_count_ = 0;
+    return bytes_builder_.FinishWithLength(final_byte_length, shrink_to_fit);
+  }
+
   void Reset() {
     bytes_builder_.Reset();
     bit_length_ = false_count_ = 0;
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/builder.cc b/contrib/libs/apache/arrow/cpp/src/arrow/builder.cc
index e46661b4b42..f22228a4588 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/builder.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/builder.cc
@@ -51,7 +51,7 @@ struct DictionaryBuilderCase {
   }
   Status Visit(const FixedSizeBinaryType&) { return CreateFor<FixedSizeBinaryType>(); }
   Status Visit(const Decimal128Type&) { return CreateFor<Decimal128Type>(); }
-  Status Visit(const Decimal256Type&) { return CreateFor<Decimal256Type>(); } 
+  Status Visit(const Decimal256Type&) { return CreateFor<Decimal256Type>(); }
 
   Status Visit(const DataType& value_type) { return NotImplemented(value_type); }
   Status Visit(const HalfFloatType& value_type) { return NotImplemented(value_type); }
@@ -139,7 +139,7 @@ Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
       BUILDER_CASE(LargeBinary);
       BUILDER_CASE(FixedSizeBinary);
       BUILDER_CASE(Decimal128);
-      BUILDER_CASE(Decimal256); 
+      BUILDER_CASE(Decimal256);
 
     case Type::DICTIONARY: {
       const auto& dict_type = static_cast<const DictionaryType&>(*type);
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/c/bridge.cc b/contrib/libs/apache/arrow/cpp/src/arrow/c/bridge.cc
index ccd780fa687..a43bf8104f2 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/c/bridge.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/c/bridge.cc
@@ -304,16 +304,16 @@ struct SchemaExporter {
     return SetFormat("w:" + std::to_string(type.byte_width()));
   }
 
-  Status Visit(const DecimalType& type) { 
-    if (type.bit_width() == 128) { 
-      // 128 is the default bit-width 
-      return SetFormat("d:" + std::to_string(type.precision()) + "," + 
-                       std::to_string(type.scale())); 
-    } else { 
-      return SetFormat("d:" + std::to_string(type.precision()) + "," + 
-                       std::to_string(type.scale()) + "," + 
-                       std::to_string(type.bit_width())); 
-    } 
+  Status Visit(const DecimalType& type) {
+    if (type.bit_width() == 128) {
+      // 128 is the default bit-width
+      return SetFormat("d:" + std::to_string(type.precision()) + "," +
+                       std::to_string(type.scale()));
+    } else {
+      return SetFormat("d:" + std::to_string(type.precision()) + "," +
+                       std::to_string(type.scale()) + "," +
+                       std::to_string(type.bit_width()));
+    }
   }
 
   Status Visit(const BinaryType& type) { return SetFormat("z"); }
@@ -980,20 +980,20 @@ struct SchemaImporter {
   Status ProcessDecimal() {
     RETURN_NOT_OK(f_parser_.CheckNext(':'));
     ARROW_ASSIGN_OR_RAISE(auto prec_scale, f_parser_.ParseInts(f_parser_.Rest()));
-    // 3 elements indicates bit width was communicated as well. 
-    if (prec_scale.size() != 2 && prec_scale.size() != 3) { 
+    // 3 elements indicates bit width was communicated as well.
+    if (prec_scale.size() != 2 && prec_scale.size() != 3) {
       return f_parser_.Invalid();
     }
-    if (prec_scale[0] <= 0) { 
+    if (prec_scale[0] <= 0) {
+      return f_parser_.Invalid();
+    }
+    if (prec_scale.size() == 2 || prec_scale[2] == 128) {
+      type_ = decimal128(prec_scale[0], prec_scale[1]);
+    } else if (prec_scale[2] == 256) {
+      type_ = decimal256(prec_scale[0], prec_scale[1]);
+    } else {
       return f_parser_.Invalid();
     }
-    if (prec_scale.size() == 2 || prec_scale[2] == 128) { 
-      type_ = decimal128(prec_scale[0], prec_scale[1]); 
-    } else if (prec_scale[2] == 256) { 
-      type_ = decimal256(prec_scale[0], prec_scale[1]); 
-    } else { 
-      return f_parser_.Invalid(); 
-    } 
     return Status::OK();
   }
 
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/chunked_array.cc b/contrib/libs/apache/arrow/cpp/src/arrow/chunked_array.cc
index 20c63c78959..142bd0d8c89 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/chunked_array.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/chunked_array.cc
@@ -118,33 +118,33 @@ bool ChunkedArray::Equals(const std::shared_ptr<ChunkedArray>& other) const {
   return Equals(*other.get());
 }
 
-bool ChunkedArray::ApproxEquals(const ChunkedArray& other, 
-                                const EqualOptions& equal_options) const { 
-  if (length_ != other.length()) { 
-    return false; 
-  } 
-  if (null_count_ != other.null_count()) { 
-    return false; 
-  } 
-  // We cannot toggle check_metadata here yet, so we don't check it 
-  if (!type_->Equals(*other.type_, /*check_metadata=*/false)) { 
-    return false; 
-  } 
- 
-  // Check contents of the underlying arrays. This checks for equality of 
-  // the underlying data independently of the chunk size. 
-  return internal::ApplyBinaryChunked( 
-             *this, other, 
-             [&](const Array& left_piece, const Array& right_piece, 
-                 int64_t ARROW_ARG_UNUSED(position)) { 
-               if (!left_piece.ApproxEquals(right_piece, equal_options)) { 
-                 return Status::Invalid("Unequal piece"); 
-               } 
-               return Status::OK(); 
-             }) 
-      .ok(); 
-} 
- 
+bool ChunkedArray::ApproxEquals(const ChunkedArray& other,
+                                const EqualOptions& equal_options) const {
+  if (length_ != other.length()) {
+    return false;
+  }
+  if (null_count_ != other.null_count()) {
+    return false;
+  }
+  // We cannot toggle check_metadata here yet, so we don't check it
+  if (!type_->Equals(*other.type_, /*check_metadata=*/false)) {
+    return false;
+  }
+
+  // Check contents of the underlying arrays. This checks for equality of
+  // the underlying data independently of the chunk size.
+  return internal::ApplyBinaryChunked(
+             *this, other,
+             [&](const Array& left_piece, const Array& right_piece,
+                 int64_t ARROW_ARG_UNUSED(position)) {
+               if (!left_piece.ApproxEquals(right_piece, equal_options)) {
+                 return Status::Invalid("Unequal piece");
+               }
+               return Status::OK();
+             })
+      .ok();
+}
+
 std::shared_ptr<ChunkedArray> ChunkedArray::Slice(int64_t offset, int64_t length) const {
   ARROW_CHECK_LE(offset, length_) << "Slice offset greater than array length";
   bool offset_equals_length = offset == length_;
@@ -246,7 +246,7 @@ Status ChunkedArray::ValidateFull() const {
   RETURN_NOT_OK(Validate());
   for (size_t i = 0; i < chunks_.size(); ++i) {
     const Array& chunk = *chunks_[i];
-    const Status st = internal::ValidateArrayFull(chunk); 
+    const Status st = internal::ValidateArrayFull(chunk);
     if (!st.ok()) {
       return Status::Invalid("In chunk ", i, ": ", st.ToString());
     }
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/chunked_array.h b/contrib/libs/apache/arrow/cpp/src/arrow/chunked_array.h
index 892ae637545..2ace045c2bf 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/chunked_array.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/chunked_array.h
@@ -23,7 +23,7 @@
 #include <utility>
 #include <vector>
 
-#include "arrow/compare.h" 
+#include "arrow/compare.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
 #include "arrow/type_fwd.h"
@@ -73,9 +73,9 @@ class ARROW_EXPORT ChunkedArray {
   /// data type.
   explicit ChunkedArray(ArrayVector chunks);
 
-  ChunkedArray(ChunkedArray&&) = default; 
-  ChunkedArray& operator=(ChunkedArray&&) = default; 
- 
+  ChunkedArray(ChunkedArray&&) = default;
+  ChunkedArray& operator=(ChunkedArray&&) = default;
+
   /// \brief Construct a chunked array from a single Array
   explicit ChunkedArray(std::shared_ptr<Array> chunk)
       : ChunkedArray(ArrayVector{std::move(chunk)}) {}
@@ -137,9 +137,9 @@ class ARROW_EXPORT ChunkedArray {
   bool Equals(const ChunkedArray& other) const;
   /// \brief Determine if two chunked arrays are equal.
   bool Equals(const std::shared_ptr<ChunkedArray>& other) const;
-  /// \brief Determine if two chunked arrays approximately equal 
-  bool ApproxEquals(const ChunkedArray& other, 
-                    const EqualOptions& = EqualOptions::Defaults()) const; 
+  /// \brief Determine if two chunked arrays approximately equal
+  bool ApproxEquals(const ChunkedArray& other,
+                    const EqualOptions& = EqualOptions::Defaults()) const;
 
   /// \return PrettyPrint representation suitable for debugging
   std::string ToString() const;
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compare.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compare.cc
index 51fec14e768..4c6f97faf95 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compare.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compare.cc
@@ -38,10 +38,10 @@
 #include "arrow/tensor.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
-#include "arrow/util/bit_run_reader.h" 
+#include "arrow/util/bit_run_reader.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/bitmap_ops.h"
-#include "arrow/util/bitmap_reader.h" 
+#include "arrow/util/bitmap_reader.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
@@ -51,499 +51,499 @@
 namespace arrow {
 
 using internal::BitmapEquals;
-using internal::BitmapReader; 
-using internal::BitmapUInt64Reader; 
+using internal::BitmapReader;
+using internal::BitmapUInt64Reader;
 using internal::checked_cast;
-using internal::OptionalBitmapEquals; 
+using internal::OptionalBitmapEquals;
 
 // ----------------------------------------------------------------------
 // Public method implementations
 
 namespace {
 
-// TODO also handle HALF_FLOAT NaNs 
-
-enum FloatingEqualityFlags : int8_t { Approximate = 1, NansEqual = 2 }; 
-
-template <typename T, int8_t Flags> 
-struct FloatingEquality { 
-  bool operator()(T x, T y) { return x == y; } 
-}; 
-
-template <typename T> 
-struct FloatingEquality<T, NansEqual> { 
-  bool operator()(T x, T y) { return (x == y) || (std::isnan(x) && std::isnan(y)); } 
-}; 
- 
-template <typename T> 
-struct FloatingEquality<T, Approximate> { 
-  explicit FloatingEquality(const EqualOptions& options) 
-      : epsilon(static_cast<T>(options.atol())) {} 
- 
-  bool operator()(T x, T y) { return (fabs(x - y) <= epsilon) || (x == y); } 
- 
-  const T epsilon; 
-}; 
- 
-template <typename T> 
-struct FloatingEquality<T, Approximate | NansEqual> { 
-  explicit FloatingEquality(const EqualOptions& options) 
-      : epsilon(static_cast<T>(options.atol())) {} 
- 
-  bool operator()(T x, T y) { 
-    return (fabs(x - y) <= epsilon) || (x == y) || (std::isnan(x) && std::isnan(y)); 
-  } 
- 
-  const T epsilon; 
-}; 
- 
-template <typename T, typename Visitor> 
-void VisitFloatingEquality(const EqualOptions& options, bool floating_approximate, 
-                           Visitor&& visit) { 
-  if (options.nans_equal()) { 
-    if (floating_approximate) { 
-      visit(FloatingEquality<T, NansEqual | Approximate>{options}); 
-    } else { 
-      visit(FloatingEquality<T, NansEqual>{}); 
+// TODO also handle HALF_FLOAT NaNs
+
+enum FloatingEqualityFlags : int8_t { Approximate = 1, NansEqual = 2 };
+
+template <typename T, int8_t Flags>
+struct FloatingEquality {
+  bool operator()(T x, T y) { return x == y; }
+};
+
+template <typename T>
+struct FloatingEquality<T, NansEqual> {
+  bool operator()(T x, T y) { return (x == y) || (std::isnan(x) && std::isnan(y)); }
+};
+
+template <typename T>
+struct FloatingEquality<T, Approximate> {
+  explicit FloatingEquality(const EqualOptions& options)
+      : epsilon(static_cast<T>(options.atol())) {}
+
+  bool operator()(T x, T y) { return (fabs(x - y) <= epsilon) || (x == y); }
+
+  const T epsilon;
+};
+
+template <typename T>
+struct FloatingEquality<T, Approximate | NansEqual> {
+  explicit FloatingEquality(const EqualOptions& options)
+      : epsilon(static_cast<T>(options.atol())) {}
+
+  bool operator()(T x, T y) {
+    return (fabs(x - y) <= epsilon) || (x == y) || (std::isnan(x) && std::isnan(y));
+  }
+
+  const T epsilon;
+};
+
+template <typename T, typename Visitor>
+void VisitFloatingEquality(const EqualOptions& options, bool floating_approximate,
+                           Visitor&& visit) {
+  if (options.nans_equal()) {
+    if (floating_approximate) {
+      visit(FloatingEquality<T, NansEqual | Approximate>{options});
+    } else {
+      visit(FloatingEquality<T, NansEqual>{});
     }
   } else {
-    if (floating_approximate) { 
-      visit(FloatingEquality<T, Approximate>{options}); 
-    } else { 
-      visit(FloatingEquality<T, 0>{}); 
+    if (floating_approximate) {
+      visit(FloatingEquality<T, Approximate>{options});
+    } else {
+      visit(FloatingEquality<T, 0>{});
     }
   }
 }
 
-inline bool IdentityImpliesEqualityNansNotEqual(const DataType& type) { 
-  if (type.id() == Type::FLOAT || type.id() == Type::DOUBLE) { 
-    return false; 
+inline bool IdentityImpliesEqualityNansNotEqual(const DataType& type) {
+  if (type.id() == Type::FLOAT || type.id() == Type::DOUBLE) {
+    return false;
+  }
+  for (const auto& child : type.fields()) {
+    if (!IdentityImpliesEqualityNansNotEqual(*child->type())) {
+      return false;
+    }
   }
-  for (const auto& child : type.fields()) { 
-    if (!IdentityImpliesEqualityNansNotEqual(*child->type())) { 
-      return false; 
-    } 
-  } 
-  return true; 
+  return true;
 }
 
-inline bool IdentityImpliesEquality(const DataType& type, const EqualOptions& options) { 
-  if (options.nans_equal()) { 
-    return true; 
+inline bool IdentityImpliesEquality(const DataType& type, const EqualOptions& options) {
+  if (options.nans_equal()) {
+    return true;
   }
-  return IdentityImpliesEqualityNansNotEqual(type); 
+  return IdentityImpliesEqualityNansNotEqual(type);
 }
 
-bool CompareArrayRanges(const ArrayData& left, const ArrayData& right, 
-                        int64_t left_start_idx, int64_t left_end_idx, 
-                        int64_t right_start_idx, const EqualOptions& options, 
-                        bool floating_approximate); 
+bool CompareArrayRanges(const ArrayData& left, const ArrayData& right,
+                        int64_t left_start_idx, int64_t left_end_idx,
+                        int64_t right_start_idx, const EqualOptions& options,
+                        bool floating_approximate);
 
-class RangeDataEqualsImpl { 
+class RangeDataEqualsImpl {
  public:
-  // PRE-CONDITIONS: 
-  // - the types are equal 
-  // - the ranges are in bounds 
-  RangeDataEqualsImpl(const EqualOptions& options, bool floating_approximate, 
-                      const ArrayData& left, const ArrayData& right, 
-                      int64_t left_start_idx, int64_t right_start_idx, 
-                      int64_t range_length) 
-      : options_(options), 
-        floating_approximate_(floating_approximate), 
-        left_(left), 
-        right_(right), 
+  // PRE-CONDITIONS:
+  // - the types are equal
+  // - the ranges are in bounds
+  RangeDataEqualsImpl(const EqualOptions& options, bool floating_approximate,
+                      const ArrayData& left, const ArrayData& right,
+                      int64_t left_start_idx, int64_t right_start_idx,
+                      int64_t range_length)
+      : options_(options),
+        floating_approximate_(floating_approximate),
+        left_(left),
+        right_(right),
         left_start_idx_(left_start_idx),
         right_start_idx_(right_start_idx),
-        range_length_(range_length), 
+        range_length_(range_length),
         result_(false) {}
 
-  bool Compare() { 
-    // Compare null bitmaps 
-    if (left_start_idx_ == 0 && right_start_idx_ == 0 && range_length_ == left_.length && 
-        range_length_ == right_.length) { 
-      // If we're comparing entire arrays, we can first compare the cached null counts 
-      if (left_.GetNullCount() != right_.GetNullCount()) { 
-        return false; 
+  bool Compare() {
+    // Compare null bitmaps
+    if (left_start_idx_ == 0 && right_start_idx_ == 0 && range_length_ == left_.length &&
+        range_length_ == right_.length) {
+      // If we're comparing entire arrays, we can first compare the cached null counts
+      if (left_.GetNullCount() != right_.GetNullCount()) {
+        return false;
       }
     }
-    if (!OptionalBitmapEquals(left_.buffers[0], left_.offset + left_start_idx_, 
-                              right_.buffers[0], right_.offset + right_start_idx_, 
-                              range_length_)) { 
-      return false; 
-    } 
-    // Compare values 
-    return CompareWithType(*left_.type); 
-  }
-
-  bool CompareWithType(const DataType& type) { 
-    result_ = true; 
-    if (range_length_ != 0) { 
-      ARROW_CHECK_OK(VisitTypeInline(type, this)); 
+    if (!OptionalBitmapEquals(left_.buffers[0], left_.offset + left_start_idx_,
+                              right_.buffers[0], right_.offset + right_start_idx_,
+                              range_length_)) {
+      return false;
     }
-    return result_; 
+    // Compare values
+    return CompareWithType(*left_.type);
   }
 
-  Status Visit(const NullType&) { return Status::OK(); } 
+  bool CompareWithType(const DataType& type) {
+    result_ = true;
+    if (range_length_ != 0) {
+      ARROW_CHECK_OK(VisitTypeInline(type, this));
+    }
+    return result_;
+  }
 
-  template <typename TypeClass> 
-  enable_if_primitive_ctype<TypeClass, Status> Visit(const TypeClass& type) { 
-    return ComparePrimitive(type); 
+  Status Visit(const NullType&) { return Status::OK(); }
+
+  template <typename TypeClass>
+  enable_if_primitive_ctype<TypeClass, Status> Visit(const TypeClass& type) {
+    return ComparePrimitive(type);
   }
 
-  template <typename TypeClass> 
-  enable_if_t<is_temporal_type<TypeClass>::value, Status> Visit(const TypeClass& type) { 
-    return ComparePrimitive(type); 
-  } 
+  template <typename TypeClass>
+  enable_if_t<is_temporal_type<TypeClass>::value, Status> Visit(const TypeClass& type) {
+    return ComparePrimitive(type);
+  }
 
-  Status Visit(const BooleanType&) { 
-    const uint8_t* left_bits = left_.GetValues<uint8_t>(1, 0); 
-    const uint8_t* right_bits = right_.GetValues<uint8_t>(1, 0); 
-    auto compare_runs = [&](int64_t i, int64_t length) -> bool { 
-      if (length <= 8) { 
-        // Avoid the BitmapUInt64Reader overhead for very small runs 
-        for (int64_t j = i; j < i + length; ++j) { 
-          if (BitUtil::GetBit(left_bits, left_start_idx_ + left_.offset + j) != 
-              BitUtil::GetBit(right_bits, right_start_idx_ + right_.offset + j)) { 
-            return false; 
-          } 
-        } 
+  Status Visit(const BooleanType&) {
+    const uint8_t* left_bits = left_.GetValues<uint8_t>(1, 0);
+    const uint8_t* right_bits = right_.GetValues<uint8_t>(1, 0);
+    auto compare_runs = [&](int64_t i, int64_t length) -> bool {
+      if (length <= 8) {
+        // Avoid the BitmapUInt64Reader overhead for very small runs
+        for (int64_t j = i; j < i + length; ++j) {
+          if (BitUtil::GetBit(left_bits, left_start_idx_ + left_.offset + j) !=
+              BitUtil::GetBit(right_bits, right_start_idx_ + right_.offset + j)) {
+            return false;
+          }
+        }
         return true;
-      } else if (length <= 1024) { 
-        BitmapUInt64Reader left_reader(left_bits, left_start_idx_ + left_.offset + i, 
-                                       length); 
-        BitmapUInt64Reader right_reader(right_bits, right_start_idx_ + right_.offset + i, 
-                                        length); 
-        while (left_reader.position() < length) { 
-          if (left_reader.NextWord() != right_reader.NextWord()) { 
-            return false; 
-          } 
-        } 
-        DCHECK_EQ(right_reader.position(), length); 
-      } else { 
-        // BitmapEquals is the fastest method on large runs 
-        return BitmapEquals(left_bits, left_start_idx_ + left_.offset + i, right_bits, 
-                            right_start_idx_ + right_.offset + i, length); 
+      } else if (length <= 1024) {
+        BitmapUInt64Reader left_reader(left_bits, left_start_idx_ + left_.offset + i,
+                                       length);
+        BitmapUInt64Reader right_reader(right_bits, right_start_idx_ + right_.offset + i,
+                                        length);
+        while (left_reader.position() < length) {
+          if (left_reader.NextWord() != right_reader.NextWord()) {
+            return false;
+          }
+        }
+        DCHECK_EQ(right_reader.position(), length);
+      } else {
+        // BitmapEquals is the fastest method on large runs
+        return BitmapEquals(left_bits, left_start_idx_ + left_.offset + i, right_bits,
+                            right_start_idx_ + right_.offset + i, length);
       }
-      return true; 
+      return true;
     };
-    VisitValidRuns(compare_runs); 
-    return Status::OK(); 
-  }
-
-  Status Visit(const FloatType& type) { return CompareFloating(type); } 
-
-  Status Visit(const DoubleType& type) { return CompareFloating(type); } 
-
-  // Also matches StringType 
-  Status Visit(const BinaryType& type) { return CompareBinary(type); } 
-
-  // Also matches LargeStringType 
-  Status Visit(const LargeBinaryType& type) { return CompareBinary(type); } 
-
-  Status Visit(const FixedSizeBinaryType& type) { 
-    const auto byte_width = type.byte_width(); 
-    const uint8_t* left_data = left_.GetValues<uint8_t>(1, 0); 
-    const uint8_t* right_data = right_.GetValues<uint8_t>(1, 0); 
- 
-    if (left_data != nullptr && right_data != nullptr) { 
-      auto compare_runs = [&](int64_t i, int64_t length) -> bool { 
-        return memcmp(left_data + (left_start_idx_ + left_.offset + i) * byte_width, 
-                      right_data + (right_start_idx_ + right_.offset + i) * byte_width, 
-                      length * byte_width) == 0; 
-      }; 
-      VisitValidRuns(compare_runs); 
-    } else { 
-      auto compare_runs = [&](int64_t i, int64_t length) -> bool { return true; }; 
-      VisitValidRuns(compare_runs); 
+    VisitValidRuns(compare_runs);
+    return Status::OK();
+  }
+
+  Status Visit(const FloatType& type) { return CompareFloating(type); }
+
+  Status Visit(const DoubleType& type) { return CompareFloating(type); }
+
+  // Also matches StringType
+  Status Visit(const BinaryType& type) { return CompareBinary(type); }
+
+  // Also matches LargeStringType
+  Status Visit(const LargeBinaryType& type) { return CompareBinary(type); }
+
+  Status Visit(const FixedSizeBinaryType& type) {
+    const auto byte_width = type.byte_width();
+    const uint8_t* left_data = left_.GetValues<uint8_t>(1, 0);
+    const uint8_t* right_data = right_.GetValues<uint8_t>(1, 0);
+
+    if (left_data != nullptr && right_data != nullptr) {
+      auto compare_runs = [&](int64_t i, int64_t length) -> bool {
+        return memcmp(left_data + (left_start_idx_ + left_.offset + i) * byte_width,
+                      right_data + (right_start_idx_ + right_.offset + i) * byte_width,
+                      length * byte_width) == 0;
+      };
+      VisitValidRuns(compare_runs);
+    } else {
+      auto compare_runs = [&](int64_t i, int64_t length) -> bool { return true; };
+      VisitValidRuns(compare_runs);
     }
-    return Status::OK(); 
-  } 
-
-  // Also matches MapType 
-  Status Visit(const ListType& type) { return CompareList(type); } 
-
-  Status Visit(const LargeListType& type) { return CompareList(type); } 
-
-  Status Visit(const FixedSizeListType& type) { 
-    const auto list_size = type.list_size(); 
-    const ArrayData& left_data = *left_.child_data[0]; 
-    const ArrayData& right_data = *right_.child_data[0]; 
-
-    auto compare_runs = [&](int64_t i, int64_t length) -> bool { 
-      RangeDataEqualsImpl impl(options_, floating_approximate_, left_data, right_data, 
-                               (left_start_idx_ + left_.offset + i) * list_size, 
-                               (right_start_idx_ + right_.offset + i) * list_size, 
-                               length * list_size); 
-      return impl.Compare(); 
-    }; 
-    VisitValidRuns(compare_runs); 
-    return Status::OK(); 
-  } 
-
-  Status Visit(const StructType& type) { 
-    const int32_t num_fields = type.num_fields(); 
-
-    auto compare_runs = [&](int64_t i, int64_t length) -> bool { 
-      for (int32_t f = 0; f < num_fields; ++f) { 
-        RangeDataEqualsImpl impl(options_, floating_approximate_, *left_.child_data[f], 
-                                 *right_.child_data[f], 
-                                 left_start_idx_ + left_.offset + i, 
-                                 right_start_idx_ + right_.offset + i, length); 
-        if (!impl.Compare()) { 
+    return Status::OK();
+  }
+
+  // Also matches MapType
+  Status Visit(const ListType& type) { return CompareList(type); }
+
+  Status Visit(const LargeListType& type) { return CompareList(type); }
+
+  Status Visit(const FixedSizeListType& type) {
+    const auto list_size = type.list_size();
+    const ArrayData& left_data = *left_.child_data[0];
+    const ArrayData& right_data = *right_.child_data[0];
+
+    auto compare_runs = [&](int64_t i, int64_t length) -> bool {
+      RangeDataEqualsImpl impl(options_, floating_approximate_, left_data, right_data,
+                               (left_start_idx_ + left_.offset + i) * list_size,
+                               (right_start_idx_ + right_.offset + i) * list_size,
+                               length * list_size);
+      return impl.Compare();
+    };
+    VisitValidRuns(compare_runs);
+    return Status::OK();
+  }
+
+  Status Visit(const StructType& type) {
+    const int32_t num_fields = type.num_fields();
+
+    auto compare_runs = [&](int64_t i, int64_t length) -> bool {
+      for (int32_t f = 0; f < num_fields; ++f) {
+        RangeDataEqualsImpl impl(options_, floating_approximate_, *left_.child_data[f],
+                                 *right_.child_data[f],
+                                 left_start_idx_ + left_.offset + i,
+                                 right_start_idx_ + right_.offset + i, length);
+        if (!impl.Compare()) {
           return false;
         }
       }
-      return true; 
-    }; 
-    VisitValidRuns(compare_runs); 
+      return true;
+    };
+    VisitValidRuns(compare_runs);
     return Status::OK();
   }
 
-  Status Visit(const SparseUnionType& type) { 
-    const auto& child_ids = type.child_ids(); 
-    const int8_t* left_codes = left_.GetValues<int8_t>(1); 
-    const int8_t* right_codes = right_.GetValues<int8_t>(1); 
+  Status Visit(const SparseUnionType& type) {
+    const auto& child_ids = type.child_ids();
+    const int8_t* left_codes = left_.GetValues<int8_t>(1);
+    const int8_t* right_codes = right_.GetValues<int8_t>(1);
 
-    // Unions don't have a null bitmap 
-    for (int64_t i = 0; i < range_length_; ++i) { 
-      const auto type_id = left_codes[left_start_idx_ + i]; 
-      if (type_id != right_codes[right_start_idx_ + i]) { 
+    // Unions don't have a null bitmap
+    for (int64_t i = 0; i < range_length_; ++i) {
+      const auto type_id = left_codes[left_start_idx_ + i];
+      if (type_id != right_codes[right_start_idx_ + i]) {
         result_ = false;
-        break; 
+        break;
       }
-      const auto child_num = child_ids[type_id]; 
-      // XXX can we instead detect runs of same-child union values? 
-      RangeDataEqualsImpl impl( 
-          options_, floating_approximate_, *left_.child_data[child_num], 
-          *right_.child_data[child_num], left_start_idx_ + left_.offset + i, 
-          right_start_idx_ + right_.offset + i, 1); 
-      if (!impl.Compare()) { 
+      const auto child_num = child_ids[type_id];
+      // XXX can we instead detect runs of same-child union values?
+      RangeDataEqualsImpl impl(
+          options_, floating_approximate_, *left_.child_data[child_num],
+          *right_.child_data[child_num], left_start_idx_ + left_.offset + i,
+          right_start_idx_ + right_.offset + i, 1);
+      if (!impl.Compare()) {
         result_ = false;
-        break; 
+        break;
       }
     }
     return Status::OK();
   }
 
-  Status Visit(const DenseUnionType& type) { 
-    const auto& child_ids = type.child_ids(); 
-    const int8_t* left_codes = left_.GetValues<int8_t>(1); 
-    const int8_t* right_codes = right_.GetValues<int8_t>(1); 
-    const int32_t* left_offsets = left_.GetValues<int32_t>(2); 
-    const int32_t* right_offsets = right_.GetValues<int32_t>(2); 
-
-    for (int64_t i = 0; i < range_length_; ++i) { 
-      const auto type_id = left_codes[left_start_idx_ + i]; 
-      if (type_id != right_codes[right_start_idx_ + i]) { 
-        result_ = false; 
-        break; 
-      } 
-      const auto child_num = child_ids[type_id]; 
-      RangeDataEqualsImpl impl( 
-          options_, floating_approximate_, *left_.child_data[child_num], 
-          *right_.child_data[child_num], left_offsets[left_start_idx_ + i], 
-          right_offsets[right_start_idx_ + i], 1); 
-      if (!impl.Compare()) { 
-        result_ = false; 
-        break; 
-      } 
-    } 
+  Status Visit(const DenseUnionType& type) {
+    const auto& child_ids = type.child_ids();
+    const int8_t* left_codes = left_.GetValues<int8_t>(1);
+    const int8_t* right_codes = right_.GetValues<int8_t>(1);
+    const int32_t* left_offsets = left_.GetValues<int32_t>(2);
+    const int32_t* right_offsets = right_.GetValues<int32_t>(2);
+
+    for (int64_t i = 0; i < range_length_; ++i) {
+      const auto type_id = left_codes[left_start_idx_ + i];
+      if (type_id != right_codes[right_start_idx_ + i]) {
+        result_ = false;
+        break;
+      }
+      const auto child_num = child_ids[type_id];
+      RangeDataEqualsImpl impl(
+          options_, floating_approximate_, *left_.child_data[child_num],
+          *right_.child_data[child_num], left_offsets[left_start_idx_ + i],
+          right_offsets[right_start_idx_ + i], 1);
+      if (!impl.Compare()) {
+        result_ = false;
+        break;
+      }
+    }
     return Status::OK();
   }
 
-  Status Visit(const DictionaryType& type) { 
-    // Compare dictionaries 
-    result_ &= CompareArrayRanges( 
-        *left_.dictionary, *right_.dictionary, 
-        /*left_start_idx=*/0, 
-        /*left_end_idx=*/std::max(left_.dictionary->length, right_.dictionary->length), 
-        /*right_start_idx=*/0, options_, floating_approximate_); 
-    if (result_) { 
-      // Compare indices 
-      result_ &= CompareWithType(*type.index_type()); 
+  Status Visit(const DictionaryType& type) {
+    // Compare dictionaries
+    result_ &= CompareArrayRanges(
+        *left_.dictionary, *right_.dictionary,
+        /*left_start_idx=*/0,
+        /*left_end_idx=*/std::max(left_.dictionary->length, right_.dictionary->length),
+        /*right_start_idx=*/0, options_, floating_approximate_);
+    if (result_) {
+      // Compare indices
+      result_ &= CompareWithType(*type.index_type());
     }
     return Status::OK();
   }
 
-  Status Visit(const ExtensionType& type) { 
-    // Compare storages 
-    result_ &= CompareWithType(*type.storage_type()); 
+  Status Visit(const ExtensionType& type) {
+    // Compare storages
+    result_ &= CompareWithType(*type.storage_type());
     return Status::OK();
   }
 
  protected:
-  // For CompareFloating (templated local classes or lambdas not supported in C++11) 
-  template <typename CType> 
-  struct ComparatorVisitor { 
-    RangeDataEqualsImpl* impl; 
-    const CType* left_values; 
-    const CType* right_values; 
-
-    template <typename CompareFunction> 
-    void operator()(CompareFunction&& compare) { 
-      impl->VisitValues([&](int64_t i) { 
-        const CType x = left_values[i + impl->left_start_idx_]; 
-        const CType y = right_values[i + impl->right_start_idx_]; 
-        return compare(x, y); 
-      }); 
-    } 
-  }; 
-
-  template <typename CType> 
-  friend struct ComparatorVisitor; 
-
-  template <typename TypeClass, typename CType = typename TypeClass::c_type> 
-  Status ComparePrimitive(const TypeClass&) { 
-    const CType* left_values = left_.GetValues<CType>(1); 
-    const CType* right_values = right_.GetValues<CType>(1); 
-    VisitValidRuns([&](int64_t i, int64_t length) { 
-      return memcmp(left_values + left_start_idx_ + i, 
-                    right_values + right_start_idx_ + i, length * sizeof(CType)) == 0; 
-    }); 
-    return Status::OK(); 
-  }
-
-  template <typename TypeClass> 
-  Status CompareFloating(const TypeClass&) { 
-    using CType = typename TypeClass::c_type; 
-    const CType* left_values = left_.GetValues<CType>(1); 
-    const CType* right_values = right_.GetValues<CType>(1); 
-
-    ComparatorVisitor<CType> visitor{this, left_values, right_values}; 
-    VisitFloatingEquality<CType>(options_, floating_approximate_, visitor); 
+  // For CompareFloating (templated local classes or lambdas not supported in C++11)
+  template <typename CType>
+  struct ComparatorVisitor {
+    RangeDataEqualsImpl* impl;
+    const CType* left_values;
+    const CType* right_values;
+
+    template <typename CompareFunction>
+    void operator()(CompareFunction&& compare) {
+      impl->VisitValues([&](int64_t i) {
+        const CType x = left_values[i + impl->left_start_idx_];
+        const CType y = right_values[i + impl->right_start_idx_];
+        return compare(x, y);
+      });
+    }
+  };
+
+  template <typename CType>
+  friend struct ComparatorVisitor;
+
+  template <typename TypeClass, typename CType = typename TypeClass::c_type>
+  Status ComparePrimitive(const TypeClass&) {
+    const CType* left_values = left_.GetValues<CType>(1);
+    const CType* right_values = right_.GetValues<CType>(1);
+    VisitValidRuns([&](int64_t i, int64_t length) {
+      return memcmp(left_values + left_start_idx_ + i,
+                    right_values + right_start_idx_ + i, length * sizeof(CType)) == 0;
+    });
+    return Status::OK();
+  }
+
+  template <typename TypeClass>
+  Status CompareFloating(const TypeClass&) {
+    using CType = typename TypeClass::c_type;
+    const CType* left_values = left_.GetValues<CType>(1);
+    const CType* right_values = right_.GetValues<CType>(1);
+
+    ComparatorVisitor<CType> visitor{this, left_values, right_values};
+    VisitFloatingEquality<CType>(options_, floating_approximate_, visitor);
     return Status::OK();
   }
 
-  template <typename TypeClass> 
-  Status CompareBinary(const TypeClass&) { 
-    const uint8_t* left_data = left_.GetValues<uint8_t>(2, 0); 
-    const uint8_t* right_data = right_.GetValues<uint8_t>(2, 0); 
+  template <typename TypeClass>
+  Status CompareBinary(const TypeClass&) {
+    const uint8_t* left_data = left_.GetValues<uint8_t>(2, 0);
+    const uint8_t* right_data = right_.GetValues<uint8_t>(2, 0);
 
-    if (left_data != nullptr && right_data != nullptr) { 
-      const auto compare_ranges = [&](int64_t left_offset, int64_t right_offset, 
-                                      int64_t length) -> bool { 
-        return memcmp(left_data + left_offset, right_data + right_offset, length) == 0; 
-      }; 
-      CompareWithOffsets<typename TypeClass::offset_type>(1, compare_ranges); 
+    if (left_data != nullptr && right_data != nullptr) {
+      const auto compare_ranges = [&](int64_t left_offset, int64_t right_offset,
+                                      int64_t length) -> bool {
+        return memcmp(left_data + left_offset, right_data + right_offset, length) == 0;
+      };
+      CompareWithOffsets<typename TypeClass::offset_type>(1, compare_ranges);
     } else {
-      // One of the arrays is an array of empty strings and nulls. 
-      // We just need to compare the offsets. 
-      // (note we must not call memcmp() with null data pointers) 
-      CompareWithOffsets<typename TypeClass::offset_type>(1, [](...) { return true; }); 
+      // One of the arrays is an array of empty strings and nulls.
+      // We just need to compare the offsets.
+      // (note we must not call memcmp() with null data pointers)
+      CompareWithOffsets<typename TypeClass::offset_type>(1, [](...) { return true; });
     }
     return Status::OK();
   }
 
-  template <typename TypeClass> 
-  Status CompareList(const TypeClass&) { 
-    const ArrayData& left_data = *left_.child_data[0]; 
-    const ArrayData& right_data = *right_.child_data[0]; 
+  template <typename TypeClass>
+  Status CompareList(const TypeClass&) {
+    const ArrayData& left_data = *left_.child_data[0];
+    const ArrayData& right_data = *right_.child_data[0];
 
-    const auto compare_ranges = [&](int64_t left_offset, int64_t right_offset, 
-                                    int64_t length) -> bool { 
-      RangeDataEqualsImpl impl(options_, floating_approximate_, left_data, right_data, 
-                               left_offset, right_offset, length); 
-      return impl.Compare(); 
-    }; 
+    const auto compare_ranges = [&](int64_t left_offset, int64_t right_offset,
+                                    int64_t length) -> bool {
+      RangeDataEqualsImpl impl(options_, floating_approximate_, left_data, right_data,
+                               left_offset, right_offset, length);
+      return impl.Compare();
+    };
 
-    CompareWithOffsets<typename TypeClass::offset_type>(1, compare_ranges); 
+    CompareWithOffsets<typename TypeClass::offset_type>(1, compare_ranges);
     return Status::OK();
   }
 
-  template <typename offset_type, typename CompareRanges> 
-  void CompareWithOffsets(int offsets_buffer_index, CompareRanges&& compare_ranges) { 
-    const offset_type* left_offsets = 
-        left_.GetValues<offset_type>(offsets_buffer_index) + left_start_idx_; 
-    const offset_type* right_offsets = 
-        right_.GetValues<offset_type>(offsets_buffer_index) + right_start_idx_; 
+  template <typename offset_type, typename CompareRanges>
+  void CompareWithOffsets(int offsets_buffer_index, CompareRanges&& compare_ranges) {
+    const offset_type* left_offsets =
+        left_.GetValues<offset_type>(offsets_buffer_index) + left_start_idx_;
+    const offset_type* right_offsets =
+        right_.GetValues<offset_type>(offsets_buffer_index) + right_start_idx_;
 
-    const auto compare_runs = [&](int64_t i, int64_t length) { 
-      for (int64_t j = i; j < i + length; ++j) { 
-        if (left_offsets[j + 1] - left_offsets[j] != 
-            right_offsets[j + 1] - right_offsets[j]) { 
+    const auto compare_runs = [&](int64_t i, int64_t length) {
+      for (int64_t j = i; j < i + length; ++j) {
+        if (left_offsets[j + 1] - left_offsets[j] !=
+            right_offsets[j + 1] - right_offsets[j]) {
           return false;
         }
       }
-      if (!compare_ranges(left_offsets[i], right_offsets[i], 
-                          left_offsets[i + length] - left_offsets[i])) { 
-        return false; 
-      } 
+      if (!compare_ranges(left_offsets[i], right_offsets[i],
+                          left_offsets[i + length] - left_offsets[i])) {
+        return false;
+      }
       return true;
-    }; 
- 
-    VisitValidRuns(compare_runs); 
-  }
-
-  template <typename CompareValues> 
-  void VisitValues(CompareValues&& compare_values) { 
-    internal::VisitSetBitRunsVoid(left_.buffers[0], left_.offset + left_start_idx_, 
-                                  range_length_, [&](int64_t position, int64_t length) { 
-                                    for (int64_t i = 0; i < length; ++i) { 
-                                      result_ &= compare_values(position + i); 
-                                    } 
-                                  }); 
-  } 
-
-  // Visit and compare runs of non-null values 
-  template <typename CompareRuns> 
-  void VisitValidRuns(CompareRuns&& compare_runs) { 
-    const uint8_t* left_null_bitmap = left_.GetValues<uint8_t>(0, 0); 
-    if (left_null_bitmap == nullptr) { 
-      result_ = compare_runs(0, range_length_); 
-      return; 
+    };
+
+    VisitValidRuns(compare_runs);
+  }
+
+  template <typename CompareValues>
+  void VisitValues(CompareValues&& compare_values) {
+    internal::VisitSetBitRunsVoid(left_.buffers[0], left_.offset + left_start_idx_,
+                                  range_length_, [&](int64_t position, int64_t length) {
+                                    for (int64_t i = 0; i < length; ++i) {
+                                      result_ &= compare_values(position + i);
+                                    }
+                                  });
+  }
+
+  // Visit and compare runs of non-null values
+  template <typename CompareRuns>
+  void VisitValidRuns(CompareRuns&& compare_runs) {
+    const uint8_t* left_null_bitmap = left_.GetValues<uint8_t>(0, 0);
+    if (left_null_bitmap == nullptr) {
+      result_ = compare_runs(0, range_length_);
+      return;
     }
-    internal::SetBitRunReader reader(left_null_bitmap, left_.offset + left_start_idx_, 
-                                     range_length_); 
-    while (true) { 
-      const auto run = reader.NextRun(); 
-      if (run.length == 0) { 
-        return; 
+    internal::SetBitRunReader reader(left_null_bitmap, left_.offset + left_start_idx_,
+                                     range_length_);
+    while (true) {
+      const auto run = reader.NextRun();
+      if (run.length == 0) {
+        return;
       }
-      if (!compare_runs(run.position, run.length)) { 
-        result_ = false; 
-        return; 
+      if (!compare_runs(run.position, run.length)) {
+        result_ = false;
+        return;
       }
     }
   }
 
-  const EqualOptions& options_; 
-  const bool floating_approximate_; 
-  const ArrayData& left_; 
-  const ArrayData& right_; 
-  const int64_t left_start_idx_; 
-  const int64_t right_start_idx_; 
-  const int64_t range_length_; 
+  const EqualOptions& options_;
+  const bool floating_approximate_;
+  const ArrayData& left_;
+  const ArrayData& right_;
+  const int64_t left_start_idx_;
+  const int64_t right_start_idx_;
+  const int64_t range_length_;
 
-  bool result_; 
+  bool result_;
 };
 
-bool CompareArrayRanges(const ArrayData& left, const ArrayData& right, 
-                        int64_t left_start_idx, int64_t left_end_idx, 
-                        int64_t right_start_idx, const EqualOptions& options, 
-                        bool floating_approximate) { 
-  if (left.type->id() != right.type->id() || 
-      !TypeEquals(*left.type, *right.type, false /* check_metadata */)) { 
-    return false; 
+bool CompareArrayRanges(const ArrayData& left, const ArrayData& right,
+                        int64_t left_start_idx, int64_t left_end_idx,
+                        int64_t right_start_idx, const EqualOptions& options,
+                        bool floating_approximate) {
+  if (left.type->id() != right.type->id() ||
+      !TypeEquals(*left.type, *right.type, false /* check_metadata */)) {
+    return false;
   }
 
-  const int64_t range_length = left_end_idx - left_start_idx; 
-  DCHECK_GE(range_length, 0); 
-  if (left_start_idx + range_length > left.length) { 
-    // Left range too small 
+  const int64_t range_length = left_end_idx - left_start_idx;
+  DCHECK_GE(range_length, 0);
+  if (left_start_idx + range_length > left.length) {
+    // Left range too small
     return false;
   }
-  if (right_start_idx + range_length > right.length) { 
-    // Right range too small 
+  if (right_start_idx + range_length > right.length) {
+    // Right range too small
     return false;
   }
-  if (&left == &right && left_start_idx == right_start_idx && 
-      IdentityImpliesEquality(*left.type, options)) { 
-    return true; 
+  if (&left == &right && left_start_idx == right_start_idx &&
+      IdentityImpliesEquality(*left.type, options)) {
+    return true;
   }
-  // Compare values 
-  RangeDataEqualsImpl impl(options, floating_approximate, left, right, left_start_idx, 
-                           right_start_idx, range_length); 
-  return impl.Compare(); 
+  // Compare values
+  RangeDataEqualsImpl impl(options, floating_approximate, left, right, left_start_idx,
+                           right_start_idx, range_length);
+  return impl.Compare();
 }
 
 class TypeEqualsVisitor {
@@ -611,12 +611,12 @@ class TypeEqualsVisitor {
     return Status::OK();
   }
 
-  Status Visit(const Decimal256Type& left) { 
-    const auto& right = checked_cast<const Decimal256Type&>(right_); 
-    result_ = left.precision() == right.precision() && left.scale() == right.scale(); 
-    return Status::OK(); 
-  } 
- 
+  Status Visit(const Decimal256Type& left) {
+    const auto& right = checked_cast<const Decimal256Type&>(right_);
+    result_ = left.precision() == right.precision() && left.scale() == right.scale();
+    return Status::OK();
+  }
+
   template <typename T>
   enable_if_t<is_list_like_type<T>::value || is_struct_type<T>::value, Status> Visit(
       const T& left) {
@@ -671,22 +671,22 @@ class TypeEqualsVisitor {
   bool result_;
 };
 
-bool ArrayEquals(const Array& left, const Array& right, const EqualOptions& opts, 
-                 bool floating_approximate); 
-bool ScalarEquals(const Scalar& left, const Scalar& right, const EqualOptions& options, 
-                  bool floating_approximate); 
- 
+bool ArrayEquals(const Array& left, const Array& right, const EqualOptions& opts,
+                 bool floating_approximate);
+bool ScalarEquals(const Scalar& left, const Scalar& right, const EqualOptions& options,
+                  bool floating_approximate);
+
 class ScalarEqualsVisitor {
  public:
-  // PRE-CONDITIONS: 
-  // - the types are equal 
-  // - the scalars are non-null 
-  explicit ScalarEqualsVisitor(const Scalar& right, const EqualOptions& opts, 
-                               bool floating_approximate) 
-      : right_(right), 
-        options_(opts), 
-        floating_approximate_(floating_approximate), 
-        result_(false) {} 
+  // PRE-CONDITIONS:
+  // - the types are equal
+  // - the scalars are non-null
+  explicit ScalarEqualsVisitor(const Scalar& right, const EqualOptions& opts,
+                               bool floating_approximate)
+      : right_(right),
+        options_(opts),
+        floating_approximate_(floating_approximate),
+        result_(false) {}
 
   Status Visit(const NullScalar& left) {
     result_ = true;
@@ -700,8 +700,8 @@ class ScalarEqualsVisitor {
   }
 
   template <typename T>
-  typename std::enable_if<(is_primitive_ctype<typename T::TypeClass>::value || 
-                           is_temporal_type<typename T::TypeClass>::value), 
+  typename std::enable_if<(is_primitive_ctype<typename T::TypeClass>::value ||
+                           is_temporal_type<typename T::TypeClass>::value),
                           Status>::type
   Visit(const T& left_) {
     const auto& right = checked_cast<const T&>(right_);
@@ -709,10 +709,10 @@ class ScalarEqualsVisitor {
     return Status::OK();
   }
 
-  Status Visit(const FloatScalar& left) { return CompareFloating(left); } 
- 
-  Status Visit(const DoubleScalar& left) { return CompareFloating(left); } 
- 
+  Status Visit(const FloatScalar& left) { return CompareFloating(left); }
+
+  Status Visit(const DoubleScalar& left) { return CompareFloating(left); }
+
   template <typename T>
   typename std::enable_if<std::is_base_of<BaseBinaryScalar, T>::value, Status>::type
   Visit(const T& left) {
@@ -727,33 +727,33 @@ class ScalarEqualsVisitor {
     return Status::OK();
   }
 
-  Status Visit(const Decimal256Scalar& left) { 
-    const auto& right = checked_cast<const Decimal256Scalar&>(right_); 
-    result_ = left.value == right.value; 
-    return Status::OK(); 
-  } 
- 
+  Status Visit(const Decimal256Scalar& left) {
+    const auto& right = checked_cast<const Decimal256Scalar&>(right_);
+    result_ = left.value == right.value;
+    return Status::OK();
+  }
+
   Status Visit(const ListScalar& left) {
     const auto& right = checked_cast<const ListScalar&>(right_);
-    result_ = ArrayEquals(*left.value, *right.value, options_, floating_approximate_); 
+    result_ = ArrayEquals(*left.value, *right.value, options_, floating_approximate_);
     return Status::OK();
   }
 
   Status Visit(const LargeListScalar& left) {
     const auto& right = checked_cast<const LargeListScalar&>(right_);
-    result_ = ArrayEquals(*left.value, *right.value, options_, floating_approximate_); 
+    result_ = ArrayEquals(*left.value, *right.value, options_, floating_approximate_);
     return Status::OK();
   }
 
   Status Visit(const MapScalar& left) {
     const auto& right = checked_cast<const MapScalar&>(right_);
-    result_ = ArrayEquals(*left.value, *right.value, options_, floating_approximate_); 
+    result_ = ArrayEquals(*left.value, *right.value, options_, floating_approximate_);
     return Status::OK();
   }
 
   Status Visit(const FixedSizeListScalar& left) {
     const auto& right = checked_cast<const FixedSizeListScalar&>(right_);
-    result_ = ArrayEquals(*left.value, *right.value, options_, floating_approximate_); 
+    result_ = ArrayEquals(*left.value, *right.value, options_, floating_approximate_);
     return Status::OK();
   }
 
@@ -765,8 +765,8 @@ class ScalarEqualsVisitor {
     } else {
       bool all_equals = true;
       for (size_t i = 0; i < left.value.size() && all_equals; i++) {
-        all_equals &= ScalarEquals(*left.value[i], *right.value[i], options_, 
-                                   floating_approximate_); 
+        all_equals &= ScalarEquals(*left.value[i], *right.value[i], options_,
+                                   floating_approximate_);
       }
       result_ = all_equals;
     }
@@ -777,7 +777,7 @@ class ScalarEqualsVisitor {
   Status Visit(const UnionScalar& left) {
     const auto& right = checked_cast<const UnionScalar&>(right_);
     if (left.is_valid && right.is_valid) {
-      result_ = ScalarEquals(*left.value, *right.value, options_, floating_approximate_); 
+      result_ = ScalarEquals(*left.value, *right.value, options_, floating_approximate_);
     } else if (!left.is_valid && !right.is_valid) {
       result_ = true;
     } else {
@@ -788,10 +788,10 @@ class ScalarEqualsVisitor {
 
   Status Visit(const DictionaryScalar& left) {
     const auto& right = checked_cast<const DictionaryScalar&>(right_);
-    result_ = ScalarEquals(*left.value.index, *right.value.index, options_, 
-                           floating_approximate_) && 
-              ArrayEquals(*left.value.dictionary, *right.value.dictionary, options_, 
-                          floating_approximate_); 
+    result_ = ScalarEquals(*left.value.index, *right.value.index, options_,
+                           floating_approximate_) &&
+              ArrayEquals(*left.value.dictionary, *right.value.dictionary, options_,
+                          floating_approximate_);
     return Status::OK();
   }
 
@@ -802,40 +802,40 @@ class ScalarEqualsVisitor {
   bool result() const { return result_; }
 
  protected:
-  // For CompareFloating (templated local classes or lambdas not supported in C++11) 
-  template <typename ScalarType> 
-  struct ComparatorVisitor { 
-    const ScalarType& left; 
-    const ScalarType& right; 
-    bool* result; 
- 
-    template <typename CompareFunction> 
-    void operator()(CompareFunction&& compare) { 
-      *result = compare(left.value, right.value); 
-    } 
-  }; 
- 
-  template <typename ScalarType> 
-  Status CompareFloating(const ScalarType& left) { 
-    using CType = decltype(left.value); 
- 
-    ComparatorVisitor<ScalarType> visitor{left, checked_cast<const ScalarType&>(right_), 
-                                          &result_}; 
-    VisitFloatingEquality<CType>(options_, floating_approximate_, visitor); 
-    return Status::OK(); 
-  } 
- 
+  // For CompareFloating (templated local classes or lambdas not supported in C++11)
+  template <typename ScalarType>
+  struct ComparatorVisitor {
+    const ScalarType& left;
+    const ScalarType& right;
+    bool* result;
+
+    template <typename CompareFunction>
+    void operator()(CompareFunction&& compare) {
+      *result = compare(left.value, right.value);
+    }
+  };
+
+  template <typename ScalarType>
+  Status CompareFloating(const ScalarType& left) {
+    using CType = decltype(left.value);
+
+    ComparatorVisitor<ScalarType> visitor{left, checked_cast<const ScalarType&>(right_),
+                                          &result_};
+    VisitFloatingEquality<CType>(options_, floating_approximate_, visitor);
+    return Status::OK();
+  }
+
   const Scalar& right_;
-  const EqualOptions options_; 
-  const bool floating_approximate_; 
+  const EqualOptions options_;
+  const bool floating_approximate_;
   bool result_;
 };
 
-Status PrintDiff(const Array& left, const Array& right, std::ostream* os); 
- 
-Status PrintDiff(const Array& left, const Array& right, int64_t left_offset, 
-                 int64_t left_length, int64_t right_offset, int64_t right_length, 
-                 std::ostream* os) { 
+Status PrintDiff(const Array& left, const Array& right, std::ostream* os);
+
+Status PrintDiff(const Array& left, const Array& right, int64_t left_offset,
+                 int64_t left_length, int64_t right_offset, int64_t right_length,
+                 std::ostream* os) {
   if (os == nullptr) {
     return Status::OK();
   }
@@ -868,100 +868,100 @@ Status PrintDiff(const Array& left, const Array& right, int64_t left_offset,
     return Status::OK();
   }
 
-  const auto left_slice = left.Slice(left_offset, left_length); 
-  const auto right_slice = right.Slice(right_offset, right_length); 
-  ARROW_ASSIGN_OR_RAISE(auto edits, 
-                        Diff(*left_slice, *right_slice, default_memory_pool())); 
+  const auto left_slice = left.Slice(left_offset, left_length);
+  const auto right_slice = right.Slice(right_offset, right_length);
+  ARROW_ASSIGN_OR_RAISE(auto edits,
+                        Diff(*left_slice, *right_slice, default_memory_pool()));
   ARROW_ASSIGN_OR_RAISE(auto formatter, MakeUnifiedDiffFormatter(*left.type(), os));
-  return formatter(*edits, *left_slice, *right_slice); 
+  return formatter(*edits, *left_slice, *right_slice);
 }
 
-Status PrintDiff(const Array& left, const Array& right, std::ostream* os) { 
-  return PrintDiff(left, right, 0, left.length(), 0, right.length(), os); 
-} 
+Status PrintDiff(const Array& left, const Array& right, std::ostream* os) {
+  return PrintDiff(left, right, 0, left.length(), 0, right.length(), os);
+}
 
-bool ArrayRangeEquals(const Array& left, const Array& right, int64_t left_start_idx, 
-                      int64_t left_end_idx, int64_t right_start_idx, 
-                      const EqualOptions& options, bool floating_approximate) { 
-  bool are_equal = 
-      CompareArrayRanges(*left.data(), *right.data(), left_start_idx, left_end_idx, 
-                         right_start_idx, options, floating_approximate); 
+bool ArrayRangeEquals(const Array& left, const Array& right, int64_t left_start_idx,
+                      int64_t left_end_idx, int64_t right_start_idx,
+                      const EqualOptions& options, bool floating_approximate) {
+  bool are_equal =
+      CompareArrayRanges(*left.data(), *right.data(), left_start_idx, left_end_idx,
+                         right_start_idx, options, floating_approximate);
   if (!are_equal) {
-    ARROW_IGNORE_EXPR(PrintDiff( 
-        left, right, left_start_idx, left_end_idx, right_start_idx, 
-        right_start_idx + (left_end_idx - left_start_idx), options.diff_sink())); 
+    ARROW_IGNORE_EXPR(PrintDiff(
+        left, right, left_start_idx, left_end_idx, right_start_idx,
+        right_start_idx + (left_end_idx - left_start_idx), options.diff_sink()));
   }
   return are_equal;
 }
 
-bool ArrayEquals(const Array& left, const Array& right, const EqualOptions& opts, 
-                 bool floating_approximate) { 
-  if (left.length() != right.length()) { 
-    ARROW_IGNORE_EXPR(PrintDiff(left, right, opts.diff_sink())); 
-    return false; 
+bool ArrayEquals(const Array& left, const Array& right, const EqualOptions& opts,
+                 bool floating_approximate) {
+  if (left.length() != right.length()) {
+    ARROW_IGNORE_EXPR(PrintDiff(left, right, opts.diff_sink()));
+    return false;
+  }
+  return ArrayRangeEquals(left, right, 0, left.length(), 0, opts, floating_approximate);
+}
+
+bool ScalarEquals(const Scalar& left, const Scalar& right, const EqualOptions& options,
+                  bool floating_approximate) {
+  if (&left == &right && IdentityImpliesEquality(*left.type, options)) {
+    return true;
+  }
+  if (!left.type->Equals(right.type)) {
+    return false;
+  }
+  if (left.is_valid != right.is_valid) {
+    return false;
+  }
+  if (!left.is_valid) {
+    return true;
   }
-  return ArrayRangeEquals(left, right, 0, left.length(), 0, opts, floating_approximate); 
+  ScalarEqualsVisitor visitor(right, options, floating_approximate);
+  auto error = VisitScalarInline(left, &visitor);
+  DCHECK_OK(error);
+  return visitor.result();
+}
+
+}  // namespace
+
+bool ArrayRangeEquals(const Array& left, const Array& right, int64_t left_start_idx,
+                      int64_t left_end_idx, int64_t right_start_idx,
+                      const EqualOptions& options) {
+  const bool floating_approximate = false;
+  return ArrayRangeEquals(left, right, left_start_idx, left_end_idx, right_start_idx,
+                          options, floating_approximate);
+}
+
+bool ArrayRangeApproxEquals(const Array& left, const Array& right, int64_t left_start_idx,
+                            int64_t left_end_idx, int64_t right_start_idx,
+                            const EqualOptions& options) {
+  const bool floating_approximate = true;
+  return ArrayRangeEquals(left, right, left_start_idx, left_end_idx, right_start_idx,
+                          options, floating_approximate);
+}
+
+bool ArrayEquals(const Array& left, const Array& right, const EqualOptions& opts) {
+  const bool floating_approximate = false;
+  return ArrayEquals(left, right, opts, floating_approximate);
+}
+
+bool ArrayApproxEquals(const Array& left, const Array& right, const EqualOptions& opts) {
+  const bool floating_approximate = true;
+  return ArrayEquals(left, right, opts, floating_approximate);
+}
+
+bool ScalarEquals(const Scalar& left, const Scalar& right, const EqualOptions& options) {
+  const bool floating_approximate = false;
+  return ScalarEquals(left, right, options, floating_approximate);
 }
 
-bool ScalarEquals(const Scalar& left, const Scalar& right, const EqualOptions& options, 
-                  bool floating_approximate) { 
-  if (&left == &right && IdentityImpliesEquality(*left.type, options)) { 
-    return true; 
-  }
-  if (!left.type->Equals(right.type)) { 
-    return false; 
-  } 
-  if (left.is_valid != right.is_valid) { 
-    return false; 
-  } 
-  if (!left.is_valid) { 
-    return true; 
-  } 
-  ScalarEqualsVisitor visitor(right, options, floating_approximate); 
-  auto error = VisitScalarInline(left, &visitor); 
-  DCHECK_OK(error); 
-  return visitor.result(); 
+bool ScalarApproxEquals(const Scalar& left, const Scalar& right,
+                        const EqualOptions& options) {
+  const bool floating_approximate = true;
+  return ScalarEquals(left, right, options, floating_approximate);
 }
 
-}  // namespace 
- 
-bool ArrayRangeEquals(const Array& left, const Array& right, int64_t left_start_idx, 
-                      int64_t left_end_idx, int64_t right_start_idx, 
-                      const EqualOptions& options) { 
-  const bool floating_approximate = false; 
-  return ArrayRangeEquals(left, right, left_start_idx, left_end_idx, right_start_idx, 
-                          options, floating_approximate); 
-} 
- 
-bool ArrayRangeApproxEquals(const Array& left, const Array& right, int64_t left_start_idx, 
-                            int64_t left_end_idx, int64_t right_start_idx, 
-                            const EqualOptions& options) { 
-  const bool floating_approximate = true; 
-  return ArrayRangeEquals(left, right, left_start_idx, left_end_idx, right_start_idx, 
-                          options, floating_approximate); 
-} 
- 
-bool ArrayEquals(const Array& left, const Array& right, const EqualOptions& opts) { 
-  const bool floating_approximate = false; 
-  return ArrayEquals(left, right, opts, floating_approximate); 
-} 
- 
-bool ArrayApproxEquals(const Array& left, const Array& right, const EqualOptions& opts) { 
-  const bool floating_approximate = true; 
-  return ArrayEquals(left, right, opts, floating_approximate); 
-} 
- 
-bool ScalarEquals(const Scalar& left, const Scalar& right, const EqualOptions& options) { 
-  const bool floating_approximate = false; 
-  return ScalarEquals(left, right, options, floating_approximate); 
-} 
- 
-bool ScalarApproxEquals(const Scalar& left, const Scalar& right, 
-                        const EqualOptions& options) { 
-  const bool floating_approximate = true; 
-  return ScalarEquals(left, right, options, floating_approximate); 
-} 
- 
 namespace {
 
 bool StridedIntegerTensorContentEquals(const int dim_index, int64_t left_offset,
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compare.h b/contrib/libs/apache/arrow/cpp/src/arrow/compare.h
index 3acd6b1b33e..6769b23867b 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compare.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compare.h
@@ -71,7 +71,7 @@ class EqualOptions {
     return res;
   }
 
-  static EqualOptions Defaults() { return {}; } 
+  static EqualOptions Defaults() { return {}; }
 
  protected:
   double atol_ = kDefaultAbsoluteTolerance;
@@ -88,25 +88,25 @@ bool ARROW_EXPORT ArrayEquals(const Array& left, const Array& right,
 bool ARROW_EXPORT ArrayApproxEquals(const Array& left, const Array& right,
                                     const EqualOptions& = EqualOptions::Defaults());
 
-/// Returns true if indicated equal-length segment of arrays are exactly equal 
+/// Returns true if indicated equal-length segment of arrays are exactly equal
 bool ARROW_EXPORT ArrayRangeEquals(const Array& left, const Array& right,
                                    int64_t start_idx, int64_t end_idx,
-                                   int64_t other_start_idx, 
-                                   const EqualOptions& = EqualOptions::Defaults()); 
-
-/// Returns true if indicated equal-length segment of arrays are approximately equal 
-bool ARROW_EXPORT ArrayRangeApproxEquals(const Array& left, const Array& right, 
-                                         int64_t start_idx, int64_t end_idx, 
-                                         int64_t other_start_idx, 
-                                         const EqualOptions& = EqualOptions::Defaults()); 
- 
-bool ARROW_EXPORT TensorEquals(const Tensor& left, const Tensor& right, 
-                               const EqualOptions& = EqualOptions::Defaults()); 
- 
-/// EXPERIMENTAL: Returns true if the given sparse tensors are exactly equal 
-bool ARROW_EXPORT SparseTensorEquals(const SparseTensor& left, const SparseTensor& right, 
-                                     const EqualOptions& = EqualOptions::Defaults()); 
- 
+                                   int64_t other_start_idx,
+                                   const EqualOptions& = EqualOptions::Defaults());
+
+/// Returns true if indicated equal-length segment of arrays are approximately equal
+bool ARROW_EXPORT ArrayRangeApproxEquals(const Array& left, const Array& right,
+                                         int64_t start_idx, int64_t end_idx,
+                                         int64_t other_start_idx,
+                                         const EqualOptions& = EqualOptions::Defaults());
+
+bool ARROW_EXPORT TensorEquals(const Tensor& left, const Tensor& right,
+                               const EqualOptions& = EqualOptions::Defaults());
+
+/// EXPERIMENTAL: Returns true if the given sparse tensors are exactly equal
+bool ARROW_EXPORT SparseTensorEquals(const SparseTensor& left, const SparseTensor& right,
+                                     const EqualOptions& = EqualOptions::Defaults());
+
 /// Returns true if the type metadata are exactly equal
 /// \param[in] left a DataType
 /// \param[in] right a DataType
@@ -122,12 +122,12 @@ bool ARROW_EXPORT TypeEquals(const DataType& left, const DataType& right,
 bool ARROW_EXPORT ScalarEquals(const Scalar& left, const Scalar& right,
                                const EqualOptions& options = EqualOptions::Defaults());
 
-/// Returns true if scalars are approximately equal 
-/// \param[in] left a Scalar 
-/// \param[in] right a Scalar 
-/// \param[in] options comparison options 
-bool ARROW_EXPORT 
-ScalarApproxEquals(const Scalar& left, const Scalar& right, 
-                   const EqualOptions& options = EqualOptions::Defaults()); 
- 
+/// Returns true if scalars are approximately equal
+/// \param[in] left a Scalar
+/// \param[in] right a Scalar
+/// \param[in] options comparison options
+bool ARROW_EXPORT
+ScalarApproxEquals(const Scalar& left, const Scalar& right,
+                   const EqualOptions& options = EqualOptions::Defaults());
+
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/api_aggregate.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/api_aggregate.cc
index 2f26520c22a..1b00c366bfd 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/api_aggregate.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/api_aggregate.cc
@@ -18,157 +18,157 @@
 #include "arrow/compute/api_aggregate.h"
 
 #include "arrow/compute/exec.h"
-#include "arrow/compute/function_internal.h" 
-#include "arrow/compute/registry.h" 
-#include "arrow/compute/util_internal.h" 
-#include "arrow/util/checked_cast.h" 
-#include "arrow/util/logging.h" 
+#include "arrow/compute/function_internal.h"
+#include "arrow/compute/registry.h"
+#include "arrow/compute/util_internal.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
 
 namespace arrow {
- 
-namespace internal { 
-template <> 
-struct EnumTraits<compute::QuantileOptions::Interpolation> 
-    : BasicEnumTraits<compute::QuantileOptions::Interpolation, 
-                      compute::QuantileOptions::LINEAR, compute::QuantileOptions::LOWER, 
-                      compute::QuantileOptions::HIGHER, compute::QuantileOptions::NEAREST, 
-                      compute::QuantileOptions::MIDPOINT> { 
-  static std::string name() { return "QuantileOptions::Interpolation"; } 
-  static std::string value_name(compute::QuantileOptions::Interpolation value) { 
-    switch (value) { 
-      case compute::QuantileOptions::LINEAR: 
-        return "LINEAR"; 
-      case compute::QuantileOptions::LOWER: 
-        return "LOWER"; 
-      case compute::QuantileOptions::HIGHER: 
-        return "HIGHER"; 
-      case compute::QuantileOptions::NEAREST: 
-        return "NEAREST"; 
-      case compute::QuantileOptions::MIDPOINT: 
-        return "MIDPOINT"; 
-    } 
-    return "<INVALID>"; 
-  } 
-}; 
-}  // namespace internal 
- 
+
+namespace internal {
+template <>
+struct EnumTraits<compute::QuantileOptions::Interpolation>
+    : BasicEnumTraits<compute::QuantileOptions::Interpolation,
+                      compute::QuantileOptions::LINEAR, compute::QuantileOptions::LOWER,
+                      compute::QuantileOptions::HIGHER, compute::QuantileOptions::NEAREST,
+                      compute::QuantileOptions::MIDPOINT> {
+  static std::string name() { return "QuantileOptions::Interpolation"; }
+  static std::string value_name(compute::QuantileOptions::Interpolation value) {
+    switch (value) {
+      case compute::QuantileOptions::LINEAR:
+        return "LINEAR";
+      case compute::QuantileOptions::LOWER:
+        return "LOWER";
+      case compute::QuantileOptions::HIGHER:
+        return "HIGHER";
+      case compute::QuantileOptions::NEAREST:
+        return "NEAREST";
+      case compute::QuantileOptions::MIDPOINT:
+        return "MIDPOINT";
+    }
+    return "<INVALID>";
+  }
+};
+}  // namespace internal
+
 namespace compute {
 
 // ----------------------------------------------------------------------
-// Function options 
- 
-using ::arrow::internal::checked_cast; 
- 
-namespace internal { 
-namespace { 
-using ::arrow::internal::DataMember; 
-static auto kScalarAggregateOptionsType = GetFunctionOptionsType<ScalarAggregateOptions>( 
-    DataMember("skip_nulls", &ScalarAggregateOptions::skip_nulls), 
-    DataMember("min_count", &ScalarAggregateOptions::min_count)); 
-static auto kModeOptionsType = 
-    GetFunctionOptionsType<ModeOptions>(DataMember("n", &ModeOptions::n)); 
-static auto kVarianceOptionsType = 
-    GetFunctionOptionsType<VarianceOptions>(DataMember("ddof", &VarianceOptions::ddof)); 
-static auto kQuantileOptionsType = GetFunctionOptionsType<QuantileOptions>( 
-    DataMember("q", &QuantileOptions::q), 
-    DataMember("interpolation", &QuantileOptions::interpolation)); 
-static auto kTDigestOptionsType = GetFunctionOptionsType<TDigestOptions>( 
-    DataMember("q", &TDigestOptions::q), DataMember("delta", &TDigestOptions::delta), 
-    DataMember("buffer_size", &TDigestOptions::buffer_size)); 
-static auto kIndexOptionsType = 
-    GetFunctionOptionsType<IndexOptions>(DataMember("value", &IndexOptions::value)); 
-}  // namespace 
-}  // namespace internal 
- 
-ScalarAggregateOptions::ScalarAggregateOptions(bool skip_nulls, uint32_t min_count) 
-    : FunctionOptions(internal::kScalarAggregateOptionsType), 
-      skip_nulls(skip_nulls), 
-      min_count(min_count) {} 
-constexpr char ScalarAggregateOptions::kTypeName[]; 
- 
-ModeOptions::ModeOptions(int64_t n) : FunctionOptions(internal::kModeOptionsType), n(n) {} 
-constexpr char ModeOptions::kTypeName[]; 
- 
-VarianceOptions::VarianceOptions(int ddof) 
-    : FunctionOptions(internal::kVarianceOptionsType), ddof(ddof) {} 
-constexpr char VarianceOptions::kTypeName[]; 
- 
-QuantileOptions::QuantileOptions(double q, enum Interpolation interpolation) 
-    : FunctionOptions(internal::kQuantileOptionsType), 
-      q{q}, 
-      interpolation{interpolation} {} 
-QuantileOptions::QuantileOptions(std::vector<double> q, enum Interpolation interpolation) 
-    : FunctionOptions(internal::kQuantileOptionsType), 
-      q{std::move(q)}, 
-      interpolation{interpolation} {} 
-constexpr char QuantileOptions::kTypeName[]; 
- 
-TDigestOptions::TDigestOptions(double q, uint32_t delta, uint32_t buffer_size) 
-    : FunctionOptions(internal::kTDigestOptionsType), 
-      q{q}, 
-      delta{delta}, 
-      buffer_size{buffer_size} {} 
-TDigestOptions::TDigestOptions(std::vector<double> q, uint32_t delta, 
-                               uint32_t buffer_size) 
-    : FunctionOptions(internal::kTDigestOptionsType), 
-      q{std::move(q)}, 
-      delta{delta}, 
-      buffer_size{buffer_size} {} 
-constexpr char TDigestOptions::kTypeName[]; 
- 
-IndexOptions::IndexOptions(std::shared_ptr<Scalar> value) 
-    : FunctionOptions(internal::kIndexOptionsType), value{std::move(value)} {} 
-IndexOptions::IndexOptions() : IndexOptions(std::make_shared<NullScalar>()) {} 
-constexpr char IndexOptions::kTypeName[]; 
- 
-namespace internal { 
-void RegisterAggregateOptions(FunctionRegistry* registry) { 
-  DCHECK_OK(registry->AddFunctionOptionsType(kScalarAggregateOptionsType)); 
-  DCHECK_OK(registry->AddFunctionOptionsType(kModeOptionsType)); 
-  DCHECK_OK(registry->AddFunctionOptionsType(kVarianceOptionsType)); 
-  DCHECK_OK(registry->AddFunctionOptionsType(kQuantileOptionsType)); 
-  DCHECK_OK(registry->AddFunctionOptionsType(kTDigestOptionsType)); 
-  DCHECK_OK(registry->AddFunctionOptionsType(kIndexOptionsType)); 
-} 
-}  // namespace internal 
- 
-// ---------------------------------------------------------------------- 
+// Function options
+
+using ::arrow::internal::checked_cast;
+
+namespace internal {
+namespace {
+using ::arrow::internal::DataMember;
+static auto kScalarAggregateOptionsType = GetFunctionOptionsType<ScalarAggregateOptions>(
+    DataMember("skip_nulls", &ScalarAggregateOptions::skip_nulls),
+    DataMember("min_count", &ScalarAggregateOptions::min_count));
+static auto kModeOptionsType =
+    GetFunctionOptionsType<ModeOptions>(DataMember("n", &ModeOptions::n));
+static auto kVarianceOptionsType =
+    GetFunctionOptionsType<VarianceOptions>(DataMember("ddof", &VarianceOptions::ddof));
+static auto kQuantileOptionsType = GetFunctionOptionsType<QuantileOptions>(
+    DataMember("q", &QuantileOptions::q),
+    DataMember("interpolation", &QuantileOptions::interpolation));
+static auto kTDigestOptionsType = GetFunctionOptionsType<TDigestOptions>(
+    DataMember("q", &TDigestOptions::q), DataMember("delta", &TDigestOptions::delta),
+    DataMember("buffer_size", &TDigestOptions::buffer_size));
+static auto kIndexOptionsType =
+    GetFunctionOptionsType<IndexOptions>(DataMember("value", &IndexOptions::value));
+}  // namespace
+}  // namespace internal
+
+ScalarAggregateOptions::ScalarAggregateOptions(bool skip_nulls, uint32_t min_count)
+    : FunctionOptions(internal::kScalarAggregateOptionsType),
+      skip_nulls(skip_nulls),
+      min_count(min_count) {}
+constexpr char ScalarAggregateOptions::kTypeName[];
+
+ModeOptions::ModeOptions(int64_t n) : FunctionOptions(internal::kModeOptionsType), n(n) {}
+constexpr char ModeOptions::kTypeName[];
+
+VarianceOptions::VarianceOptions(int ddof)
+    : FunctionOptions(internal::kVarianceOptionsType), ddof(ddof) {}
+constexpr char VarianceOptions::kTypeName[];
+
+QuantileOptions::QuantileOptions(double q, enum Interpolation interpolation)
+    : FunctionOptions(internal::kQuantileOptionsType),
+      q{q},
+      interpolation{interpolation} {}
+QuantileOptions::QuantileOptions(std::vector<double> q, enum Interpolation interpolation)
+    : FunctionOptions(internal::kQuantileOptionsType),
+      q{std::move(q)},
+      interpolation{interpolation} {}
+constexpr char QuantileOptions::kTypeName[];
+
+TDigestOptions::TDigestOptions(double q, uint32_t delta, uint32_t buffer_size)
+    : FunctionOptions(internal::kTDigestOptionsType),
+      q{q},
+      delta{delta},
+      buffer_size{buffer_size} {}
+TDigestOptions::TDigestOptions(std::vector<double> q, uint32_t delta,
+                               uint32_t buffer_size)
+    : FunctionOptions(internal::kTDigestOptionsType),
+      q{std::move(q)},
+      delta{delta},
+      buffer_size{buffer_size} {}
+constexpr char TDigestOptions::kTypeName[];
+
+IndexOptions::IndexOptions(std::shared_ptr<Scalar> value)
+    : FunctionOptions(internal::kIndexOptionsType), value{std::move(value)} {}
+IndexOptions::IndexOptions() : IndexOptions(std::make_shared<NullScalar>()) {}
+constexpr char IndexOptions::kTypeName[];
+
+namespace internal {
+void RegisterAggregateOptions(FunctionRegistry* registry) {
+  DCHECK_OK(registry->AddFunctionOptionsType(kScalarAggregateOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kModeOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kVarianceOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kQuantileOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kTDigestOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kIndexOptionsType));
+}
+}  // namespace internal
+
+// ----------------------------------------------------------------------
 // Scalar aggregates
 
-Result<Datum> Count(const Datum& value, const ScalarAggregateOptions& options, 
-                    ExecContext* ctx) { 
+Result<Datum> Count(const Datum& value, const ScalarAggregateOptions& options,
+                    ExecContext* ctx) {
   return CallFunction("count", {value}, &options, ctx);
 }
 
-Result<Datum> Mean(const Datum& value, const ScalarAggregateOptions& options, 
-                   ExecContext* ctx) { 
-  return CallFunction("mean", {value}, &options, ctx); 
+Result<Datum> Mean(const Datum& value, const ScalarAggregateOptions& options,
+                   ExecContext* ctx) {
+  return CallFunction("mean", {value}, &options, ctx);
 }
 
-Result<Datum> Sum(const Datum& value, const ScalarAggregateOptions& options, 
-                  ExecContext* ctx) { 
-  return CallFunction("sum", {value}, &options, ctx); 
+Result<Datum> Sum(const Datum& value, const ScalarAggregateOptions& options,
+                  ExecContext* ctx) {
+  return CallFunction("sum", {value}, &options, ctx);
 }
 
-Result<Datum> MinMax(const Datum& value, const ScalarAggregateOptions& options, 
-                     ExecContext* ctx) { 
+Result<Datum> MinMax(const Datum& value, const ScalarAggregateOptions& options,
+                     ExecContext* ctx) {
   return CallFunction("min_max", {value}, &options, ctx);
 }
 
-Result<Datum> Any(const Datum& value, const ScalarAggregateOptions& options, 
-                  ExecContext* ctx) { 
-  return CallFunction("any", {value}, &options, ctx); 
+Result<Datum> Any(const Datum& value, const ScalarAggregateOptions& options,
+                  ExecContext* ctx) {
+  return CallFunction("any", {value}, &options, ctx);
+}
+
+Result<Datum> All(const Datum& value, const ScalarAggregateOptions& options,
+                  ExecContext* ctx) {
+  return CallFunction("all", {value}, &options, ctx);
+}
+
+Result<Datum> Mode(const Datum& value, const ModeOptions& options, ExecContext* ctx) {
+  return CallFunction("mode", {value}, &options, ctx);
 }
 
-Result<Datum> All(const Datum& value, const ScalarAggregateOptions& options, 
-                  ExecContext* ctx) { 
-  return CallFunction("all", {value}, &options, ctx); 
-} 
- 
-Result<Datum> Mode(const Datum& value, const ModeOptions& options, ExecContext* ctx) { 
-  return CallFunction("mode", {value}, &options, ctx); 
-} 
- 
 Result<Datum> Stddev(const Datum& value, const VarianceOptions& options,
                      ExecContext* ctx) {
   return CallFunction("stddev", {value}, &options, ctx);
@@ -179,19 +179,19 @@ Result<Datum> Variance(const Datum& value, const VarianceOptions& options,
   return CallFunction("variance", {value}, &options, ctx);
 }
 
-Result<Datum> Quantile(const Datum& value, const QuantileOptions& options, 
-                       ExecContext* ctx) { 
-  return CallFunction("quantile", {value}, &options, ctx); 
-} 
- 
-Result<Datum> TDigest(const Datum& value, const TDigestOptions& options, 
-                      ExecContext* ctx) { 
-  return CallFunction("tdigest", {value}, &options, ctx); 
-} 
- 
-Result<Datum> Index(const Datum& value, const IndexOptions& options, ExecContext* ctx) { 
-  return CallFunction("index", {value}, &options, ctx); 
-} 
- 
+Result<Datum> Quantile(const Datum& value, const QuantileOptions& options,
+                       ExecContext* ctx) {
+  return CallFunction("quantile", {value}, &options, ctx);
+}
+
+Result<Datum> TDigest(const Datum& value, const TDigestOptions& options,
+                      ExecContext* ctx) {
+  return CallFunction("tdigest", {value}, &options, ctx);
+}
+
+Result<Datum> Index(const Datum& value, const IndexOptions& options, ExecContext* ctx) {
+  return CallFunction("index", {value}, &options, ctx);
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/api_aggregate.h b/contrib/libs/apache/arrow/cpp/src/arrow/compute/api_aggregate.h
index 37296779b2f..7a6c44bd923 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/api_aggregate.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/api_aggregate.h
@@ -40,108 +40,108 @@ class ExecContext;
 /// \addtogroup compute-concrete-options
 /// @{
 
-/// \brief Control general scalar aggregate kernel behavior 
-///
-/// By default, null values are ignored 
-class ARROW_EXPORT ScalarAggregateOptions : public FunctionOptions { 
- public: 
-  explicit ScalarAggregateOptions(bool skip_nulls = true, uint32_t min_count = 1); 
-  constexpr static char const kTypeName[] = "ScalarAggregateOptions"; 
-  static ScalarAggregateOptions Defaults() { return ScalarAggregateOptions{}; } 
-
-  bool skip_nulls; 
-  uint32_t min_count; 
-}; 
-
-/// \brief Control Mode kernel behavior 
-/// 
-/// Returns top-n common values and counts. 
-/// By default, returns the most common value and count. 
-class ARROW_EXPORT ModeOptions : public FunctionOptions { 
- public: 
-  explicit ModeOptions(int64_t n = 1); 
-  constexpr static char const kTypeName[] = "ModeOptions"; 
-  static ModeOptions Defaults() { return ModeOptions{}; } 
-
-  int64_t n = 1; 
+/// \brief Control general scalar aggregate kernel behavior
+///
+/// By default, null values are ignored
+class ARROW_EXPORT ScalarAggregateOptions : public FunctionOptions {
+ public:
+  explicit ScalarAggregateOptions(bool skip_nulls = true, uint32_t min_count = 1);
+  constexpr static char const kTypeName[] = "ScalarAggregateOptions";
+  static ScalarAggregateOptions Defaults() { return ScalarAggregateOptions{}; }
+
+  bool skip_nulls;
+  uint32_t min_count;
 };
 
-/// \brief Control Delta Degrees of Freedom (ddof) of Variance and Stddev kernel 
-///
-/// The divisor used in calculations is N - ddof, where N is the number of elements. 
-/// By default, ddof is zero, and population variance or stddev is returned. 
-class ARROW_EXPORT VarianceOptions : public FunctionOptions { 
- public: 
-  explicit VarianceOptions(int ddof = 0); 
-  constexpr static char const kTypeName[] = "VarianceOptions"; 
-  static VarianceOptions Defaults() { return VarianceOptions{}; } 
- 
-  int ddof = 0; 
-}; 
- 
-/// \brief Control Quantile kernel behavior 
-/// 
-/// By default, returns the median value. 
-class ARROW_EXPORT QuantileOptions : public FunctionOptions { 
- public: 
-  /// Interpolation method to use when quantile lies between two data points 
-  enum Interpolation { 
-    LINEAR = 0, 
-    LOWER, 
-    HIGHER, 
-    NEAREST, 
-    MIDPOINT, 
+/// \brief Control Mode kernel behavior
+///
+/// Returns top-n common values and counts.
+/// By default, returns the most common value and count.
+class ARROW_EXPORT ModeOptions : public FunctionOptions {
+ public:
+  explicit ModeOptions(int64_t n = 1);
+  constexpr static char const kTypeName[] = "ModeOptions";
+  static ModeOptions Defaults() { return ModeOptions{}; }
+
+  int64_t n = 1;
+};
+
+/// \brief Control Delta Degrees of Freedom (ddof) of Variance and Stddev kernel
+///
+/// The divisor used in calculations is N - ddof, where N is the number of elements.
+/// By default, ddof is zero, and population variance or stddev is returned.
+class ARROW_EXPORT VarianceOptions : public FunctionOptions {
+ public:
+  explicit VarianceOptions(int ddof = 0);
+  constexpr static char const kTypeName[] = "VarianceOptions";
+  static VarianceOptions Defaults() { return VarianceOptions{}; }
+
+  int ddof = 0;
+};
+
+/// \brief Control Quantile kernel behavior
+///
+/// By default, returns the median value.
+class ARROW_EXPORT QuantileOptions : public FunctionOptions {
+ public:
+  /// Interpolation method to use when quantile lies between two data points
+  enum Interpolation {
+    LINEAR = 0,
+    LOWER,
+    HIGHER,
+    NEAREST,
+    MIDPOINT,
   };
 
-  explicit QuantileOptions(double q = 0.5, enum Interpolation interpolation = LINEAR); 
+  explicit QuantileOptions(double q = 0.5, enum Interpolation interpolation = LINEAR);
+
+  explicit QuantileOptions(std::vector<double> q,
+                           enum Interpolation interpolation = LINEAR);
+
+  constexpr static char const kTypeName[] = "QuantileOptions";
+  static QuantileOptions Defaults() { return QuantileOptions{}; }
 
-  explicit QuantileOptions(std::vector<double> q, 
-                           enum Interpolation interpolation = LINEAR); 
+  /// quantile must be between 0 and 1 inclusive
+  std::vector<double> q;
+  enum Interpolation interpolation;
+};
+
+/// \brief Control TDigest approximate quantile kernel behavior
+///
+/// By default, returns the median value.
+class ARROW_EXPORT TDigestOptions : public FunctionOptions {
+ public:
+  explicit TDigestOptions(double q = 0.5, uint32_t delta = 100,
+                          uint32_t buffer_size = 500);
+  explicit TDigestOptions(std::vector<double> q, uint32_t delta = 100,
+                          uint32_t buffer_size = 500);
+  constexpr static char const kTypeName[] = "TDigestOptions";
+  static TDigestOptions Defaults() { return TDigestOptions{}; }
 
-  constexpr static char const kTypeName[] = "QuantileOptions"; 
-  static QuantileOptions Defaults() { return QuantileOptions{}; } 
- 
-  /// quantile must be between 0 and 1 inclusive 
-  std::vector<double> q; 
-  enum Interpolation interpolation; 
+  /// quantile must be between 0 and 1 inclusive
+  std::vector<double> q;
+  /// compression parameter, default 100
+  uint32_t delta;
+  /// input buffer size, default 500
+  uint32_t buffer_size;
 };
 
-/// \brief Control TDigest approximate quantile kernel behavior 
-///
-/// By default, returns the median value. 
-class ARROW_EXPORT TDigestOptions : public FunctionOptions { 
- public: 
-  explicit TDigestOptions(double q = 0.5, uint32_t delta = 100, 
-                          uint32_t buffer_size = 500); 
-  explicit TDigestOptions(std::vector<double> q, uint32_t delta = 100, 
-                          uint32_t buffer_size = 500); 
-  constexpr static char const kTypeName[] = "TDigestOptions"; 
-  static TDigestOptions Defaults() { return TDigestOptions{}; } 
-
-  /// quantile must be between 0 and 1 inclusive 
-  std::vector<double> q; 
-  /// compression parameter, default 100 
-  uint32_t delta; 
-  /// input buffer size, default 500 
-  uint32_t buffer_size; 
-}; 
-
-/// \brief Control Index kernel behavior 
-class ARROW_EXPORT IndexOptions : public FunctionOptions { 
- public: 
-  explicit IndexOptions(std::shared_ptr<Scalar> value); 
-  // Default constructor for serialization 
-  IndexOptions(); 
-  constexpr static char const kTypeName[] = "IndexOptions"; 
- 
-  std::shared_ptr<Scalar> value; 
+/// \brief Control Index kernel behavior
+class ARROW_EXPORT IndexOptions : public FunctionOptions {
+ public:
+  explicit IndexOptions(std::shared_ptr<Scalar> value);
+  // Default constructor for serialization
+  IndexOptions();
+  constexpr static char const kTypeName[] = "IndexOptions";
+
+  std::shared_ptr<Scalar> value;
 };
 
 /// @}
 
 /// \brief Count non-null (or null) values in an array.
 ///
-/// \param[in] options counting options, see ScalarAggregateOptions for more information 
+/// \param[in] options counting options, see ScalarAggregateOptions for more information
 /// \param[in] datum to count
 /// \param[in] ctx the function execution context, optional
 /// \return out resulting datum
@@ -149,40 +149,40 @@ class ARROW_EXPORT IndexOptions : public FunctionOptions {
 /// \since 1.0.0
 /// \note API not yet finalized
 ARROW_EXPORT
-Result<Datum> Count( 
-    const Datum& datum, 
-    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults(), 
-    ExecContext* ctx = NULLPTR); 
+Result<Datum> Count(
+    const Datum& datum,
+    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults(),
+    ExecContext* ctx = NULLPTR);
 
 /// \brief Compute the mean of a numeric array.
 ///
 /// \param[in] value datum to compute the mean, expecting Array
-/// \param[in] options see ScalarAggregateOptions for more information 
+/// \param[in] options see ScalarAggregateOptions for more information
 /// \param[in] ctx the function execution context, optional
 /// \return datum of the computed mean as a DoubleScalar
 ///
 /// \since 1.0.0
 /// \note API not yet finalized
 ARROW_EXPORT
-Result<Datum> Mean( 
-    const Datum& value, 
-    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults(), 
-    ExecContext* ctx = NULLPTR); 
+Result<Datum> Mean(
+    const Datum& value,
+    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults(),
+    ExecContext* ctx = NULLPTR);
 
 /// \brief Sum values of a numeric array.
 ///
 /// \param[in] value datum to sum, expecting Array or ChunkedArray
-/// \param[in] options see ScalarAggregateOptions for more information 
+/// \param[in] options see ScalarAggregateOptions for more information
 /// \param[in] ctx the function execution context, optional
 /// \return datum of the computed sum as a Scalar
 ///
 /// \since 1.0.0
 /// \note API not yet finalized
 ARROW_EXPORT
-Result<Datum> Sum( 
-    const Datum& value, 
-    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults(), 
-    ExecContext* ctx = NULLPTR); 
+Result<Datum> Sum(
+    const Datum& value,
+    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults(),
+    ExecContext* ctx = NULLPTR);
 
 /// \brief Calculate the min / max of a numeric array
 ///
@@ -190,78 +190,78 @@ Result<Datum> Sum(
 /// struct<min: T, max: T>, where T is the input type
 ///
 /// \param[in] value input datum, expecting Array or ChunkedArray
-/// \param[in] options see ScalarAggregateOptions for more information 
+/// \param[in] options see ScalarAggregateOptions for more information
 /// \param[in] ctx the function execution context, optional
 /// \return resulting datum as a struct<min: T, max: T> scalar
 ///
 /// \since 1.0.0
 /// \note API not yet finalized
 ARROW_EXPORT
-Result<Datum> MinMax( 
-    const Datum& value, 
-    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults(), 
-    ExecContext* ctx = NULLPTR); 
-
-/// \brief Test whether any element in a boolean array evaluates to true. 
-/// 
-/// This function returns true if any of the elements in the array evaluates 
-/// to true and false otherwise. Null values are ignored by default. 
-/// If null values are taken into account by setting ScalarAggregateOptions 
-/// parameter skip_nulls = false then Kleene logic is used. 
-/// See KleeneOr for more details on Kleene logic. 
-/// 
-/// \param[in] value input datum, expecting a boolean array 
-/// \param[in] options see ScalarAggregateOptions for more information 
-/// \param[in] ctx the function execution context, optional 
-/// \return resulting datum as a BooleanScalar 
-/// 
-/// \since 3.0.0 
-/// \note API not yet finalized 
-ARROW_EXPORT 
-Result<Datum> Any( 
-    const Datum& value, 
-    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults(), 
-    ExecContext* ctx = NULLPTR); 
- 
-/// \brief Test whether all elements in a boolean array evaluate to true. 
-/// 
-/// This function returns true if all of the elements in the array evaluate 
-/// to true and false otherwise. Null values are ignored by default. 
-/// If null values are taken into account by setting ScalarAggregateOptions 
-/// parameter skip_nulls = false then Kleene logic is used. 
-/// See KleeneAnd for more details on Kleene logic. 
-/// 
-/// \param[in] value input datum, expecting a boolean array 
-/// \param[in] options see ScalarAggregateOptions for more information 
-/// \param[in] ctx the function execution context, optional 
-/// \return resulting datum as a BooleanScalar 
- 
-/// \since 3.0.0 
-/// \note API not yet finalized 
-ARROW_EXPORT 
-Result<Datum> All( 
-    const Datum& value, 
-    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults(), 
-    ExecContext* ctx = NULLPTR); 
- 
+Result<Datum> MinMax(
+    const Datum& value,
+    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults(),
+    ExecContext* ctx = NULLPTR);
+
+/// \brief Test whether any element in a boolean array evaluates to true.
+///
+/// This function returns true if any of the elements in the array evaluates
+/// to true and false otherwise. Null values are ignored by default.
+/// If null values are taken into account by setting ScalarAggregateOptions
+/// parameter skip_nulls = false then Kleene logic is used.
+/// See KleeneOr for more details on Kleene logic.
+///
+/// \param[in] value input datum, expecting a boolean array
+/// \param[in] options see ScalarAggregateOptions for more information
+/// \param[in] ctx the function execution context, optional
+/// \return resulting datum as a BooleanScalar
+///
+/// \since 3.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Any(
+    const Datum& value,
+    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults(),
+    ExecContext* ctx = NULLPTR);
+
+/// \brief Test whether all elements in a boolean array evaluate to true.
+///
+/// This function returns true if all of the elements in the array evaluate
+/// to true and false otherwise. Null values are ignored by default.
+/// If null values are taken into account by setting ScalarAggregateOptions
+/// parameter skip_nulls = false then Kleene logic is used.
+/// See KleeneAnd for more details on Kleene logic.
+///
+/// \param[in] value input datum, expecting a boolean array
+/// \param[in] options see ScalarAggregateOptions for more information
+/// \param[in] ctx the function execution context, optional
+/// \return resulting datum as a BooleanScalar
+
+/// \since 3.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> All(
+    const Datum& value,
+    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults(),
+    ExecContext* ctx = NULLPTR);
+
 /// \brief Calculate the modal (most common) value of a numeric array
 ///
-/// This function returns top-n most common values and number of times they occur as 
-/// an array of `struct<mode: T, count: int64>`, where T is the input type. 
-/// Values with larger counts are returned before smaller ones. 
-/// If there are more than one values with same count, smaller value is returned first. 
+/// This function returns top-n most common values and number of times they occur as
+/// an array of `struct<mode: T, count: int64>`, where T is the input type.
+/// Values with larger counts are returned before smaller ones.
+/// If there are more than one values with same count, smaller value is returned first.
 ///
 /// \param[in] value input datum, expecting Array or ChunkedArray
-/// \param[in] options see ModeOptions for more information 
+/// \param[in] options see ModeOptions for more information
 /// \param[in] ctx the function execution context, optional
-/// \return resulting datum as an array of struct<mode: T, count: int64> 
+/// \return resulting datum as an array of struct<mode: T, count: int64>
 ///
 /// \since 2.0.0
 /// \note API not yet finalized
 ARROW_EXPORT
-Result<Datum> Mode(const Datum& value, 
-                   const ModeOptions& options = ModeOptions::Defaults(), 
-                   ExecContext* ctx = NULLPTR); 
+Result<Datum> Mode(const Datum& value,
+                   const ModeOptions& options = ModeOptions::Defaults(),
+                   ExecContext* ctx = NULLPTR);
 
 /// \brief Calculate the standard deviation of a numeric array
 ///
@@ -291,143 +291,143 @@ Result<Datum> Variance(const Datum& value,
                        const VarianceOptions& options = VarianceOptions::Defaults(),
                        ExecContext* ctx = NULLPTR);
 
-/// \brief Calculate the quantiles of a numeric array 
-/// 
-/// \param[in] value input datum, expecting Array or ChunkedArray 
-/// \param[in] options see QuantileOptions for more information 
-/// \param[in] ctx the function execution context, optional 
-/// \return resulting datum as an array 
-/// 
-/// \since 4.0.0 
-/// \note API not yet finalized 
-ARROW_EXPORT 
-Result<Datum> Quantile(const Datum& value, 
-                       const QuantileOptions& options = QuantileOptions::Defaults(), 
-                       ExecContext* ctx = NULLPTR); 
- 
-/// \brief Calculate the approximate quantiles of a numeric array with T-Digest algorithm 
-/// 
-/// \param[in] value input datum, expecting Array or ChunkedArray 
-/// \param[in] options see TDigestOptions for more information 
-/// \param[in] ctx the function execution context, optional 
-/// \return resulting datum as an array 
-/// 
-/// \since 4.0.0 
-/// \note API not yet finalized 
-ARROW_EXPORT 
-Result<Datum> TDigest(const Datum& value, 
-                      const TDigestOptions& options = TDigestOptions::Defaults(), 
-                      ExecContext* ctx = NULLPTR); 
- 
-/// \brief Find the first index of a value in an array. 
-/// 
-/// \param[in] value The array to search. 
-/// \param[in] options The array to search for. See IndexOoptions. 
-/// \param[in] ctx the function execution context, optional 
-/// \return out a Scalar containing the index (or -1 if not found). 
-/// 
-/// \since 5.0.0 
-/// \note API not yet finalized 
-ARROW_EXPORT 
-Result<Datum> Index(const Datum& value, const IndexOptions& options, 
-                    ExecContext* ctx = NULLPTR); 
- 
-namespace internal { 
- 
-/// Internal use only: streaming group identifier. 
-/// Consumes batches of keys and yields batches of the group ids. 
-class ARROW_EXPORT Grouper { 
- public: 
-  virtual ~Grouper() = default; 
- 
-  /// Construct a Grouper which receives the specified key types 
-  static Result<std::unique_ptr<Grouper>> Make(const std::vector<ValueDescr>& descrs, 
-                                               ExecContext* ctx = default_exec_context()); 
- 
-  /// Consume a batch of keys, producing the corresponding group ids as an integer array. 
-  /// Currently only uint32 indices will be produced, eventually the bit width will only 
-  /// be as wide as necessary. 
-  virtual Result<Datum> Consume(const ExecBatch& batch) = 0; 
- 
-  /// Get current unique keys. May be called multiple times. 
-  virtual Result<ExecBatch> GetUniques() = 0; 
- 
-  /// Get the current number of groups. 
-  virtual uint32_t num_groups() const = 0; 
- 
-  /// \brief Assemble lists of indices of identical elements. 
-  /// 
-  /// \param[in] ids An unsigned, all-valid integral array which will be 
-  ///                used as grouping criteria. 
-  /// \param[in] num_groups An upper bound for the elements of ids 
-  /// \return A num_groups-long ListArray where the slot at i contains a 
-  ///         list of indices where i appears in ids. 
-  /// 
-  ///   MakeGroupings([ 
-  ///       2, 
-  ///       2, 
-  ///       5, 
-  ///       5, 
-  ///       2, 
-  ///       3 
-  ///   ], 8) == [ 
-  ///       [], 
-  ///       [], 
-  ///       [0, 1, 4], 
-  ///       [5], 
-  ///       [], 
-  ///       [2, 3], 
-  ///       [], 
-  ///       [] 
-  ///   ] 
-  static Result<std::shared_ptr<ListArray>> MakeGroupings( 
-      const UInt32Array& ids, uint32_t num_groups, 
-      ExecContext* ctx = default_exec_context()); 
- 
-  /// \brief Produce a ListArray whose slots are selections of `array` which correspond to 
-  /// the provided groupings. 
-  /// 
-  /// For example, 
-  ///   ApplyGroupings([ 
-  ///       [], 
-  ///       [], 
-  ///       [0, 1, 4], 
-  ///       [5], 
-  ///       [], 
-  ///       [2, 3], 
-  ///       [], 
-  ///       [] 
-  ///   ], [2, 2, 5, 5, 2, 3]) == [ 
-  ///       [], 
-  ///       [], 
-  ///       [2, 2, 2], 
-  ///       [3], 
-  ///       [], 
-  ///       [5, 5], 
-  ///       [], 
-  ///       [] 
-  ///   ] 
-  static Result<std::shared_ptr<ListArray>> ApplyGroupings( 
-      const ListArray& groupings, const Array& array, 
-      ExecContext* ctx = default_exec_context()); 
-}; 
- 
-/// \brief Configure a grouped aggregation 
-struct ARROW_EXPORT Aggregate { 
-  /// the name of the aggregation function 
-  std::string function; 
- 
-  /// options for the aggregation function 
-  const FunctionOptions* options; 
-}; 
- 
-/// Internal use only: helper function for testing HashAggregateKernels. 
-/// This will be replaced by streaming execution operators. 
-ARROW_EXPORT 
-Result<Datum> GroupBy(const std::vector<Datum>& arguments, const std::vector<Datum>& keys, 
-                      const std::vector<Aggregate>& aggregates, 
-                      ExecContext* ctx = default_exec_context()); 
- 
-}  // namespace internal 
+/// \brief Calculate the quantiles of a numeric array
+///
+/// \param[in] value input datum, expecting Array or ChunkedArray
+/// \param[in] options see QuantileOptions for more information
+/// \param[in] ctx the function execution context, optional
+/// \return resulting datum as an array
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Quantile(const Datum& value,
+                       const QuantileOptions& options = QuantileOptions::Defaults(),
+                       ExecContext* ctx = NULLPTR);
+
+/// \brief Calculate the approximate quantiles of a numeric array with T-Digest algorithm
+///
+/// \param[in] value input datum, expecting Array or ChunkedArray
+/// \param[in] options see TDigestOptions for more information
+/// \param[in] ctx the function execution context, optional
+/// \return resulting datum as an array
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> TDigest(const Datum& value,
+                      const TDigestOptions& options = TDigestOptions::Defaults(),
+                      ExecContext* ctx = NULLPTR);
+
+/// \brief Find the first index of a value in an array.
+///
+/// \param[in] value The array to search.
+/// \param[in] options The array to search for. See IndexOoptions.
+/// \param[in] ctx the function execution context, optional
+/// \return out a Scalar containing the index (or -1 if not found).
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Index(const Datum& value, const IndexOptions& options,
+                    ExecContext* ctx = NULLPTR);
+
+namespace internal {
+
+/// Internal use only: streaming group identifier.
+/// Consumes batches of keys and yields batches of the group ids.
+class ARROW_EXPORT Grouper {
+ public:
+  virtual ~Grouper() = default;
+
+  /// Construct a Grouper which receives the specified key types
+  static Result<std::unique_ptr<Grouper>> Make(const std::vector<ValueDescr>& descrs,
+                                               ExecContext* ctx = default_exec_context());
+
+  /// Consume a batch of keys, producing the corresponding group ids as an integer array.
+  /// Currently only uint32 indices will be produced, eventually the bit width will only
+  /// be as wide as necessary.
+  virtual Result<Datum> Consume(const ExecBatch& batch) = 0;
+
+  /// Get current unique keys. May be called multiple times.
+  virtual Result<ExecBatch> GetUniques() = 0;
+
+  /// Get the current number of groups.
+  virtual uint32_t num_groups() const = 0;
+
+  /// \brief Assemble lists of indices of identical elements.
+  ///
+  /// \param[in] ids An unsigned, all-valid integral array which will be
+  ///                used as grouping criteria.
+  /// \param[in] num_groups An upper bound for the elements of ids
+  /// \return A num_groups-long ListArray where the slot at i contains a
+  ///         list of indices where i appears in ids.
+  ///
+  ///   MakeGroupings([
+  ///       2,
+  ///       2,
+  ///       5,
+  ///       5,
+  ///       2,
+  ///       3
+  ///   ], 8) == [
+  ///       [],
+  ///       [],
+  ///       [0, 1, 4],
+  ///       [5],
+  ///       [],
+  ///       [2, 3],
+  ///       [],
+  ///       []
+  ///   ]
+  static Result<std::shared_ptr<ListArray>> MakeGroupings(
+      const UInt32Array& ids, uint32_t num_groups,
+      ExecContext* ctx = default_exec_context());
+
+  /// \brief Produce a ListArray whose slots are selections of `array` which correspond to
+  /// the provided groupings.
+  ///
+  /// For example,
+  ///   ApplyGroupings([
+  ///       [],
+  ///       [],
+  ///       [0, 1, 4],
+  ///       [5],
+  ///       [],
+  ///       [2, 3],
+  ///       [],
+  ///       []
+  ///   ], [2, 2, 5, 5, 2, 3]) == [
+  ///       [],
+  ///       [],
+  ///       [2, 2, 2],
+  ///       [3],
+  ///       [],
+  ///       [5, 5],
+  ///       [],
+  ///       []
+  ///   ]
+  static Result<std::shared_ptr<ListArray>> ApplyGroupings(
+      const ListArray& groupings, const Array& array,
+      ExecContext* ctx = default_exec_context());
+};
+
+/// \brief Configure a grouped aggregation
+struct ARROW_EXPORT Aggregate {
+  /// the name of the aggregation function
+  std::string function;
+
+  /// options for the aggregation function
+  const FunctionOptions* options;
+};
+
+/// Internal use only: helper function for testing HashAggregateKernels.
+/// This will be replaced by streaming execution operators.
+ARROW_EXPORT
+Result<Datum> GroupBy(const std::vector<Datum>& arguments, const std::vector<Datum>& keys,
+                      const std::vector<Aggregate>& aggregates,
+                      ExecContext* ctx = default_exec_context());
+
+}  // namespace internal
 }  // namespace compute
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/api_scalar.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/api_scalar.cc
index 989ca2b3937..1feb4e7eee0 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/api_scalar.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/api_scalar.cc
@@ -21,287 +21,287 @@
 #include <sstream>
 #include <string>
 
-#include "arrow/array/array_base.h" 
+#include "arrow/array/array_base.h"
 #include "arrow/compute/exec.h"
-#include "arrow/compute/function_internal.h" 
-#include "arrow/compute/registry.h" 
-#include "arrow/compute/util_internal.h" 
+#include "arrow/compute/function_internal.h"
+#include "arrow/compute/registry.h"
+#include "arrow/compute/util_internal.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
-#include "arrow/util/checked_cast.h" 
-#include "arrow/util/logging.h" 
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
 
 namespace arrow {
- 
-namespace internal { 
-template <> 
-struct EnumTraits<compute::JoinOptions::NullHandlingBehavior> 
-    : BasicEnumTraits<compute::JoinOptions::NullHandlingBehavior, 
-                      compute::JoinOptions::NullHandlingBehavior::EMIT_NULL, 
-                      compute::JoinOptions::NullHandlingBehavior::SKIP, 
-                      compute::JoinOptions::NullHandlingBehavior::REPLACE> { 
-  static std::string name() { return "JoinOptions::NullHandlingBehavior"; } 
-  static std::string value_name(compute::JoinOptions::NullHandlingBehavior value) { 
-    switch (value) { 
-      case compute::JoinOptions::NullHandlingBehavior::EMIT_NULL: 
-        return "EMIT_NULL"; 
-      case compute::JoinOptions::NullHandlingBehavior::SKIP: 
-        return "SKIP"; 
-      case compute::JoinOptions::NullHandlingBehavior::REPLACE: 
-        return "REPLACE"; 
-    } 
-    return "<INVALID>"; 
-  } 
-}; 
-template <> 
-struct EnumTraits<TimeUnit::type> 
-    : BasicEnumTraits<TimeUnit::type, TimeUnit::type::SECOND, TimeUnit::type::MILLI, 
-                      TimeUnit::type::MICRO, TimeUnit::type::NANO> { 
-  static std::string name() { return "TimeUnit::type"; } 
-  static std::string value_name(TimeUnit::type value) { 
-    switch (value) { 
-      case TimeUnit::type::SECOND: 
-        return "SECOND"; 
-      case TimeUnit::type::MILLI: 
-        return "MILLI"; 
-      case TimeUnit::type::MICRO: 
-        return "MICRO"; 
-      case TimeUnit::type::NANO: 
-        return "NANO"; 
-    } 
-    return "<INVALID>"; 
-  } 
-}; 
-template <> 
-struct EnumTraits<compute::CompareOperator> 
-    : BasicEnumTraits< 
-          compute::CompareOperator, compute::CompareOperator::EQUAL, 
-          compute::CompareOperator::NOT_EQUAL, compute::CompareOperator::GREATER, 
-          compute::CompareOperator::GREATER_EQUAL, compute::CompareOperator::LESS, 
-          compute::CompareOperator::LESS_EQUAL> { 
-  static std::string name() { return "compute::CompareOperator"; } 
-  static std::string value_name(compute::CompareOperator value) { 
-    switch (value) { 
-      case compute::CompareOperator::EQUAL: 
-        return "EQUAL"; 
-      case compute::CompareOperator::NOT_EQUAL: 
-        return "NOT_EQUAL"; 
-      case compute::CompareOperator::GREATER: 
-        return "GREATER"; 
-      case compute::CompareOperator::GREATER_EQUAL: 
-        return "GREATER_EQUAL"; 
-      case compute::CompareOperator::LESS: 
-        return "LESS"; 
-      case compute::CompareOperator::LESS_EQUAL: 
-        return "LESS_EQUAL"; 
-    } 
-    return "<INVALID>"; 
-  } 
-}; 
-}  // namespace internal 
- 
+
+namespace internal {
+template <>
+struct EnumTraits<compute::JoinOptions::NullHandlingBehavior>
+    : BasicEnumTraits<compute::JoinOptions::NullHandlingBehavior,
+                      compute::JoinOptions::NullHandlingBehavior::EMIT_NULL,
+                      compute::JoinOptions::NullHandlingBehavior::SKIP,
+                      compute::JoinOptions::NullHandlingBehavior::REPLACE> {
+  static std::string name() { return "JoinOptions::NullHandlingBehavior"; }
+  static std::string value_name(compute::JoinOptions::NullHandlingBehavior value) {
+    switch (value) {
+      case compute::JoinOptions::NullHandlingBehavior::EMIT_NULL:
+        return "EMIT_NULL";
+      case compute::JoinOptions::NullHandlingBehavior::SKIP:
+        return "SKIP";
+      case compute::JoinOptions::NullHandlingBehavior::REPLACE:
+        return "REPLACE";
+    }
+    return "<INVALID>";
+  }
+};
+template <>
+struct EnumTraits<TimeUnit::type>
+    : BasicEnumTraits<TimeUnit::type, TimeUnit::type::SECOND, TimeUnit::type::MILLI,
+                      TimeUnit::type::MICRO, TimeUnit::type::NANO> {
+  static std::string name() { return "TimeUnit::type"; }
+  static std::string value_name(TimeUnit::type value) {
+    switch (value) {
+      case TimeUnit::type::SECOND:
+        return "SECOND";
+      case TimeUnit::type::MILLI:
+        return "MILLI";
+      case TimeUnit::type::MICRO:
+        return "MICRO";
+      case TimeUnit::type::NANO:
+        return "NANO";
+    }
+    return "<INVALID>";
+  }
+};
+template <>
+struct EnumTraits<compute::CompareOperator>
+    : BasicEnumTraits<
+          compute::CompareOperator, compute::CompareOperator::EQUAL,
+          compute::CompareOperator::NOT_EQUAL, compute::CompareOperator::GREATER,
+          compute::CompareOperator::GREATER_EQUAL, compute::CompareOperator::LESS,
+          compute::CompareOperator::LESS_EQUAL> {
+  static std::string name() { return "compute::CompareOperator"; }
+  static std::string value_name(compute::CompareOperator value) {
+    switch (value) {
+      case compute::CompareOperator::EQUAL:
+        return "EQUAL";
+      case compute::CompareOperator::NOT_EQUAL:
+        return "NOT_EQUAL";
+      case compute::CompareOperator::GREATER:
+        return "GREATER";
+      case compute::CompareOperator::GREATER_EQUAL:
+        return "GREATER_EQUAL";
+      case compute::CompareOperator::LESS:
+        return "LESS";
+      case compute::CompareOperator::LESS_EQUAL:
+        return "LESS_EQUAL";
+    }
+    return "<INVALID>";
+  }
+};
+}  // namespace internal
+
 namespace compute {
 
-// ---------------------------------------------------------------------- 
-// Function options 
- 
-using ::arrow::internal::checked_cast; 
- 
-namespace internal { 
-namespace { 
-using ::arrow::internal::DataMember; 
-static auto kArithmeticOptionsType = GetFunctionOptionsType<ArithmeticOptions>( 
-    DataMember("check_overflow", &ArithmeticOptions::check_overflow)); 
-static auto kElementWiseAggregateOptionsType = 
-    GetFunctionOptionsType<ElementWiseAggregateOptions>( 
-        DataMember("skip_nulls", &ElementWiseAggregateOptions::skip_nulls)); 
-static auto kJoinOptionsType = GetFunctionOptionsType<JoinOptions>( 
-    DataMember("null_handling", &JoinOptions::null_handling), 
-    DataMember("null_replacement", &JoinOptions::null_replacement)); 
-static auto kMatchSubstringOptionsType = GetFunctionOptionsType<MatchSubstringOptions>( 
-    DataMember("pattern", &MatchSubstringOptions::pattern), 
-    DataMember("ignore_case", &MatchSubstringOptions::ignore_case)); 
-static auto kSplitOptionsType = GetFunctionOptionsType<SplitOptions>( 
-    DataMember("max_splits", &SplitOptions::max_splits), 
-    DataMember("reverse", &SplitOptions::reverse)); 
-static auto kSplitPatternOptionsType = GetFunctionOptionsType<SplitPatternOptions>( 
-    DataMember("pattern", &SplitPatternOptions::pattern), 
-    DataMember("max_splits", &SplitPatternOptions::max_splits), 
-    DataMember("reverse", &SplitPatternOptions::reverse)); 
-static auto kReplaceSliceOptionsType = GetFunctionOptionsType<ReplaceSliceOptions>( 
-    DataMember("start", &ReplaceSliceOptions::start), 
-    DataMember("stop", &ReplaceSliceOptions::stop), 
-    DataMember("replacement", &ReplaceSliceOptions::replacement)); 
-static auto kReplaceSubstringOptionsType = 
-    GetFunctionOptionsType<ReplaceSubstringOptions>( 
-        DataMember("pattern", &ReplaceSubstringOptions::pattern), 
-        DataMember("replacement", &ReplaceSubstringOptions::replacement), 
-        DataMember("max_replacements", &ReplaceSubstringOptions::max_replacements)); 
-static auto kExtractRegexOptionsType = GetFunctionOptionsType<ExtractRegexOptions>( 
-    DataMember("pattern", &ExtractRegexOptions::pattern)); 
-static auto kSetLookupOptionsType = GetFunctionOptionsType<SetLookupOptions>( 
-    DataMember("value_set", &SetLookupOptions::value_set), 
-    DataMember("skip_nulls", &SetLookupOptions::skip_nulls)); 
-static auto kStrptimeOptionsType = GetFunctionOptionsType<StrptimeOptions>( 
-    DataMember("format", &StrptimeOptions::format), 
-    DataMember("unit", &StrptimeOptions::unit)); 
-static auto kPadOptionsType = GetFunctionOptionsType<PadOptions>( 
-    DataMember("width", &PadOptions::width), DataMember("padding", &PadOptions::padding)); 
-static auto kTrimOptionsType = GetFunctionOptionsType<TrimOptions>( 
-    DataMember("characters", &TrimOptions::characters)); 
-static auto kSliceOptionsType = GetFunctionOptionsType<SliceOptions>( 
-    DataMember("start", &SliceOptions::start), DataMember("stop", &SliceOptions::stop), 
-    DataMember("step", &SliceOptions::step)); 
-static auto kMakeStructOptionsType = GetFunctionOptionsType<MakeStructOptions>( 
-    DataMember("field_names", &MakeStructOptions::field_names), 
-    DataMember("field_nullability", &MakeStructOptions::field_nullability), 
-    DataMember("field_metadata", &MakeStructOptions::field_metadata)); 
-static auto kDayOfWeekOptionsType = GetFunctionOptionsType<DayOfWeekOptions>( 
-    DataMember("one_based_numbering", &DayOfWeekOptions::one_based_numbering), 
-    DataMember("week_start", &DayOfWeekOptions::week_start)); 
-}  // namespace 
-}  // namespace internal 
- 
-ArithmeticOptions::ArithmeticOptions(bool check_overflow) 
-    : FunctionOptions(internal::kArithmeticOptionsType), check_overflow(check_overflow) {} 
-constexpr char ArithmeticOptions::kTypeName[]; 
- 
-ElementWiseAggregateOptions::ElementWiseAggregateOptions(bool skip_nulls) 
-    : FunctionOptions(internal::kElementWiseAggregateOptionsType), 
-      skip_nulls(skip_nulls) {} 
-constexpr char ElementWiseAggregateOptions::kTypeName[]; 
- 
-JoinOptions::JoinOptions(NullHandlingBehavior null_handling, std::string null_replacement) 
-    : FunctionOptions(internal::kJoinOptionsType), 
-      null_handling(null_handling), 
-      null_replacement(std::move(null_replacement)) {} 
-constexpr char JoinOptions::kTypeName[]; 
- 
-MatchSubstringOptions::MatchSubstringOptions(std::string pattern, bool ignore_case) 
-    : FunctionOptions(internal::kMatchSubstringOptionsType), 
-      pattern(std::move(pattern)), 
-      ignore_case(ignore_case) {} 
-MatchSubstringOptions::MatchSubstringOptions() : MatchSubstringOptions("", false) {} 
-constexpr char MatchSubstringOptions::kTypeName[]; 
- 
-SplitOptions::SplitOptions(int64_t max_splits, bool reverse) 
-    : FunctionOptions(internal::kSplitOptionsType), 
-      max_splits(max_splits), 
-      reverse(reverse) {} 
-constexpr char SplitOptions::kTypeName[]; 
- 
-SplitPatternOptions::SplitPatternOptions(std::string pattern, int64_t max_splits, 
-                                         bool reverse) 
-    : FunctionOptions(internal::kSplitPatternOptionsType), 
-      pattern(std::move(pattern)), 
-      max_splits(max_splits), 
-      reverse(reverse) {} 
-SplitPatternOptions::SplitPatternOptions() : SplitPatternOptions("", -1, false) {} 
-constexpr char SplitPatternOptions::kTypeName[]; 
- 
-ReplaceSliceOptions::ReplaceSliceOptions(int64_t start, int64_t stop, 
-                                         std::string replacement) 
-    : FunctionOptions(internal::kReplaceSliceOptionsType), 
-      start(start), 
-      stop(stop), 
-      replacement(std::move(replacement)) {} 
-ReplaceSliceOptions::ReplaceSliceOptions() : ReplaceSliceOptions(0, 0, "") {} 
-constexpr char ReplaceSliceOptions::kTypeName[]; 
- 
-ReplaceSubstringOptions::ReplaceSubstringOptions(std::string pattern, 
-                                                 std::string replacement, 
-                                                 int64_t max_replacements) 
-    : FunctionOptions(internal::kReplaceSubstringOptionsType), 
-      pattern(std::move(pattern)), 
-      replacement(std::move(replacement)), 
-      max_replacements(max_replacements) {} 
-ReplaceSubstringOptions::ReplaceSubstringOptions() 
-    : ReplaceSubstringOptions("", "", -1) {} 
-constexpr char ReplaceSubstringOptions::kTypeName[]; 
- 
-ExtractRegexOptions::ExtractRegexOptions(std::string pattern) 
-    : FunctionOptions(internal::kExtractRegexOptionsType), pattern(std::move(pattern)) {} 
-ExtractRegexOptions::ExtractRegexOptions() : ExtractRegexOptions("") {} 
-constexpr char ExtractRegexOptions::kTypeName[]; 
- 
-SetLookupOptions::SetLookupOptions(Datum value_set, bool skip_nulls) 
-    : FunctionOptions(internal::kSetLookupOptionsType), 
-      value_set(std::move(value_set)), 
-      skip_nulls(skip_nulls) {} 
-SetLookupOptions::SetLookupOptions() : SetLookupOptions({}, false) {} 
-constexpr char SetLookupOptions::kTypeName[]; 
- 
-StrptimeOptions::StrptimeOptions(std::string format, TimeUnit::type unit) 
-    : FunctionOptions(internal::kStrptimeOptionsType), 
-      format(std::move(format)), 
-      unit(unit) {} 
-StrptimeOptions::StrptimeOptions() : StrptimeOptions("", TimeUnit::SECOND) {} 
-constexpr char StrptimeOptions::kTypeName[]; 
- 
-PadOptions::PadOptions(int64_t width, std::string padding) 
-    : FunctionOptions(internal::kPadOptionsType), 
-      width(width), 
-      padding(std::move(padding)) {} 
-PadOptions::PadOptions() : PadOptions(0, " ") {} 
-constexpr char PadOptions::kTypeName[]; 
- 
-TrimOptions::TrimOptions(std::string characters) 
-    : FunctionOptions(internal::kTrimOptionsType), characters(std::move(characters)) {} 
-TrimOptions::TrimOptions() : TrimOptions("") {} 
-constexpr char TrimOptions::kTypeName[]; 
- 
-SliceOptions::SliceOptions(int64_t start, int64_t stop, int64_t step) 
-    : FunctionOptions(internal::kSliceOptionsType), 
-      start(start), 
-      stop(stop), 
-      step(step) {} 
-SliceOptions::SliceOptions() : SliceOptions(0, 0, 1) {} 
-constexpr char SliceOptions::kTypeName[]; 
- 
-MakeStructOptions::MakeStructOptions( 
-    std::vector<std::string> n, std::vector<bool> r, 
-    std::vector<std::shared_ptr<const KeyValueMetadata>> m) 
-    : FunctionOptions(internal::kMakeStructOptionsType), 
-      field_names(std::move(n)), 
-      field_nullability(std::move(r)), 
-      field_metadata(std::move(m)) {} 
- 
-MakeStructOptions::MakeStructOptions(std::vector<std::string> n) 
-    : FunctionOptions(internal::kMakeStructOptionsType), 
-      field_names(std::move(n)), 
-      field_nullability(field_names.size(), true), 
-      field_metadata(field_names.size(), NULLPTR) {} 
- 
-MakeStructOptions::MakeStructOptions() : MakeStructOptions(std::vector<std::string>()) {} 
-constexpr char MakeStructOptions::kTypeName[]; 
- 
-DayOfWeekOptions::DayOfWeekOptions(bool one_based_numbering, uint32_t week_start) 
-    : FunctionOptions(internal::kDayOfWeekOptionsType), 
-      one_based_numbering(one_based_numbering), 
-      week_start(week_start) {} 
-constexpr char DayOfWeekOptions::kTypeName[]; 
- 
-namespace internal { 
-void RegisterScalarOptions(FunctionRegistry* registry) { 
-  DCHECK_OK(registry->AddFunctionOptionsType(kArithmeticOptionsType)); 
-  DCHECK_OK(registry->AddFunctionOptionsType(kElementWiseAggregateOptionsType)); 
-  DCHECK_OK(registry->AddFunctionOptionsType(kJoinOptionsType)); 
-  DCHECK_OK(registry->AddFunctionOptionsType(kMatchSubstringOptionsType)); 
-  DCHECK_OK(registry->AddFunctionOptionsType(kSplitOptionsType)); 
-  DCHECK_OK(registry->AddFunctionOptionsType(kSplitPatternOptionsType)); 
-  DCHECK_OK(registry->AddFunctionOptionsType(kReplaceSliceOptionsType)); 
-  DCHECK_OK(registry->AddFunctionOptionsType(kReplaceSubstringOptionsType)); 
-  DCHECK_OK(registry->AddFunctionOptionsType(kExtractRegexOptionsType)); 
-  DCHECK_OK(registry->AddFunctionOptionsType(kSetLookupOptionsType)); 
-  DCHECK_OK(registry->AddFunctionOptionsType(kStrptimeOptionsType)); 
-  DCHECK_OK(registry->AddFunctionOptionsType(kPadOptionsType)); 
-  DCHECK_OK(registry->AddFunctionOptionsType(kTrimOptionsType)); 
-  DCHECK_OK(registry->AddFunctionOptionsType(kSliceOptionsType)); 
-  DCHECK_OK(registry->AddFunctionOptionsType(kMakeStructOptionsType)); 
-  DCHECK_OK(registry->AddFunctionOptionsType(kDayOfWeekOptionsType)); 
-} 
-}  // namespace internal 
- 
+// ----------------------------------------------------------------------
+// Function options
+
+using ::arrow::internal::checked_cast;
+
+namespace internal {
+namespace {
+using ::arrow::internal::DataMember;
+static auto kArithmeticOptionsType = GetFunctionOptionsType<ArithmeticOptions>(
+    DataMember("check_overflow", &ArithmeticOptions::check_overflow));
+static auto kElementWiseAggregateOptionsType =
+    GetFunctionOptionsType<ElementWiseAggregateOptions>(
+        DataMember("skip_nulls", &ElementWiseAggregateOptions::skip_nulls));
+static auto kJoinOptionsType = GetFunctionOptionsType<JoinOptions>(
+    DataMember("null_handling", &JoinOptions::null_handling),
+    DataMember("null_replacement", &JoinOptions::null_replacement));
+static auto kMatchSubstringOptionsType = GetFunctionOptionsType<MatchSubstringOptions>(
+    DataMember("pattern", &MatchSubstringOptions::pattern),
+    DataMember("ignore_case", &MatchSubstringOptions::ignore_case));
+static auto kSplitOptionsType = GetFunctionOptionsType<SplitOptions>(
+    DataMember("max_splits", &SplitOptions::max_splits),
+    DataMember("reverse", &SplitOptions::reverse));
+static auto kSplitPatternOptionsType = GetFunctionOptionsType<SplitPatternOptions>(
+    DataMember("pattern", &SplitPatternOptions::pattern),
+    DataMember("max_splits", &SplitPatternOptions::max_splits),
+    DataMember("reverse", &SplitPatternOptions::reverse));
+static auto kReplaceSliceOptionsType = GetFunctionOptionsType<ReplaceSliceOptions>(
+    DataMember("start", &ReplaceSliceOptions::start),
+    DataMember("stop", &ReplaceSliceOptions::stop),
+    DataMember("replacement", &ReplaceSliceOptions::replacement));
+static auto kReplaceSubstringOptionsType =
+    GetFunctionOptionsType<ReplaceSubstringOptions>(
+        DataMember("pattern", &ReplaceSubstringOptions::pattern),
+        DataMember("replacement", &ReplaceSubstringOptions::replacement),
+        DataMember("max_replacements", &ReplaceSubstringOptions::max_replacements));
+static auto kExtractRegexOptionsType = GetFunctionOptionsType<ExtractRegexOptions>(
+    DataMember("pattern", &ExtractRegexOptions::pattern));
+static auto kSetLookupOptionsType = GetFunctionOptionsType<SetLookupOptions>(
+    DataMember("value_set", &SetLookupOptions::value_set),
+    DataMember("skip_nulls", &SetLookupOptions::skip_nulls));
+static auto kStrptimeOptionsType = GetFunctionOptionsType<StrptimeOptions>(
+    DataMember("format", &StrptimeOptions::format),
+    DataMember("unit", &StrptimeOptions::unit));
+static auto kPadOptionsType = GetFunctionOptionsType<PadOptions>(
+    DataMember("width", &PadOptions::width), DataMember("padding", &PadOptions::padding));
+static auto kTrimOptionsType = GetFunctionOptionsType<TrimOptions>(
+    DataMember("characters", &TrimOptions::characters));
+static auto kSliceOptionsType = GetFunctionOptionsType<SliceOptions>(
+    DataMember("start", &SliceOptions::start), DataMember("stop", &SliceOptions::stop),
+    DataMember("step", &SliceOptions::step));
+static auto kMakeStructOptionsType = GetFunctionOptionsType<MakeStructOptions>(
+    DataMember("field_names", &MakeStructOptions::field_names),
+    DataMember("field_nullability", &MakeStructOptions::field_nullability),
+    DataMember("field_metadata", &MakeStructOptions::field_metadata));
+static auto kDayOfWeekOptionsType = GetFunctionOptionsType<DayOfWeekOptions>(
+    DataMember("one_based_numbering", &DayOfWeekOptions::one_based_numbering),
+    DataMember("week_start", &DayOfWeekOptions::week_start));
+}  // namespace
+}  // namespace internal
+
+ArithmeticOptions::ArithmeticOptions(bool check_overflow)
+    : FunctionOptions(internal::kArithmeticOptionsType), check_overflow(check_overflow) {}
+constexpr char ArithmeticOptions::kTypeName[];
+
+ElementWiseAggregateOptions::ElementWiseAggregateOptions(bool skip_nulls)
+    : FunctionOptions(internal::kElementWiseAggregateOptionsType),
+      skip_nulls(skip_nulls) {}
+constexpr char ElementWiseAggregateOptions::kTypeName[];
+
+JoinOptions::JoinOptions(NullHandlingBehavior null_handling, std::string null_replacement)
+    : FunctionOptions(internal::kJoinOptionsType),
+      null_handling(null_handling),
+      null_replacement(std::move(null_replacement)) {}
+constexpr char JoinOptions::kTypeName[];
+
+MatchSubstringOptions::MatchSubstringOptions(std::string pattern, bool ignore_case)
+    : FunctionOptions(internal::kMatchSubstringOptionsType),
+      pattern(std::move(pattern)),
+      ignore_case(ignore_case) {}
+MatchSubstringOptions::MatchSubstringOptions() : MatchSubstringOptions("", false) {}
+constexpr char MatchSubstringOptions::kTypeName[];
+
+SplitOptions::SplitOptions(int64_t max_splits, bool reverse)
+    : FunctionOptions(internal::kSplitOptionsType),
+      max_splits(max_splits),
+      reverse(reverse) {}
+constexpr char SplitOptions::kTypeName[];
+
+SplitPatternOptions::SplitPatternOptions(std::string pattern, int64_t max_splits,
+                                         bool reverse)
+    : FunctionOptions(internal::kSplitPatternOptionsType),
+      pattern(std::move(pattern)),
+      max_splits(max_splits),
+      reverse(reverse) {}
+SplitPatternOptions::SplitPatternOptions() : SplitPatternOptions("", -1, false) {}
+constexpr char SplitPatternOptions::kTypeName[];
+
+ReplaceSliceOptions::ReplaceSliceOptions(int64_t start, int64_t stop,
+                                         std::string replacement)
+    : FunctionOptions(internal::kReplaceSliceOptionsType),
+      start(start),
+      stop(stop),
+      replacement(std::move(replacement)) {}
+ReplaceSliceOptions::ReplaceSliceOptions() : ReplaceSliceOptions(0, 0, "") {}
+constexpr char ReplaceSliceOptions::kTypeName[];
+
+ReplaceSubstringOptions::ReplaceSubstringOptions(std::string pattern,
+                                                 std::string replacement,
+                                                 int64_t max_replacements)
+    : FunctionOptions(internal::kReplaceSubstringOptionsType),
+      pattern(std::move(pattern)),
+      replacement(std::move(replacement)),
+      max_replacements(max_replacements) {}
+ReplaceSubstringOptions::ReplaceSubstringOptions()
+    : ReplaceSubstringOptions("", "", -1) {}
+constexpr char ReplaceSubstringOptions::kTypeName[];
+
+ExtractRegexOptions::ExtractRegexOptions(std::string pattern)
+    : FunctionOptions(internal::kExtractRegexOptionsType), pattern(std::move(pattern)) {}
+ExtractRegexOptions::ExtractRegexOptions() : ExtractRegexOptions("") {}
+constexpr char ExtractRegexOptions::kTypeName[];
+
+SetLookupOptions::SetLookupOptions(Datum value_set, bool skip_nulls)
+    : FunctionOptions(internal::kSetLookupOptionsType),
+      value_set(std::move(value_set)),
+      skip_nulls(skip_nulls) {}
+SetLookupOptions::SetLookupOptions() : SetLookupOptions({}, false) {}
+constexpr char SetLookupOptions::kTypeName[];
+
+StrptimeOptions::StrptimeOptions(std::string format, TimeUnit::type unit)
+    : FunctionOptions(internal::kStrptimeOptionsType),
+      format(std::move(format)),
+      unit(unit) {}
+StrptimeOptions::StrptimeOptions() : StrptimeOptions("", TimeUnit::SECOND) {}
+constexpr char StrptimeOptions::kTypeName[];
+
+PadOptions::PadOptions(int64_t width, std::string padding)
+    : FunctionOptions(internal::kPadOptionsType),
+      width(width),
+      padding(std::move(padding)) {}
+PadOptions::PadOptions() : PadOptions(0, " ") {}
+constexpr char PadOptions::kTypeName[];
+
+TrimOptions::TrimOptions(std::string characters)
+    : FunctionOptions(internal::kTrimOptionsType), characters(std::move(characters)) {}
+TrimOptions::TrimOptions() : TrimOptions("") {}
+constexpr char TrimOptions::kTypeName[];
+
+SliceOptions::SliceOptions(int64_t start, int64_t stop, int64_t step)
+    : FunctionOptions(internal::kSliceOptionsType),
+      start(start),
+      stop(stop),
+      step(step) {}
+SliceOptions::SliceOptions() : SliceOptions(0, 0, 1) {}
+constexpr char SliceOptions::kTypeName[];
+
+MakeStructOptions::MakeStructOptions(
+    std::vector<std::string> n, std::vector<bool> r,
+    std::vector<std::shared_ptr<const KeyValueMetadata>> m)
+    : FunctionOptions(internal::kMakeStructOptionsType),
+      field_names(std::move(n)),
+      field_nullability(std::move(r)),
+      field_metadata(std::move(m)) {}
+
+MakeStructOptions::MakeStructOptions(std::vector<std::string> n)
+    : FunctionOptions(internal::kMakeStructOptionsType),
+      field_names(std::move(n)),
+      field_nullability(field_names.size(), true),
+      field_metadata(field_names.size(), NULLPTR) {}
+
+MakeStructOptions::MakeStructOptions() : MakeStructOptions(std::vector<std::string>()) {}
+constexpr char MakeStructOptions::kTypeName[];
+
+DayOfWeekOptions::DayOfWeekOptions(bool one_based_numbering, uint32_t week_start)
+    : FunctionOptions(internal::kDayOfWeekOptionsType),
+      one_based_numbering(one_based_numbering),
+      week_start(week_start) {}
+constexpr char DayOfWeekOptions::kTypeName[];
+
+namespace internal {
+void RegisterScalarOptions(FunctionRegistry* registry) {
+  DCHECK_OK(registry->AddFunctionOptionsType(kArithmeticOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kElementWiseAggregateOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kJoinOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kMatchSubstringOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kSplitOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kSplitPatternOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kReplaceSliceOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kReplaceSubstringOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kExtractRegexOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kSetLookupOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kStrptimeOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kPadOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kTrimOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kSliceOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kMakeStructOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kDayOfWeekOptionsType));
+}
+}  // namespace internal
+
 #define SCALAR_EAGER_UNARY(NAME, REGISTRY_NAME)              \
   Result<Datum> NAME(const Datum& value, ExecContext* ctx) { \
     return CallFunction(REGISTRY_NAME, {value}, ctx);        \
@@ -315,26 +315,26 @@ void RegisterScalarOptions(FunctionRegistry* registry) {
 // ----------------------------------------------------------------------
 // Arithmetic
 
-#define SCALAR_ARITHMETIC_UNARY(NAME, REGISTRY_NAME, REGISTRY_CHECKED_NAME)            \ 
-  Result<Datum> NAME(const Datum& arg, ArithmeticOptions options, ExecContext* ctx) {  \ 
-    auto func_name = (options.check_overflow) ? REGISTRY_CHECKED_NAME : REGISTRY_NAME; \ 
-    return CallFunction(func_name, {arg}, ctx);                                        \ 
-  } 
- 
-SCALAR_ARITHMETIC_UNARY(AbsoluteValue, "abs", "abs_checked") 
-SCALAR_ARITHMETIC_UNARY(Negate, "negate", "negate_checked") 
-SCALAR_EAGER_UNARY(Sign, "sign") 
-SCALAR_ARITHMETIC_UNARY(Sin, "sin", "sin_checked") 
-SCALAR_ARITHMETIC_UNARY(Cos, "cos", "cos_checked") 
-SCALAR_ARITHMETIC_UNARY(Asin, "asin", "asin_checked") 
-SCALAR_ARITHMETIC_UNARY(Acos, "acos", "acos_checked") 
-SCALAR_ARITHMETIC_UNARY(Tan, "tan", "tan_checked") 
-SCALAR_EAGER_UNARY(Atan, "atan") 
-SCALAR_ARITHMETIC_UNARY(Ln, "ln", "ln_checked") 
-SCALAR_ARITHMETIC_UNARY(Log10, "log10", "log10_checked") 
-SCALAR_ARITHMETIC_UNARY(Log2, "log2", "log2_checked") 
-SCALAR_ARITHMETIC_UNARY(Log1p, "log1p", "log1p_checked") 
- 
+#define SCALAR_ARITHMETIC_UNARY(NAME, REGISTRY_NAME, REGISTRY_CHECKED_NAME)            \
+  Result<Datum> NAME(const Datum& arg, ArithmeticOptions options, ExecContext* ctx) {  \
+    auto func_name = (options.check_overflow) ? REGISTRY_CHECKED_NAME : REGISTRY_NAME; \
+    return CallFunction(func_name, {arg}, ctx);                                        \
+  }
+
+SCALAR_ARITHMETIC_UNARY(AbsoluteValue, "abs", "abs_checked")
+SCALAR_ARITHMETIC_UNARY(Negate, "negate", "negate_checked")
+SCALAR_EAGER_UNARY(Sign, "sign")
+SCALAR_ARITHMETIC_UNARY(Sin, "sin", "sin_checked")
+SCALAR_ARITHMETIC_UNARY(Cos, "cos", "cos_checked")
+SCALAR_ARITHMETIC_UNARY(Asin, "asin", "asin_checked")
+SCALAR_ARITHMETIC_UNARY(Acos, "acos", "acos_checked")
+SCALAR_ARITHMETIC_UNARY(Tan, "tan", "tan_checked")
+SCALAR_EAGER_UNARY(Atan, "atan")
+SCALAR_ARITHMETIC_UNARY(Ln, "ln", "ln_checked")
+SCALAR_ARITHMETIC_UNARY(Log10, "log10", "log10_checked")
+SCALAR_ARITHMETIC_UNARY(Log2, "log2", "log2_checked")
+SCALAR_ARITHMETIC_UNARY(Log1p, "log1p", "log1p_checked")
+
 #define SCALAR_ARITHMETIC_BINARY(NAME, REGISTRY_NAME, REGISTRY_CHECKED_NAME)           \
   Result<Datum> NAME(const Datum& left, const Datum& right, ArithmeticOptions options, \
                      ExecContext* ctx) {                                               \
@@ -346,65 +346,65 @@ SCALAR_ARITHMETIC_BINARY(Add, "add", "add_checked")
 SCALAR_ARITHMETIC_BINARY(Subtract, "subtract", "subtract_checked")
 SCALAR_ARITHMETIC_BINARY(Multiply, "multiply", "multiply_checked")
 SCALAR_ARITHMETIC_BINARY(Divide, "divide", "divide_checked")
-SCALAR_ARITHMETIC_BINARY(Power, "power", "power_checked") 
-SCALAR_ARITHMETIC_BINARY(ShiftLeft, "shift_left", "shift_left_checked") 
-SCALAR_ARITHMETIC_BINARY(ShiftRight, "shift_right", "shift_right_checked") 
-SCALAR_EAGER_BINARY(Atan2, "atan2") 
-SCALAR_EAGER_UNARY(Floor, "floor") 
-SCALAR_EAGER_UNARY(Ceil, "ceil") 
-SCALAR_EAGER_UNARY(Trunc, "trunc") 
-
-Result<Datum> MaxElementWise(const std::vector<Datum>& args, 
-                             ElementWiseAggregateOptions options, ExecContext* ctx) { 
-  return CallFunction("max_element_wise", args, &options, ctx); 
-} 
- 
-Result<Datum> MinElementWise(const std::vector<Datum>& args, 
-                             ElementWiseAggregateOptions options, ExecContext* ctx) { 
-  return CallFunction("min_element_wise", args, &options, ctx); 
-} 
- 
+SCALAR_ARITHMETIC_BINARY(Power, "power", "power_checked")
+SCALAR_ARITHMETIC_BINARY(ShiftLeft, "shift_left", "shift_left_checked")
+SCALAR_ARITHMETIC_BINARY(ShiftRight, "shift_right", "shift_right_checked")
+SCALAR_EAGER_BINARY(Atan2, "atan2")
+SCALAR_EAGER_UNARY(Floor, "floor")
+SCALAR_EAGER_UNARY(Ceil, "ceil")
+SCALAR_EAGER_UNARY(Trunc, "trunc")
+
+Result<Datum> MaxElementWise(const std::vector<Datum>& args,
+                             ElementWiseAggregateOptions options, ExecContext* ctx) {
+  return CallFunction("max_element_wise", args, &options, ctx);
+}
+
+Result<Datum> MinElementWise(const std::vector<Datum>& args,
+                             ElementWiseAggregateOptions options, ExecContext* ctx) {
+  return CallFunction("min_element_wise", args, &options, ctx);
+}
+
 // ----------------------------------------------------------------------
 // Set-related operations
 
 static Result<Datum> ExecSetLookup(const std::string& func_name, const Datum& data,
-                                   const SetLookupOptions& options, ExecContext* ctx) { 
-  if (!options.value_set.is_arraylike()) { 
+                                   const SetLookupOptions& options, ExecContext* ctx) {
+  if (!options.value_set.is_arraylike()) {
     return Status::Invalid("Set lookup value set must be Array or ChunkedArray");
   }
-  std::shared_ptr<DataType> data_type; 
-  if (data.type()->id() == Type::DICTIONARY) { 
-    data_type = 
-        arrow::internal::checked_pointer_cast<DictionaryType>(data.type())->value_type(); 
-  } else { 
-    data_type = data.type(); 
-  } 
-
-  if (options.value_set.length() > 0 && !data_type->Equals(options.value_set.type())) { 
+  std::shared_ptr<DataType> data_type;
+  if (data.type()->id() == Type::DICTIONARY) {
+    data_type =
+        arrow::internal::checked_pointer_cast<DictionaryType>(data.type())->value_type();
+  } else {
+    data_type = data.type();
+  }
+
+  if (options.value_set.length() > 0 && !data_type->Equals(options.value_set.type())) {
     std::stringstream ss;
-    ss << "Array type didn't match type of values set: " << data_type->ToString() 
-       << " vs " << options.value_set.type()->ToString(); 
+    ss << "Array type didn't match type of values set: " << data_type->ToString()
+       << " vs " << options.value_set.type()->ToString();
     return Status::Invalid(ss.str());
   }
   return CallFunction(func_name, {data}, &options, ctx);
 }
 
-Result<Datum> IsIn(const Datum& values, const SetLookupOptions& options, 
-                   ExecContext* ctx) { 
-  return ExecSetLookup("is_in", values, options, ctx); 
-} 
- 
+Result<Datum> IsIn(const Datum& values, const SetLookupOptions& options,
+                   ExecContext* ctx) {
+  return ExecSetLookup("is_in", values, options, ctx);
+}
+
 Result<Datum> IsIn(const Datum& values, const Datum& value_set, ExecContext* ctx) {
-  return ExecSetLookup("is_in", values, SetLookupOptions{value_set}, ctx); 
+  return ExecSetLookup("is_in", values, SetLookupOptions{value_set}, ctx);
+}
+
+Result<Datum> IndexIn(const Datum& values, const SetLookupOptions& options,
+                      ExecContext* ctx) {
+  return ExecSetLookup("index_in", values, options, ctx);
 }
 
-Result<Datum> IndexIn(const Datum& values, const SetLookupOptions& options, 
-                      ExecContext* ctx) { 
-  return ExecSetLookup("index_in", values, options, ctx); 
-} 
- 
 Result<Datum> IndexIn(const Datum& values, const Datum& value_set, ExecContext* ctx) {
-  return ExecSetLookup("index_in", values, SetLookupOptions{value_set}, ctx); 
+  return ExecSetLookup("index_in", values, SetLookupOptions{value_set}, ctx);
 }
 
 // ----------------------------------------------------------------------
@@ -416,8 +416,8 @@ SCALAR_EAGER_BINARY(KleeneAnd, "and_kleene")
 SCALAR_EAGER_BINARY(Or, "or")
 SCALAR_EAGER_BINARY(KleeneOr, "or_kleene")
 SCALAR_EAGER_BINARY(Xor, "xor")
-SCALAR_EAGER_BINARY(AndNot, "and_not") 
-SCALAR_EAGER_BINARY(KleeneAndNot, "and_not_kleene") 
+SCALAR_EAGER_BINARY(AndNot, "and_not")
+SCALAR_EAGER_BINARY(KleeneAndNot, "and_not_kleene")
 
 // ----------------------------------------------------------------------
 
@@ -444,7 +444,7 @@ Result<Datum> Compare(const Datum& left, const Datum& right, CompareOptions opti
       func_name = "less_equal";
       break;
   }
-  return CallFunction(func_name, {left, right}, nullptr, ctx); 
+  return CallFunction(func_name, {left, right}, nullptr, ctx);
 }
 
 // ----------------------------------------------------------------------
@@ -452,47 +452,47 @@ Result<Datum> Compare(const Datum& left, const Datum& right, CompareOptions opti
 
 SCALAR_EAGER_UNARY(IsValid, "is_valid")
 SCALAR_EAGER_UNARY(IsNull, "is_null")
-SCALAR_EAGER_UNARY(IsNan, "is_nan") 
+SCALAR_EAGER_UNARY(IsNan, "is_nan")
 
 Result<Datum> FillNull(const Datum& values, const Datum& fill_value, ExecContext* ctx) {
   return CallFunction("fill_null", {values, fill_value}, ctx);
 }
 
-Result<Datum> IfElse(const Datum& cond, const Datum& if_true, const Datum& if_false, 
-                     ExecContext* ctx) { 
-  return CallFunction("if_else", {cond, if_true, if_false}, ctx); 
-} 
- 
-Result<Datum> CaseWhen(const Datum& cond, const std::vector<Datum>& cases, 
-                       ExecContext* ctx) { 
-  std::vector<Datum> args = {cond}; 
-  args.reserve(cases.size() + 1); 
-  args.insert(args.end(), cases.begin(), cases.end()); 
-  return CallFunction("case_when", args, ctx); 
-} 
- 
-// ---------------------------------------------------------------------- 
-// Temporal functions 
- 
-SCALAR_EAGER_UNARY(Year, "year") 
-SCALAR_EAGER_UNARY(Month, "month") 
-SCALAR_EAGER_UNARY(Day, "day") 
-SCALAR_EAGER_UNARY(DayOfYear, "day_of_year") 
-SCALAR_EAGER_UNARY(ISOYear, "iso_year") 
-SCALAR_EAGER_UNARY(ISOWeek, "iso_week") 
-SCALAR_EAGER_UNARY(ISOCalendar, "iso_calendar") 
-SCALAR_EAGER_UNARY(Quarter, "quarter") 
-SCALAR_EAGER_UNARY(Hour, "hour") 
-SCALAR_EAGER_UNARY(Minute, "minute") 
-SCALAR_EAGER_UNARY(Second, "second") 
-SCALAR_EAGER_UNARY(Millisecond, "millisecond") 
-SCALAR_EAGER_UNARY(Microsecond, "microsecond") 
-SCALAR_EAGER_UNARY(Nanosecond, "nanosecond") 
-SCALAR_EAGER_UNARY(Subsecond, "subsecond") 
- 
-Result<Datum> DayOfWeek(const Datum& arg, DayOfWeekOptions options, ExecContext* ctx) { 
-  return CallFunction("day_of_week", {arg}, &options, ctx); 
-} 
- 
+Result<Datum> IfElse(const Datum& cond, const Datum& if_true, const Datum& if_false,
+                     ExecContext* ctx) {
+  return CallFunction("if_else", {cond, if_true, if_false}, ctx);
+}
+
+Result<Datum> CaseWhen(const Datum& cond, const std::vector<Datum>& cases,
+                       ExecContext* ctx) {
+  std::vector<Datum> args = {cond};
+  args.reserve(cases.size() + 1);
+  args.insert(args.end(), cases.begin(), cases.end());
+  return CallFunction("case_when", args, ctx);
+}
+
+// ----------------------------------------------------------------------
+// Temporal functions
+
+SCALAR_EAGER_UNARY(Year, "year")
+SCALAR_EAGER_UNARY(Month, "month")
+SCALAR_EAGER_UNARY(Day, "day")
+SCALAR_EAGER_UNARY(DayOfYear, "day_of_year")
+SCALAR_EAGER_UNARY(ISOYear, "iso_year")
+SCALAR_EAGER_UNARY(ISOWeek, "iso_week")
+SCALAR_EAGER_UNARY(ISOCalendar, "iso_calendar")
+SCALAR_EAGER_UNARY(Quarter, "quarter")
+SCALAR_EAGER_UNARY(Hour, "hour")
+SCALAR_EAGER_UNARY(Minute, "minute")
+SCALAR_EAGER_UNARY(Second, "second")
+SCALAR_EAGER_UNARY(Millisecond, "millisecond")
+SCALAR_EAGER_UNARY(Microsecond, "microsecond")
+SCALAR_EAGER_UNARY(Nanosecond, "nanosecond")
+SCALAR_EAGER_UNARY(Subsecond, "subsecond")
+
+Result<Datum> DayOfWeek(const Datum& arg, DayOfWeekOptions options, ExecContext* ctx) {
+  return CallFunction("day_of_week", {arg}, &options, ctx);
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/api_scalar.h b/contrib/libs/apache/arrow/cpp/src/arrow/compute/api_scalar.h
index 8486cb0126f..e07e41569a1 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/api_scalar.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/api_scalar.h
@@ -37,125 +37,125 @@ namespace compute {
 ///
 /// @{
 
-class ARROW_EXPORT ArithmeticOptions : public FunctionOptions { 
- public: 
-  explicit ArithmeticOptions(bool check_overflow = false); 
-  constexpr static char const kTypeName[] = "ArithmeticOptions"; 
+class ARROW_EXPORT ArithmeticOptions : public FunctionOptions {
+ public:
+  explicit ArithmeticOptions(bool check_overflow = false);
+  constexpr static char const kTypeName[] = "ArithmeticOptions";
   bool check_overflow;
 };
 
-class ARROW_EXPORT ElementWiseAggregateOptions : public FunctionOptions { 
- public: 
-  explicit ElementWiseAggregateOptions(bool skip_nulls = true); 
-  constexpr static char const kTypeName[] = "ElementWiseAggregateOptions"; 
-  static ElementWiseAggregateOptions Defaults() { return ElementWiseAggregateOptions{}; } 
-
-  bool skip_nulls; 
-}; 
- 
-/// Options for var_args_join. 
-class ARROW_EXPORT JoinOptions : public FunctionOptions { 
- public: 
-  /// How to handle null values. (A null separator always results in a null output.) 
-  enum NullHandlingBehavior { 
-    /// A null in any input results in a null in the output. 
-    EMIT_NULL, 
-    /// Nulls in inputs are skipped. 
-    SKIP, 
-    /// Nulls in inputs are replaced with the replacement string. 
-    REPLACE, 
-  }; 
-  explicit JoinOptions(NullHandlingBehavior null_handling = EMIT_NULL, 
-                       std::string null_replacement = ""); 
-  constexpr static char const kTypeName[] = "JoinOptions"; 
-  static JoinOptions Defaults() { return JoinOptions(); } 
-  NullHandlingBehavior null_handling; 
-  std::string null_replacement; 
-}; 
- 
-class ARROW_EXPORT MatchSubstringOptions : public FunctionOptions { 
- public: 
-  explicit MatchSubstringOptions(std::string pattern, bool ignore_case = false); 
-  MatchSubstringOptions(); 
-  constexpr static char const kTypeName[] = "MatchSubstringOptions"; 
- 
-  /// The exact substring (or regex, depending on kernel) to look for inside input values. 
+class ARROW_EXPORT ElementWiseAggregateOptions : public FunctionOptions {
+ public:
+  explicit ElementWiseAggregateOptions(bool skip_nulls = true);
+  constexpr static char const kTypeName[] = "ElementWiseAggregateOptions";
+  static ElementWiseAggregateOptions Defaults() { return ElementWiseAggregateOptions{}; }
+
+  bool skip_nulls;
+};
+
+/// Options for var_args_join.
+class ARROW_EXPORT JoinOptions : public FunctionOptions {
+ public:
+  /// How to handle null values. (A null separator always results in a null output.)
+  enum NullHandlingBehavior {
+    /// A null in any input results in a null in the output.
+    EMIT_NULL,
+    /// Nulls in inputs are skipped.
+    SKIP,
+    /// Nulls in inputs are replaced with the replacement string.
+    REPLACE,
+  };
+  explicit JoinOptions(NullHandlingBehavior null_handling = EMIT_NULL,
+                       std::string null_replacement = "");
+  constexpr static char const kTypeName[] = "JoinOptions";
+  static JoinOptions Defaults() { return JoinOptions(); }
+  NullHandlingBehavior null_handling;
+  std::string null_replacement;
+};
+
+class ARROW_EXPORT MatchSubstringOptions : public FunctionOptions {
+ public:
+  explicit MatchSubstringOptions(std::string pattern, bool ignore_case = false);
+  MatchSubstringOptions();
+  constexpr static char const kTypeName[] = "MatchSubstringOptions";
+
+  /// The exact substring (or regex, depending on kernel) to look for inside input values.
+  std::string pattern;
+  /// Whether to perform a case-insensitive match.
+  bool ignore_case = false;
+};
+
+class ARROW_EXPORT SplitOptions : public FunctionOptions {
+ public:
+  explicit SplitOptions(int64_t max_splits = -1, bool reverse = false);
+  constexpr static char const kTypeName[] = "SplitOptions";
+
+  /// Maximum number of splits allowed, or unlimited when -1
+  int64_t max_splits;
+  /// Start splitting from the end of the string (only relevant when max_splits != -1)
+  bool reverse;
+};
+
+class ARROW_EXPORT SplitPatternOptions : public FunctionOptions {
+ public:
+  explicit SplitPatternOptions(std::string pattern, int64_t max_splits = -1,
+                               bool reverse = false);
+  SplitPatternOptions();
+  constexpr static char const kTypeName[] = "SplitPatternOptions";
+
+  /// The exact substring to split on.
+  std::string pattern;
+  /// Maximum number of splits allowed, or unlimited when -1
+  int64_t max_splits;
+  /// Start splitting from the end of the string (only relevant when max_splits != -1)
+  bool reverse;
+};
+
+class ARROW_EXPORT ReplaceSliceOptions : public FunctionOptions {
+ public:
+  explicit ReplaceSliceOptions(int64_t start, int64_t stop, std::string replacement);
+  ReplaceSliceOptions();
+  constexpr static char const kTypeName[] = "ReplaceSliceOptions";
+
+  /// Index to start slicing at
+  int64_t start;
+  /// Index to stop slicing at
+  int64_t stop;
+  /// String to replace the slice with
+  std::string replacement;
+};
+
+class ARROW_EXPORT ReplaceSubstringOptions : public FunctionOptions {
+ public:
+  explicit ReplaceSubstringOptions(std::string pattern, std::string replacement,
+                                   int64_t max_replacements = -1);
+  ReplaceSubstringOptions();
+  constexpr static char const kTypeName[] = "ReplaceSubstringOptions";
+
+  /// Pattern to match, literal, or regular expression depending on which kernel is used
+  std::string pattern;
+  /// String to replace the pattern with
+  std::string replacement;
+  /// Max number of substrings to replace (-1 means unbounded)
+  int64_t max_replacements;
+};
+
+class ARROW_EXPORT ExtractRegexOptions : public FunctionOptions {
+ public:
+  explicit ExtractRegexOptions(std::string pattern);
+  ExtractRegexOptions();
+  constexpr static char const kTypeName[] = "ExtractRegexOptions";
+
+  /// Regular expression with named capture fields
   std::string pattern;
-  /// Whether to perform a case-insensitive match. 
-  bool ignore_case = false; 
 };
 
-class ARROW_EXPORT SplitOptions : public FunctionOptions { 
- public: 
-  explicit SplitOptions(int64_t max_splits = -1, bool reverse = false); 
-  constexpr static char const kTypeName[] = "SplitOptions"; 
- 
-  /// Maximum number of splits allowed, or unlimited when -1 
-  int64_t max_splits; 
-  /// Start splitting from the end of the string (only relevant when max_splits != -1) 
-  bool reverse; 
-}; 
- 
-class ARROW_EXPORT SplitPatternOptions : public FunctionOptions { 
- public: 
-  explicit SplitPatternOptions(std::string pattern, int64_t max_splits = -1, 
-                               bool reverse = false); 
-  SplitPatternOptions(); 
-  constexpr static char const kTypeName[] = "SplitPatternOptions"; 
- 
-  /// The exact substring to split on. 
-  std::string pattern; 
-  /// Maximum number of splits allowed, or unlimited when -1 
-  int64_t max_splits; 
-  /// Start splitting from the end of the string (only relevant when max_splits != -1) 
-  bool reverse; 
-}; 
- 
-class ARROW_EXPORT ReplaceSliceOptions : public FunctionOptions { 
- public: 
-  explicit ReplaceSliceOptions(int64_t start, int64_t stop, std::string replacement); 
-  ReplaceSliceOptions(); 
-  constexpr static char const kTypeName[] = "ReplaceSliceOptions"; 
- 
-  /// Index to start slicing at 
-  int64_t start; 
-  /// Index to stop slicing at 
-  int64_t stop; 
-  /// String to replace the slice with 
-  std::string replacement; 
-}; 
- 
-class ARROW_EXPORT ReplaceSubstringOptions : public FunctionOptions { 
- public: 
-  explicit ReplaceSubstringOptions(std::string pattern, std::string replacement, 
-                                   int64_t max_replacements = -1); 
-  ReplaceSubstringOptions(); 
-  constexpr static char const kTypeName[] = "ReplaceSubstringOptions"; 
- 
-  /// Pattern to match, literal, or regular expression depending on which kernel is used 
-  std::string pattern; 
-  /// String to replace the pattern with 
-  std::string replacement; 
-  /// Max number of substrings to replace (-1 means unbounded) 
-  int64_t max_replacements; 
-}; 
- 
-class ARROW_EXPORT ExtractRegexOptions : public FunctionOptions { 
- public: 
-  explicit ExtractRegexOptions(std::string pattern); 
-  ExtractRegexOptions(); 
-  constexpr static char const kTypeName[] = "ExtractRegexOptions"; 
- 
-  /// Regular expression with named capture fields 
-  std::string pattern; 
-}; 
- 
 /// Options for IsIn and IndexIn functions
-class ARROW_EXPORT SetLookupOptions : public FunctionOptions { 
- public: 
-  explicit SetLookupOptions(Datum value_set, bool skip_nulls = false); 
-  SetLookupOptions(); 
-  constexpr static char const kTypeName[] = "SetLookupOptions"; 
+class ARROW_EXPORT SetLookupOptions : public FunctionOptions {
+ public:
+  explicit SetLookupOptions(Datum value_set, bool skip_nulls = false);
+  SetLookupOptions();
+  constexpr static char const kTypeName[] = "SetLookupOptions";
 
   /// The set of values to look up input values into.
   Datum value_set;
@@ -168,47 +168,47 @@ class ARROW_EXPORT SetLookupOptions : public FunctionOptions {
   bool skip_nulls;
 };
 
-class ARROW_EXPORT StrptimeOptions : public FunctionOptions { 
- public: 
-  explicit StrptimeOptions(std::string format, TimeUnit::type unit); 
-  StrptimeOptions(); 
-  constexpr static char const kTypeName[] = "StrptimeOptions"; 
+class ARROW_EXPORT StrptimeOptions : public FunctionOptions {
+ public:
+  explicit StrptimeOptions(std::string format, TimeUnit::type unit);
+  StrptimeOptions();
+  constexpr static char const kTypeName[] = "StrptimeOptions";
 
   std::string format;
   TimeUnit::type unit;
 };
 
-class ARROW_EXPORT PadOptions : public FunctionOptions { 
- public: 
-  explicit PadOptions(int64_t width, std::string padding = " "); 
-  PadOptions(); 
-  constexpr static char const kTypeName[] = "PadOptions"; 
- 
-  /// The desired string length. 
-  int64_t width; 
-  /// What to pad the string with. Should be one codepoint (Unicode)/byte (ASCII). 
-  std::string padding; 
-}; 
- 
-class ARROW_EXPORT TrimOptions : public FunctionOptions { 
- public: 
-  explicit TrimOptions(std::string characters); 
-  TrimOptions(); 
-  constexpr static char const kTypeName[] = "TrimOptions"; 
- 
-  /// The individual characters that can be trimmed from the string. 
-  std::string characters; 
-}; 
- 
-class ARROW_EXPORT SliceOptions : public FunctionOptions { 
- public: 
-  explicit SliceOptions(int64_t start, int64_t stop = std::numeric_limits<int64_t>::max(), 
-                        int64_t step = 1); 
-  SliceOptions(); 
-  constexpr static char const kTypeName[] = "SliceOptions"; 
-  int64_t start, stop, step; 
-}; 
- 
+class ARROW_EXPORT PadOptions : public FunctionOptions {
+ public:
+  explicit PadOptions(int64_t width, std::string padding = " ");
+  PadOptions();
+  constexpr static char const kTypeName[] = "PadOptions";
+
+  /// The desired string length.
+  int64_t width;
+  /// What to pad the string with. Should be one codepoint (Unicode)/byte (ASCII).
+  std::string padding;
+};
+
+class ARROW_EXPORT TrimOptions : public FunctionOptions {
+ public:
+  explicit TrimOptions(std::string characters);
+  TrimOptions();
+  constexpr static char const kTypeName[] = "TrimOptions";
+
+  /// The individual characters that can be trimmed from the string.
+  std::string characters;
+};
+
+class ARROW_EXPORT SliceOptions : public FunctionOptions {
+ public:
+  explicit SliceOptions(int64_t start, int64_t stop = std::numeric_limits<int64_t>::max(),
+                        int64_t step = 1);
+  SliceOptions();
+  constexpr static char const kTypeName[] = "SliceOptions";
+  int64_t start, stop, step;
+};
+
 enum CompareOperator : int8_t {
   EQUAL,
   NOT_EQUAL,
@@ -218,57 +218,57 @@ enum CompareOperator : int8_t {
   LESS_EQUAL,
 };
 
-struct ARROW_EXPORT CompareOptions { 
+struct ARROW_EXPORT CompareOptions {
   explicit CompareOptions(CompareOperator op) : op(op) {}
-  CompareOptions() : CompareOptions(CompareOperator::EQUAL) {} 
+  CompareOptions() : CompareOptions(CompareOperator::EQUAL) {}
   enum CompareOperator op;
 };
 
-class ARROW_EXPORT MakeStructOptions : public FunctionOptions { 
- public: 
-  MakeStructOptions(std::vector<std::string> n, std::vector<bool> r, 
-                    std::vector<std::shared_ptr<const KeyValueMetadata>> m); 
-  explicit MakeStructOptions(std::vector<std::string> n); 
-  MakeStructOptions(); 
-  constexpr static char const kTypeName[] = "MakeStructOptions"; 
- 
-  /// Names for wrapped columns 
-  std::vector<std::string> field_names; 
- 
-  /// Nullability bits for wrapped columns 
-  std::vector<bool> field_nullability; 
- 
-  /// Metadata attached to wrapped columns 
-  std::vector<std::shared_ptr<const KeyValueMetadata>> field_metadata; 
-}; 
- 
-struct ARROW_EXPORT DayOfWeekOptions : public FunctionOptions { 
- public: 
-  explicit DayOfWeekOptions(bool one_based_numbering = false, uint32_t week_start = 1); 
-  constexpr static char const kTypeName[] = "DayOfWeekOptions"; 
-  static DayOfWeekOptions Defaults() { return DayOfWeekOptions{}; } 
- 
-  /// Number days from 1 if true and from 0 if false 
-  bool one_based_numbering; 
-  /// What day does the week start with (Monday=1, Sunday=7) 
-  uint32_t week_start; 
-}; 
- 
+class ARROW_EXPORT MakeStructOptions : public FunctionOptions {
+ public:
+  MakeStructOptions(std::vector<std::string> n, std::vector<bool> r,
+                    std::vector<std::shared_ptr<const KeyValueMetadata>> m);
+  explicit MakeStructOptions(std::vector<std::string> n);
+  MakeStructOptions();
+  constexpr static char const kTypeName[] = "MakeStructOptions";
+
+  /// Names for wrapped columns
+  std::vector<std::string> field_names;
+
+  /// Nullability bits for wrapped columns
+  std::vector<bool> field_nullability;
+
+  /// Metadata attached to wrapped columns
+  std::vector<std::shared_ptr<const KeyValueMetadata>> field_metadata;
+};
+
+struct ARROW_EXPORT DayOfWeekOptions : public FunctionOptions {
+ public:
+  explicit DayOfWeekOptions(bool one_based_numbering = false, uint32_t week_start = 1);
+  constexpr static char const kTypeName[] = "DayOfWeekOptions";
+  static DayOfWeekOptions Defaults() { return DayOfWeekOptions{}; }
+
+  /// Number days from 1 if true and from 0 if false
+  bool one_based_numbering;
+  /// What day does the week start with (Monday=1, Sunday=7)
+  uint32_t week_start;
+};
+
 /// @}
 
-/// \brief Get the absolute value of a value. 
-/// 
-/// If argument is null the result will be null. 
-/// 
-/// \param[in] arg the value transformed 
-/// \param[in] options arithmetic options (overflow handling), optional 
-/// \param[in] ctx the function execution context, optional 
-/// \return the elementwise absolute value 
-ARROW_EXPORT 
-Result<Datum> AbsoluteValue(const Datum& arg, 
-                            ArithmeticOptions options = ArithmeticOptions(), 
-                            ExecContext* ctx = NULLPTR); 
- 
+/// \brief Get the absolute value of a value.
+///
+/// If argument is null the result will be null.
+///
+/// \param[in] arg the value transformed
+/// \param[in] options arithmetic options (overflow handling), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise absolute value
+ARROW_EXPORT
+Result<Datum> AbsoluteValue(const Datum& arg,
+                            ArithmeticOptions options = ArithmeticOptions(),
+                            ExecContext* ctx = NULLPTR);
+
 /// \brief Add two values together. Array values must be the same length. If
 /// either addend is null the result will be null.
 ///
@@ -322,233 +322,233 @@ Result<Datum> Divide(const Datum& left, const Datum& right,
                      ArithmeticOptions options = ArithmeticOptions(),
                      ExecContext* ctx = NULLPTR);
 
-/// \brief Negate values. 
-/// 
-/// If argument is null the result will be null. 
-/// 
-/// \param[in] arg the value negated 
-/// \param[in] options arithmetic options (overflow handling), optional 
-/// \param[in] ctx the function execution context, optional 
-/// \return the elementwise negation 
-ARROW_EXPORT 
-Result<Datum> Negate(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(), 
-                     ExecContext* ctx = NULLPTR); 
- 
-/// \brief Raise the values of base array to the power of the exponent array values. 
-/// Array values must be the same length. If either base or exponent is null the result 
-/// will be null. 
-/// 
-/// \param[in] left the base 
-/// \param[in] right the exponent 
-/// \param[in] options arithmetic options (enable/disable overflow checking), optional 
-/// \param[in] ctx the function execution context, optional 
-/// \return the elementwise base value raised to the power of exponent 
-ARROW_EXPORT 
-Result<Datum> Power(const Datum& left, const Datum& right, 
-                    ArithmeticOptions options = ArithmeticOptions(), 
-                    ExecContext* ctx = NULLPTR); 
- 
-/// \brief Left shift the left array by the right array. Array values must be the 
-/// same length. If either operand is null, the result will be null. 
-/// 
-/// \param[in] left the value to shift 
-/// \param[in] right the value to shift by 
-/// \param[in] options arithmetic options (enable/disable overflow checking), optional 
-/// \param[in] ctx the function execution context, optional 
-/// \return the elementwise left value shifted left by the right value 
-ARROW_EXPORT 
-Result<Datum> ShiftLeft(const Datum& left, const Datum& right, 
-                        ArithmeticOptions options = ArithmeticOptions(), 
-                        ExecContext* ctx = NULLPTR); 
- 
-/// \brief Right shift the left array by the right array. Array values must be the 
-/// same length. If either operand is null, the result will be null. Performs a 
-/// logical shift for unsigned values, and an arithmetic shift for signed values. 
-/// 
-/// \param[in] left the value to shift 
-/// \param[in] right the value to shift by 
-/// \param[in] options arithmetic options (enable/disable overflow checking), optional 
-/// \param[in] ctx the function execution context, optional 
-/// \return the elementwise left value shifted right by the right value 
-ARROW_EXPORT 
-Result<Datum> ShiftRight(const Datum& left, const Datum& right, 
-                         ArithmeticOptions options = ArithmeticOptions(), 
-                         ExecContext* ctx = NULLPTR); 
- 
-/// \brief Compute the sine of the array values. 
-/// \param[in] arg The values to compute the sine for. 
-/// \param[in] options arithmetic options (enable/disable overflow checking), optional 
-/// \param[in] ctx the function execution context, optional 
-/// \return the elementwise sine of the values 
-ARROW_EXPORT 
-Result<Datum> Sin(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(), 
-                  ExecContext* ctx = NULLPTR); 
- 
-/// \brief Compute the cosine of the array values. 
-/// \param[in] arg The values to compute the cosine for. 
-/// \param[in] options arithmetic options (enable/disable overflow checking), optional 
-/// \param[in] ctx the function execution context, optional 
-/// \return the elementwise cosine of the values 
-ARROW_EXPORT 
-Result<Datum> Cos(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(), 
-                  ExecContext* ctx = NULLPTR); 
- 
-/// \brief Compute the inverse sine (arcsine) of the array values. 
-/// \param[in] arg The values to compute the inverse sine for. 
-/// \param[in] options arithmetic options (enable/disable overflow checking), optional 
-/// \param[in] ctx the function execution context, optional 
-/// \return the elementwise inverse sine of the values 
-ARROW_EXPORT 
-Result<Datum> Asin(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(), 
-                   ExecContext* ctx = NULLPTR); 
- 
-/// \brief Compute the inverse cosine (arccosine) of the array values. 
-/// \param[in] arg The values to compute the inverse cosine for. 
-/// \param[in] options arithmetic options (enable/disable overflow checking), optional 
-/// \param[in] ctx the function execution context, optional 
-/// \return the elementwise inverse cosine of the values 
-ARROW_EXPORT 
-Result<Datum> Acos(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(), 
-                   ExecContext* ctx = NULLPTR); 
- 
-/// \brief Compute the tangent of the array values. 
-/// \param[in] arg The values to compute the tangent for. 
-/// \param[in] options arithmetic options (enable/disable overflow checking), optional 
-/// \param[in] ctx the function execution context, optional 
-/// \return the elementwise tangent of the values 
-ARROW_EXPORT 
-Result<Datum> Tan(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(), 
-                  ExecContext* ctx = NULLPTR); 
- 
-/// \brief Compute the inverse tangent (arctangent) of the array values. 
-/// \param[in] arg The values to compute the inverse tangent for. 
-/// \param[in] ctx the function execution context, optional 
-/// \return the elementwise inverse tangent of the values 
-ARROW_EXPORT 
-Result<Datum> Atan(const Datum& arg, ExecContext* ctx = NULLPTR); 
- 
-/// \brief Compute the inverse tangent (arctangent) of y/x, using the 
-/// argument signs to determine the correct quadrant. 
-/// \param[in] y The y-values to compute the inverse tangent for. 
-/// \param[in] x The x-values to compute the inverse tangent for. 
-/// \param[in] ctx the function execution context, optional 
-/// \return the elementwise inverse tangent of the values 
-ARROW_EXPORT 
-Result<Datum> Atan2(const Datum& y, const Datum& x, ExecContext* ctx = NULLPTR); 
- 
-/// \brief Get the natural log of a value. 
-/// 
-/// If argument is null the result will be null. 
-/// 
-/// \param[in] arg The values to compute the logarithm for. 
-/// \param[in] options arithmetic options (overflow handling), optional 
-/// \param[in] ctx the function execution context, optional 
-/// \return the elementwise natural log 
-ARROW_EXPORT 
-Result<Datum> Ln(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(), 
-                 ExecContext* ctx = NULLPTR); 
- 
-/// \brief Get the log base 10 of a value. 
-/// 
-/// If argument is null the result will be null. 
-/// 
-/// \param[in] arg The values to compute the logarithm for. 
-/// \param[in] options arithmetic options (overflow handling), optional 
-/// \param[in] ctx the function execution context, optional 
-/// \return the elementwise log base 10 
-ARROW_EXPORT 
-Result<Datum> Log10(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(), 
-                    ExecContext* ctx = NULLPTR); 
- 
-/// \brief Get the log base 2 of a value. 
-/// 
-/// If argument is null the result will be null. 
-/// 
-/// \param[in] arg The values to compute the logarithm for. 
-/// \param[in] options arithmetic options (overflow handling), optional 
-/// \param[in] ctx the function execution context, optional 
-/// \return the elementwise log base 2 
-ARROW_EXPORT 
-Result<Datum> Log2(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(), 
-                   ExecContext* ctx = NULLPTR); 
- 
-/// \brief Get the natural log of (1 + value). 
-/// 
-/// If argument is null the result will be null. 
-/// This function may be more accurate than Log(1 + value) for values close to zero. 
-/// 
-/// \param[in] arg The values to compute the logarithm for. 
-/// \param[in] options arithmetic options (overflow handling), optional 
-/// \param[in] ctx the function execution context, optional 
-/// \return the elementwise natural log 
-ARROW_EXPORT 
-Result<Datum> Log1p(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(), 
-                    ExecContext* ctx = NULLPTR); 
- 
-/// \brief Round to the nearest integer less than or equal in magnitude to the 
-/// argument. Array values can be of arbitrary length. If argument is null the 
-/// result will be null. 
-/// 
-/// \param[in] arg the value to round 
-/// \param[in] ctx the function execution context, optional 
-/// \return the rounded value 
-ARROW_EXPORT 
-Result<Datum> Floor(const Datum& arg, ExecContext* ctx = NULLPTR); 
- 
-/// \brief Round to the nearest integer greater than or equal in magnitude to the 
-/// argument. Array values can be of arbitrary length. If argument is null the 
-/// result will be null. 
-/// 
-/// \param[in] arg the value to round 
-/// \param[in] ctx the function execution context, optional 
-/// \return the rounded value 
-ARROW_EXPORT 
-Result<Datum> Ceil(const Datum& arg, ExecContext* ctx = NULLPTR); 
- 
-/// \brief Get the integral part without fractional digits. Array values can be 
-/// of arbitrary length. If argument is null the result will be null. 
-/// 
-/// \param[in] arg the value to truncate 
-/// \param[in] ctx the function execution context, optional 
-/// \return the truncated value 
-ARROW_EXPORT 
-Result<Datum> Trunc(const Datum& arg, ExecContext* ctx = NULLPTR); 
- 
-/// \brief Find the element-wise maximum of any number of arrays or scalars. 
-/// Array values must be the same length. 
-/// 
-/// \param[in] args arrays or scalars to operate on. 
-/// \param[in] options options for handling nulls, optional 
-/// \param[in] ctx the function execution context, optional 
-/// \return the element-wise maximum 
-ARROW_EXPORT 
-Result<Datum> MaxElementWise( 
-    const std::vector<Datum>& args, 
-    ElementWiseAggregateOptions options = ElementWiseAggregateOptions::Defaults(), 
-    ExecContext* ctx = NULLPTR); 
- 
-/// \brief Find the element-wise minimum of any number of arrays or scalars. 
-/// Array values must be the same length. 
-/// 
-/// \param[in] args arrays or scalars to operate on. 
-/// \param[in] options options for handling nulls, optional 
-/// \param[in] ctx the function execution context, optional 
-/// \return the element-wise minimum 
-ARROW_EXPORT 
-Result<Datum> MinElementWise( 
-    const std::vector<Datum>& args, 
-    ElementWiseAggregateOptions options = ElementWiseAggregateOptions::Defaults(), 
-    ExecContext* ctx = NULLPTR); 
- 
-/// \brief Get the sign of a value. Array values can be of arbitrary length. If argument 
-/// is null the result will be null. 
-/// 
-/// \param[in] arg the value to extract sign from 
-/// \param[in] ctx the function execution context, optional 
-/// \return the elementwise sign function 
-ARROW_EXPORT 
-Result<Datum> Sign(const Datum& arg, ExecContext* ctx = NULLPTR); 
- 
+/// \brief Negate values.
+///
+/// If argument is null the result will be null.
+///
+/// \param[in] arg the value negated
+/// \param[in] options arithmetic options (overflow handling), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise negation
+ARROW_EXPORT
+Result<Datum> Negate(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
+                     ExecContext* ctx = NULLPTR);
+
+/// \brief Raise the values of base array to the power of the exponent array values.
+/// Array values must be the same length. If either base or exponent is null the result
+/// will be null.
+///
+/// \param[in] left the base
+/// \param[in] right the exponent
+/// \param[in] options arithmetic options (enable/disable overflow checking), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise base value raised to the power of exponent
+ARROW_EXPORT
+Result<Datum> Power(const Datum& left, const Datum& right,
+                    ArithmeticOptions options = ArithmeticOptions(),
+                    ExecContext* ctx = NULLPTR);
+
+/// \brief Left shift the left array by the right array. Array values must be the
+/// same length. If either operand is null, the result will be null.
+///
+/// \param[in] left the value to shift
+/// \param[in] right the value to shift by
+/// \param[in] options arithmetic options (enable/disable overflow checking), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise left value shifted left by the right value
+ARROW_EXPORT
+Result<Datum> ShiftLeft(const Datum& left, const Datum& right,
+                        ArithmeticOptions options = ArithmeticOptions(),
+                        ExecContext* ctx = NULLPTR);
+
+/// \brief Right shift the left array by the right array. Array values must be the
+/// same length. If either operand is null, the result will be null. Performs a
+/// logical shift for unsigned values, and an arithmetic shift for signed values.
+///
+/// \param[in] left the value to shift
+/// \param[in] right the value to shift by
+/// \param[in] options arithmetic options (enable/disable overflow checking), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise left value shifted right by the right value
+ARROW_EXPORT
+Result<Datum> ShiftRight(const Datum& left, const Datum& right,
+                         ArithmeticOptions options = ArithmeticOptions(),
+                         ExecContext* ctx = NULLPTR);
+
+/// \brief Compute the sine of the array values.
+/// \param[in] arg The values to compute the sine for.
+/// \param[in] options arithmetic options (enable/disable overflow checking), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise sine of the values
+ARROW_EXPORT
+Result<Datum> Sin(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
+                  ExecContext* ctx = NULLPTR);
+
+/// \brief Compute the cosine of the array values.
+/// \param[in] arg The values to compute the cosine for.
+/// \param[in] options arithmetic options (enable/disable overflow checking), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise cosine of the values
+ARROW_EXPORT
+Result<Datum> Cos(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
+                  ExecContext* ctx = NULLPTR);
+
+/// \brief Compute the inverse sine (arcsine) of the array values.
+/// \param[in] arg The values to compute the inverse sine for.
+/// \param[in] options arithmetic options (enable/disable overflow checking), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise inverse sine of the values
+ARROW_EXPORT
+Result<Datum> Asin(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
+                   ExecContext* ctx = NULLPTR);
+
+/// \brief Compute the inverse cosine (arccosine) of the array values.
+/// \param[in] arg The values to compute the inverse cosine for.
+/// \param[in] options arithmetic options (enable/disable overflow checking), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise inverse cosine of the values
+ARROW_EXPORT
+Result<Datum> Acos(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
+                   ExecContext* ctx = NULLPTR);
+
+/// \brief Compute the tangent of the array values.
+/// \param[in] arg The values to compute the tangent for.
+/// \param[in] options arithmetic options (enable/disable overflow checking), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise tangent of the values
+ARROW_EXPORT
+Result<Datum> Tan(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
+                  ExecContext* ctx = NULLPTR);
+
+/// \brief Compute the inverse tangent (arctangent) of the array values.
+/// \param[in] arg The values to compute the inverse tangent for.
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise inverse tangent of the values
+ARROW_EXPORT
+Result<Datum> Atan(const Datum& arg, ExecContext* ctx = NULLPTR);
+
+/// \brief Compute the inverse tangent (arctangent) of y/x, using the
+/// argument signs to determine the correct quadrant.
+/// \param[in] y The y-values to compute the inverse tangent for.
+/// \param[in] x The x-values to compute the inverse tangent for.
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise inverse tangent of the values
+ARROW_EXPORT
+Result<Datum> Atan2(const Datum& y, const Datum& x, ExecContext* ctx = NULLPTR);
+
+/// \brief Get the natural log of a value.
+///
+/// If argument is null the result will be null.
+///
+/// \param[in] arg The values to compute the logarithm for.
+/// \param[in] options arithmetic options (overflow handling), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise natural log
+ARROW_EXPORT
+Result<Datum> Ln(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
+                 ExecContext* ctx = NULLPTR);
+
+/// \brief Get the log base 10 of a value.
+///
+/// If argument is null the result will be null.
+///
+/// \param[in] arg The values to compute the logarithm for.
+/// \param[in] options arithmetic options (overflow handling), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise log base 10
+ARROW_EXPORT
+Result<Datum> Log10(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
+                    ExecContext* ctx = NULLPTR);
+
+/// \brief Get the log base 2 of a value.
+///
+/// If argument is null the result will be null.
+///
+/// \param[in] arg The values to compute the logarithm for.
+/// \param[in] options arithmetic options (overflow handling), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise log base 2
+ARROW_EXPORT
+Result<Datum> Log2(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
+                   ExecContext* ctx = NULLPTR);
+
+/// \brief Get the natural log of (1 + value).
+///
+/// If argument is null the result will be null.
+/// This function may be more accurate than Log(1 + value) for values close to zero.
+///
+/// \param[in] arg The values to compute the logarithm for.
+/// \param[in] options arithmetic options (overflow handling), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise natural log
+ARROW_EXPORT
+Result<Datum> Log1p(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
+                    ExecContext* ctx = NULLPTR);
+
+/// \brief Round to the nearest integer less than or equal in magnitude to the
+/// argument. Array values can be of arbitrary length. If argument is null the
+/// result will be null.
+///
+/// \param[in] arg the value to round
+/// \param[in] ctx the function execution context, optional
+/// \return the rounded value
+ARROW_EXPORT
+Result<Datum> Floor(const Datum& arg, ExecContext* ctx = NULLPTR);
+
+/// \brief Round to the nearest integer greater than or equal in magnitude to the
+/// argument. Array values can be of arbitrary length. If argument is null the
+/// result will be null.
+///
+/// \param[in] arg the value to round
+/// \param[in] ctx the function execution context, optional
+/// \return the rounded value
+ARROW_EXPORT
+Result<Datum> Ceil(const Datum& arg, ExecContext* ctx = NULLPTR);
+
+/// \brief Get the integral part without fractional digits. Array values can be
+/// of arbitrary length. If argument is null the result will be null.
+///
+/// \param[in] arg the value to truncate
+/// \param[in] ctx the function execution context, optional
+/// \return the truncated value
+ARROW_EXPORT
+Result<Datum> Trunc(const Datum& arg, ExecContext* ctx = NULLPTR);
+
+/// \brief Find the element-wise maximum of any number of arrays or scalars.
+/// Array values must be the same length.
+///
+/// \param[in] args arrays or scalars to operate on.
+/// \param[in] options options for handling nulls, optional
+/// \param[in] ctx the function execution context, optional
+/// \return the element-wise maximum
+ARROW_EXPORT
+Result<Datum> MaxElementWise(
+    const std::vector<Datum>& args,
+    ElementWiseAggregateOptions options = ElementWiseAggregateOptions::Defaults(),
+    ExecContext* ctx = NULLPTR);
+
+/// \brief Find the element-wise minimum of any number of arrays or scalars.
+/// Array values must be the same length.
+///
+/// \param[in] args arrays or scalars to operate on.
+/// \param[in] options options for handling nulls, optional
+/// \param[in] ctx the function execution context, optional
+/// \return the element-wise minimum
+ARROW_EXPORT
+Result<Datum> MinElementWise(
+    const std::vector<Datum>& args,
+    ElementWiseAggregateOptions options = ElementWiseAggregateOptions::Defaults(),
+    ExecContext* ctx = NULLPTR);
+
+/// \brief Get the sign of a value. Array values can be of arbitrary length. If argument
+/// is null the result will be null.
+///
+/// \param[in] arg the value to extract sign from
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise sign function
+ARROW_EXPORT
+Result<Datum> Sign(const Datum& arg, ExecContext* ctx = NULLPTR);
+
 /// \brief Compare a numeric array with a scalar.
 ///
 /// \param[in] left datum to compare, must be an Array
@@ -562,10 +562,10 @@ Result<Datum> Sign(const Datum& arg, ExecContext* ctx = NULLPTR);
 ///
 /// \since 1.0.0
 /// \note API not yet finalized
-ARROW_DEPRECATED("Deprecated in 5.0.0. Use each compare function directly") 
+ARROW_DEPRECATED("Deprecated in 5.0.0. Use each compare function directly")
 ARROW_EXPORT
-Result<Datum> Compare(const Datum& left, const Datum& right, CompareOptions options, 
-                      ExecContext* ctx = NULLPTR); 
+Result<Datum> Compare(const Datum& left, const Datum& right, CompareOptions options,
+                      ExecContext* ctx = NULLPTR);
 
 /// \brief Invert the values of a boolean datum
 /// \param[in] value datum to invert
@@ -580,8 +580,8 @@ Result<Datum> Invert(const Datum& value, ExecContext* ctx = NULLPTR);
 /// \brief Element-wise AND of two boolean datums which always propagates nulls
 /// (null and false is null).
 ///
-/// \param[in] left left operand 
-/// \param[in] right right operand 
+/// \param[in] left left operand
+/// \param[in] right right operand
 /// \param[in] ctx the function execution context, optional
 /// \return the resulting datum
 ///
@@ -593,8 +593,8 @@ Result<Datum> And(const Datum& left, const Datum& right, ExecContext* ctx = NULL
 /// \brief Element-wise AND of two boolean datums with a Kleene truth table
 /// (null and false is false).
 ///
-/// \param[in] left left operand 
-/// \param[in] right right operand 
+/// \param[in] left left operand
+/// \param[in] right right operand
 /// \param[in] ctx the function execution context, optional
 /// \return the resulting datum
 ///
@@ -607,8 +607,8 @@ Result<Datum> KleeneAnd(const Datum& left, const Datum& right,
 /// \brief Element-wise OR of two boolean datums which always propagates nulls
 /// (null and true is null).
 ///
-/// \param[in] left left operand 
-/// \param[in] right right operand 
+/// \param[in] left left operand
+/// \param[in] right right operand
 /// \param[in] ctx the function execution context, optional
 /// \return the resulting datum
 ///
@@ -620,8 +620,8 @@ Result<Datum> Or(const Datum& left, const Datum& right, ExecContext* ctx = NULLP
 /// \brief Element-wise OR of two boolean datums with a Kleene truth table
 /// (null or true is true).
 ///
-/// \param[in] left left operand 
-/// \param[in] right right operand 
+/// \param[in] left left operand
+/// \param[in] right right operand
 /// \param[in] ctx the function execution context, optional
 /// \return the resulting datum
 ///
@@ -631,8 +631,8 @@ ARROW_EXPORT
 Result<Datum> KleeneOr(const Datum& left, const Datum& right, ExecContext* ctx = NULLPTR);
 
 /// \brief Element-wise XOR of two boolean datums
-/// \param[in] left left operand 
-/// \param[in] right right operand 
+/// \param[in] left left operand
+/// \param[in] right right operand
 /// \param[in] ctx the function execution context, optional
 /// \return the resulting datum
 ///
@@ -641,49 +641,49 @@ Result<Datum> KleeneOr(const Datum& left, const Datum& right, ExecContext* ctx =
 ARROW_EXPORT
 Result<Datum> Xor(const Datum& left, const Datum& right, ExecContext* ctx = NULLPTR);
 
-/// \brief Element-wise AND NOT of two boolean datums which always propagates nulls 
-/// (null and not true is null). 
-/// 
-/// \param[in] left left operand 
-/// \param[in] right right operand 
-/// \param[in] ctx the function execution context, optional 
-/// \return the resulting datum 
-/// 
-/// \since 3.0.0 
-/// \note API not yet finalized 
-ARROW_EXPORT 
-Result<Datum> AndNot(const Datum& left, const Datum& right, ExecContext* ctx = NULLPTR); 
- 
-/// \brief Element-wise AND NOT of two boolean datums with a Kleene truth table 
-/// (false and not null is false, null and not true is false). 
-/// 
-/// \param[in] left left operand 
-/// \param[in] right right operand 
-/// \param[in] ctx the function execution context, optional 
-/// \return the resulting datum 
-/// 
-/// \since 3.0.0 
-/// \note API not yet finalized 
-ARROW_EXPORT 
-Result<Datum> KleeneAndNot(const Datum& left, const Datum& right, 
-                           ExecContext* ctx = NULLPTR); 
- 
+/// \brief Element-wise AND NOT of two boolean datums which always propagates nulls
+/// (null and not true is null).
+///
+/// \param[in] left left operand
+/// \param[in] right right operand
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 3.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> AndNot(const Datum& left, const Datum& right, ExecContext* ctx = NULLPTR);
+
+/// \brief Element-wise AND NOT of two boolean datums with a Kleene truth table
+/// (false and not null is false, null and not true is false).
+///
+/// \param[in] left left operand
+/// \param[in] right right operand
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 3.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> KleeneAndNot(const Datum& left, const Datum& right,
+                           ExecContext* ctx = NULLPTR);
+
 /// \brief IsIn returns true for each element of `values` that is contained in
 /// `value_set`
 ///
-/// Behaviour of nulls is governed by SetLookupOptions::skip_nulls. 
+/// Behaviour of nulls is governed by SetLookupOptions::skip_nulls.
 ///
 /// \param[in] values array-like input to look up in value_set
-/// \param[in] options SetLookupOptions 
+/// \param[in] options SetLookupOptions
 /// \param[in] ctx the function execution context, optional
 /// \return the resulting datum
 ///
 /// \since 1.0.0
 /// \note API not yet finalized
 ARROW_EXPORT
-Result<Datum> IsIn(const Datum& values, const SetLookupOptions& options, 
-                   ExecContext* ctx = NULLPTR); 
-ARROW_EXPORT 
+Result<Datum> IsIn(const Datum& values, const SetLookupOptions& options,
+                   ExecContext* ctx = NULLPTR);
+ARROW_EXPORT
 Result<Datum> IsIn(const Datum& values, const Datum& value_set,
                    ExecContext* ctx = NULLPTR);
 
@@ -695,19 +695,19 @@ Result<Datum> IsIn(const Datum& values, const Datum& value_set,
 /// For example given values = [99, 42, 3, null] and
 /// value_set = [3, 3, 99], the output will be = [1, null, 0, null]
 ///
-/// Behaviour of nulls is governed by SetLookupOptions::skip_nulls. 
+/// Behaviour of nulls is governed by SetLookupOptions::skip_nulls.
 ///
 /// \param[in] values array-like input
-/// \param[in] options SetLookupOptions 
+/// \param[in] options SetLookupOptions
 /// \param[in] ctx the function execution context, optional
 /// \return the resulting datum
 ///
 /// \since 1.0.0
 /// \note API not yet finalized
 ARROW_EXPORT
-Result<Datum> IndexIn(const Datum& values, const SetLookupOptions& options, 
-                      ExecContext* ctx = NULLPTR); 
-ARROW_EXPORT 
+Result<Datum> IndexIn(const Datum& values, const SetLookupOptions& options,
+                      ExecContext* ctx = NULLPTR);
+ARROW_EXPORT
 Result<Datum> IndexIn(const Datum& values, const Datum& value_set,
                       ExecContext* ctx = NULLPTR);
 
@@ -735,18 +735,18 @@ Result<Datum> IsValid(const Datum& values, ExecContext* ctx = NULLPTR);
 ARROW_EXPORT
 Result<Datum> IsNull(const Datum& values, ExecContext* ctx = NULLPTR);
 
-/// \brief IsNan returns true for each element of `values` that is NaN, 
-/// false otherwise 
-/// 
-/// \param[in] values input to look for NaN 
-/// \param[in] ctx the function execution context, optional 
-/// \return the resulting datum 
-/// 
-/// \since 3.0.0 
-/// \note API not yet finalized 
-ARROW_EXPORT 
-Result<Datum> IsNan(const Datum& values, ExecContext* ctx = NULLPTR); 
- 
+/// \brief IsNan returns true for each element of `values` that is NaN,
+/// false otherwise
+///
+/// \param[in] values input to look for NaN
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 3.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> IsNan(const Datum& values, ExecContext* ctx = NULLPTR);
+
 /// \brief FillNull replaces each null element in `values`
 /// with `fill_value`
 ///
@@ -762,228 +762,228 @@ ARROW_EXPORT
 Result<Datum> FillNull(const Datum& values, const Datum& fill_value,
                        ExecContext* ctx = NULLPTR);
 
-/// \brief IfElse returns elements chosen from `left` or `right` 
-/// depending on `cond`. `null` values in `cond` will be promoted to the result 
-/// 
-/// \param[in] cond `Boolean` condition Scalar/ Array 
-/// \param[in] left Scalar/ Array 
-/// \param[in] right Scalar/ Array 
-/// \param[in] ctx the function execution context, optional 
-/// 
-/// \return the resulting datum 
-/// 
-/// \since 5.0.0 
-/// \note API not yet finalized 
-ARROW_EXPORT 
-Result<Datum> IfElse(const Datum& cond, const Datum& left, const Datum& right, 
-                     ExecContext* ctx = NULLPTR); 
- 
-/// \brief CaseWhen behaves like a switch/case or if-else if-else statement: for 
-/// each row, select the first value for which the corresponding condition is 
-/// true, or (if given) select the 'else' value, else emit null. Note that a 
-/// null condition is the same as false. 
-/// 
-/// \param[in] cond Conditions (Boolean) 
-/// \param[in] cases Values (any type), along with an optional 'else' value. 
-/// \param[in] ctx the function execution context, optional 
-/// 
-/// \return the resulting datum 
-/// 
-/// \since 5.0.0 
-/// \note API not yet finalized 
-ARROW_EXPORT 
-Result<Datum> CaseWhen(const Datum& cond, const std::vector<Datum>& cases, 
-                       ExecContext* ctx = NULLPTR); 
- 
-/// \brief Year returns year for each element of `values` 
-/// 
-/// \param[in] values input to extract year from 
-/// \param[in] ctx the function execution context, optional 
-/// \return the resulting datum 
-/// 
-/// \since 5.0.0 
-/// \note API not yet finalized 
-ARROW_EXPORT 
-Result<Datum> Year(const Datum& values, ExecContext* ctx = NULLPTR); 
- 
-/// \brief Month returns month for each element of `values`. 
-/// Month is encoded as January=1, December=12 
-/// 
-/// \param[in] values input to extract month from 
-/// \param[in] ctx the function execution context, optional 
-/// \return the resulting datum 
-/// 
-/// \since 5.0.0 
-/// \note API not yet finalized 
-ARROW_EXPORT 
-Result<Datum> Month(const Datum& values, ExecContext* ctx = NULLPTR); 
- 
-/// \brief Day returns day number for each element of `values` 
-/// 
-/// \param[in] values input to extract day from 
-/// \param[in] ctx the function execution context, optional 
-/// \return the resulting datum 
-/// 
-/// \since 5.0.0 
-/// \note API not yet finalized 
-ARROW_EXPORT 
-Result<Datum> Day(const Datum& values, ExecContext* ctx = NULLPTR); 
- 
-/// \brief DayOfWeek returns number of the day of the week value for each element of 
-/// `values`. 
-/// 
-/// By default week starts on Monday denoted by 0 and ends on Sunday denoted 
-/// by 6. Start day of the week (Monday=1, Sunday=7) and numbering base (0 or 1) can be 
-/// set using DayOfWeekOptions 
-/// 
-/// \param[in] values input to extract number of the day of the week from 
-/// \param[in] options for setting start of the week and day numbering 
-/// \param[in] ctx the function execution context, optional 
-/// \return the resulting datum 
-/// 
-/// \since 5.0.0 
-/// \note API not yet finalized 
-ARROW_EXPORT Result<Datum> DayOfWeek(const Datum& values, 
-                                     DayOfWeekOptions options = DayOfWeekOptions(), 
-                                     ExecContext* ctx = NULLPTR); 
- 
-/// \brief DayOfYear returns number of day of the year for each element of `values`. 
-/// January 1st maps to day number 1, February 1st to 32, etc. 
-/// 
-/// \param[in] values input to extract number of day of the year from 
-/// \param[in] ctx the function execution context, optional 
-/// \return the resulting datum 
-/// 
-/// \since 5.0.0 
-/// \note API not yet finalized 
-ARROW_EXPORT Result<Datum> DayOfYear(const Datum& values, ExecContext* ctx = NULLPTR); 
- 
-/// \brief ISOYear returns ISO year number for each element of `values`. 
-/// First week of an ISO year has the majority (4 or more) of its days in January. 
-/// 
-/// \param[in] values input to extract ISO year from 
-/// \param[in] ctx the function execution context, optional 
-/// \return the resulting datum 
-/// 
-/// \since 5.0.0 
-/// \note API not yet finalized 
-ARROW_EXPORT 
-Result<Datum> ISOYear(const Datum& values, ExecContext* ctx = NULLPTR); 
- 
-/// \brief ISOWeek returns ISO week of year number for each element of `values`. 
-/// First ISO week has the majority (4 or more) of its days in January. 
-/// Week of the year starts with 1 and can run up to 53. 
-/// 
-/// \param[in] values input to extract ISO week of year from 
-/// \param[in] ctx the function execution context, optional 
-/// \return the resulting datum 
-/// 
-/// \since 5.0.0 
-/// \note API not yet finalized 
-ARROW_EXPORT Result<Datum> ISOWeek(const Datum& values, ExecContext* ctx = NULLPTR); 
- 
-/// \brief ISOCalendar returns a (ISO year, ISO week, ISO day of week) struct for 
-/// each element of `values`. 
-/// ISO week starts on Monday denoted by 1 and ends on Sunday denoted by 7. 
-/// 
-/// \param[in] values input to ISO calendar struct from 
-/// \param[in] ctx the function execution context, optional 
-/// \return the resulting datum 
-/// 
-/// \since 5.0.0 
-/// \note API not yet finalized 
-ARROW_EXPORT Result<Datum> ISOCalendar(const Datum& values, ExecContext* ctx = NULLPTR); 
- 
-/// \brief Quarter returns the quarter of year number for each element of `values` 
-/// First quarter maps to 1 and fourth quarter maps to 4. 
-/// 
-/// \param[in] values input to extract quarter of year from 
-/// \param[in] ctx the function execution context, optional 
-/// \return the resulting datum 
-/// 
-/// \since 5.0.0 
-/// \note API not yet finalized 
-ARROW_EXPORT Result<Datum> Quarter(const Datum& values, ExecContext* ctx = NULLPTR); 
- 
-/// \brief Hour returns hour value for each element of `values` 
-/// 
-/// \param[in] values input to extract hour from 
-/// \param[in] ctx the function execution context, optional 
-/// \return the resulting datum 
-/// 
-/// \since 5.0.0 
-/// \note API not yet finalized 
-ARROW_EXPORT 
-Result<Datum> Hour(const Datum& values, ExecContext* ctx = NULLPTR); 
- 
-/// \brief Minute returns minutes value for each element of `values` 
-/// 
-/// \param[in] values input to extract minutes from 
-/// \param[in] ctx the function execution context, optional 
-/// \return the resulting datum 
-/// 
-/// \since 5.0.0 
-/// \note API not yet finalized 
-ARROW_EXPORT 
-Result<Datum> Minute(const Datum& values, ExecContext* ctx = NULLPTR); 
- 
-/// \brief Second returns seconds value for each element of `values` 
-/// 
-/// \param[in] values input to extract seconds from 
-/// \param[in] ctx the function execution context, optional 
-/// \return the resulting datum 
-/// 
-/// \since 5.0.0 
-/// \note API not yet finalized 
-ARROW_EXPORT 
-Result<Datum> Second(const Datum& values, ExecContext* ctx = NULLPTR); 
- 
-/// \brief Millisecond returns number of milliseconds since the last full second 
-/// for each element of `values` 
-/// 
-/// \param[in] values input to extract milliseconds from 
-/// \param[in] ctx the function execution context, optional 
-/// \return the resulting datum 
-/// 
-/// \since 5.0.0 
-/// \note API not yet finalized 
-ARROW_EXPORT 
-Result<Datum> Millisecond(const Datum& values, ExecContext* ctx = NULLPTR); 
- 
-/// \brief Microsecond returns number of microseconds since the last full millisecond 
-/// for each element of `values` 
-/// 
-/// \param[in] values input to extract microseconds from 
-/// \param[in] ctx the function execution context, optional 
-/// \return the resulting datum 
-/// 
-/// \since 5.0.0 
-/// \note API not yet finalized 
-ARROW_EXPORT 
-Result<Datum> Microsecond(const Datum& values, ExecContext* ctx = NULLPTR); 
- 
-/// \brief Nanosecond returns number of nanoseconds since the last full millisecond 
-/// for each element of `values` 
-/// 
-/// \param[in] values input to extract nanoseconds from 
-/// \param[in] ctx the function execution context, optional 
-/// \return the resulting datum 
-/// 
-/// \since 5.0.0 
-/// \note API not yet finalized 
-ARROW_EXPORT 
-Result<Datum> Nanosecond(const Datum& values, ExecContext* ctx = NULLPTR); 
- 
-/// \brief Subsecond returns the fraction of second elapsed since last full second 
-/// as a float for each element of `values` 
-/// 
-/// \param[in] values input to extract subsecond from 
-/// \param[in] ctx the function execution context, optional 
-/// \return the resulting datum 
-/// 
-/// \since 5.0.0 
-/// \note API not yet finalized 
-ARROW_EXPORT Result<Datum> Subsecond(const Datum& values, ExecContext* ctx = NULLPTR); 
- 
+/// \brief IfElse returns elements chosen from `left` or `right`
+/// depending on `cond`. `null` values in `cond` will be promoted to the result
+///
+/// \param[in] cond `Boolean` condition Scalar/ Array
+/// \param[in] left Scalar/ Array
+/// \param[in] right Scalar/ Array
+/// \param[in] ctx the function execution context, optional
+///
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> IfElse(const Datum& cond, const Datum& left, const Datum& right,
+                     ExecContext* ctx = NULLPTR);
+
+/// \brief CaseWhen behaves like a switch/case or if-else if-else statement: for
+/// each row, select the first value for which the corresponding condition is
+/// true, or (if given) select the 'else' value, else emit null. Note that a
+/// null condition is the same as false.
+///
+/// \param[in] cond Conditions (Boolean)
+/// \param[in] cases Values (any type), along with an optional 'else' value.
+/// \param[in] ctx the function execution context, optional
+///
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> CaseWhen(const Datum& cond, const std::vector<Datum>& cases,
+                       ExecContext* ctx = NULLPTR);
+
+/// \brief Year returns year for each element of `values`
+///
+/// \param[in] values input to extract year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Year(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Month returns month for each element of `values`.
+/// Month is encoded as January=1, December=12
+///
+/// \param[in] values input to extract month from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Month(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Day returns day number for each element of `values`
+///
+/// \param[in] values input to extract day from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Day(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief DayOfWeek returns number of the day of the week value for each element of
+/// `values`.
+///
+/// By default week starts on Monday denoted by 0 and ends on Sunday denoted
+/// by 6. Start day of the week (Monday=1, Sunday=7) and numbering base (0 or 1) can be
+/// set using DayOfWeekOptions
+///
+/// \param[in] values input to extract number of the day of the week from
+/// \param[in] options for setting start of the week and day numbering
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> DayOfWeek(const Datum& values,
+                                     DayOfWeekOptions options = DayOfWeekOptions(),
+                                     ExecContext* ctx = NULLPTR);
+
+/// \brief DayOfYear returns number of day of the year for each element of `values`.
+/// January 1st maps to day number 1, February 1st to 32, etc.
+///
+/// \param[in] values input to extract number of day of the year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> DayOfYear(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief ISOYear returns ISO year number for each element of `values`.
+/// First week of an ISO year has the majority (4 or more) of its days in January.
+///
+/// \param[in] values input to extract ISO year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> ISOYear(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief ISOWeek returns ISO week of year number for each element of `values`.
+/// First ISO week has the majority (4 or more) of its days in January.
+/// Week of the year starts with 1 and can run up to 53.
+///
+/// \param[in] values input to extract ISO week of year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> ISOWeek(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief ISOCalendar returns a (ISO year, ISO week, ISO day of week) struct for
+/// each element of `values`.
+/// ISO week starts on Monday denoted by 1 and ends on Sunday denoted by 7.
+///
+/// \param[in] values input to ISO calendar struct from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> ISOCalendar(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Quarter returns the quarter of year number for each element of `values`
+/// First quarter maps to 1 and fourth quarter maps to 4.
+///
+/// \param[in] values input to extract quarter of year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> Quarter(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Hour returns hour value for each element of `values`
+///
+/// \param[in] values input to extract hour from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Hour(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Minute returns minutes value for each element of `values`
+///
+/// \param[in] values input to extract minutes from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Minute(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Second returns seconds value for each element of `values`
+///
+/// \param[in] values input to extract seconds from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Second(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Millisecond returns number of milliseconds since the last full second
+/// for each element of `values`
+///
+/// \param[in] values input to extract milliseconds from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Millisecond(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Microsecond returns number of microseconds since the last full millisecond
+/// for each element of `values`
+///
+/// \param[in] values input to extract microseconds from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Microsecond(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Nanosecond returns number of nanoseconds since the last full millisecond
+/// for each element of `values`
+///
+/// \param[in] values input to extract nanoseconds from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Nanosecond(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Subsecond returns the fraction of second elapsed since last full second
+/// as a float for each element of `values`
+///
+/// \param[in] values input to extract subsecond from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> Subsecond(const Datum& values, ExecContext* ctx = NULLPTR);
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/api_vector.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/api_vector.cc
index 4b875ddaf04..a68969b2ee5 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/api_vector.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/api_vector.cc
@@ -18,140 +18,140 @@
 #include "arrow/compute/api_vector.h"
 
 #include <memory>
-#include <sstream> 
+#include <sstream>
 #include <utility>
 #include <vector>
 
 #include "arrow/array/array_nested.h"
 #include "arrow/array/builder_primitive.h"
 #include "arrow/compute/exec.h"
-#include "arrow/compute/function_internal.h" 
-#include "arrow/compute/registry.h" 
+#include "arrow/compute/function_internal.h"
+#include "arrow/compute/registry.h"
 #include "arrow/datum.h"
 #include "arrow/record_batch.h"
 #include "arrow/result.h"
 #include "arrow/util/checked_cast.h"
-#include "arrow/util/logging.h" 
+#include "arrow/util/logging.h"
 
 namespace arrow {
 
-using internal::checked_cast; 
+using internal::checked_cast;
 using internal::checked_pointer_cast;
 
-namespace internal { 
-using compute::DictionaryEncodeOptions; 
-using compute::FilterOptions; 
-template <> 
-struct EnumTraits<FilterOptions::NullSelectionBehavior> 
-    : BasicEnumTraits<FilterOptions::NullSelectionBehavior, FilterOptions::DROP, 
-                      FilterOptions::EMIT_NULL> { 
-  static std::string name() { return "FilterOptions::NullSelectionBehavior"; } 
-  static std::string value_name(FilterOptions::NullSelectionBehavior value) { 
-    switch (value) { 
-      case FilterOptions::DROP: 
-        return "DROP"; 
-      case FilterOptions::EMIT_NULL: 
-        return "EMIT_NULL"; 
-    } 
-    return "<INVALID>"; 
-  } 
-}; 
-template <> 
-struct EnumTraits<DictionaryEncodeOptions::NullEncodingBehavior> 
-    : BasicEnumTraits<DictionaryEncodeOptions::NullEncodingBehavior, 
-                      DictionaryEncodeOptions::ENCODE, DictionaryEncodeOptions::MASK> { 
-  static std::string name() { return "DictionaryEncodeOptions::NullEncodingBehavior"; } 
-  static std::string value_name(DictionaryEncodeOptions::NullEncodingBehavior value) { 
-    switch (value) { 
-      case DictionaryEncodeOptions::ENCODE: 
-        return "ENCODE"; 
-      case DictionaryEncodeOptions::MASK: 
-        return "MASK"; 
-    } 
-    return "<INVALID>"; 
-  } 
-}; 
-}  // namespace internal 
- 
+namespace internal {
+using compute::DictionaryEncodeOptions;
+using compute::FilterOptions;
+template <>
+struct EnumTraits<FilterOptions::NullSelectionBehavior>
+    : BasicEnumTraits<FilterOptions::NullSelectionBehavior, FilterOptions::DROP,
+                      FilterOptions::EMIT_NULL> {
+  static std::string name() { return "FilterOptions::NullSelectionBehavior"; }
+  static std::string value_name(FilterOptions::NullSelectionBehavior value) {
+    switch (value) {
+      case FilterOptions::DROP:
+        return "DROP";
+      case FilterOptions::EMIT_NULL:
+        return "EMIT_NULL";
+    }
+    return "<INVALID>";
+  }
+};
+template <>
+struct EnumTraits<DictionaryEncodeOptions::NullEncodingBehavior>
+    : BasicEnumTraits<DictionaryEncodeOptions::NullEncodingBehavior,
+                      DictionaryEncodeOptions::ENCODE, DictionaryEncodeOptions::MASK> {
+  static std::string name() { return "DictionaryEncodeOptions::NullEncodingBehavior"; }
+  static std::string value_name(DictionaryEncodeOptions::NullEncodingBehavior value) {
+    switch (value) {
+      case DictionaryEncodeOptions::ENCODE:
+        return "ENCODE";
+      case DictionaryEncodeOptions::MASK:
+        return "MASK";
+    }
+    return "<INVALID>";
+  }
+};
+}  // namespace internal
+
 namespace compute {
 
 // ----------------------------------------------------------------------
-// Function options 
- 
-bool SortKey::Equals(const SortKey& other) const { 
-  return name == other.name && order == other.order; 
-} 
-std::string SortKey::ToString() const { 
-  std::stringstream ss; 
-  ss << name << ' '; 
-  switch (order) { 
-    case SortOrder::Ascending: 
-      ss << "ASC"; 
-      break; 
-    case SortOrder::Descending: 
-      ss << "DESC"; 
-      break; 
-  } 
-  return ss.str(); 
-} 
- 
-namespace internal { 
-namespace { 
-using ::arrow::internal::DataMember; 
-static auto kFilterOptionsType = GetFunctionOptionsType<FilterOptions>( 
-    DataMember("null_selection_behavior", &FilterOptions::null_selection_behavior)); 
-static auto kTakeOptionsType = GetFunctionOptionsType<TakeOptions>( 
-    DataMember("boundscheck", &TakeOptions::boundscheck)); 
-static auto kDictionaryEncodeOptionsType = 
-    GetFunctionOptionsType<DictionaryEncodeOptions>(DataMember( 
-        "null_encoding_behavior", &DictionaryEncodeOptions::null_encoding_behavior)); 
-static auto kArraySortOptionsType = GetFunctionOptionsType<ArraySortOptions>( 
-    DataMember("order", &ArraySortOptions::order)); 
-static auto kSortOptionsType = 
-    GetFunctionOptionsType<SortOptions>(DataMember("sort_keys", &SortOptions::sort_keys)); 
-static auto kPartitionNthOptionsType = GetFunctionOptionsType<PartitionNthOptions>( 
-    DataMember("pivot", &PartitionNthOptions::pivot)); 
-}  // namespace 
-}  // namespace internal 
- 
-FilterOptions::FilterOptions(NullSelectionBehavior null_selection) 
-    : FunctionOptions(internal::kFilterOptionsType), 
-      null_selection_behavior(null_selection) {} 
-constexpr char FilterOptions::kTypeName[]; 
- 
-TakeOptions::TakeOptions(bool boundscheck) 
-    : FunctionOptions(internal::kTakeOptionsType), boundscheck(boundscheck) {} 
-constexpr char TakeOptions::kTypeName[]; 
- 
-DictionaryEncodeOptions::DictionaryEncodeOptions(NullEncodingBehavior null_encoding) 
-    : FunctionOptions(internal::kDictionaryEncodeOptionsType), 
-      null_encoding_behavior(null_encoding) {} 
-constexpr char DictionaryEncodeOptions::kTypeName[]; 
- 
-ArraySortOptions::ArraySortOptions(SortOrder order) 
-    : FunctionOptions(internal::kArraySortOptionsType), order(order) {} 
-constexpr char ArraySortOptions::kTypeName[]; 
- 
-SortOptions::SortOptions(std::vector<SortKey> sort_keys) 
-    : FunctionOptions(internal::kSortOptionsType), sort_keys(std::move(sort_keys)) {} 
-constexpr char SortOptions::kTypeName[]; 
- 
-PartitionNthOptions::PartitionNthOptions(int64_t pivot) 
-    : FunctionOptions(internal::kPartitionNthOptionsType), pivot(pivot) {} 
-constexpr char PartitionNthOptions::kTypeName[]; 
- 
-namespace internal { 
-void RegisterVectorOptions(FunctionRegistry* registry) { 
-  DCHECK_OK(registry->AddFunctionOptionsType(kFilterOptionsType)); 
-  DCHECK_OK(registry->AddFunctionOptionsType(kTakeOptionsType)); 
-  DCHECK_OK(registry->AddFunctionOptionsType(kDictionaryEncodeOptionsType)); 
-  DCHECK_OK(registry->AddFunctionOptionsType(kArraySortOptionsType)); 
-  DCHECK_OK(registry->AddFunctionOptionsType(kSortOptionsType)); 
-  DCHECK_OK(registry->AddFunctionOptionsType(kPartitionNthOptionsType)); 
-} 
-}  // namespace internal 
- 
-// ---------------------------------------------------------------------- 
+// Function options
+
+bool SortKey::Equals(const SortKey& other) const {
+  return name == other.name && order == other.order;
+}
+std::string SortKey::ToString() const {
+  std::stringstream ss;
+  ss << name << ' ';
+  switch (order) {
+    case SortOrder::Ascending:
+      ss << "ASC";
+      break;
+    case SortOrder::Descending:
+      ss << "DESC";
+      break;
+  }
+  return ss.str();
+}
+
+namespace internal {
+namespace {
+using ::arrow::internal::DataMember;
+static auto kFilterOptionsType = GetFunctionOptionsType<FilterOptions>(
+    DataMember("null_selection_behavior", &FilterOptions::null_selection_behavior));
+static auto kTakeOptionsType = GetFunctionOptionsType<TakeOptions>(
+    DataMember("boundscheck", &TakeOptions::boundscheck));
+static auto kDictionaryEncodeOptionsType =
+    GetFunctionOptionsType<DictionaryEncodeOptions>(DataMember(
+        "null_encoding_behavior", &DictionaryEncodeOptions::null_encoding_behavior));
+static auto kArraySortOptionsType = GetFunctionOptionsType<ArraySortOptions>(
+    DataMember("order", &ArraySortOptions::order));
+static auto kSortOptionsType =
+    GetFunctionOptionsType<SortOptions>(DataMember("sort_keys", &SortOptions::sort_keys));
+static auto kPartitionNthOptionsType = GetFunctionOptionsType<PartitionNthOptions>(
+    DataMember("pivot", &PartitionNthOptions::pivot));
+}  // namespace
+}  // namespace internal
+
+FilterOptions::FilterOptions(NullSelectionBehavior null_selection)
+    : FunctionOptions(internal::kFilterOptionsType),
+      null_selection_behavior(null_selection) {}
+constexpr char FilterOptions::kTypeName[];
+
+TakeOptions::TakeOptions(bool boundscheck)
+    : FunctionOptions(internal::kTakeOptionsType), boundscheck(boundscheck) {}
+constexpr char TakeOptions::kTypeName[];
+
+DictionaryEncodeOptions::DictionaryEncodeOptions(NullEncodingBehavior null_encoding)
+    : FunctionOptions(internal::kDictionaryEncodeOptionsType),
+      null_encoding_behavior(null_encoding) {}
+constexpr char DictionaryEncodeOptions::kTypeName[];
+
+ArraySortOptions::ArraySortOptions(SortOrder order)
+    : FunctionOptions(internal::kArraySortOptionsType), order(order) {}
+constexpr char ArraySortOptions::kTypeName[];
+
+SortOptions::SortOptions(std::vector<SortKey> sort_keys)
+    : FunctionOptions(internal::kSortOptionsType), sort_keys(std::move(sort_keys)) {}
+constexpr char SortOptions::kTypeName[];
+
+PartitionNthOptions::PartitionNthOptions(int64_t pivot)
+    : FunctionOptions(internal::kPartitionNthOptionsType), pivot(pivot) {}
+constexpr char PartitionNthOptions::kTypeName[];
+
+namespace internal {
+void RegisterVectorOptions(FunctionRegistry* registry) {
+  DCHECK_OK(registry->AddFunctionOptionsType(kFilterOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kTakeOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kDictionaryEncodeOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kArraySortOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kSortOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kPartitionNthOptionsType));
+}
+}  // namespace internal
+
+// ----------------------------------------------------------------------
 // Direct exec interface to kernels
 
 Result<std::shared_ptr<Array>> NthToIndices(const Array& values, int64_t n,
@@ -162,42 +162,42 @@ Result<std::shared_ptr<Array>> NthToIndices(const Array& values, int64_t n,
   return result.make_array();
 }
 
-Result<Datum> ReplaceWithMask(const Datum& values, const Datum& mask, 
-                              const Datum& replacements, ExecContext* ctx) { 
-  return CallFunction("replace_with_mask", {values, mask, replacements}, ctx); 
-} 
- 
-Result<std::shared_ptr<Array>> SortIndices(const Array& values, SortOrder order, 
-                                           ExecContext* ctx) { 
-  ArraySortOptions options(order); 
-  ARROW_ASSIGN_OR_RAISE( 
-      Datum result, CallFunction("array_sort_indices", {Datum(values)}, &options, ctx)); 
+Result<Datum> ReplaceWithMask(const Datum& values, const Datum& mask,
+                              const Datum& replacements, ExecContext* ctx) {
+  return CallFunction("replace_with_mask", {values, mask, replacements}, ctx);
+}
+
+Result<std::shared_ptr<Array>> SortIndices(const Array& values, SortOrder order,
+                                           ExecContext* ctx) {
+  ArraySortOptions options(order);
+  ARROW_ASSIGN_OR_RAISE(
+      Datum result, CallFunction("array_sort_indices", {Datum(values)}, &options, ctx));
+  return result.make_array();
+}
+
+Result<std::shared_ptr<Array>> SortIndices(const ChunkedArray& chunked_array,
+                                           SortOrder order, ExecContext* ctx) {
+  SortOptions options({SortKey("not-used", order)});
+  ARROW_ASSIGN_OR_RAISE(
+      Datum result, CallFunction("sort_indices", {Datum(chunked_array)}, &options, ctx));
+  return result.make_array();
+}
+
+Result<std::shared_ptr<Array>> SortIndices(const Datum& datum, const SortOptions& options,
+                                           ExecContext* ctx) {
+  ARROW_ASSIGN_OR_RAISE(Datum result,
+                        CallFunction("sort_indices", {datum}, &options, ctx));
   return result.make_array();
 }
 
-Result<std::shared_ptr<Array>> SortIndices(const ChunkedArray& chunked_array, 
-                                           SortOrder order, ExecContext* ctx) { 
-  SortOptions options({SortKey("not-used", order)}); 
-  ARROW_ASSIGN_OR_RAISE( 
-      Datum result, CallFunction("sort_indices", {Datum(chunked_array)}, &options, ctx)); 
-  return result.make_array(); 
-} 
- 
-Result<std::shared_ptr<Array>> SortIndices(const Datum& datum, const SortOptions& options, 
-                                           ExecContext* ctx) { 
-  ARROW_ASSIGN_OR_RAISE(Datum result, 
-                        CallFunction("sort_indices", {datum}, &options, ctx)); 
-  return result.make_array(); 
-} 
- 
 Result<std::shared_ptr<Array>> Unique(const Datum& value, ExecContext* ctx) {
   ARROW_ASSIGN_OR_RAISE(Datum result, CallFunction("unique", {value}, ctx));
   return result.make_array();
 }
 
-Result<Datum> DictionaryEncode(const Datum& value, const DictionaryEncodeOptions& options, 
-                               ExecContext* ctx) { 
-  return CallFunction("dictionary_encode", {value}, &options, ctx); 
+Result<Datum> DictionaryEncode(const Datum& value, const DictionaryEncodeOptions& options,
+                               ExecContext* ctx) {
+  return CallFunction("dictionary_encode", {value}, &options, ctx);
 }
 
 const char kValuesFieldName[] = "values";
@@ -275,9 +275,9 @@ Result<std::shared_ptr<Table>> Take(const Table& table, const ChunkedArray& indi
   return result.table();
 }
 
-Result<std::shared_ptr<Array>> SortToIndices(const Array& values, ExecContext* ctx) { 
-  return SortIndices(values, SortOrder::Ascending, ctx); 
-} 
- 
+Result<std::shared_ptr<Array>> SortToIndices(const Array& values, ExecContext* ctx) {
+  return SortIndices(values, SortOrder::Ascending, ctx);
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/api_vector.h b/contrib/libs/apache/arrow/cpp/src/arrow/compute/api_vector.h
index c3a81542b76..9d8d4271db8 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/api_vector.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/api_vector.h
@@ -32,8 +32,8 @@ class ExecContext;
 /// \addtogroup compute-concrete-options
 /// @{
 
-class ARROW_EXPORT FilterOptions : public FunctionOptions { 
- public: 
+class ARROW_EXPORT FilterOptions : public FunctionOptions {
+ public:
   /// Configure the action taken when a slot of the selection mask is null
   enum NullSelectionBehavior {
     /// the corresponding filtered value will be removed in the output
@@ -42,89 +42,89 @@ class ARROW_EXPORT FilterOptions : public FunctionOptions {
     EMIT_NULL,
   };
 
-  explicit FilterOptions(NullSelectionBehavior null_selection = DROP); 
-  constexpr static char const kTypeName[] = "FilterOptions"; 
+  explicit FilterOptions(NullSelectionBehavior null_selection = DROP);
+  constexpr static char const kTypeName[] = "FilterOptions";
   static FilterOptions Defaults() { return FilterOptions(); }
 
   NullSelectionBehavior null_selection_behavior = DROP;
 };
 
-class ARROW_EXPORT TakeOptions : public FunctionOptions { 
- public: 
-  explicit TakeOptions(bool boundscheck = true); 
-  constexpr static char const kTypeName[] = "TakeOptions"; 
+class ARROW_EXPORT TakeOptions : public FunctionOptions {
+ public:
+  explicit TakeOptions(bool boundscheck = true);
+  constexpr static char const kTypeName[] = "TakeOptions";
   static TakeOptions BoundsCheck() { return TakeOptions(true); }
   static TakeOptions NoBoundsCheck() { return TakeOptions(false); }
   static TakeOptions Defaults() { return BoundsCheck(); }
- 
-  bool boundscheck = true; 
+
+  bool boundscheck = true;
+};
+
+/// \brief Options for the dictionary encode function
+class ARROW_EXPORT DictionaryEncodeOptions : public FunctionOptions {
+ public:
+  /// Configure how null values will be encoded
+  enum NullEncodingBehavior {
+    /// the null value will be added to the dictionary with a proper index
+    ENCODE,
+    /// the null value will be masked in the indices array
+    MASK
+  };
+
+  explicit DictionaryEncodeOptions(NullEncodingBehavior null_encoding = MASK);
+  constexpr static char const kTypeName[] = "DictionaryEncodeOptions";
+  static DictionaryEncodeOptions Defaults() { return DictionaryEncodeOptions(); }
+
+  NullEncodingBehavior null_encoding_behavior = MASK;
+};
+
+enum class SortOrder {
+  Ascending,
+  Descending,
+};
+
+/// \brief One sort key for PartitionNthIndices (TODO) and SortIndices
+class ARROW_EXPORT SortKey : public util::EqualityComparable<SortKey> {
+ public:
+  explicit SortKey(std::string name, SortOrder order = SortOrder::Ascending)
+      : name(name), order(order) {}
+
+  using util::EqualityComparable<SortKey>::Equals;
+  using util::EqualityComparable<SortKey>::operator==;
+  using util::EqualityComparable<SortKey>::operator!=;
+  bool Equals(const SortKey& other) const;
+  std::string ToString() const;
+
+  /// The name of the sort column.
+  std::string name;
+  /// How to order by this sort key.
+  SortOrder order;
+};
+
+class ARROW_EXPORT ArraySortOptions : public FunctionOptions {
+ public:
+  explicit ArraySortOptions(SortOrder order = SortOrder::Ascending);
+  constexpr static char const kTypeName[] = "ArraySortOptions";
+  static ArraySortOptions Defaults() { return ArraySortOptions{}; }
+
+  SortOrder order;
+};
+
+class ARROW_EXPORT SortOptions : public FunctionOptions {
+ public:
+  explicit SortOptions(std::vector<SortKey> sort_keys = {});
+  constexpr static char const kTypeName[] = "SortOptions";
+  static SortOptions Defaults() { return SortOptions{}; }
+
+  std::vector<SortKey> sort_keys;
 };
 
-/// \brief Options for the dictionary encode function 
-class ARROW_EXPORT DictionaryEncodeOptions : public FunctionOptions { 
- public: 
-  /// Configure how null values will be encoded 
-  enum NullEncodingBehavior { 
-    /// the null value will be added to the dictionary with a proper index 
-    ENCODE, 
-    /// the null value will be masked in the indices array 
-    MASK 
-  }; 
- 
-  explicit DictionaryEncodeOptions(NullEncodingBehavior null_encoding = MASK); 
-  constexpr static char const kTypeName[] = "DictionaryEncodeOptions"; 
-  static DictionaryEncodeOptions Defaults() { return DictionaryEncodeOptions(); } 
- 
-  NullEncodingBehavior null_encoding_behavior = MASK; 
-}; 
- 
-enum class SortOrder { 
-  Ascending, 
-  Descending, 
-}; 
- 
-/// \brief One sort key for PartitionNthIndices (TODO) and SortIndices 
-class ARROW_EXPORT SortKey : public util::EqualityComparable<SortKey> { 
- public: 
-  explicit SortKey(std::string name, SortOrder order = SortOrder::Ascending) 
-      : name(name), order(order) {} 
- 
-  using util::EqualityComparable<SortKey>::Equals; 
-  using util::EqualityComparable<SortKey>::operator==; 
-  using util::EqualityComparable<SortKey>::operator!=; 
-  bool Equals(const SortKey& other) const; 
-  std::string ToString() const; 
- 
-  /// The name of the sort column. 
-  std::string name; 
-  /// How to order by this sort key. 
-  SortOrder order; 
-}; 
- 
-class ARROW_EXPORT ArraySortOptions : public FunctionOptions { 
- public: 
-  explicit ArraySortOptions(SortOrder order = SortOrder::Ascending); 
-  constexpr static char const kTypeName[] = "ArraySortOptions"; 
-  static ArraySortOptions Defaults() { return ArraySortOptions{}; } 
- 
-  SortOrder order; 
-}; 
- 
-class ARROW_EXPORT SortOptions : public FunctionOptions { 
- public: 
-  explicit SortOptions(std::vector<SortKey> sort_keys = {}); 
-  constexpr static char const kTypeName[] = "SortOptions"; 
-  static SortOptions Defaults() { return SortOptions{}; } 
- 
-  std::vector<SortKey> sort_keys; 
-}; 
- 
 /// \brief Partitioning options for NthToIndices
-class ARROW_EXPORT PartitionNthOptions : public FunctionOptions { 
- public: 
-  explicit PartitionNthOptions(int64_t pivot); 
-  PartitionNthOptions() : PartitionNthOptions(0) {} 
-  constexpr static char const kTypeName[] = "PartitionNthOptions"; 
+class ARROW_EXPORT PartitionNthOptions : public FunctionOptions {
+ public:
+  explicit PartitionNthOptions(int64_t pivot);
+  PartitionNthOptions() : PartitionNthOptions(0) {}
+  constexpr static char const kTypeName[] = "PartitionNthOptions";
 
   /// The index into the equivalent sorted array of the partition pivot element.
   int64_t pivot;
@@ -171,23 +171,23 @@ Result<std::shared_ptr<ArrayData>> GetTakeIndices(
 
 }  // namespace internal
 
-/// \brief ReplaceWithMask replaces each value in the array corresponding 
-/// to a true value in the mask with the next element from `replacements`. 
-/// 
-/// \param[in] values Array input to replace 
-/// \param[in] mask Array or Scalar of Boolean mask values 
-/// \param[in] replacements The replacement values to draw from. There must 
-/// be as many replacement values as true values in the mask. 
-/// \param[in] ctx the function execution context, optional 
-/// 
-/// \return the resulting datum 
-/// 
-/// \since 5.0.0 
-/// \note API not yet finalized 
-ARROW_EXPORT 
-Result<Datum> ReplaceWithMask(const Datum& values, const Datum& mask, 
-                              const Datum& replacements, ExecContext* ctx = NULLPTR); 
- 
+/// \brief ReplaceWithMask replaces each value in the array corresponding
+/// to a true value in the mask with the next element from `replacements`.
+///
+/// \param[in] values Array input to replace
+/// \param[in] mask Array or Scalar of Boolean mask values
+/// \param[in] replacements The replacement values to draw from. There must
+/// be as many replacement values as true values in the mask.
+/// \param[in] ctx the function execution context, optional
+///
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> ReplaceWithMask(const Datum& values, const Datum& mask,
+                              const Datum& replacements, ExecContext* ctx = NULLPTR);
+
 /// \brief Take from an array of values at indices in another array
 ///
 /// The output array will be of the same type as the input values
@@ -233,73 +233,73 @@ ARROW_EXPORT
 Result<std::shared_ptr<Array>> NthToIndices(const Array& values, int64_t n,
                                             ExecContext* ctx = NULLPTR);
 
-/// \brief Returns the indices that would sort an array in the 
-/// specified order. 
+/// \brief Returns the indices that would sort an array in the
+/// specified order.
 ///
 /// Perform an indirect sort of array. The output array will contain
 /// indices that would sort an array, which would be the same length
-/// as input. Nulls will be stably partitioned to the end of the output 
-/// regardless of order. 
+/// as input. Nulls will be stably partitioned to the end of the output
+/// regardless of order.
 ///
-/// For example given array = [null, 1, 3.3, null, 2, 5.3] and order 
-/// = SortOrder::DESCENDING, the output will be [5, 2, 4, 1, 0, 
-/// 3]. 
+/// For example given array = [null, 1, 3.3, null, 2, 5.3] and order
+/// = SortOrder::DESCENDING, the output will be [5, 2, 4, 1, 0,
+/// 3].
 ///
-/// \param[in] array array to sort 
-/// \param[in] order ascending or descending 
+/// \param[in] array array to sort
+/// \param[in] order ascending or descending
 /// \param[in] ctx the function execution context, optional
 /// \return offsets indices that would sort an array
 ARROW_EXPORT
-Result<std::shared_ptr<Array>> SortIndices(const Array& array, 
-                                           SortOrder order = SortOrder::Ascending, 
-                                           ExecContext* ctx = NULLPTR); 
-
-/// \brief Returns the indices that would sort a chunked array in the 
-/// specified order. 
-/// 
-/// Perform an indirect sort of chunked array. The output array will 
-/// contain indices that would sort a chunked array, which would be 
-/// the same length as input. Nulls will be stably partitioned to the 
-/// end of the output regardless of order. 
-/// 
-/// For example given chunked_array = [[null, 1], [3.3], [null, 2, 
-/// 5.3]] and order = SortOrder::DESCENDING, the output will be [5, 2, 
-/// 4, 1, 0, 3]. 
-/// 
-/// \param[in] chunked_array chunked array to sort 
-/// \param[in] order ascending or descending 
-/// \param[in] ctx the function execution context, optional 
-/// \return offsets indices that would sort an array 
-ARROW_EXPORT 
-Result<std::shared_ptr<Array>> SortIndices(const ChunkedArray& chunked_array, 
-                                           SortOrder order = SortOrder::Ascending, 
-                                           ExecContext* ctx = NULLPTR); 
- 
-/// \brief Returns the indices that would sort an input in the 
-/// specified order. Input is one of array, chunked array record batch 
-/// or table. 
-/// 
-/// Perform an indirect sort of input. The output array will contain 
-/// indices that would sort an input, which would be the same length 
-/// as input. Nulls will be stably partitioned to the end of the 
-/// output regardless of order. 
-/// 
-/// For example given input (table) = { 
-/// "column1": [[null,   1], [   3, null, 2, 1]], 
-/// "column2": [[   5], [3,   null, null, 5, 5]], 
-/// } and options = { 
-/// {"column1", SortOrder::Ascending}, 
-/// {"column2", SortOrder::Descending}, 
-/// }, the output will be [5, 1, 4, 2, 0, 3]. 
-/// 
-/// \param[in] datum array, chunked array, record batch or table to sort 
-/// \param[in] options options 
-/// \param[in] ctx the function execution context, optional 
-/// \return offsets indices that would sort a table 
-ARROW_EXPORT 
-Result<std::shared_ptr<Array>> SortIndices(const Datum& datum, const SortOptions& options, 
-                                           ExecContext* ctx = NULLPTR); 
- 
+Result<std::shared_ptr<Array>> SortIndices(const Array& array,
+                                           SortOrder order = SortOrder::Ascending,
+                                           ExecContext* ctx = NULLPTR);
+
+/// \brief Returns the indices that would sort a chunked array in the
+/// specified order.
+///
+/// Perform an indirect sort of chunked array. The output array will
+/// contain indices that would sort a chunked array, which would be
+/// the same length as input. Nulls will be stably partitioned to the
+/// end of the output regardless of order.
+///
+/// For example given chunked_array = [[null, 1], [3.3], [null, 2,
+/// 5.3]] and order = SortOrder::DESCENDING, the output will be [5, 2,
+/// 4, 1, 0, 3].
+///
+/// \param[in] chunked_array chunked array to sort
+/// \param[in] order ascending or descending
+/// \param[in] ctx the function execution context, optional
+/// \return offsets indices that would sort an array
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> SortIndices(const ChunkedArray& chunked_array,
+                                           SortOrder order = SortOrder::Ascending,
+                                           ExecContext* ctx = NULLPTR);
+
+/// \brief Returns the indices that would sort an input in the
+/// specified order. Input is one of array, chunked array record batch
+/// or table.
+///
+/// Perform an indirect sort of input. The output array will contain
+/// indices that would sort an input, which would be the same length
+/// as input. Nulls will be stably partitioned to the end of the
+/// output regardless of order.
+///
+/// For example given input (table) = {
+/// "column1": [[null,   1], [   3, null, 2, 1]],
+/// "column2": [[   5], [3,   null, null, 5, 5]],
+/// } and options = {
+/// {"column1", SortOrder::Ascending},
+/// {"column2", SortOrder::Descending},
+/// }, the output will be [5, 1, 4, 2, 0, 3].
+///
+/// \param[in] datum array, chunked array, record batch or table to sort
+/// \param[in] options options
+/// \param[in] ctx the function execution context, optional
+/// \return offsets indices that would sort a table
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> SortIndices(const Datum& datum, const SortOptions& options,
+                                           ExecContext* ctx = NULLPTR);
+
 /// \brief Compute unique elements from an array-like object
 ///
 /// Note if a null occurs in the input it will NOT be included in the output.
@@ -338,29 +338,29 @@ Result<std::shared_ptr<StructArray>> ValueCounts(const Datum& value,
                                                  ExecContext* ctx = NULLPTR);
 
 /// \brief Dictionary-encode values in an array-like object
-/// 
-/// Any nulls encountered in the dictionary will be handled according to the 
-/// specified null encoding behavior. 
-/// 
-/// For example, given values ["a", "b", null, "a", null] the output will be 
-/// (null_encoding == ENCODE) Indices: [0, 1, 2, 0, 2] / Dict: ["a", "b", null] 
-/// (null_encoding == MASK)   Indices: [0, 1, null, 0, null] / Dict: ["a", "b"] 
-/// 
-/// If the input is already dictionary encoded this function is a no-op unless 
-/// it needs to modify the null_encoding (TODO) 
-/// 
+///
+/// Any nulls encountered in the dictionary will be handled according to the
+/// specified null encoding behavior.
+///
+/// For example, given values ["a", "b", null, "a", null] the output will be
+/// (null_encoding == ENCODE) Indices: [0, 1, 2, 0, 2] / Dict: ["a", "b", null]
+/// (null_encoding == MASK)   Indices: [0, 1, null, 0, null] / Dict: ["a", "b"]
+///
+/// If the input is already dictionary encoded this function is a no-op unless
+/// it needs to modify the null_encoding (TODO)
+///
 /// \param[in] data array-like input
 /// \param[in] ctx the function execution context, optional
-/// \param[in] options configures null encoding behavior 
+/// \param[in] options configures null encoding behavior
 /// \return result with same shape and type as input
 ///
 /// \since 1.0.0
 /// \note API not yet finalized
 ARROW_EXPORT
-Result<Datum> DictionaryEncode( 
-    const Datum& data, 
-    const DictionaryEncodeOptions& options = DictionaryEncodeOptions::Defaults(), 
-    ExecContext* ctx = NULLPTR); 
+Result<Datum> DictionaryEncode(
+    const Datum& data,
+    const DictionaryEncodeOptions& options = DictionaryEncodeOptions::Defaults(),
+    ExecContext* ctx = NULLPTR);
 
 // ----------------------------------------------------------------------
 // Deprecated functions
@@ -401,10 +401,10 @@ Result<std::shared_ptr<Table>> Take(const Table& table, const ChunkedArray& indi
                                     const TakeOptions& options = TakeOptions::Defaults(),
                                     ExecContext* context = NULLPTR);
 
-ARROW_DEPRECATED("Deprecated in 3.0.0. Use SortIndices()") 
-ARROW_EXPORT 
-Result<std::shared_ptr<Array>> SortToIndices(const Array& values, 
-                                             ExecContext* ctx = NULLPTR); 
- 
+ARROW_DEPRECATED("Deprecated in 3.0.0. Use SortIndices()")
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> SortToIndices(const Array& values,
+                                             ExecContext* ctx = NULLPTR);
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/cast.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/cast.cc
index db3b2e05da4..4de68ba8d90 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/cast.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/cast.cc
@@ -18,7 +18,7 @@
 #include "arrow/compute/cast.h"
 
 #include <mutex>
-#include <sstream> 
+#include <sstream>
 #include <string>
 #include <unordered_map>
 #include <unordered_set>
@@ -27,12 +27,12 @@
 
 #include "arrow/compute/cast_internal.h"
 #include "arrow/compute/exec.h"
-#include "arrow/compute/function_internal.h" 
+#include "arrow/compute/function_internal.h"
 #include "arrow/compute/kernel.h"
 #include "arrow/compute/kernels/codegen_internal.h"
 #include "arrow/compute/registry.h"
 #include "arrow/util/logging.h"
-#include "arrow/util/reflection_internal.h" 
+#include "arrow/util/reflection_internal.h"
 
 namespace arrow {
 
@@ -41,13 +41,13 @@ using internal::ToTypeName;
 namespace compute {
 namespace internal {
 
-// ---------------------------------------------------------------------- 
-// Function options 
- 
-namespace { 
- 
+// ----------------------------------------------------------------------
+// Function options
+
+namespace {
+
 std::unordered_map<int, std::shared_ptr<CastFunction>> g_cast_table;
-std::once_flag cast_table_initialized; 
+std::once_flag cast_table_initialized;
 
 void AddCastFunctions(const std::vector<std::shared_ptr<CastFunction>>& funcs) {
   for (const auto& func : funcs) {
@@ -61,7 +61,7 @@ void InitCastTable() {
   AddCastFunctions(GetNestedCasts());
   AddCastFunctions(GetNumericCasts());
   AddCastFunctions(GetTemporalCasts());
-  AddCastFunctions(GetDictionaryCasts()); 
+  AddCastFunctions(GetDictionaryCasts());
 }
 
 void EnsureInitCastTable() { std::call_once(cast_table_initialized, InitCastTable); }
@@ -85,17 +85,17 @@ Result<std::shared_ptr<CastFunction>> GetCastFunctionInternal(
   return it->second;
 }
 
-const FunctionDoc cast_doc{"Cast values to another data type", 
-                           ("Behavior when values wouldn't fit in the target type\n" 
-                            "can be controlled through CastOptions."), 
-                           {"input"}, 
-                           "CastOptions"}; 
+const FunctionDoc cast_doc{"Cast values to another data type",
+                           ("Behavior when values wouldn't fit in the target type\n"
+                            "can be controlled through CastOptions."),
+                           {"input"},
+                           "CastOptions"};
 
-// Metafunction for dispatching to appropriate CastFunction. This corresponds 
+// Metafunction for dispatching to appropriate CastFunction. This corresponds
 // to the standard SQL CAST(expr AS target_type)
 class CastMetaFunction : public MetaFunction {
  public:
-  CastMetaFunction() : MetaFunction("cast", Arity::Unary(), &cast_doc) {} 
+  CastMetaFunction() : MetaFunction("cast", Arity::Unary(), &cast_doc) {}
 
   Result<const CastOptions*> ValidateOptions(const FunctionOptions* options) const {
     auto cast_options = static_cast<const CastOptions*>(options);
@@ -123,44 +123,44 @@ class CastMetaFunction : public MetaFunction {
   }
 };
 
-static auto kCastOptionsType = GetFunctionOptionsType<CastOptions>( 
-    arrow::internal::DataMember("to_type", &CastOptions::to_type), 
-    arrow::internal::DataMember("allow_int_overflow", &CastOptions::allow_int_overflow), 
-    arrow::internal::DataMember("allow_time_truncate", &CastOptions::allow_time_truncate), 
-    arrow::internal::DataMember("allow_time_overflow", &CastOptions::allow_time_overflow), 
-    arrow::internal::DataMember("allow_decimal_truncate", 
-                                &CastOptions::allow_decimal_truncate), 
-    arrow::internal::DataMember("allow_float_truncate", 
-                                &CastOptions::allow_float_truncate), 
-    arrow::internal::DataMember("allow_invalid_utf8", &CastOptions::allow_invalid_utf8)); 
-}  // namespace 
- 
+static auto kCastOptionsType = GetFunctionOptionsType<CastOptions>(
+    arrow::internal::DataMember("to_type", &CastOptions::to_type),
+    arrow::internal::DataMember("allow_int_overflow", &CastOptions::allow_int_overflow),
+    arrow::internal::DataMember("allow_time_truncate", &CastOptions::allow_time_truncate),
+    arrow::internal::DataMember("allow_time_overflow", &CastOptions::allow_time_overflow),
+    arrow::internal::DataMember("allow_decimal_truncate",
+                                &CastOptions::allow_decimal_truncate),
+    arrow::internal::DataMember("allow_float_truncate",
+                                &CastOptions::allow_float_truncate),
+    arrow::internal::DataMember("allow_invalid_utf8", &CastOptions::allow_invalid_utf8));
+}  // namespace
+
 void RegisterScalarCast(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunction(std::make_shared<CastMetaFunction>()));
-  DCHECK_OK(registry->AddFunctionOptionsType(kCastOptionsType)); 
+  DCHECK_OK(registry->AddFunctionOptionsType(kCastOptionsType));
 }
 }  // namespace internal
 
-CastOptions::CastOptions(bool safe) 
-    : FunctionOptions(internal::kCastOptionsType), 
-      allow_int_overflow(!safe), 
-      allow_time_truncate(!safe), 
-      allow_time_overflow(!safe), 
-      allow_decimal_truncate(!safe), 
-      allow_float_truncate(!safe), 
-      allow_invalid_utf8(!safe) {} 
+CastOptions::CastOptions(bool safe)
+    : FunctionOptions(internal::kCastOptionsType),
+      allow_int_overflow(!safe),
+      allow_time_truncate(!safe),
+      allow_time_overflow(!safe),
+      allow_decimal_truncate(!safe),
+      allow_float_truncate(!safe),
+      allow_invalid_utf8(!safe) {}
 
-constexpr char CastOptions::kTypeName[]; 
+constexpr char CastOptions::kTypeName[];
 
-CastFunction::CastFunction(std::string name, Type::type out_type_id) 
-    : ScalarFunction(std::move(name), Arity::Unary(), /*doc=*/nullptr), 
-      out_type_id_(out_type_id) {} 
+CastFunction::CastFunction(std::string name, Type::type out_type_id)
+    : ScalarFunction(std::move(name), Arity::Unary(), /*doc=*/nullptr),
+      out_type_id_(out_type_id) {}
 
 Status CastFunction::AddKernel(Type::type in_type_id, ScalarKernel kernel) {
   // We use the same KernelInit for every cast
   kernel.init = internal::CastState::Init;
   RETURN_NOT_OK(ScalarFunction::AddKernel(kernel));
-  in_type_ids_.push_back(in_type_id); 
+  in_type_ids_.push_back(in_type_id);
   return Status::OK();
 }
 
@@ -176,9 +176,9 @@ Status CastFunction::AddKernel(Type::type in_type_id, std::vector<InputType> in_
   return AddKernel(in_type_id, std::move(kernel));
 }
 
-Result<const Kernel*> CastFunction::DispatchExact( 
+Result<const Kernel*> CastFunction::DispatchExact(
     const std::vector<ValueDescr>& values) const {
-  RETURN_NOT_OK(CheckArity(values)); 
+  RETURN_NOT_OK(CheckArity(values));
 
   std::vector<const ScalarKernel*> candidate_kernels;
   for (const auto& kernel : kernels_) {
@@ -189,28 +189,28 @@ Result<const Kernel*> CastFunction::DispatchExact(
 
   if (candidate_kernels.size() == 0) {
     return Status::NotImplemented("Unsupported cast from ", values[0].type->ToString(),
-                                  " to ", ToTypeName(out_type_id_), " using function ", 
+                                  " to ", ToTypeName(out_type_id_), " using function ",
                                   this->name());
-  } 
- 
-  if (candidate_kernels.size() == 1) { 
+  }
+
+  if (candidate_kernels.size() == 1) {
     // One match, return it
     return candidate_kernels[0];
-  } 
- 
-  // Now we are in a casting scenario where we may have both a EXACT_TYPE and 
-  // a SAME_TYPE_ID. So we will see if there is an exact match among the 
-  // candidate kernels and if not we will just return the first one 
-  for (auto kernel : candidate_kernels) { 
-    const InputType& arg0 = kernel->signature->in_types()[0]; 
-    if (arg0.kind() == InputType::EXACT_TYPE) { 
-      // Bingo. Return it 
-      return kernel; 
+  }
+
+  // Now we are in a casting scenario where we may have both a EXACT_TYPE and
+  // a SAME_TYPE_ID. So we will see if there is an exact match among the
+  // candidate kernels and if not we will just return the first one
+  for (auto kernel : candidate_kernels) {
+    const InputType& arg0 = kernel->signature->in_types()[0];
+    if (arg0.kind() == InputType::EXACT_TYPE) {
+      // Bingo. Return it
+      return kernel;
     }
   }
- 
-  // We didn't find an exact match. So just return some kernel that matches 
-  return candidate_kernels[0]; 
+
+  // We didn't find an exact match. So just return some kernel that matches
+  return candidate_kernels[0];
 }
 
 Result<Datum> Cast(const Datum& value, const CastOptions& options, ExecContext* ctx) {
@@ -237,37 +237,37 @@ Result<std::shared_ptr<CastFunction>> GetCastFunction(
 
 bool CanCast(const DataType& from_type, const DataType& to_type) {
   internal::EnsureInitCastTable();
-  auto it = internal::g_cast_table.find(static_cast<int>(to_type.id())); 
+  auto it = internal::g_cast_table.find(static_cast<int>(to_type.id()));
   if (it == internal::g_cast_table.end()) {
     return false;
   }
- 
-  const CastFunction* function = it->second.get(); 
-  DCHECK_EQ(function->out_type_id(), to_type.id()); 
- 
-  for (auto from_id : function->in_type_ids()) { 
-    // XXX should probably check the output type as well 
-    if (from_type.id() == from_id) return true; 
-  } 
- 
-  return false; 
+
+  const CastFunction* function = it->second.get();
+  DCHECK_EQ(function->out_type_id(), to_type.id());
+
+  for (auto from_id : function->in_type_ids()) {
+    // XXX should probably check the output type as well
+    if (from_type.id() == from_id) return true;
+  }
+
+  return false;
+}
+
+Result<std::vector<Datum>> Cast(std::vector<Datum> datums, std::vector<ValueDescr> descrs,
+                                ExecContext* ctx) {
+  for (size_t i = 0; i != datums.size(); ++i) {
+    if (descrs[i] != datums[i].descr()) {
+      if (descrs[i].shape != datums[i].shape()) {
+        return Status::NotImplemented("casting between Datum shapes");
+      }
+
+      ARROW_ASSIGN_OR_RAISE(datums[i],
+                            Cast(datums[i], CastOptions::Safe(descrs[i].type), ctx));
+    }
+  }
+
+  return datums;
 }
 
-Result<std::vector<Datum>> Cast(std::vector<Datum> datums, std::vector<ValueDescr> descrs, 
-                                ExecContext* ctx) { 
-  for (size_t i = 0; i != datums.size(); ++i) { 
-    if (descrs[i] != datums[i].descr()) { 
-      if (descrs[i].shape != datums[i].shape()) { 
-        return Status::NotImplemented("casting between Datum shapes"); 
-      } 
- 
-      ARROW_ASSIGN_OR_RAISE(datums[i], 
-                            Cast(datums[i], CastOptions::Safe(descrs[i].type), ctx)); 
-    } 
-  } 
- 
-  return datums; 
-} 
- 
 }  // namespace compute
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/cast.h b/contrib/libs/apache/arrow/cpp/src/arrow/compute/cast.h
index 5a2afd86845..131f57f892f 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/cast.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/cast.h
@@ -41,22 +41,22 @@ class ExecContext;
 /// \addtogroup compute-concrete-options
 /// @{
 
-class ARROW_EXPORT CastOptions : public FunctionOptions { 
- public: 
-  explicit CastOptions(bool safe = true); 
-
-  constexpr static char const kTypeName[] = "CastOptions"; 
-  static CastOptions Safe(std::shared_ptr<DataType> to_type = NULLPTR) { 
-    CastOptions safe(true); 
-    safe.to_type = std::move(to_type); 
-    return safe; 
-  } 
-
-  static CastOptions Unsafe(std::shared_ptr<DataType> to_type = NULLPTR) { 
-    CastOptions unsafe(false); 
-    unsafe.to_type = std::move(to_type); 
-    return unsafe; 
-  } 
+class ARROW_EXPORT CastOptions : public FunctionOptions {
+ public:
+  explicit CastOptions(bool safe = true);
+
+  constexpr static char const kTypeName[] = "CastOptions";
+  static CastOptions Safe(std::shared_ptr<DataType> to_type = NULLPTR) {
+    CastOptions safe(true);
+    safe.to_type = std::move(to_type);
+    return safe;
+  }
+
+  static CastOptions Unsafe(std::shared_ptr<DataType> to_type = NULLPTR) {
+    CastOptions unsafe(false);
+    unsafe.to_type = std::move(to_type);
+    return unsafe;
+  }
 
   // Type being casted to. May be passed separate to eager function
   // compute::Cast
@@ -78,10 +78,10 @@ class ARROW_EXPORT CastOptions : public FunctionOptions {
 // the same execution machinery
 class CastFunction : public ScalarFunction {
  public:
-  CastFunction(std::string name, Type::type out_type_id); 
+  CastFunction(std::string name, Type::type out_type_id);
 
-  Type::type out_type_id() const { return out_type_id_; } 
-  const std::vector<Type::type>& in_type_ids() const { return in_type_ids_; } 
+  Type::type out_type_id() const { return out_type_id_; }
+  const std::vector<Type::type>& in_type_ids() const { return in_type_ids_; }
 
   Status AddKernel(Type::type in_type_id, std::vector<InputType> in_types,
                    OutputType out_type, ArrayKernelExec exec,
@@ -92,12 +92,12 @@ class CastFunction : public ScalarFunction {
   // function to CastInit
   Status AddKernel(Type::type in_type_id, ScalarKernel kernel);
 
-  Result<const Kernel*> DispatchExact( 
+  Result<const Kernel*> DispatchExact(
       const std::vector<ValueDescr>& values) const override;
 
  private:
-  std::vector<Type::type> in_type_ids_; 
-  const Type::type out_type_id_; 
+  std::vector<Type::type> in_type_ids_;
+  const Type::type out_type_id_;
 };
 
 ARROW_EXPORT
@@ -151,17 +151,17 @@ Result<Datum> Cast(const Datum& value, std::shared_ptr<DataType> to_type,
                    const CastOptions& options = CastOptions::Safe(),
                    ExecContext* ctx = NULLPTR);
 
-/// \brief Cast several values simultaneously. Safe cast options are used. 
-/// \param[in] values datums to cast 
-/// \param[in] descrs ValueDescrs to cast to 
-/// \param[in] ctx the function execution context, optional 
-/// \return the resulting datums 
-/// 
-/// \since 4.0.0 
-/// \note API not yet finalized 
-ARROW_EXPORT 
-Result<std::vector<Datum>> Cast(std::vector<Datum> values, std::vector<ValueDescr> descrs, 
-                                ExecContext* ctx = NULLPTR); 
- 
+/// \brief Cast several values simultaneously. Safe cast options are used.
+/// \param[in] values datums to cast
+/// \param[in] descrs ValueDescrs to cast to
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datums
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<std::vector<Datum>> Cast(std::vector<Datum> values, std::vector<ValueDescr> descrs,
+                                ExecContext* ctx = NULLPTR);
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/cast_internal.h b/contrib/libs/apache/arrow/cpp/src/arrow/compute/cast_internal.h
index 7e784a0b61a..0105d08a573 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/cast_internal.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/cast_internal.h
@@ -36,7 +36,7 @@ std::vector<std::shared_ptr<CastFunction>> GetNumericCasts();
 std::vector<std::shared_ptr<CastFunction>> GetTemporalCasts();
 std::vector<std::shared_ptr<CastFunction>> GetBinaryLikeCasts();
 std::vector<std::shared_ptr<CastFunction>> GetNestedCasts();
-std::vector<std::shared_ptr<CastFunction>> GetDictionaryCasts(); 
+std::vector<std::shared_ptr<CastFunction>> GetDictionaryCasts();
 
 }  // namespace internal
 }  // namespace compute
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec.cc
index 8998df465e5..63f8d39f551 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec.cc
@@ -36,8 +36,8 @@
 #include "arrow/compute/registry.h"
 #include "arrow/compute/util_internal.h"
 #include "arrow/datum.h"
-#include "arrow/pretty_print.h" 
-#include "arrow/record_batch.h" 
+#include "arrow/pretty_print.h"
+#include "arrow/record_batch.h"
 #include "arrow/scalar.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
@@ -47,8 +47,8 @@
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/cpu_info.h"
 #include "arrow/util/logging.h"
-#include "arrow/util/make_unique.h" 
-#include "arrow/util/vector.h" 
+#include "arrow/util/make_unique.h"
+#include "arrow/util/vector.h"
 
 namespace arrow {
 
@@ -59,104 +59,104 @@ using internal::CpuInfo;
 
 namespace compute {
 
-ExecContext* default_exec_context() { 
-  static ExecContext default_ctx; 
-  return &default_ctx; 
-} 
- 
-ExecBatch::ExecBatch(const RecordBatch& batch) 
-    : values(batch.num_columns()), length(batch.num_rows()) { 
-  auto columns = batch.column_data(); 
-  std::move(columns.begin(), columns.end(), values.begin()); 
-} 
- 
-bool ExecBatch::Equals(const ExecBatch& other) const { 
-  return guarantee == other.guarantee && values == other.values; 
-} 
- 
-void PrintTo(const ExecBatch& batch, std::ostream* os) { 
-  *os << "ExecBatch\n"; 
- 
-  static const std::string indent = "    "; 
- 
-  *os << indent << "# Rows: " << batch.length << "\n"; 
-  if (batch.guarantee != literal(true)) { 
-    *os << indent << "Guarantee: " << batch.guarantee.ToString() << "\n"; 
-  } 
- 
-  int i = 0; 
-  for (const Datum& value : batch.values) { 
-    *os << indent << "" << i++ << ": "; 
- 
-    if (value.is_scalar()) { 
-      *os << "Scalar[" << value.scalar()->ToString() << "]\n"; 
-      continue; 
-    } 
- 
-    auto array = value.make_array(); 
-    PrettyPrintOptions options; 
-    options.skip_new_lines = true; 
-    *os << "Array"; 
-    ARROW_CHECK_OK(PrettyPrint(*array, options, os)); 
-    *os << "\n"; 
-  } 
-} 
- 
-ExecBatch ExecBatch::Slice(int64_t offset, int64_t length) const { 
-  ExecBatch out = *this; 
-  for (auto& value : out.values) { 
-    if (value.is_scalar()) continue; 
-    value = value.array()->Slice(offset, length); 
-  } 
-  out.length = length; 
-  return out; 
-} 
- 
-Result<ExecBatch> ExecBatch::Make(std::vector<Datum> values) { 
-  if (values.empty()) { 
-    return Status::Invalid("Cannot infer ExecBatch length without at least one value"); 
-  } 
- 
-  int64_t length = -1; 
-  for (const auto& value : values) { 
-    if (value.is_scalar()) { 
-      continue; 
-    } 
- 
-    if (length == -1) { 
-      length = value.length(); 
-      continue; 
-    } 
- 
-    if (length != value.length()) { 
-      return Status::Invalid( 
-          "Arrays used to construct an ExecBatch must have equal length"); 
-    } 
-  } 
- 
-  if (length == -1) { 
-    length = 1; 
-  } 
- 
-  return ExecBatch(std::move(values), length); 
-} 
- 
-Result<std::shared_ptr<RecordBatch>> ExecBatch::ToRecordBatch( 
-    std::shared_ptr<Schema> schema, MemoryPool* pool) const { 
-  ArrayVector columns(schema->num_fields()); 
- 
-  for (size_t i = 0; i < columns.size(); ++i) { 
-    const Datum& value = values[i]; 
-    if (value.is_array()) { 
-      columns[i] = value.make_array(); 
-      continue; 
-    } 
-    ARROW_ASSIGN_OR_RAISE(columns[i], MakeArrayFromScalar(*value.scalar(), length, pool)); 
-  } 
- 
-  return RecordBatch::Make(std::move(schema), length, std::move(columns)); 
-} 
- 
+ExecContext* default_exec_context() {
+  static ExecContext default_ctx;
+  return &default_ctx;
+}
+
+ExecBatch::ExecBatch(const RecordBatch& batch)
+    : values(batch.num_columns()), length(batch.num_rows()) {
+  auto columns = batch.column_data();
+  std::move(columns.begin(), columns.end(), values.begin());
+}
+
+bool ExecBatch::Equals(const ExecBatch& other) const {
+  return guarantee == other.guarantee && values == other.values;
+}
+
+void PrintTo(const ExecBatch& batch, std::ostream* os) {
+  *os << "ExecBatch\n";
+
+  static const std::string indent = "    ";
+
+  *os << indent << "# Rows: " << batch.length << "\n";
+  if (batch.guarantee != literal(true)) {
+    *os << indent << "Guarantee: " << batch.guarantee.ToString() << "\n";
+  }
+
+  int i = 0;
+  for (const Datum& value : batch.values) {
+    *os << indent << "" << i++ << ": ";
+
+    if (value.is_scalar()) {
+      *os << "Scalar[" << value.scalar()->ToString() << "]\n";
+      continue;
+    }
+
+    auto array = value.make_array();
+    PrettyPrintOptions options;
+    options.skip_new_lines = true;
+    *os << "Array";
+    ARROW_CHECK_OK(PrettyPrint(*array, options, os));
+    *os << "\n";
+  }
+}
+
+ExecBatch ExecBatch::Slice(int64_t offset, int64_t length) const {
+  ExecBatch out = *this;
+  for (auto& value : out.values) {
+    if (value.is_scalar()) continue;
+    value = value.array()->Slice(offset, length);
+  }
+  out.length = length;
+  return out;
+}
+
+Result<ExecBatch> ExecBatch::Make(std::vector<Datum> values) {
+  if (values.empty()) {
+    return Status::Invalid("Cannot infer ExecBatch length without at least one value");
+  }
+
+  int64_t length = -1;
+  for (const auto& value : values) {
+    if (value.is_scalar()) {
+      continue;
+    }
+
+    if (length == -1) {
+      length = value.length();
+      continue;
+    }
+
+    if (length != value.length()) {
+      return Status::Invalid(
+          "Arrays used to construct an ExecBatch must have equal length");
+    }
+  }
+
+  if (length == -1) {
+    length = 1;
+  }
+
+  return ExecBatch(std::move(values), length);
+}
+
+Result<std::shared_ptr<RecordBatch>> ExecBatch::ToRecordBatch(
+    std::shared_ptr<Schema> schema, MemoryPool* pool) const {
+  ArrayVector columns(schema->num_fields());
+
+  for (size_t i = 0; i < columns.size(); ++i) {
+    const Datum& value = values[i];
+    if (value.is_array()) {
+      columns[i] = value.make_array();
+      continue;
+    }
+    ARROW_ASSIGN_OR_RAISE(columns[i], MakeArrayFromScalar(*value.scalar(), length, pool));
+  }
+
+  return RecordBatch::Make(std::move(schema), length, std::move(columns));
+}
+
 namespace {
 
 Result<std::shared_ptr<Buffer>> AllocateDataBuffer(KernelContext* ctx, int64_t length,
@@ -164,57 +164,57 @@ Result<std::shared_ptr<Buffer>> AllocateDataBuffer(KernelContext* ctx, int64_t l
   if (bit_width == 1) {
     return ctx->AllocateBitmap(length);
   } else {
-    int64_t buffer_size = BitUtil::BytesForBits(length * bit_width); 
+    int64_t buffer_size = BitUtil::BytesForBits(length * bit_width);
     return ctx->Allocate(buffer_size);
   }
 }
 
-struct BufferPreallocation { 
-  explicit BufferPreallocation(int bit_width = -1, int added_length = 0) 
-      : bit_width(bit_width), added_length(added_length) {} 
-
-  int bit_width; 
-  int added_length; 
-}; 
- 
-void ComputeDataPreallocate(const DataType& type, 
-                            std::vector<BufferPreallocation>* widths) { 
-  if (is_fixed_width(type.id()) && type.id() != Type::NA) { 
-    widths->emplace_back(checked_cast<const FixedWidthType&>(type).bit_width()); 
-    return; 
+struct BufferPreallocation {
+  explicit BufferPreallocation(int bit_width = -1, int added_length = 0)
+      : bit_width(bit_width), added_length(added_length) {}
+
+  int bit_width;
+  int added_length;
+};
+
+void ComputeDataPreallocate(const DataType& type,
+                            std::vector<BufferPreallocation>* widths) {
+  if (is_fixed_width(type.id()) && type.id() != Type::NA) {
+    widths->emplace_back(checked_cast<const FixedWidthType&>(type).bit_width());
+    return;
+  }
+  // Preallocate binary and list offsets
+  switch (type.id()) {
+    case Type::BINARY:
+    case Type::STRING:
+    case Type::LIST:
+    case Type::MAP:
+      widths->emplace_back(32, /*added_length=*/1);
+      return;
+    case Type::LARGE_BINARY:
+    case Type::LARGE_STRING:
+    case Type::LARGE_LIST:
+      widths->emplace_back(64, /*added_length=*/1);
+      return;
+    default:
+      break;
   }
-  // Preallocate binary and list offsets 
-  switch (type.id()) { 
-    case Type::BINARY: 
-    case Type::STRING: 
-    case Type::LIST: 
-    case Type::MAP: 
-      widths->emplace_back(32, /*added_length=*/1); 
-      return; 
-    case Type::LARGE_BINARY: 
-    case Type::LARGE_STRING: 
-    case Type::LARGE_LIST: 
-      widths->emplace_back(64, /*added_length=*/1); 
-      return; 
-    default: 
-      break; 
-  } 
 }
 
 }  // namespace
 
 namespace detail {
 
-Status CheckAllValues(const std::vector<Datum>& values) { 
-  for (const auto& value : values) { 
-    if (!value.is_value()) { 
-      return Status::Invalid("Tried executing function with non-value type: ", 
-                             value.ToString()); 
-    } 
-  } 
-  return Status::OK(); 
-} 
- 
+Status CheckAllValues(const std::vector<Datum>& values) {
+  for (const auto& value : values) {
+    if (!value.is_value()) {
+      return Status::Invalid("Tried executing function with non-value type: ",
+                             value.ToString());
+    }
+  }
+  return Status::OK();
+}
+
 ExecBatchIterator::ExecBatchIterator(std::vector<Datum> args, int64_t length,
                                      int64_t max_chunksize)
     : args_(std::move(args)),
@@ -311,35 +311,35 @@ bool ExecBatchIterator::Next(ExecBatch* batch) {
   return true;
 }
 
-namespace { 
- 
-struct NullGeneralization { 
-  enum type { PERHAPS_NULL, ALL_VALID, ALL_NULL }; 
- 
-  static type Get(const Datum& datum) { 
-    if (datum.type()->id() == Type::NA) { 
-      return ALL_NULL; 
-    } 
- 
-    if (datum.is_scalar()) { 
-      return datum.scalar()->is_valid ? ALL_VALID : ALL_NULL; 
-    } 
- 
-    const auto& arr = *datum.array(); 
- 
+namespace {
+
+struct NullGeneralization {
+  enum type { PERHAPS_NULL, ALL_VALID, ALL_NULL };
+
+  static type Get(const Datum& datum) {
+    if (datum.type()->id() == Type::NA) {
+      return ALL_NULL;
+    }
+
+    if (datum.is_scalar()) {
+      return datum.scalar()->is_valid ? ALL_VALID : ALL_NULL;
+    }
+
+    const auto& arr = *datum.array();
+
     // Do not count the bits if they haven't been counted already
-    const int64_t known_null_count = arr.null_count.load(); 
-    if ((known_null_count == 0) || (arr.buffers[0] == NULLPTR)) { 
-      return ALL_VALID; 
-    } 
- 
-    if (known_null_count == arr.length) { 
-      return ALL_NULL; 
-    } 
- 
-    return PERHAPS_NULL; 
+    const int64_t known_null_count = arr.null_count.load();
+    if ((known_null_count == 0) || (arr.buffers[0] == NULLPTR)) {
+      return ALL_VALID;
+    }
+
+    if (known_null_count == arr.length) {
+      return ALL_NULL;
+    }
+
+    return PERHAPS_NULL;
   }
-}; 
+};
 
 // Null propagation implementation that deals both with preallocated bitmaps
 // and maybe-to-be allocated bitmaps
@@ -356,17 +356,17 @@ class NullPropagator {
  public:
   NullPropagator(KernelContext* ctx, const ExecBatch& batch, ArrayData* output)
       : ctx_(ctx), batch_(batch), output_(output) {
-    for (const Datum& datum : batch_.values) { 
-      auto null_generalization = NullGeneralization::Get(datum); 
- 
-      if (null_generalization == NullGeneralization::ALL_NULL) { 
-        is_all_null_ = true; 
+    for (const Datum& datum : batch_.values) {
+      auto null_generalization = NullGeneralization::Get(datum);
+
+      if (null_generalization == NullGeneralization::ALL_NULL) {
+        is_all_null_ = true;
+      }
+
+      if (null_generalization != NullGeneralization::ALL_VALID &&
+          datum.kind() == Datum::ARRAY) {
+        arrays_with_nulls_.push_back(datum.array().get());
       }
- 
-      if (null_generalization != NullGeneralization::ALL_VALID && 
-          datum.kind() == Datum::ARRAY) { 
-        arrays_with_nulls_.push_back(datum.array().get()); 
-      } 
     }
 
     if (output->buffers[0] != nullptr) {
@@ -386,33 +386,33 @@ class NullPropagator {
     return Status::OK();
   }
 
-  Status AllNullShortCircuit() { 
-    // OK, the output should be all null 
-    output_->null_count = output_->length; 
+  Status AllNullShortCircuit() {
+    // OK, the output should be all null
+    output_->null_count = output_->length;
+
+    if (bitmap_preallocated_) {
+      BitUtil::SetBitsTo(bitmap_, output_->offset, output_->length, false);
+      return Status::OK();
+    }
 
-    if (bitmap_preallocated_) { 
-      BitUtil::SetBitsTo(bitmap_, output_->offset, output_->length, false); 
-      return Status::OK(); 
-    } 
- 
     // Walk all the values with nulls instead of breaking on the first in case
     // we find a bitmap that can be reused in the non-preallocated case
-    for (const ArrayData* arr : arrays_with_nulls_) { 
-      if (arr->null_count.load() == arr->length && arr->buffers[0] != nullptr) { 
-        // Reuse this all null bitmap 
-        output_->buffers[0] = arr->buffers[0]; 
-        return Status::OK(); 
+    for (const ArrayData* arr : arrays_with_nulls_) {
+      if (arr->null_count.load() == arr->length && arr->buffers[0] != nullptr) {
+        // Reuse this all null bitmap
+        output_->buffers[0] = arr->buffers[0];
+        return Status::OK();
       }
     }
 
-    RETURN_NOT_OK(EnsureAllocated()); 
-    BitUtil::SetBitsTo(bitmap_, output_->offset, output_->length, false); 
-    return Status::OK(); 
+    RETURN_NOT_OK(EnsureAllocated());
+    BitUtil::SetBitsTo(bitmap_, output_->offset, output_->length, false);
+    return Status::OK();
   }
 
   Status PropagateSingle() {
     // One array
-    const ArrayData& arr = *arrays_with_nulls_[0]; 
+    const ArrayData& arr = *arrays_with_nulls_[0];
     const std::shared_ptr<Buffer>& arr_bitmap = arr.buffers[0];
 
     // Reuse the null count if it's known
@@ -420,27 +420,27 @@ class NullPropagator {
 
     if (bitmap_preallocated_) {
       CopyBitmap(arr_bitmap->data(), arr.offset, arr.length, bitmap_, output_->offset);
-      return Status::OK(); 
-    } 
- 
-    // Two cases when memory was not pre-allocated: 
-    // 
-    // * Offset is zero: we reuse the bitmap as is 
-    // * Offset is nonzero but a multiple of 8: we can slice the bitmap 
-    // * Offset is not a multiple of 8: we must allocate and use CopyBitmap 
-    // 
-    // Keep in mind that output_->offset is not permitted to be nonzero when 
-    // the bitmap is not preallocated, and that precondition is asserted 
-    // higher in the call stack. 
-    if (arr.offset == 0) { 
-      output_->buffers[0] = arr_bitmap; 
-    } else if (arr.offset % 8 == 0) { 
-      output_->buffers[0] = 
-          SliceBuffer(arr_bitmap, arr.offset / 8, BitUtil::BytesForBits(arr.length)); 
+      return Status::OK();
+    }
+
+    // Two cases when memory was not pre-allocated:
+    //
+    // * Offset is zero: we reuse the bitmap as is
+    // * Offset is nonzero but a multiple of 8: we can slice the bitmap
+    // * Offset is not a multiple of 8: we must allocate and use CopyBitmap
+    //
+    // Keep in mind that output_->offset is not permitted to be nonzero when
+    // the bitmap is not preallocated, and that precondition is asserted
+    // higher in the call stack.
+    if (arr.offset == 0) {
+      output_->buffers[0] = arr_bitmap;
+    } else if (arr.offset % 8 == 0) {
+      output_->buffers[0] =
+          SliceBuffer(arr_bitmap, arr.offset / 8, BitUtil::BytesForBits(arr.length));
     } else {
-      RETURN_NOT_OK(EnsureAllocated()); 
-      CopyBitmap(arr_bitmap->data(), arr.offset, arr.length, bitmap_, 
-                 /*dst_offset=*/0); 
+      RETURN_NOT_OK(EnsureAllocated());
+      CopyBitmap(arr_bitmap->data(), arr.offset, arr.length, bitmap_,
+                 /*dst_offset=*/0);
     }
     return Status::OK();
   }
@@ -459,27 +459,27 @@ class NullPropagator {
                 output_->buffers[0]->mutable_data());
     };
 
-    DCHECK_GT(arrays_with_nulls_.size(), 1); 
+    DCHECK_GT(arrays_with_nulls_.size(), 1);
 
     // Seed the output bitmap with the & of the first two bitmaps
-    Accumulate(*arrays_with_nulls_[0], *arrays_with_nulls_[1]); 
+    Accumulate(*arrays_with_nulls_[0], *arrays_with_nulls_[1]);
 
     // Accumulate the rest
-    for (size_t i = 2; i < arrays_with_nulls_.size(); ++i) { 
-      Accumulate(*output_, *arrays_with_nulls_[i]); 
+    for (size_t i = 2; i < arrays_with_nulls_.size(); ++i) {
+      Accumulate(*output_, *arrays_with_nulls_[i]);
     }
     return Status::OK();
   }
 
   Status Execute() {
-    if (is_all_null_) { 
-      // An all-null value (scalar null or all-null array) gives us a short 
-      // circuit opportunity 
-      return AllNullShortCircuit(); 
+    if (is_all_null_) {
+      // An all-null value (scalar null or all-null array) gives us a short
+      // circuit opportunity
+      return AllNullShortCircuit();
     }
 
     // At this point, by construction we know that all of the values in
-    // arrays_with_nulls_ are arrays that are not all null. So there are a 
+    // arrays_with_nulls_ are arrays that are not all null. So there are a
     // few cases:
     //
     // * No arrays. This is a no-op w/o preallocation but when the bitmap is
@@ -494,27 +494,27 @@ class NullPropagator {
 
     output_->null_count = kUnknownNullCount;
 
-    if (arrays_with_nulls_.empty()) { 
+    if (arrays_with_nulls_.empty()) {
       // No arrays with nulls case
       output_->null_count = 0;
       if (bitmap_preallocated_) {
         BitUtil::SetBitsTo(bitmap_, output_->offset, output_->length, true);
       }
       return Status::OK();
-    } 
- 
-    if (arrays_with_nulls_.size() == 1) { 
+    }
+
+    if (arrays_with_nulls_.size() == 1) {
       return PropagateSingle();
     }
- 
-    return PropagateMultiple(); 
+
+    return PropagateMultiple();
   }
 
  private:
   KernelContext* ctx_;
   const ExecBatch& batch_;
-  std::vector<const ArrayData*> arrays_with_nulls_; 
-  bool is_all_null_ = false; 
+  std::vector<const ArrayData*> arrays_with_nulls_;
+  bool is_all_null_ = false;
   ArrayData* output_;
   uint8_t* bitmap_;
   bool bitmap_preallocated_ = false;
@@ -523,15 +523,15 @@ class NullPropagator {
 std::shared_ptr<ChunkedArray> ToChunkedArray(const std::vector<Datum>& values,
                                              const std::shared_ptr<DataType>& type) {
   std::vector<std::shared_ptr<Array>> arrays;
-  arrays.reserve(values.size()); 
-  for (const Datum& val : values) { 
-    if (val.length() == 0) { 
+  arrays.reserve(values.size());
+  for (const Datum& val : values) {
+    if (val.length() == 0) {
       // Skip empty chunks
       continue;
     }
-    arrays.emplace_back(val.make_array()); 
+    arrays.emplace_back(val.make_array());
   }
-  return std::make_shared<ChunkedArray>(std::move(arrays), type); 
+  return std::make_shared<ChunkedArray>(std::move(arrays), type);
 }
 
 bool HaveChunkedArray(const std::vector<Datum>& values) {
@@ -543,25 +543,25 @@ bool HaveChunkedArray(const std::vector<Datum>& values) {
   return false;
 }
 
-template <typename KernelType> 
-class KernelExecutorImpl : public KernelExecutor { 
+template <typename KernelType>
+class KernelExecutorImpl : public KernelExecutor {
  public:
-  Status Init(KernelContext* kernel_ctx, KernelInitArgs args) override { 
-    kernel_ctx_ = kernel_ctx; 
-    kernel_ = static_cast<const KernelType*>(args.kernel); 
+  Status Init(KernelContext* kernel_ctx, KernelInitArgs args) override {
+    kernel_ctx_ = kernel_ctx;
+    kernel_ = static_cast<const KernelType*>(args.kernel);
 
-    // Resolve the output descriptor for this kernel 
-    ARROW_ASSIGN_OR_RAISE( 
-        output_descr_, kernel_->signature->out_type().Resolve(kernel_ctx_, args.inputs)); 
+    // Resolve the output descriptor for this kernel
+    ARROW_ASSIGN_OR_RAISE(
+        output_descr_, kernel_->signature->out_type().Resolve(kernel_ctx_, args.inputs));
 
     return Status::OK();
   }
 
- protected: 
+ protected:
   // This is overridden by the VectorExecutor
   virtual Status SetupArgIteration(const std::vector<Datum>& args) {
-    ARROW_ASSIGN_OR_RAISE( 
-        batch_iterator_, ExecBatchIterator::Make(args, exec_context()->exec_chunksize())); 
+    ARROW_ASSIGN_OR_RAISE(
+        batch_iterator_, ExecBatchIterator::Make(args, exec_context()->exec_chunksize()));
     return Status::OK();
   }
 
@@ -570,29 +570,29 @@ class KernelExecutorImpl : public KernelExecutor {
     out->buffers.resize(output_num_buffers_);
 
     if (validity_preallocated_) {
-      ARROW_ASSIGN_OR_RAISE(out->buffers[0], kernel_ctx_->AllocateBitmap(length)); 
+      ARROW_ASSIGN_OR_RAISE(out->buffers[0], kernel_ctx_->AllocateBitmap(length));
     }
-    if (kernel_->null_handling == NullHandling::OUTPUT_NOT_NULL) { 
-      out->null_count = 0; 
+    if (kernel_->null_handling == NullHandling::OUTPUT_NOT_NULL) {
+      out->null_count = 0;
+    }
+    for (size_t i = 0; i < data_preallocated_.size(); ++i) {
+      const auto& prealloc = data_preallocated_[i];
+      if (prealloc.bit_width >= 0) {
+        ARROW_ASSIGN_OR_RAISE(
+            out->buffers[i + 1],
+            AllocateDataBuffer(kernel_ctx_, length + prealloc.added_length,
+                               prealloc.bit_width));
+      }
     }
-    for (size_t i = 0; i < data_preallocated_.size(); ++i) { 
-      const auto& prealloc = data_preallocated_[i]; 
-      if (prealloc.bit_width >= 0) { 
-        ARROW_ASSIGN_OR_RAISE( 
-            out->buffers[i + 1], 
-            AllocateDataBuffer(kernel_ctx_, length + prealloc.added_length, 
-                               prealloc.bit_width)); 
-      } 
-    } 
     return out;
   }
 
-  ExecContext* exec_context() { return kernel_ctx_->exec_context(); } 
-  KernelState* state() { return kernel_ctx_->state(); } 
+  ExecContext* exec_context() { return kernel_ctx_->exec_context(); }
+  KernelState* state() { return kernel_ctx_->state(); }
 
   // Not all of these members are used for every executor type
 
-  KernelContext* kernel_ctx_; 
+  KernelContext* kernel_ctx_;
   const KernelType* kernel_;
   std::unique_ptr<ExecBatchIterator> batch_iterator_;
   ValueDescr output_descr_;
@@ -602,13 +602,13 @@ class KernelExecutorImpl : public KernelExecutor {
   // If true, then memory is preallocated for the validity bitmap with the same
   // strategy as the data buffer(s).
   bool validity_preallocated_ = false;
- 
-  // The kernel writes into data buffers preallocated for these bit widths 
-  // (0 indicates no preallocation); 
-  std::vector<BufferPreallocation> data_preallocated_; 
+
+  // The kernel writes into data buffers preallocated for these bit widths
+  // (0 indicates no preallocation);
+  std::vector<BufferPreallocation> data_preallocated_;
 };
 
-class ScalarExecutor : public KernelExecutorImpl<ScalarKernel> { 
+class ScalarExecutor : public KernelExecutorImpl<ScalarKernel> {
  public:
   Status Execute(const std::vector<Datum>& args, ExecListener* listener) override {
     RETURN_NOT_OK(PrepareExecute(args));
@@ -646,9 +646,9 @@ class ScalarExecutor : public KernelExecutorImpl<ScalarKernel> {
       } else {
         // XXX: In the case where no outputs are omitted, is returning a 0-length
         // array always the correct move?
-        return MakeArrayOfNull(output_descr_.type, /*length=*/0, 
-                               exec_context()->memory_pool()) 
-            .ValueOrDie(); 
+        return MakeArrayOfNull(output_descr_.type, /*length=*/0,
+                               exec_context()->memory_pool())
+            .ValueOrDie();
       }
     }
   }
@@ -661,7 +661,7 @@ class ScalarExecutor : public KernelExecutorImpl<ScalarKernel> {
     if (output_descr_.shape == ValueDescr::ARRAY) {
       ArrayData* out_arr = out.mutable_array();
       if (kernel_->null_handling == NullHandling::INTERSECTION) {
-        RETURN_NOT_OK(PropagateNulls(kernel_ctx_, batch, out_arr)); 
+        RETURN_NOT_OK(PropagateNulls(kernel_ctx_, batch, out_arr));
       } else if (kernel_->null_handling == NullHandling::OUTPUT_NOT_NULL) {
         out_arr->null_count = 0;
       }
@@ -676,7 +676,7 @@ class ScalarExecutor : public KernelExecutorImpl<ScalarKernel> {
       }
     }
 
-    RETURN_NOT_OK(kernel_->exec(kernel_ctx_, batch, &out)); 
+    RETURN_NOT_OK(kernel_->exec(kernel_ctx_, batch, &out));
     if (!preallocate_contiguous_) {
       // If we are producing chunked output rather than one big array, then
       // emit each chunk as soon as it's available
@@ -686,7 +686,7 @@ class ScalarExecutor : public KernelExecutorImpl<ScalarKernel> {
   }
 
   Status PrepareExecute(const std::vector<Datum>& args) {
-    RETURN_NOT_OK(this->SetupArgIteration(args)); 
+    RETURN_NOT_OK(this->SetupArgIteration(args));
 
     if (output_descr_.shape == ValueDescr::ARRAY) {
       // If the executor is configured to produce a single large Array output for
@@ -749,26 +749,26 @@ class ScalarExecutor : public KernelExecutorImpl<ScalarKernel> {
     // Decide if we need to preallocate memory for this kernel
     validity_preallocated_ =
         (kernel_->null_handling != NullHandling::COMPUTED_NO_PREALLOCATE &&
-         kernel_->null_handling != NullHandling::OUTPUT_NOT_NULL && 
-         output_descr_.type->id() != Type::NA); 
-    if (kernel_->mem_allocation == MemAllocation::PREALLOCATE) { 
-      ComputeDataPreallocate(*output_descr_.type, &data_preallocated_); 
-    } 
-
-    // Contiguous preallocation only possible on non-nested types if all 
-    // buffers are preallocated.  Otherwise, we must go chunk-by-chunk. 
+         kernel_->null_handling != NullHandling::OUTPUT_NOT_NULL &&
+         output_descr_.type->id() != Type::NA);
+    if (kernel_->mem_allocation == MemAllocation::PREALLOCATE) {
+      ComputeDataPreallocate(*output_descr_.type, &data_preallocated_);
+    }
+
+    // Contiguous preallocation only possible on non-nested types if all
+    // buffers are preallocated.  Otherwise, we must go chunk-by-chunk.
     //
-    // Some kernels are also unable to write into sliced outputs, so we respect the 
-    // kernel's attributes. 
+    // Some kernels are also unable to write into sliced outputs, so we respect the
+    // kernel's attributes.
     preallocate_contiguous_ =
-        (exec_context()->preallocate_contiguous() && kernel_->can_write_into_slices && 
-         validity_preallocated_ && !is_nested(output_descr_.type->id()) && 
-         !is_dictionary(output_descr_.type->id()) && 
-         data_preallocated_.size() == static_cast<size_t>(output_num_buffers_ - 1) && 
-         std::all_of(data_preallocated_.begin(), data_preallocated_.end(), 
-                     [](const BufferPreallocation& prealloc) { 
-                       return prealloc.bit_width >= 0; 
-                     })); 
+        (exec_context()->preallocate_contiguous() && kernel_->can_write_into_slices &&
+         validity_preallocated_ && !is_nested(output_descr_.type->id()) &&
+         !is_dictionary(output_descr_.type->id()) &&
+         data_preallocated_.size() == static_cast<size_t>(output_num_buffers_ - 1) &&
+         std::all_of(data_preallocated_.begin(), data_preallocated_.end(),
+                     [](const BufferPreallocation& prealloc) {
+                       return prealloc.bit_width >= 0;
+                     }));
     if (preallocate_contiguous_) {
       ARROW_ASSIGN_OR_RAISE(preallocated_, PrepareOutput(total_length));
     }
@@ -790,7 +790,7 @@ Status PackBatchNoChunks(const std::vector<Datum>& args, ExecBatch* out) {
     switch (arg.kind()) {
       case Datum::SCALAR:
       case Datum::ARRAY:
-      case Datum::CHUNKED_ARRAY: 
+      case Datum::CHUNKED_ARRAY:
         length = std::max(arg.length(), length);
         break;
       default:
@@ -803,7 +803,7 @@ Status PackBatchNoChunks(const std::vector<Datum>& args, ExecBatch* out) {
   return Status::OK();
 }
 
-class VectorExecutor : public KernelExecutorImpl<VectorKernel> { 
+class VectorExecutor : public KernelExecutorImpl<VectorKernel> {
  public:
   Status Execute(const std::vector<Datum>& args, ExecListener* listener) override {
     RETURN_NOT_OK(PrepareExecute(args));
@@ -823,15 +823,15 @@ class VectorExecutor : public KernelExecutorImpl<VectorKernel> {
                     const std::vector<Datum>& outputs) override {
     // If execution yielded multiple chunks (because large arrays were split
     // based on the ExecContext parameters, then the result is a ChunkedArray
-    if (kernel_->output_chunked && (HaveChunkedArray(inputs) || outputs.size() > 1)) { 
-      return ToChunkedArray(outputs, output_descr_.type); 
-    } else if (outputs.size() == 1) { 
-      // Outputs have just one element 
-      return outputs[0]; 
+    if (kernel_->output_chunked && (HaveChunkedArray(inputs) || outputs.size() > 1)) {
+      return ToChunkedArray(outputs, output_descr_.type);
+    } else if (outputs.size() == 1) {
+      // Outputs have just one element
+      return outputs[0];
     } else {
-      // XXX: In the case where no outputs are omitted, is returning a 0-length 
-      // array always the correct move? 
-      return MakeArrayOfNull(output_descr_.type, /*length=*/0).ValueOrDie(); 
+      // XXX: In the case where no outputs are omitted, is returning a 0-length
+      // array always the correct move?
+      return MakeArrayOfNull(output_descr_.type, /*length=*/0).ValueOrDie();
     }
   }
 
@@ -851,9 +851,9 @@ class VectorExecutor : public KernelExecutorImpl<VectorKernel> {
 
     if (kernel_->null_handling == NullHandling::INTERSECTION &&
         output_descr_.shape == ValueDescr::ARRAY) {
-      RETURN_NOT_OK(PropagateNulls(kernel_ctx_, batch, out.mutable_array())); 
+      RETURN_NOT_OK(PropagateNulls(kernel_ctx_, batch, out.mutable_array()));
     }
-    RETURN_NOT_OK(kernel_->exec(kernel_ctx_, batch, &out)); 
+    RETURN_NOT_OK(kernel_->exec(kernel_ctx_, batch, &out));
     if (!kernel_->finalize) {
       // If there is no result finalizer (e.g. for hash-based functions, we can
       // emit the processed batch right away rather than waiting
@@ -868,7 +868,7 @@ class VectorExecutor : public KernelExecutorImpl<VectorKernel> {
     if (kernel_->finalize) {
       // Intermediate results require post-processing after the execution is
       // completed (possibly involving some accumulated state)
-      RETURN_NOT_OK(kernel_->finalize(kernel_ctx_, &results_)); 
+      RETURN_NOT_OK(kernel_->finalize(kernel_ctx_, &results_));
       for (const auto& result : results_) {
         RETURN_NOT_OK(listener->OnResult(result));
       }
@@ -878,39 +878,39 @@ class VectorExecutor : public KernelExecutorImpl<VectorKernel> {
 
   Status SetupArgIteration(const std::vector<Datum>& args) override {
     if (kernel_->can_execute_chunkwise) {
-      ARROW_ASSIGN_OR_RAISE(batch_iterator_, ExecBatchIterator::Make( 
-                                                 args, exec_context()->exec_chunksize())); 
+      ARROW_ASSIGN_OR_RAISE(batch_iterator_, ExecBatchIterator::Make(
+                                                 args, exec_context()->exec_chunksize()));
     }
     return Status::OK();
   }
 
   Status PrepareExecute(const std::vector<Datum>& args) {
-    RETURN_NOT_OK(this->SetupArgIteration(args)); 
+    RETURN_NOT_OK(this->SetupArgIteration(args));
     output_num_buffers_ = static_cast<int>(output_descr_.type->layout().buffers.size());
 
     // Decide if we need to preallocate memory for this kernel
     validity_preallocated_ =
         (kernel_->null_handling != NullHandling::COMPUTED_NO_PREALLOCATE &&
          kernel_->null_handling != NullHandling::OUTPUT_NOT_NULL);
-    if (kernel_->mem_allocation == MemAllocation::PREALLOCATE) { 
-      ComputeDataPreallocate(*output_descr_.type, &data_preallocated_); 
-    } 
+    if (kernel_->mem_allocation == MemAllocation::PREALLOCATE) {
+      ComputeDataPreallocate(*output_descr_.type, &data_preallocated_);
+    }
     return Status::OK();
   }
 
   std::vector<Datum> results_;
 };
 
-class ScalarAggExecutor : public KernelExecutorImpl<ScalarAggregateKernel> { 
+class ScalarAggExecutor : public KernelExecutorImpl<ScalarAggregateKernel> {
  public:
-  Status Init(KernelContext* ctx, KernelInitArgs args) override { 
-    input_descrs_ = &args.inputs; 
-    options_ = args.options; 
-    return KernelExecutorImpl<ScalarAggregateKernel>::Init(ctx, args); 
-  } 
+  Status Init(KernelContext* ctx, KernelInitArgs args) override {
+    input_descrs_ = &args.inputs;
+    options_ = args.options;
+    return KernelExecutorImpl<ScalarAggregateKernel>::Init(ctx, args);
+  }
 
   Status Execute(const std::vector<Datum>& args, ExecListener* listener) override {
-    RETURN_NOT_OK(this->SetupArgIteration(args)); 
+    RETURN_NOT_OK(this->SetupArgIteration(args));
 
     ExecBatch batch;
     while (batch_iterator_->Next(&batch)) {
@@ -921,7 +921,7 @@ class ScalarAggExecutor : public KernelExecutorImpl<ScalarAggregateKernel> {
     }
 
     Datum out;
-    RETURN_NOT_OK(kernel_->finalize(kernel_ctx_, &out)); 
+    RETURN_NOT_OK(kernel_->finalize(kernel_ctx_, &out));
     RETURN_NOT_OK(listener->OnResult(std::move(out)));
     return Status::OK();
   }
@@ -934,78 +934,78 @@ class ScalarAggExecutor : public KernelExecutorImpl<ScalarAggregateKernel> {
 
  private:
   Status Consume(const ExecBatch& batch) {
-    // FIXME(ARROW-11840) don't merge *any* aggegates for every batch 
-    ARROW_ASSIGN_OR_RAISE( 
-        auto batch_state, 
-        kernel_->init(kernel_ctx_, {kernel_, *input_descrs_, options_})); 
+    // FIXME(ARROW-11840) don't merge *any* aggegates for every batch
+    ARROW_ASSIGN_OR_RAISE(
+        auto batch_state,
+        kernel_->init(kernel_ctx_, {kernel_, *input_descrs_, options_}));
 
     if (batch_state == nullptr) {
-      return Status::Invalid("ScalarAggregation requires non-null kernel state"); 
+      return Status::Invalid("ScalarAggregation requires non-null kernel state");
     }
 
-    KernelContext batch_ctx(exec_context()); 
+    KernelContext batch_ctx(exec_context());
     batch_ctx.SetState(batch_state.get());
 
-    RETURN_NOT_OK(kernel_->consume(&batch_ctx, batch)); 
-    RETURN_NOT_OK(kernel_->merge(kernel_ctx_, std::move(*batch_state), state())); 
+    RETURN_NOT_OK(kernel_->consume(&batch_ctx, batch));
+    RETURN_NOT_OK(kernel_->merge(kernel_ctx_, std::move(*batch_state), state()));
     return Status::OK();
   }
- 
-  const std::vector<ValueDescr>* input_descrs_; 
-  const FunctionOptions* options_; 
+
+  const std::vector<ValueDescr>* input_descrs_;
+  const FunctionOptions* options_;
 };
 
 template <typename ExecutorType,
           typename FunctionType = typename ExecutorType::FunctionType>
-Result<std::unique_ptr<KernelExecutor>> MakeExecutor(ExecContext* ctx, 
-                                                     const Function* func, 
-                                                     const FunctionOptions* options) { 
+Result<std::unique_ptr<KernelExecutor>> MakeExecutor(ExecContext* ctx,
+                                                     const Function* func,
+                                                     const FunctionOptions* options) {
   DCHECK_EQ(ExecutorType::function_kind, func->kind());
   auto typed_func = checked_cast<const FunctionType*>(func);
-  return std::unique_ptr<KernelExecutor>(new ExecutorType(ctx, typed_func, options)); 
+  return std::unique_ptr<KernelExecutor>(new ExecutorType(ctx, typed_func, options));
 }
 
-}  // namespace 
- 
-Status PropagateNulls(KernelContext* ctx, const ExecBatch& batch, ArrayData* output) { 
-  DCHECK_NE(nullptr, output); 
-  DCHECK_GT(output->buffers.size(), 0); 
- 
-  if (output->type->id() == Type::NA) { 
-    // Null output type is a no-op (rare when this would happen but we at least 
-    // will test for it) 
-    return Status::OK(); 
+}  // namespace
+
+Status PropagateNulls(KernelContext* ctx, const ExecBatch& batch, ArrayData* output) {
+  DCHECK_NE(nullptr, output);
+  DCHECK_GT(output->buffers.size(), 0);
+
+  if (output->type->id() == Type::NA) {
+    // Null output type is a no-op (rare when this would happen but we at least
+    // will test for it)
+    return Status::OK();
+  }
+
+  // This function is ONLY able to write into output with non-zero offset
+  // when the bitmap is preallocated. This could be a DCHECK but returning
+  // error Status for now for emphasis
+  if (output->offset != 0 && output->buffers[0] == nullptr) {
+    return Status::Invalid(
+        "Can only propagate nulls into pre-allocated memory "
+        "when the output offset is non-zero");
   }
- 
-  // This function is ONLY able to write into output with non-zero offset 
-  // when the bitmap is preallocated. This could be a DCHECK but returning 
-  // error Status for now for emphasis 
-  if (output->offset != 0 && output->buffers[0] == nullptr) { 
-    return Status::Invalid( 
-        "Can only propagate nulls into pre-allocated memory " 
-        "when the output offset is non-zero"); 
-  } 
-  NullPropagator propagator(ctx, batch, output); 
-  return propagator.Execute(); 
+  NullPropagator propagator(ctx, batch, output);
+  return propagator.Execute();
+}
+
+std::unique_ptr<KernelExecutor> KernelExecutor::MakeScalar() {
+  return ::arrow::internal::make_unique<detail::ScalarExecutor>();
+}
+
+std::unique_ptr<KernelExecutor> KernelExecutor::MakeVector() {
+  return ::arrow::internal::make_unique<detail::VectorExecutor>();
+}
+
+std::unique_ptr<KernelExecutor> KernelExecutor::MakeScalarAggregate() {
+  return ::arrow::internal::make_unique<detail::ScalarAggExecutor>();
 }
 
-std::unique_ptr<KernelExecutor> KernelExecutor::MakeScalar() { 
-  return ::arrow::internal::make_unique<detail::ScalarExecutor>(); 
-} 
- 
-std::unique_ptr<KernelExecutor> KernelExecutor::MakeVector() { 
-  return ::arrow::internal::make_unique<detail::VectorExecutor>(); 
-} 
- 
-std::unique_ptr<KernelExecutor> KernelExecutor::MakeScalarAggregate() { 
-  return ::arrow::internal::make_unique<detail::ScalarAggExecutor>(); 
-} 
- 
 }  // namespace detail
 
-ExecContext::ExecContext(MemoryPool* pool, ::arrow::internal::Executor* executor, 
-                         FunctionRegistry* func_registry) 
-    : pool_(pool), executor_(executor) { 
+ExecContext::ExecContext(MemoryPool* pool, ::arrow::internal::Executor* executor,
+                         FunctionRegistry* func_registry)
+    : pool_(pool), executor_(executor) {
   this->func_registry_ = func_registry == nullptr ? GetFunctionRegistry() : func_registry;
 }
 
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec.h b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec.h
index 90fb291dbb8..de1b695de48 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec.h
@@ -28,13 +28,13 @@
 #include <vector>
 
 #include "arrow/array/data.h"
-#include "arrow/compute/exec/expression.h" 
+#include "arrow/compute/exec/expression.h"
 #include "arrow/datum.h"
 #include "arrow/memory_pool.h"
 #include "arrow/result.h"
 #include "arrow/type_fwd.h"
 #include "arrow/util/macros.h"
-#include "arrow/util/type_fwd.h" 
+#include "arrow/util/type_fwd.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
@@ -46,7 +46,7 @@ class CpuInfo;
 
 namespace compute {
 
-class FunctionOptions; 
+class FunctionOptions;
 class FunctionRegistry;
 
 // It seems like 64K might be a good default chunksize to use for execution
@@ -61,7 +61,7 @@ class ARROW_EXPORT ExecContext {
  public:
   // If no function registry passed, the default is used.
   explicit ExecContext(MemoryPool* pool = default_memory_pool(),
-                       ::arrow::internal::Executor* executor = NULLPTR, 
+                       ::arrow::internal::Executor* executor = NULLPTR,
                        FunctionRegistry* func_registry = NULLPTR);
 
   /// \brief The MemoryPool used for allocations, default is
@@ -70,9 +70,9 @@ class ARROW_EXPORT ExecContext {
 
   ::arrow::internal::CpuInfo* cpu_info() const;
 
-  /// \brief An Executor which may be used to parallelize execution. 
-  ::arrow::internal::Executor* executor() const { return executor_; } 
- 
+  /// \brief An Executor which may be used to parallelize execution.
+  ::arrow::internal::Executor* executor() const { return executor_; }
+
   /// \brief The FunctionRegistry for looking up functions by name and
   /// selecting kernels for execution. Defaults to the library-global function
   /// registry provided by GetFunctionRegistry.
@@ -119,15 +119,15 @@ class ARROW_EXPORT ExecContext {
 
  private:
   MemoryPool* pool_;
-  ::arrow::internal::Executor* executor_; 
+  ::arrow::internal::Executor* executor_;
   FunctionRegistry* func_registry_;
   int64_t exec_chunksize_ = std::numeric_limits<int64_t>::max();
   bool preallocate_contiguous_ = true;
   bool use_threads_ = true;
 };
 
-ARROW_EXPORT ExecContext* default_exec_context(); 
- 
+ARROW_EXPORT ExecContext* default_exec_context();
+
 // TODO: Consider standardizing on uint16 selection vectors and only use them
 // when we can ensure that each value is 64K length or smaller
 
@@ -173,18 +173,18 @@ class ARROW_EXPORT SelectionVector {
 /// TODO: Datum uses arrow/util/variant.h which may be a bit heavier-weight
 /// than is desirable for this class. Microbenchmarks would help determine for
 /// sure. See ARROW-8928.
-struct ARROW_EXPORT ExecBatch { 
-  ExecBatch() = default; 
+struct ARROW_EXPORT ExecBatch {
+  ExecBatch() = default;
   ExecBatch(std::vector<Datum> values, int64_t length)
       : values(std::move(values)), length(length) {}
 
-  explicit ExecBatch(const RecordBatch& batch); 
- 
-  static Result<ExecBatch> Make(std::vector<Datum> values); 
- 
-  Result<std::shared_ptr<RecordBatch>> ToRecordBatch( 
-      std::shared_ptr<Schema> schema, MemoryPool* pool = default_memory_pool()) const; 
- 
+  explicit ExecBatch(const RecordBatch& batch);
+
+  static Result<ExecBatch> Make(std::vector<Datum> values);
+
+  Result<std::shared_ptr<RecordBatch>> ToRecordBatch(
+      std::shared_ptr<Schema> schema, MemoryPool* pool = default_memory_pool()) const;
+
   /// The values representing positional arguments to be passed to a kernel's
   /// exec function for processing.
   std::vector<Datum> values;
@@ -196,9 +196,9 @@ struct ARROW_EXPORT ExecBatch {
   /// ExecBatch::length is equal to the length of this array.
   std::shared_ptr<SelectionVector> selection_vector;
 
-  /// A predicate Expression guaranteed to evaluate to true for all rows in this batch. 
-  Expression guarantee = literal(true); 
- 
+  /// A predicate Expression guaranteed to evaluate to true for all rows in this batch.
+  Expression guarantee = literal(true);
+
   /// The semantic length of the ExecBatch. When the values are all scalars,
   /// the length should be set to 1, otherwise the length is taken from the
   /// array values, except when there is a selection vector. When there is a
@@ -216,13 +216,13 @@ struct ARROW_EXPORT ExecBatch {
     return values[i];
   }
 
-  bool Equals(const ExecBatch& other) const; 
- 
+  bool Equals(const ExecBatch& other) const;
+
   /// \brief A convenience for the number of values / arguments.
   int num_values() const { return static_cast<int>(values.size()); }
 
-  ExecBatch Slice(int64_t offset, int64_t length) const; 
- 
+  ExecBatch Slice(int64_t offset, int64_t length) const;
+
   /// \brief A convenience for returning the ValueDescr objects (types and
   /// shapes) from the batch.
   std::vector<ValueDescr> GetDescriptors() const {
@@ -232,13 +232,13 @@ struct ARROW_EXPORT ExecBatch {
     }
     return result;
   }
- 
-  ARROW_EXPORT friend void PrintTo(const ExecBatch&, std::ostream*); 
+
+  ARROW_EXPORT friend void PrintTo(const ExecBatch&, std::ostream*);
 };
 
-inline bool operator==(const ExecBatch& l, const ExecBatch& r) { return l.Equals(r); } 
-inline bool operator!=(const ExecBatch& l, const ExecBatch& r) { return !l.Equals(r); } 
- 
+inline bool operator==(const ExecBatch& l, const ExecBatch& r) { return l.Equals(r); }
+inline bool operator!=(const ExecBatch& l, const ExecBatch& r) { return !l.Equals(r); }
+
 /// \defgroup compute-call-function One-shot calls to compute functions
 ///
 /// @{
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/exec_plan.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/exec_plan.cc
index aec7805ceea..433e895c243 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/exec_plan.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/exec_plan.cc
@@ -1,823 +1,823 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "arrow/compute/exec/exec_plan.h" 
- 
-#include <mutex> 
-#include <thread> 
-#include <unordered_map> 
-#include <unordered_set> 
- 
-#include "arrow/array/util.h" 
-#include "arrow/compute/api_vector.h" 
-#include "arrow/compute/exec.h" 
-#include "arrow/compute/exec/expression.h" 
-#include "arrow/compute/registry.h" 
-#include "arrow/datum.h" 
-#include "arrow/record_batch.h" 
-#include "arrow/result.h" 
-#include "arrow/util/async_generator.h" 
-#include "arrow/util/checked_cast.h" 
-#include "arrow/util/logging.h" 
-#include "arrow/util/optional.h" 
- 
-namespace arrow { 
- 
-using internal::checked_cast; 
-using internal::checked_pointer_cast; 
- 
-namespace compute { 
- 
-namespace { 
- 
-struct ExecPlanImpl : public ExecPlan { 
-  explicit ExecPlanImpl(ExecContext* exec_context) : ExecPlan(exec_context) {} 
- 
-  ~ExecPlanImpl() override { 
-    if (started_ && !finished_.is_finished()) { 
-      ARROW_LOG(WARNING) << "Plan was destroyed before finishing"; 
-      StopProducing(); 
-      finished().Wait(); 
-    } 
-  } 
- 
-  ExecNode* AddNode(std::unique_ptr<ExecNode> node) { 
-    if (node->num_inputs() == 0) { 
-      sources_.push_back(node.get()); 
-    } 
-    if (node->num_outputs() == 0) { 
-      sinks_.push_back(node.get()); 
-    } 
-    nodes_.push_back(std::move(node)); 
-    return nodes_.back().get(); 
-  } 
- 
-  Status Validate() const { 
-    if (nodes_.empty()) { 
-      return Status::Invalid("ExecPlan has no node"); 
-    } 
-    for (const auto& node : nodes_) { 
-      RETURN_NOT_OK(node->Validate()); 
-    } 
-    return Status::OK(); 
-  } 
- 
-  Status StartProducing() { 
-    if (started_) { 
-      return Status::Invalid("restarted ExecPlan"); 
-    } 
-    started_ = true; 
- 
-    // producers precede consumers 
-    sorted_nodes_ = TopoSort(); 
- 
-    std::vector<Future<>> futures; 
- 
-    Status st = Status::OK(); 
- 
-    using rev_it = std::reverse_iterator<NodeVector::iterator>; 
-    for (rev_it it(sorted_nodes_.end()), end(sorted_nodes_.begin()); it != end; ++it) { 
-      auto node = *it; 
- 
-      st = node->StartProducing(); 
-      if (!st.ok()) { 
-        // Stop nodes that successfully started, in reverse order 
-        stopped_ = true; 
-        StopProducingImpl(it.base(), sorted_nodes_.end()); 
-        break; 
-      } 
- 
-      futures.push_back(node->finished()); 
-    } 
- 
-    finished_ = AllComplete(std::move(futures)); 
-    return st; 
-  } 
- 
-  void StopProducing() { 
-    DCHECK(started_) << "stopped an ExecPlan which never started"; 
-    stopped_ = true; 
- 
-    StopProducingImpl(sorted_nodes_.begin(), sorted_nodes_.end()); 
-  } 
- 
-  template <typename It> 
-  void StopProducingImpl(It begin, It end) { 
-    for (auto it = begin; it != end; ++it) { 
-      auto node = *it; 
-      node->StopProducing(); 
-    } 
-  } 
- 
-  NodeVector TopoSort() { 
-    struct Impl { 
-      const std::vector<std::unique_ptr<ExecNode>>& nodes; 
-      std::unordered_set<ExecNode*> visited; 
-      NodeVector sorted; 
- 
-      explicit Impl(const std::vector<std::unique_ptr<ExecNode>>& nodes) : nodes(nodes) { 
-        visited.reserve(nodes.size()); 
-        sorted.resize(nodes.size()); 
- 
-        for (const auto& node : nodes) { 
-          Visit(node.get()); 
-        } 
- 
-        DCHECK_EQ(visited.size(), nodes.size()); 
-      } 
- 
-      void Visit(ExecNode* node) { 
-        if (visited.count(node) != 0) return; 
- 
-        for (auto input : node->inputs()) { 
-          // Ensure that producers are inserted before this consumer 
-          Visit(input); 
-        } 
- 
-        sorted[visited.size()] = node; 
-        visited.insert(node); 
-      } 
-    }; 
- 
-    return std::move(Impl{nodes_}.sorted); 
-  } 
- 
-  Future<> finished_ = Future<>::MakeFinished(); 
-  bool started_ = false, stopped_ = false; 
-  std::vector<std::unique_ptr<ExecNode>> nodes_; 
-  NodeVector sources_, sinks_; 
-  NodeVector sorted_nodes_; 
-}; 
- 
-ExecPlanImpl* ToDerived(ExecPlan* ptr) { return checked_cast<ExecPlanImpl*>(ptr); } 
- 
-const ExecPlanImpl* ToDerived(const ExecPlan* ptr) { 
-  return checked_cast<const ExecPlanImpl*>(ptr); 
-} 
- 
-util::optional<int> GetNodeIndex(const std::vector<ExecNode*>& nodes, 
-                                 const ExecNode* node) { 
-  for (int i = 0; i < static_cast<int>(nodes.size()); ++i) { 
-    if (nodes[i] == node) return i; 
-  } 
-  return util::nullopt; 
-} 
- 
-}  // namespace 
- 
-Result<std::shared_ptr<ExecPlan>> ExecPlan::Make(ExecContext* ctx) { 
-  return std::shared_ptr<ExecPlan>(new ExecPlanImpl{ctx}); 
-} 
- 
-ExecNode* ExecPlan::AddNode(std::unique_ptr<ExecNode> node) { 
-  return ToDerived(this)->AddNode(std::move(node)); 
-} 
- 
-const ExecPlan::NodeVector& ExecPlan::sources() const { 
-  return ToDerived(this)->sources_; 
-} 
- 
-const ExecPlan::NodeVector& ExecPlan::sinks() const { return ToDerived(this)->sinks_; } 
- 
-Status ExecPlan::Validate() { return ToDerived(this)->Validate(); } 
- 
-Status ExecPlan::StartProducing() { return ToDerived(this)->StartProducing(); } 
- 
-void ExecPlan::StopProducing() { ToDerived(this)->StopProducing(); } 
- 
-Future<> ExecPlan::finished() { return ToDerived(this)->finished_; } 
- 
-ExecNode::ExecNode(ExecPlan* plan, std::string label, NodeVector inputs, 
-                   std::vector<std::string> input_labels, 
-                   std::shared_ptr<Schema> output_schema, int num_outputs) 
-    : plan_(plan), 
-      label_(std::move(label)), 
-      inputs_(std::move(inputs)), 
-      input_labels_(std::move(input_labels)), 
-      output_schema_(std::move(output_schema)), 
-      num_outputs_(num_outputs) { 
-  for (auto input : inputs_) { 
-    input->outputs_.push_back(this); 
-  } 
-} 
- 
-Status ExecNode::Validate() const { 
-  if (inputs_.size() != input_labels_.size()) { 
-    return Status::Invalid("Invalid number of inputs for '", label(), "' (expected ", 
-                           num_inputs(), ", actual ", input_labels_.size(), ")"); 
-  } 
- 
-  if (static_cast<int>(outputs_.size()) != num_outputs_) { 
-    return Status::Invalid("Invalid number of outputs for '", label(), "' (expected ", 
-                           num_outputs(), ", actual ", outputs_.size(), ")"); 
-  } 
- 
-  for (auto out : outputs_) { 
-    auto input_index = GetNodeIndex(out->inputs(), this); 
-    if (!input_index) { 
-      return Status::Invalid("Node '", label(), "' outputs to node '", out->label(), 
-                             "' but is not listed as an input."); 
-    } 
-  } 
- 
-  return Status::OK(); 
-} 
- 
-struct SourceNode : ExecNode { 
-  SourceNode(ExecPlan* plan, std::string label, std::shared_ptr<Schema> output_schema, 
-             AsyncGenerator<util::optional<ExecBatch>> generator) 
-      : ExecNode(plan, std::move(label), {}, {}, std::move(output_schema), 
-                 /*num_outputs=*/1), 
-        generator_(std::move(generator)) {} 
- 
-  const char* kind_name() override { return "SourceNode"; } 
- 
-  [[noreturn]] static void NoInputs() { 
-    DCHECK(false) << "no inputs; this should never be called"; 
-    std::abort(); 
-  } 
-  [[noreturn]] void InputReceived(ExecNode*, int, ExecBatch) override { NoInputs(); } 
-  [[noreturn]] void ErrorReceived(ExecNode*, Status) override { NoInputs(); } 
-  [[noreturn]] void InputFinished(ExecNode*, int) override { NoInputs(); } 
- 
-  Status StartProducing() override { 
-    DCHECK(!stop_requested_) << "Restarted SourceNode"; 
- 
-    CallbackOptions options; 
-    if (auto executor = plan()->exec_context()->executor()) { 
-      // These options will transfer execution to the desired Executor if necessary. 
-      // This can happen for in-memory scans where batches didn't require 
-      // any CPU work to decode. Otherwise, parsing etc should have already 
-      // been placed us on the desired Executor and no queues will be pushed to. 
-      options.executor = executor; 
-      options.should_schedule = ShouldSchedule::IfDifferentExecutor; 
-    } 
- 
-    finished_ = Loop([this, options] { 
-                  std::unique_lock<std::mutex> lock(mutex_); 
-                  int seq = batch_count_++; 
-                  if (stop_requested_) { 
-                    return Future<ControlFlow<int>>::MakeFinished(Break(seq)); 
-                  } 
-                  lock.unlock(); 
- 
-                  return generator_().Then( 
-                      [=](const util::optional<ExecBatch>& batch) -> ControlFlow<int> { 
-                        std::unique_lock<std::mutex> lock(mutex_); 
-                        if (IsIterationEnd(batch) || stop_requested_) { 
-                          stop_requested_ = true; 
-                          return Break(seq); 
-                        } 
-                        lock.unlock(); 
- 
-                        outputs_[0]->InputReceived(this, seq, *batch); 
-                        return Continue(); 
-                      }, 
-                      [=](const Status& error) -> ControlFlow<int> { 
-                        // NB: ErrorReceived is independent of InputFinished, but 
-                        // ErrorReceived will usually prompt StopProducing which will 
-                        // prompt InputFinished. ErrorReceived may still be called from a 
-                        // node which was requested to stop (indeed, the request to stop 
-                        // may prompt an error). 
-                        std::unique_lock<std::mutex> lock(mutex_); 
-                        stop_requested_ = true; 
-                        lock.unlock(); 
-                        outputs_[0]->ErrorReceived(this, error); 
-                        return Break(seq); 
-                      }, 
-                      options); 
-                }).Then([&](int seq) { outputs_[0]->InputFinished(this, seq); }); 
- 
-    return Status::OK(); 
-  } 
- 
-  void PauseProducing(ExecNode* output) override {} 
- 
-  void ResumeProducing(ExecNode* output) override {} 
- 
-  void StopProducing(ExecNode* output) override { 
-    DCHECK_EQ(output, outputs_[0]); 
-    StopProducing(); 
-  } 
- 
-  void StopProducing() override { 
-    std::unique_lock<std::mutex> lock(mutex_); 
-    stop_requested_ = true; 
-  } 
- 
-  Future<> finished() override { return finished_; } 
- 
- private: 
-  std::mutex mutex_; 
-  bool stop_requested_{false}; 
-  int batch_count_{0}; 
-  Future<> finished_ = Future<>::MakeFinished(); 
-  AsyncGenerator<util::optional<ExecBatch>> generator_; 
-}; 
- 
-ExecNode* MakeSourceNode(ExecPlan* plan, std::string label, 
-                         std::shared_ptr<Schema> output_schema, 
-                         AsyncGenerator<util::optional<ExecBatch>> generator) { 
-  return plan->EmplaceNode<SourceNode>(plan, std::move(label), std::move(output_schema), 
-                                       std::move(generator)); 
-} 
- 
-struct FilterNode : ExecNode { 
-  FilterNode(ExecNode* input, std::string label, Expression filter) 
-      : ExecNode(input->plan(), std::move(label), {input}, {"target"}, 
-                 /*output_schema=*/input->output_schema(), 
-                 /*num_outputs=*/1), 
-        filter_(std::move(filter)) {} 
- 
-  const char* kind_name() override { return "FilterNode"; } 
- 
-  Result<ExecBatch> DoFilter(const ExecBatch& target) { 
-    ARROW_ASSIGN_OR_RAISE(Expression simplified_filter, 
-                          SimplifyWithGuarantee(filter_, target.guarantee)); 
- 
-    ARROW_ASSIGN_OR_RAISE(Datum mask, ExecuteScalarExpression(simplified_filter, target, 
-                                                              plan()->exec_context())); 
- 
-    if (mask.is_scalar()) { 
-      const auto& mask_scalar = mask.scalar_as<BooleanScalar>(); 
-      if (mask_scalar.is_valid && mask_scalar.value) { 
-        return target; 
-      } 
- 
-      return target.Slice(0, 0); 
-    } 
- 
-    // if the values are all scalar then the mask must also be 
-    DCHECK(!std::all_of(target.values.begin(), target.values.end(), 
-                        [](const Datum& value) { return value.is_scalar(); })); 
- 
-    auto values = target.values; 
-    for (auto& value : values) { 
-      if (value.is_scalar()) continue; 
-      ARROW_ASSIGN_OR_RAISE(value, Filter(value, mask, FilterOptions::Defaults())); 
-    } 
-    return ExecBatch::Make(std::move(values)); 
-  } 
- 
-  void InputReceived(ExecNode* input, int seq, ExecBatch batch) override { 
-    DCHECK_EQ(input, inputs_[0]); 
- 
-    auto maybe_filtered = DoFilter(std::move(batch)); 
-    if (!maybe_filtered.ok()) { 
-      outputs_[0]->ErrorReceived(this, maybe_filtered.status()); 
-      return; 
-    } 
- 
-    maybe_filtered->guarantee = batch.guarantee; 
-    outputs_[0]->InputReceived(this, seq, maybe_filtered.MoveValueUnsafe()); 
-  } 
- 
-  void ErrorReceived(ExecNode* input, Status error) override { 
-    DCHECK_EQ(input, inputs_[0]); 
-    outputs_[0]->ErrorReceived(this, std::move(error)); 
-  } 
- 
-  void InputFinished(ExecNode* input, int seq) override { 
-    DCHECK_EQ(input, inputs_[0]); 
-    outputs_[0]->InputFinished(this, seq); 
-  } 
- 
-  Status StartProducing() override { return Status::OK(); } 
- 
-  void PauseProducing(ExecNode* output) override {} 
- 
-  void ResumeProducing(ExecNode* output) override {} 
- 
-  void StopProducing(ExecNode* output) override { 
-    DCHECK_EQ(output, outputs_[0]); 
-    StopProducing(); 
-  } 
- 
-  void StopProducing() override { inputs_[0]->StopProducing(this); } 
- 
-  Future<> finished() override { return inputs_[0]->finished(); } 
- 
- private: 
-  Expression filter_; 
-}; 
- 
-Result<ExecNode*> MakeFilterNode(ExecNode* input, std::string label, Expression filter) { 
-  if (!filter.IsBound()) { 
-    ARROW_ASSIGN_OR_RAISE(filter, filter.Bind(*input->output_schema())); 
-  } 
- 
-  if (filter.type()->id() != Type::BOOL) { 
-    return Status::TypeError("Filter expression must evaluate to bool, but ", 
-                             filter.ToString(), " evaluates to ", 
-                             filter.type()->ToString()); 
-  } 
- 
-  return input->plan()->EmplaceNode<FilterNode>(input, std::move(label), 
-                                                std::move(filter)); 
-} 
- 
-struct ProjectNode : ExecNode { 
-  ProjectNode(ExecNode* input, std::string label, std::shared_ptr<Schema> output_schema, 
-              std::vector<Expression> exprs) 
-      : ExecNode(input->plan(), std::move(label), {input}, {"target"}, 
-                 /*output_schema=*/std::move(output_schema), 
-                 /*num_outputs=*/1), 
-        exprs_(std::move(exprs)) {} 
- 
-  const char* kind_name() override { return "ProjectNode"; } 
- 
-  Result<ExecBatch> DoProject(const ExecBatch& target) { 
-    std::vector<Datum> values{exprs_.size()}; 
-    for (size_t i = 0; i < exprs_.size(); ++i) { 
-      ARROW_ASSIGN_OR_RAISE(Expression simplified_expr, 
-                            SimplifyWithGuarantee(exprs_[i], target.guarantee)); 
- 
-      ARROW_ASSIGN_OR_RAISE(values[i], ExecuteScalarExpression(simplified_expr, target, 
-                                                               plan()->exec_context())); 
-    } 
-    return ExecBatch{std::move(values), target.length}; 
-  } 
- 
-  void InputReceived(ExecNode* input, int seq, ExecBatch batch) override { 
-    DCHECK_EQ(input, inputs_[0]); 
- 
-    auto maybe_projected = DoProject(std::move(batch)); 
-    if (!maybe_projected.ok()) { 
-      outputs_[0]->ErrorReceived(this, maybe_projected.status()); 
-      return; 
-    } 
- 
-    maybe_projected->guarantee = batch.guarantee; 
-    outputs_[0]->InputReceived(this, seq, maybe_projected.MoveValueUnsafe()); 
-  } 
- 
-  void ErrorReceived(ExecNode* input, Status error) override { 
-    DCHECK_EQ(input, inputs_[0]); 
-    outputs_[0]->ErrorReceived(this, std::move(error)); 
-  } 
- 
-  void InputFinished(ExecNode* input, int seq) override { 
-    DCHECK_EQ(input, inputs_[0]); 
-    outputs_[0]->InputFinished(this, seq); 
-  } 
- 
-  Status StartProducing() override { return Status::OK(); } 
- 
-  void PauseProducing(ExecNode* output) override {} 
- 
-  void ResumeProducing(ExecNode* output) override {} 
- 
-  void StopProducing(ExecNode* output) override { 
-    DCHECK_EQ(output, outputs_[0]); 
-    StopProducing(); 
-  } 
- 
-  void StopProducing() override { inputs_[0]->StopProducing(this); } 
- 
-  Future<> finished() override { return inputs_[0]->finished(); } 
- 
- private: 
-  std::vector<Expression> exprs_; 
-}; 
- 
-Result<ExecNode*> MakeProjectNode(ExecNode* input, std::string label, 
-                                  std::vector<Expression> exprs, 
-                                  std::vector<std::string> names) { 
-  FieldVector fields(exprs.size()); 
- 
-  if (names.size() == 0) { 
-    names.resize(exprs.size()); 
-    for (size_t i = 0; i < exprs.size(); ++i) { 
-      names[i] = exprs[i].ToString(); 
-    } 
-  } 
- 
-  int i = 0; 
-  for (auto& expr : exprs) { 
-    if (!expr.IsBound()) { 
-      ARROW_ASSIGN_OR_RAISE(expr, expr.Bind(*input->output_schema())); 
-    } 
-    fields[i] = field(std::move(names[i]), expr.type()); 
-    ++i; 
-  } 
- 
-  return input->plan()->EmplaceNode<ProjectNode>( 
-      input, std::move(label), schema(std::move(fields)), std::move(exprs)); 
-} 
- 
-struct SinkNode : ExecNode { 
-  SinkNode(ExecNode* input, std::string label, 
-           AsyncGenerator<util::optional<ExecBatch>>* generator) 
-      : ExecNode(input->plan(), std::move(label), {input}, {"collected"}, {}, 
-                 /*num_outputs=*/0), 
-        producer_(MakeProducer(generator)) {} 
- 
-  static PushGenerator<util::optional<ExecBatch>>::Producer MakeProducer( 
-      AsyncGenerator<util::optional<ExecBatch>>* out_gen) { 
-    PushGenerator<util::optional<ExecBatch>> gen; 
-    auto out = gen.producer(); 
-    *out_gen = std::move(gen); 
-    return out; 
-  } 
- 
-  const char* kind_name() override { return "SinkNode"; } 
- 
-  Status StartProducing() override { 
-    finished_ = Future<>::Make(); 
-    return Status::OK(); 
-  } 
- 
-  // sink nodes have no outputs from which to feel backpressure 
-  [[noreturn]] static void NoOutputs() { 
-    DCHECK(false) << "no outputs; this should never be called"; 
-    std::abort(); 
-  } 
-  [[noreturn]] void ResumeProducing(ExecNode* output) override { NoOutputs(); } 
-  [[noreturn]] void PauseProducing(ExecNode* output) override { NoOutputs(); } 
-  [[noreturn]] void StopProducing(ExecNode* output) override { NoOutputs(); } 
- 
-  void StopProducing() override { 
-    Finish(); 
-    inputs_[0]->StopProducing(this); 
-  } 
- 
-  Future<> finished() override { return finished_; } 
- 
-  void InputReceived(ExecNode* input, int seq_num, ExecBatch batch) override { 
-    DCHECK_EQ(input, inputs_[0]); 
- 
-    std::unique_lock<std::mutex> lock(mutex_); 
-    if (finished_.is_finished()) return; 
- 
-    ++num_received_; 
-    if (num_received_ == emit_stop_) { 
-      lock.unlock(); 
-      producer_.Push(std::move(batch)); 
-      Finish(); 
-      return; 
-    } 
- 
-    if (emit_stop_ != -1) { 
-      DCHECK_LE(seq_num, emit_stop_); 
-    } 
- 
-    lock.unlock(); 
-    producer_.Push(std::move(batch)); 
-  } 
- 
-  void ErrorReceived(ExecNode* input, Status error) override { 
-    DCHECK_EQ(input, inputs_[0]); 
-    producer_.Push(std::move(error)); 
-    Finish(); 
-    inputs_[0]->StopProducing(this); 
-  } 
- 
-  void InputFinished(ExecNode* input, int seq_stop) override { 
-    std::unique_lock<std::mutex> lock(mutex_); 
-    emit_stop_ = seq_stop; 
-    if (num_received_ == emit_stop_) { 
-      lock.unlock(); 
-      Finish(); 
-    } 
-  } 
- 
- private: 
-  void Finish() { 
-    if (producer_.Close()) { 
-      finished_.MarkFinished(); 
-    } 
-  } 
- 
-  std::mutex mutex_; 
- 
-  int num_received_ = 0; 
-  int emit_stop_ = -1; 
-  Future<> finished_ = Future<>::MakeFinished(); 
- 
-  PushGenerator<util::optional<ExecBatch>>::Producer producer_; 
-}; 
- 
-AsyncGenerator<util::optional<ExecBatch>> MakeSinkNode(ExecNode* input, 
-                                                       std::string label) { 
-  AsyncGenerator<util::optional<ExecBatch>> out; 
-  (void)input->plan()->EmplaceNode<SinkNode>(input, std::move(label), &out); 
-  return out; 
-} 
- 
-std::shared_ptr<RecordBatchReader> MakeGeneratorReader( 
-    std::shared_ptr<Schema> schema, 
-    std::function<Future<util::optional<ExecBatch>>()> gen, MemoryPool* pool) { 
-  struct Impl : RecordBatchReader { 
-    std::shared_ptr<Schema> schema() const override { return schema_; } 
- 
-    Status ReadNext(std::shared_ptr<RecordBatch>* record_batch) override { 
-      ARROW_ASSIGN_OR_RAISE(auto batch, iterator_.Next()); 
-      if (batch) { 
-        ARROW_ASSIGN_OR_RAISE(*record_batch, batch->ToRecordBatch(schema_, pool_)); 
-      } else { 
-        *record_batch = IterationEnd<std::shared_ptr<RecordBatch>>(); 
-      } 
-      return Status::OK(); 
-    } 
- 
-    MemoryPool* pool_; 
-    std::shared_ptr<Schema> schema_; 
-    Iterator<util::optional<ExecBatch>> iterator_; 
-  }; 
- 
-  auto out = std::make_shared<Impl>(); 
-  out->pool_ = pool; 
-  out->schema_ = std::move(schema); 
-  out->iterator_ = MakeGeneratorIterator(std::move(gen)); 
-  return out; 
-} 
- 
-struct ScalarAggregateNode : ExecNode { 
-  ScalarAggregateNode(ExecNode* input, std::string label, 
-                      std::shared_ptr<Schema> output_schema, 
-                      std::vector<const ScalarAggregateKernel*> kernels, 
-                      std::vector<std::vector<std::unique_ptr<KernelState>>> states) 
-      : ExecNode(input->plan(), std::move(label), {input}, {"target"}, 
-                 /*output_schema=*/std::move(output_schema), 
-                 /*num_outputs=*/1), 
-        kernels_(std::move(kernels)), 
-        states_(std::move(states)) {} 
- 
-  const char* kind_name() override { return "ScalarAggregateNode"; } 
- 
-  Status DoConsume(const ExecBatch& batch, size_t thread_index) { 
-    for (size_t i = 0; i < kernels_.size(); ++i) { 
-      KernelContext batch_ctx{plan()->exec_context()}; 
-      batch_ctx.SetState(states_[i][thread_index].get()); 
-      ExecBatch single_column_batch{{batch.values[i]}, batch.length}; 
-      RETURN_NOT_OK(kernels_[i]->consume(&batch_ctx, single_column_batch)); 
-    } 
-    return Status::OK(); 
-  } 
- 
-  void InputReceived(ExecNode* input, int seq, ExecBatch batch) override { 
-    DCHECK_EQ(input, inputs_[0]); 
- 
-    std::unique_lock<std::mutex> lock(mutex_); 
-    auto it = 
-        thread_indices_.emplace(std::this_thread::get_id(), thread_indices_.size()).first; 
-    auto thread_index = it->second; 
- 
-    lock.unlock(); 
- 
-    Status st = DoConsume(std::move(batch), thread_index); 
-    if (!st.ok()) { 
-      outputs_[0]->ErrorReceived(this, std::move(st)); 
-      return; 
-    } 
- 
-    lock.lock(); 
-    ++num_received_; 
-    st = MaybeFinish(&lock); 
-    if (!st.ok()) { 
-      outputs_[0]->ErrorReceived(this, std::move(st)); 
-    } 
-  } 
- 
-  void ErrorReceived(ExecNode* input, Status error) override { 
-    DCHECK_EQ(input, inputs_[0]); 
-    outputs_[0]->ErrorReceived(this, std::move(error)); 
-  } 
- 
-  void InputFinished(ExecNode* input, int seq) override { 
-    DCHECK_EQ(input, inputs_[0]); 
-    std::unique_lock<std::mutex> lock(mutex_); 
-    num_total_ = seq; 
-    Status st = MaybeFinish(&lock); 
- 
-    if (!st.ok()) { 
-      outputs_[0]->ErrorReceived(this, std::move(st)); 
-    } 
-  } 
- 
-  Status StartProducing() override { 
-    finished_ = Future<>::Make(); 
-    // Scalar aggregates will only output a single batch 
-    outputs_[0]->InputFinished(this, 1); 
-    return Status::OK(); 
-  } 
- 
-  void PauseProducing(ExecNode* output) override {} 
- 
-  void ResumeProducing(ExecNode* output) override {} 
- 
-  void StopProducing(ExecNode* output) override { 
-    DCHECK_EQ(output, outputs_[0]); 
-    StopProducing(); 
-  } 
- 
-  void StopProducing() override { 
-    inputs_[0]->StopProducing(this); 
-    finished_.MarkFinished(); 
-  } 
- 
-  Future<> finished() override { return finished_; } 
- 
- private: 
-  Status MaybeFinish(std::unique_lock<std::mutex>* lock) { 
-    if (num_received_ != num_total_) return Status::OK(); 
- 
-    if (states_.empty()) return Status::OK(); 
- 
-    ExecBatch batch{{}, 1}; 
-    batch.values.resize(kernels_.size()); 
- 
-    for (size_t i = 0; i < kernels_.size(); ++i) { 
-      KernelContext ctx{plan()->exec_context()}; 
-      ARROW_ASSIGN_OR_RAISE(auto merged, ScalarAggregateKernel::MergeAll( 
-                                             kernels_[i], &ctx, std::move(states_[i]))); 
-      RETURN_NOT_OK(kernels_[i]->finalize(&ctx, &batch.values[i])); 
-    } 
-    states_.clear(); 
-    lock->unlock(); 
- 
-    outputs_[0]->InputReceived(this, 0, batch); 
- 
-    finished_.MarkFinished(); 
-    return Status::OK(); 
-  } 
- 
-  Future<> finished_ = Future<>::MakeFinished(); 
-  std::vector<const ScalarAggregateKernel*> kernels_; 
-  std::vector<std::vector<std::unique_ptr<KernelState>>> states_; 
-  std::unordered_map<std::thread::id, size_t> thread_indices_; 
-  std::mutex mutex_; 
-  int num_received_ = 0, num_total_ = -1; 
-}; 
- 
-Result<ExecNode*> MakeScalarAggregateNode(ExecNode* input, std::string label, 
-                                          std::vector<internal::Aggregate> aggregates) { 
-  if (input->output_schema()->num_fields() != static_cast<int>(aggregates.size())) { 
-    return Status::Invalid("Provided ", aggregates.size(), 
-                           " aggregates, expected one for each field of ", 
-                           input->output_schema()->ToString()); 
-  } 
- 
-  auto exec_ctx = input->plan()->exec_context(); 
- 
-  std::vector<const ScalarAggregateKernel*> kernels(aggregates.size()); 
-  std::vector<std::vector<std::unique_ptr<KernelState>>> states(kernels.size()); 
-  FieldVector fields(kernels.size()); 
- 
-  for (size_t i = 0; i < kernels.size(); ++i) { 
-    ARROW_ASSIGN_OR_RAISE(auto function, 
-                          exec_ctx->func_registry()->GetFunction(aggregates[i].function)); 
- 
-    if (function->kind() != Function::SCALAR_AGGREGATE) { 
-      return Status::Invalid("Provided non ScalarAggregateFunction ", 
-                             aggregates[i].function); 
-    } 
- 
-    auto in_type = ValueDescr::Array(input->output_schema()->fields()[i]->type()); 
- 
-    ARROW_ASSIGN_OR_RAISE(const Kernel* kernel, function->DispatchExact({in_type})); 
-    kernels[i] = static_cast<const ScalarAggregateKernel*>(kernel); 
- 
-    if (aggregates[i].options == nullptr) { 
-      aggregates[i].options = function->default_options(); 
-    } 
- 
-    KernelContext kernel_ctx{exec_ctx}; 
-    states[i].resize(exec_ctx->executor() ? exec_ctx->executor()->GetCapacity() : 1); 
-    RETURN_NOT_OK(Kernel::InitAll(&kernel_ctx, 
-                                  KernelInitArgs{kernels[i], 
-                                                 { 
-                                                     in_type, 
-                                                 }, 
-                                                 aggregates[i].options}, 
-                                  &states[i])); 
- 
-    // pick one to resolve the kernel signature 
-    kernel_ctx.SetState(states[i][0].get()); 
-    ARROW_ASSIGN_OR_RAISE( 
-        auto descr, kernels[i]->signature->out_type().Resolve(&kernel_ctx, {in_type})); 
- 
-    fields[i] = field(aggregates[i].function, std::move(descr.type)); 
-  } 
- 
-  return input->plan()->EmplaceNode<ScalarAggregateNode>( 
-      input, std::move(label), schema(std::move(fields)), std::move(kernels), 
-      std::move(states)); 
-} 
- 
-}  // namespace compute 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/exec/exec_plan.h"
+
+#include <mutex>
+#include <thread>
+#include <unordered_map>
+#include <unordered_set>
+
+#include "arrow/array/util.h"
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/exec.h"
+#include "arrow/compute/exec/expression.h"
+#include "arrow/compute/registry.h"
+#include "arrow/datum.h"
+#include "arrow/record_batch.h"
+#include "arrow/result.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/optional.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+using internal::checked_pointer_cast;
+
+namespace compute {
+
+namespace {
+
+struct ExecPlanImpl : public ExecPlan {
+  explicit ExecPlanImpl(ExecContext* exec_context) : ExecPlan(exec_context) {}
+
+  ~ExecPlanImpl() override {
+    if (started_ && !finished_.is_finished()) {
+      ARROW_LOG(WARNING) << "Plan was destroyed before finishing";
+      StopProducing();
+      finished().Wait();
+    }
+  }
+
+  ExecNode* AddNode(std::unique_ptr<ExecNode> node) {
+    if (node->num_inputs() == 0) {
+      sources_.push_back(node.get());
+    }
+    if (node->num_outputs() == 0) {
+      sinks_.push_back(node.get());
+    }
+    nodes_.push_back(std::move(node));
+    return nodes_.back().get();
+  }
+
+  Status Validate() const {
+    if (nodes_.empty()) {
+      return Status::Invalid("ExecPlan has no node");
+    }
+    for (const auto& node : nodes_) {
+      RETURN_NOT_OK(node->Validate());
+    }
+    return Status::OK();
+  }
+
+  Status StartProducing() {
+    if (started_) {
+      return Status::Invalid("restarted ExecPlan");
+    }
+    started_ = true;
+
+    // producers precede consumers
+    sorted_nodes_ = TopoSort();
+
+    std::vector<Future<>> futures;
+
+    Status st = Status::OK();
+
+    using rev_it = std::reverse_iterator<NodeVector::iterator>;
+    for (rev_it it(sorted_nodes_.end()), end(sorted_nodes_.begin()); it != end; ++it) {
+      auto node = *it;
+
+      st = node->StartProducing();
+      if (!st.ok()) {
+        // Stop nodes that successfully started, in reverse order
+        stopped_ = true;
+        StopProducingImpl(it.base(), sorted_nodes_.end());
+        break;
+      }
+
+      futures.push_back(node->finished());
+    }
+
+    finished_ = AllComplete(std::move(futures));
+    return st;
+  }
+
+  void StopProducing() {
+    DCHECK(started_) << "stopped an ExecPlan which never started";
+    stopped_ = true;
+
+    StopProducingImpl(sorted_nodes_.begin(), sorted_nodes_.end());
+  }
+
+  template <typename It>
+  void StopProducingImpl(It begin, It end) {
+    for (auto it = begin; it != end; ++it) {
+      auto node = *it;
+      node->StopProducing();
+    }
+  }
+
+  NodeVector TopoSort() {
+    struct Impl {
+      const std::vector<std::unique_ptr<ExecNode>>& nodes;
+      std::unordered_set<ExecNode*> visited;
+      NodeVector sorted;
+
+      explicit Impl(const std::vector<std::unique_ptr<ExecNode>>& nodes) : nodes(nodes) {
+        visited.reserve(nodes.size());
+        sorted.resize(nodes.size());
+
+        for (const auto& node : nodes) {
+          Visit(node.get());
+        }
+
+        DCHECK_EQ(visited.size(), nodes.size());
+      }
+
+      void Visit(ExecNode* node) {
+        if (visited.count(node) != 0) return;
+
+        for (auto input : node->inputs()) {
+          // Ensure that producers are inserted before this consumer
+          Visit(input);
+        }
+
+        sorted[visited.size()] = node;
+        visited.insert(node);
+      }
+    };
+
+    return std::move(Impl{nodes_}.sorted);
+  }
+
+  Future<> finished_ = Future<>::MakeFinished();
+  bool started_ = false, stopped_ = false;
+  std::vector<std::unique_ptr<ExecNode>> nodes_;
+  NodeVector sources_, sinks_;
+  NodeVector sorted_nodes_;
+};
+
+ExecPlanImpl* ToDerived(ExecPlan* ptr) { return checked_cast<ExecPlanImpl*>(ptr); }
+
+const ExecPlanImpl* ToDerived(const ExecPlan* ptr) {
+  return checked_cast<const ExecPlanImpl*>(ptr);
+}
+
+util::optional<int> GetNodeIndex(const std::vector<ExecNode*>& nodes,
+                                 const ExecNode* node) {
+  for (int i = 0; i < static_cast<int>(nodes.size()); ++i) {
+    if (nodes[i] == node) return i;
+  }
+  return util::nullopt;
+}
+
+}  // namespace
+
+Result<std::shared_ptr<ExecPlan>> ExecPlan::Make(ExecContext* ctx) {
+  return std::shared_ptr<ExecPlan>(new ExecPlanImpl{ctx});
+}
+
+ExecNode* ExecPlan::AddNode(std::unique_ptr<ExecNode> node) {
+  return ToDerived(this)->AddNode(std::move(node));
+}
+
+const ExecPlan::NodeVector& ExecPlan::sources() const {
+  return ToDerived(this)->sources_;
+}
+
+const ExecPlan::NodeVector& ExecPlan::sinks() const { return ToDerived(this)->sinks_; }
+
+Status ExecPlan::Validate() { return ToDerived(this)->Validate(); }
+
+Status ExecPlan::StartProducing() { return ToDerived(this)->StartProducing(); }
+
+void ExecPlan::StopProducing() { ToDerived(this)->StopProducing(); }
+
+Future<> ExecPlan::finished() { return ToDerived(this)->finished_; }
+
+ExecNode::ExecNode(ExecPlan* plan, std::string label, NodeVector inputs,
+                   std::vector<std::string> input_labels,
+                   std::shared_ptr<Schema> output_schema, int num_outputs)
+    : plan_(plan),
+      label_(std::move(label)),
+      inputs_(std::move(inputs)),
+      input_labels_(std::move(input_labels)),
+      output_schema_(std::move(output_schema)),
+      num_outputs_(num_outputs) {
+  for (auto input : inputs_) {
+    input->outputs_.push_back(this);
+  }
+}
+
+Status ExecNode::Validate() const {
+  if (inputs_.size() != input_labels_.size()) {
+    return Status::Invalid("Invalid number of inputs for '", label(), "' (expected ",
+                           num_inputs(), ", actual ", input_labels_.size(), ")");
+  }
+
+  if (static_cast<int>(outputs_.size()) != num_outputs_) {
+    return Status::Invalid("Invalid number of outputs for '", label(), "' (expected ",
+                           num_outputs(), ", actual ", outputs_.size(), ")");
+  }
+
+  for (auto out : outputs_) {
+    auto input_index = GetNodeIndex(out->inputs(), this);
+    if (!input_index) {
+      return Status::Invalid("Node '", label(), "' outputs to node '", out->label(),
+                             "' but is not listed as an input.");
+    }
+  }
+
+  return Status::OK();
+}
+
+struct SourceNode : ExecNode {
+  SourceNode(ExecPlan* plan, std::string label, std::shared_ptr<Schema> output_schema,
+             AsyncGenerator<util::optional<ExecBatch>> generator)
+      : ExecNode(plan, std::move(label), {}, {}, std::move(output_schema),
+                 /*num_outputs=*/1),
+        generator_(std::move(generator)) {}
+
+  const char* kind_name() override { return "SourceNode"; }
+
+  [[noreturn]] static void NoInputs() {
+    DCHECK(false) << "no inputs; this should never be called";
+    std::abort();
+  }
+  [[noreturn]] void InputReceived(ExecNode*, int, ExecBatch) override { NoInputs(); }
+  [[noreturn]] void ErrorReceived(ExecNode*, Status) override { NoInputs(); }
+  [[noreturn]] void InputFinished(ExecNode*, int) override { NoInputs(); }
+
+  Status StartProducing() override {
+    DCHECK(!stop_requested_) << "Restarted SourceNode";
+
+    CallbackOptions options;
+    if (auto executor = plan()->exec_context()->executor()) {
+      // These options will transfer execution to the desired Executor if necessary.
+      // This can happen for in-memory scans where batches didn't require
+      // any CPU work to decode. Otherwise, parsing etc should have already
+      // been placed us on the desired Executor and no queues will be pushed to.
+      options.executor = executor;
+      options.should_schedule = ShouldSchedule::IfDifferentExecutor;
+    }
+
+    finished_ = Loop([this, options] {
+                  std::unique_lock<std::mutex> lock(mutex_);
+                  int seq = batch_count_++;
+                  if (stop_requested_) {
+                    return Future<ControlFlow<int>>::MakeFinished(Break(seq));
+                  }
+                  lock.unlock();
+
+                  return generator_().Then(
+                      [=](const util::optional<ExecBatch>& batch) -> ControlFlow<int> {
+                        std::unique_lock<std::mutex> lock(mutex_);
+                        if (IsIterationEnd(batch) || stop_requested_) {
+                          stop_requested_ = true;
+                          return Break(seq);
+                        }
+                        lock.unlock();
+
+                        outputs_[0]->InputReceived(this, seq, *batch);
+                        return Continue();
+                      },
+                      [=](const Status& error) -> ControlFlow<int> {
+                        // NB: ErrorReceived is independent of InputFinished, but
+                        // ErrorReceived will usually prompt StopProducing which will
+                        // prompt InputFinished. ErrorReceived may still be called from a
+                        // node which was requested to stop (indeed, the request to stop
+                        // may prompt an error).
+                        std::unique_lock<std::mutex> lock(mutex_);
+                        stop_requested_ = true;
+                        lock.unlock();
+                        outputs_[0]->ErrorReceived(this, error);
+                        return Break(seq);
+                      },
+                      options);
+                }).Then([&](int seq) { outputs_[0]->InputFinished(this, seq); });
+
+    return Status::OK();
+  }
+
+  void PauseProducing(ExecNode* output) override {}
+
+  void ResumeProducing(ExecNode* output) override {}
+
+  void StopProducing(ExecNode* output) override {
+    DCHECK_EQ(output, outputs_[0]);
+    StopProducing();
+  }
+
+  void StopProducing() override {
+    std::unique_lock<std::mutex> lock(mutex_);
+    stop_requested_ = true;
+  }
+
+  Future<> finished() override { return finished_; }
+
+ private:
+  std::mutex mutex_;
+  bool stop_requested_{false};
+  int batch_count_{0};
+  Future<> finished_ = Future<>::MakeFinished();
+  AsyncGenerator<util::optional<ExecBatch>> generator_;
+};
+
+ExecNode* MakeSourceNode(ExecPlan* plan, std::string label,
+                         std::shared_ptr<Schema> output_schema,
+                         AsyncGenerator<util::optional<ExecBatch>> generator) {
+  return plan->EmplaceNode<SourceNode>(plan, std::move(label), std::move(output_schema),
+                                       std::move(generator));
+}
+
+struct FilterNode : ExecNode {
+  FilterNode(ExecNode* input, std::string label, Expression filter)
+      : ExecNode(input->plan(), std::move(label), {input}, {"target"},
+                 /*output_schema=*/input->output_schema(),
+                 /*num_outputs=*/1),
+        filter_(std::move(filter)) {}
+
+  const char* kind_name() override { return "FilterNode"; }
+
+  Result<ExecBatch> DoFilter(const ExecBatch& target) {
+    ARROW_ASSIGN_OR_RAISE(Expression simplified_filter,
+                          SimplifyWithGuarantee(filter_, target.guarantee));
+
+    ARROW_ASSIGN_OR_RAISE(Datum mask, ExecuteScalarExpression(simplified_filter, target,
+                                                              plan()->exec_context()));
+
+    if (mask.is_scalar()) {
+      const auto& mask_scalar = mask.scalar_as<BooleanScalar>();
+      if (mask_scalar.is_valid && mask_scalar.value) {
+        return target;
+      }
+
+      return target.Slice(0, 0);
+    }
+
+    // if the values are all scalar then the mask must also be
+    DCHECK(!std::all_of(target.values.begin(), target.values.end(),
+                        [](const Datum& value) { return value.is_scalar(); }));
+
+    auto values = target.values;
+    for (auto& value : values) {
+      if (value.is_scalar()) continue;
+      ARROW_ASSIGN_OR_RAISE(value, Filter(value, mask, FilterOptions::Defaults()));
+    }
+    return ExecBatch::Make(std::move(values));
+  }
+
+  void InputReceived(ExecNode* input, int seq, ExecBatch batch) override {
+    DCHECK_EQ(input, inputs_[0]);
+
+    auto maybe_filtered = DoFilter(std::move(batch));
+    if (!maybe_filtered.ok()) {
+      outputs_[0]->ErrorReceived(this, maybe_filtered.status());
+      return;
+    }
+
+    maybe_filtered->guarantee = batch.guarantee;
+    outputs_[0]->InputReceived(this, seq, maybe_filtered.MoveValueUnsafe());
+  }
+
+  void ErrorReceived(ExecNode* input, Status error) override {
+    DCHECK_EQ(input, inputs_[0]);
+    outputs_[0]->ErrorReceived(this, std::move(error));
+  }
+
+  void InputFinished(ExecNode* input, int seq) override {
+    DCHECK_EQ(input, inputs_[0]);
+    outputs_[0]->InputFinished(this, seq);
+  }
+
+  Status StartProducing() override { return Status::OK(); }
+
+  void PauseProducing(ExecNode* output) override {}
+
+  void ResumeProducing(ExecNode* output) override {}
+
+  void StopProducing(ExecNode* output) override {
+    DCHECK_EQ(output, outputs_[0]);
+    StopProducing();
+  }
+
+  void StopProducing() override { inputs_[0]->StopProducing(this); }
+
+  Future<> finished() override { return inputs_[0]->finished(); }
+
+ private:
+  Expression filter_;
+};
+
+Result<ExecNode*> MakeFilterNode(ExecNode* input, std::string label, Expression filter) {
+  if (!filter.IsBound()) {
+    ARROW_ASSIGN_OR_RAISE(filter, filter.Bind(*input->output_schema()));
+  }
+
+  if (filter.type()->id() != Type::BOOL) {
+    return Status::TypeError("Filter expression must evaluate to bool, but ",
+                             filter.ToString(), " evaluates to ",
+                             filter.type()->ToString());
+  }
+
+  return input->plan()->EmplaceNode<FilterNode>(input, std::move(label),
+                                                std::move(filter));
+}
+
+struct ProjectNode : ExecNode {
+  ProjectNode(ExecNode* input, std::string label, std::shared_ptr<Schema> output_schema,
+              std::vector<Expression> exprs)
+      : ExecNode(input->plan(), std::move(label), {input}, {"target"},
+                 /*output_schema=*/std::move(output_schema),
+                 /*num_outputs=*/1),
+        exprs_(std::move(exprs)) {}
+
+  const char* kind_name() override { return "ProjectNode"; }
+
+  Result<ExecBatch> DoProject(const ExecBatch& target) {
+    std::vector<Datum> values{exprs_.size()};
+    for (size_t i = 0; i < exprs_.size(); ++i) {
+      ARROW_ASSIGN_OR_RAISE(Expression simplified_expr,
+                            SimplifyWithGuarantee(exprs_[i], target.guarantee));
+
+      ARROW_ASSIGN_OR_RAISE(values[i], ExecuteScalarExpression(simplified_expr, target,
+                                                               plan()->exec_context()));
+    }
+    return ExecBatch{std::move(values), target.length};
+  }
+
+  void InputReceived(ExecNode* input, int seq, ExecBatch batch) override {
+    DCHECK_EQ(input, inputs_[0]);
+
+    auto maybe_projected = DoProject(std::move(batch));
+    if (!maybe_projected.ok()) {
+      outputs_[0]->ErrorReceived(this, maybe_projected.status());
+      return;
+    }
+
+    maybe_projected->guarantee = batch.guarantee;
+    outputs_[0]->InputReceived(this, seq, maybe_projected.MoveValueUnsafe());
+  }
+
+  void ErrorReceived(ExecNode* input, Status error) override {
+    DCHECK_EQ(input, inputs_[0]);
+    outputs_[0]->ErrorReceived(this, std::move(error));
+  }
+
+  void InputFinished(ExecNode* input, int seq) override {
+    DCHECK_EQ(input, inputs_[0]);
+    outputs_[0]->InputFinished(this, seq);
+  }
+
+  Status StartProducing() override { return Status::OK(); }
+
+  void PauseProducing(ExecNode* output) override {}
+
+  void ResumeProducing(ExecNode* output) override {}
+
+  void StopProducing(ExecNode* output) override {
+    DCHECK_EQ(output, outputs_[0]);
+    StopProducing();
+  }
+
+  void StopProducing() override { inputs_[0]->StopProducing(this); }
+
+  Future<> finished() override { return inputs_[0]->finished(); }
+
+ private:
+  std::vector<Expression> exprs_;
+};
+
+Result<ExecNode*> MakeProjectNode(ExecNode* input, std::string label,
+                                  std::vector<Expression> exprs,
+                                  std::vector<std::string> names) {
+  FieldVector fields(exprs.size());
+
+  if (names.size() == 0) {
+    names.resize(exprs.size());
+    for (size_t i = 0; i < exprs.size(); ++i) {
+      names[i] = exprs[i].ToString();
+    }
+  }
+
+  int i = 0;
+  for (auto& expr : exprs) {
+    if (!expr.IsBound()) {
+      ARROW_ASSIGN_OR_RAISE(expr, expr.Bind(*input->output_schema()));
+    }
+    fields[i] = field(std::move(names[i]), expr.type());
+    ++i;
+  }
+
+  return input->plan()->EmplaceNode<ProjectNode>(
+      input, std::move(label), schema(std::move(fields)), std::move(exprs));
+}
+
+struct SinkNode : ExecNode {
+  SinkNode(ExecNode* input, std::string label,
+           AsyncGenerator<util::optional<ExecBatch>>* generator)
+      : ExecNode(input->plan(), std::move(label), {input}, {"collected"}, {},
+                 /*num_outputs=*/0),
+        producer_(MakeProducer(generator)) {}
+
+  static PushGenerator<util::optional<ExecBatch>>::Producer MakeProducer(
+      AsyncGenerator<util::optional<ExecBatch>>* out_gen) {
+    PushGenerator<util::optional<ExecBatch>> gen;
+    auto out = gen.producer();
+    *out_gen = std::move(gen);
+    return out;
+  }
+
+  const char* kind_name() override { return "SinkNode"; }
+
+  Status StartProducing() override {
+    finished_ = Future<>::Make();
+    return Status::OK();
+  }
+
+  // sink nodes have no outputs from which to feel backpressure
+  [[noreturn]] static void NoOutputs() {
+    DCHECK(false) << "no outputs; this should never be called";
+    std::abort();
+  }
+  [[noreturn]] void ResumeProducing(ExecNode* output) override { NoOutputs(); }
+  [[noreturn]] void PauseProducing(ExecNode* output) override { NoOutputs(); }
+  [[noreturn]] void StopProducing(ExecNode* output) override { NoOutputs(); }
+
+  void StopProducing() override {
+    Finish();
+    inputs_[0]->StopProducing(this);
+  }
+
+  Future<> finished() override { return finished_; }
+
+  void InputReceived(ExecNode* input, int seq_num, ExecBatch batch) override {
+    DCHECK_EQ(input, inputs_[0]);
+
+    std::unique_lock<std::mutex> lock(mutex_);
+    if (finished_.is_finished()) return;
+
+    ++num_received_;
+    if (num_received_ == emit_stop_) {
+      lock.unlock();
+      producer_.Push(std::move(batch));
+      Finish();
+      return;
+    }
+
+    if (emit_stop_ != -1) {
+      DCHECK_LE(seq_num, emit_stop_);
+    }
+
+    lock.unlock();
+    producer_.Push(std::move(batch));
+  }
+
+  void ErrorReceived(ExecNode* input, Status error) override {
+    DCHECK_EQ(input, inputs_[0]);
+    producer_.Push(std::move(error));
+    Finish();
+    inputs_[0]->StopProducing(this);
+  }
+
+  void InputFinished(ExecNode* input, int seq_stop) override {
+    std::unique_lock<std::mutex> lock(mutex_);
+    emit_stop_ = seq_stop;
+    if (num_received_ == emit_stop_) {
+      lock.unlock();
+      Finish();
+    }
+  }
+
+ private:
+  void Finish() {
+    if (producer_.Close()) {
+      finished_.MarkFinished();
+    }
+  }
+
+  std::mutex mutex_;
+
+  int num_received_ = 0;
+  int emit_stop_ = -1;
+  Future<> finished_ = Future<>::MakeFinished();
+
+  PushGenerator<util::optional<ExecBatch>>::Producer producer_;
+};
+
+AsyncGenerator<util::optional<ExecBatch>> MakeSinkNode(ExecNode* input,
+                                                       std::string label) {
+  AsyncGenerator<util::optional<ExecBatch>> out;
+  (void)input->plan()->EmplaceNode<SinkNode>(input, std::move(label), &out);
+  return out;
+}
+
+std::shared_ptr<RecordBatchReader> MakeGeneratorReader(
+    std::shared_ptr<Schema> schema,
+    std::function<Future<util::optional<ExecBatch>>()> gen, MemoryPool* pool) {
+  struct Impl : RecordBatchReader {
+    std::shared_ptr<Schema> schema() const override { return schema_; }
+
+    Status ReadNext(std::shared_ptr<RecordBatch>* record_batch) override {
+      ARROW_ASSIGN_OR_RAISE(auto batch, iterator_.Next());
+      if (batch) {
+        ARROW_ASSIGN_OR_RAISE(*record_batch, batch->ToRecordBatch(schema_, pool_));
+      } else {
+        *record_batch = IterationEnd<std::shared_ptr<RecordBatch>>();
+      }
+      return Status::OK();
+    }
+
+    MemoryPool* pool_;
+    std::shared_ptr<Schema> schema_;
+    Iterator<util::optional<ExecBatch>> iterator_;
+  };
+
+  auto out = std::make_shared<Impl>();
+  out->pool_ = pool;
+  out->schema_ = std::move(schema);
+  out->iterator_ = MakeGeneratorIterator(std::move(gen));
+  return out;
+}
+
+struct ScalarAggregateNode : ExecNode {
+  ScalarAggregateNode(ExecNode* input, std::string label,
+                      std::shared_ptr<Schema> output_schema,
+                      std::vector<const ScalarAggregateKernel*> kernels,
+                      std::vector<std::vector<std::unique_ptr<KernelState>>> states)
+      : ExecNode(input->plan(), std::move(label), {input}, {"target"},
+                 /*output_schema=*/std::move(output_schema),
+                 /*num_outputs=*/1),
+        kernels_(std::move(kernels)),
+        states_(std::move(states)) {}
+
+  const char* kind_name() override { return "ScalarAggregateNode"; }
+
+  Status DoConsume(const ExecBatch& batch, size_t thread_index) {
+    for (size_t i = 0; i < kernels_.size(); ++i) {
+      KernelContext batch_ctx{plan()->exec_context()};
+      batch_ctx.SetState(states_[i][thread_index].get());
+      ExecBatch single_column_batch{{batch.values[i]}, batch.length};
+      RETURN_NOT_OK(kernels_[i]->consume(&batch_ctx, single_column_batch));
+    }
+    return Status::OK();
+  }
+
+  void InputReceived(ExecNode* input, int seq, ExecBatch batch) override {
+    DCHECK_EQ(input, inputs_[0]);
+
+    std::unique_lock<std::mutex> lock(mutex_);
+    auto it =
+        thread_indices_.emplace(std::this_thread::get_id(), thread_indices_.size()).first;
+    auto thread_index = it->second;
+
+    lock.unlock();
+
+    Status st = DoConsume(std::move(batch), thread_index);
+    if (!st.ok()) {
+      outputs_[0]->ErrorReceived(this, std::move(st));
+      return;
+    }
+
+    lock.lock();
+    ++num_received_;
+    st = MaybeFinish(&lock);
+    if (!st.ok()) {
+      outputs_[0]->ErrorReceived(this, std::move(st));
+    }
+  }
+
+  void ErrorReceived(ExecNode* input, Status error) override {
+    DCHECK_EQ(input, inputs_[0]);
+    outputs_[0]->ErrorReceived(this, std::move(error));
+  }
+
+  void InputFinished(ExecNode* input, int seq) override {
+    DCHECK_EQ(input, inputs_[0]);
+    std::unique_lock<std::mutex> lock(mutex_);
+    num_total_ = seq;
+    Status st = MaybeFinish(&lock);
+
+    if (!st.ok()) {
+      outputs_[0]->ErrorReceived(this, std::move(st));
+    }
+  }
+
+  Status StartProducing() override {
+    finished_ = Future<>::Make();
+    // Scalar aggregates will only output a single batch
+    outputs_[0]->InputFinished(this, 1);
+    return Status::OK();
+  }
+
+  void PauseProducing(ExecNode* output) override {}
+
+  void ResumeProducing(ExecNode* output) override {}
+
+  void StopProducing(ExecNode* output) override {
+    DCHECK_EQ(output, outputs_[0]);
+    StopProducing();
+  }
+
+  void StopProducing() override {
+    inputs_[0]->StopProducing(this);
+    finished_.MarkFinished();
+  }
+
+  Future<> finished() override { return finished_; }
+
+ private:
+  Status MaybeFinish(std::unique_lock<std::mutex>* lock) {
+    if (num_received_ != num_total_) return Status::OK();
+
+    if (states_.empty()) return Status::OK();
+
+    ExecBatch batch{{}, 1};
+    batch.values.resize(kernels_.size());
+
+    for (size_t i = 0; i < kernels_.size(); ++i) {
+      KernelContext ctx{plan()->exec_context()};
+      ARROW_ASSIGN_OR_RAISE(auto merged, ScalarAggregateKernel::MergeAll(
+                                             kernels_[i], &ctx, std::move(states_[i])));
+      RETURN_NOT_OK(kernels_[i]->finalize(&ctx, &batch.values[i]));
+    }
+    states_.clear();
+    lock->unlock();
+
+    outputs_[0]->InputReceived(this, 0, batch);
+
+    finished_.MarkFinished();
+    return Status::OK();
+  }
+
+  Future<> finished_ = Future<>::MakeFinished();
+  std::vector<const ScalarAggregateKernel*> kernels_;
+  std::vector<std::vector<std::unique_ptr<KernelState>>> states_;
+  std::unordered_map<std::thread::id, size_t> thread_indices_;
+  std::mutex mutex_;
+  int num_received_ = 0, num_total_ = -1;
+};
+
+Result<ExecNode*> MakeScalarAggregateNode(ExecNode* input, std::string label,
+                                          std::vector<internal::Aggregate> aggregates) {
+  if (input->output_schema()->num_fields() != static_cast<int>(aggregates.size())) {
+    return Status::Invalid("Provided ", aggregates.size(),
+                           " aggregates, expected one for each field of ",
+                           input->output_schema()->ToString());
+  }
+
+  auto exec_ctx = input->plan()->exec_context();
+
+  std::vector<const ScalarAggregateKernel*> kernels(aggregates.size());
+  std::vector<std::vector<std::unique_ptr<KernelState>>> states(kernels.size());
+  FieldVector fields(kernels.size());
+
+  for (size_t i = 0; i < kernels.size(); ++i) {
+    ARROW_ASSIGN_OR_RAISE(auto function,
+                          exec_ctx->func_registry()->GetFunction(aggregates[i].function));
+
+    if (function->kind() != Function::SCALAR_AGGREGATE) {
+      return Status::Invalid("Provided non ScalarAggregateFunction ",
+                             aggregates[i].function);
+    }
+
+    auto in_type = ValueDescr::Array(input->output_schema()->fields()[i]->type());
+
+    ARROW_ASSIGN_OR_RAISE(const Kernel* kernel, function->DispatchExact({in_type}));
+    kernels[i] = static_cast<const ScalarAggregateKernel*>(kernel);
+
+    if (aggregates[i].options == nullptr) {
+      aggregates[i].options = function->default_options();
+    }
+
+    KernelContext kernel_ctx{exec_ctx};
+    states[i].resize(exec_ctx->executor() ? exec_ctx->executor()->GetCapacity() : 1);
+    RETURN_NOT_OK(Kernel::InitAll(&kernel_ctx,
+                                  KernelInitArgs{kernels[i],
+                                                 {
+                                                     in_type,
+                                                 },
+                                                 aggregates[i].options},
+                                  &states[i]));
+
+    // pick one to resolve the kernel signature
+    kernel_ctx.SetState(states[i][0].get());
+    ARROW_ASSIGN_OR_RAISE(
+        auto descr, kernels[i]->signature->out_type().Resolve(&kernel_ctx, {in_type}));
+
+    fields[i] = field(aggregates[i].function, std::move(descr.type));
+  }
+
+  return input->plan()->EmplaceNode<ScalarAggregateNode>(
+      input, std::move(label), schema(std::move(fields)), std::move(kernels),
+      std::move(states));
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/exec_plan.h b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/exec_plan.h
index 0df78fecd7c..c36c174af05 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/exec_plan.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/exec_plan.h
@@ -1,287 +1,287 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <functional> 
-#include <memory> 
-#include <string> 
-#include <vector> 
- 
-#include "arrow/compute/api_aggregate.h" 
-#include "arrow/compute/exec.h" 
-#include "arrow/compute/type_fwd.h" 
-#include "arrow/type_fwd.h" 
-#include "arrow/util/macros.h" 
-#include "arrow/util/optional.h" 
-#include "arrow/util/visibility.h" 
- 
-namespace arrow { 
-namespace compute { 
- 
-class ARROW_EXPORT ExecPlan : public std::enable_shared_from_this<ExecPlan> { 
- public: 
-  using NodeVector = std::vector<ExecNode*>; 
- 
-  virtual ~ExecPlan() = default; 
- 
-  ExecContext* exec_context() const { return exec_context_; } 
- 
-  /// Make an empty exec plan 
-  static Result<std::shared_ptr<ExecPlan>> Make(ExecContext* = default_exec_context()); 
- 
-  ExecNode* AddNode(std::unique_ptr<ExecNode> node); 
- 
-  template <typename Node, typename... Args> 
-  Node* EmplaceNode(Args&&... args) { 
-    std::unique_ptr<Node> node{new Node{std::forward<Args>(args)...}}; 
-    auto out = node.get(); 
-    AddNode(std::move(node)); 
-    return out; 
-  } 
- 
-  /// The initial inputs 
-  const NodeVector& sources() const; 
- 
-  /// The final outputs 
-  const NodeVector& sinks() const; 
- 
-  Status Validate(); 
- 
-  /// \brief Start producing on all nodes 
-  /// 
-  /// Nodes are started in reverse topological order, such that any node 
-  /// is started before all of its inputs. 
-  Status StartProducing(); 
- 
-  /// \brief Stop producing on all nodes 
-  /// 
-  /// Nodes are stopped in topological order, such that any node 
-  /// is stopped before all of its outputs. 
-  void StopProducing(); 
- 
-  /// \brief A future which will be marked finished when all nodes have stopped producing. 
-  Future<> finished(); 
- 
- protected: 
-  ExecContext* exec_context_; 
-  explicit ExecPlan(ExecContext* exec_context) : exec_context_(exec_context) {} 
-}; 
- 
-class ARROW_EXPORT ExecNode { 
- public: 
-  using NodeVector = std::vector<ExecNode*>; 
- 
-  virtual ~ExecNode() = default; 
- 
-  virtual const char* kind_name() = 0; 
- 
-  // The number of inputs/outputs expected by this node 
-  int num_inputs() const { return static_cast<int>(inputs_.size()); } 
-  int num_outputs() const { return num_outputs_; } 
- 
-  /// This node's predecessors in the exec plan 
-  const NodeVector& inputs() const { return inputs_; } 
- 
-  /// \brief Labels identifying the function of each input. 
-  const std::vector<std::string>& input_labels() const { return input_labels_; } 
- 
-  /// This node's successors in the exec plan 
-  const NodeVector& outputs() const { return outputs_; } 
- 
-  /// The datatypes for batches produced by this node 
-  const std::shared_ptr<Schema>& output_schema() const { return output_schema_; } 
- 
-  /// This node's exec plan 
-  ExecPlan* plan() { return plan_; } 
- 
-  /// \brief An optional label, for display and debugging 
-  /// 
-  /// There is no guarantee that this value is non-empty or unique. 
-  const std::string& label() const { return label_; } 
- 
-  Status Validate() const; 
- 
-  /// Upstream API: 
-  /// These functions are called by input nodes that want to inform this node 
-  /// about an updated condition (a new input batch, an error, an impeding 
-  /// end of stream). 
-  /// 
-  /// Implementation rules: 
-  /// - these may be called anytime after StartProducing() has succeeded 
-  ///   (and even during or after StopProducing()) 
-  /// - these may be called concurrently 
-  /// - these are allowed to call back into PauseProducing(), ResumeProducing() 
-  ///   and StopProducing() 
- 
-  /// Transfer input batch to ExecNode 
-  virtual void InputReceived(ExecNode* input, int seq_num, ExecBatch batch) = 0; 
- 
-  /// Signal error to ExecNode 
-  virtual void ErrorReceived(ExecNode* input, Status error) = 0; 
- 
-  /// Mark the inputs finished after the given number of batches. 
-  /// 
-  /// This may be called before all inputs are received.  This simply fixes 
-  /// the total number of incoming batches for an input, so that the ExecNode 
-  /// knows when it has received all input, regardless of order. 
-  virtual void InputFinished(ExecNode* input, int seq_stop) = 0; 
- 
-  /// Lifecycle API: 
-  /// - start / stop to initiate and terminate production 
-  /// - pause / resume to apply backpressure 
-  /// 
-  /// Implementation rules: 
-  /// - StartProducing() should not recurse into the inputs, as it is 
-  ///   handled by ExecPlan::StartProducing() 
-  /// - PauseProducing(), ResumeProducing(), StopProducing() may be called 
-  ///   concurrently (but only after StartProducing() has returned successfully) 
-  /// - PauseProducing(), ResumeProducing(), StopProducing() may be called 
-  ///   by the downstream nodes' InputReceived(), ErrorReceived(), InputFinished() 
-  ///   methods 
-  /// - StopProducing() should recurse into the inputs 
-  /// - StopProducing() must be idempotent 
- 
-  // XXX What happens if StartProducing() calls an output's InputReceived() 
-  // synchronously, and InputReceived() decides to call back into StopProducing() 
-  // (or PauseProducing()) because it received enough data? 
-  // 
-  // Right now, since synchronous calls happen in both directions (input to 
-  // output and then output to input), a node must be careful to be reentrant 
-  // against synchronous calls from its output, *and* also concurrent calls from 
-  // other threads.  The most reliable solution is to update the internal state 
-  // first, and notify outputs only at the end. 
-  // 
-  // Alternate rules: 
-  // - StartProducing(), ResumeProducing() can call synchronously into 
-  //   its ouputs' consuming methods (InputReceived() etc.) 
-  // - InputReceived(), ErrorReceived(), InputFinished() can call asynchronously 
-  //   into its inputs' PauseProducing(), StopProducing() 
-  // 
-  // Alternate API: 
-  // - InputReceived(), ErrorReceived(), InputFinished() return a ProductionHint 
-  //   enum: either None (default), PauseProducing, ResumeProducing, StopProducing 
-  // - A method allows passing a ProductionHint asynchronously from an output node 
-  //   (replacing PauseProducing(), ResumeProducing(), StopProducing()) 
- 
-  /// \brief Start producing 
-  /// 
-  /// This must only be called once.  If this fails, then other lifecycle 
-  /// methods must not be called. 
-  /// 
-  /// This is typically called automatically by ExecPlan::StartProducing(). 
-  virtual Status StartProducing() = 0; 
- 
-  /// \brief Pause producing temporarily 
-  /// 
-  /// This call is a hint that an output node is currently not willing 
-  /// to receive data. 
-  /// 
-  /// This may be called any number of times after StartProducing() succeeds. 
-  /// However, the node is still free to produce data (which may be difficult 
-  /// to prevent anyway if data is produced using multiple threads). 
-  virtual void PauseProducing(ExecNode* output) = 0; 
- 
-  /// \brief Resume producing after a temporary pause 
-  /// 
-  /// This call is a hint that an output node is willing to receive data again. 
-  /// 
-  /// This may be called any number of times after StartProducing() succeeds. 
-  /// This may also be called concurrently with PauseProducing(), which suggests 
-  /// the implementation may use an atomic counter. 
-  virtual void ResumeProducing(ExecNode* output) = 0; 
- 
-  /// \brief Stop producing definitively to a single output 
-  /// 
-  /// This call is a hint that an output node has completed and is not willing 
-  /// to receive any further data. 
-  virtual void StopProducing(ExecNode* output) = 0; 
- 
-  /// \brief Stop producing definitively to all outputs 
-  virtual void StopProducing() = 0; 
- 
-  /// \brief A future which will be marked finished when this node has stopped producing. 
-  virtual Future<> finished() = 0; 
- 
- protected: 
-  ExecNode(ExecPlan* plan, std::string label, NodeVector inputs, 
-           std::vector<std::string> input_labels, std::shared_ptr<Schema> output_schema, 
-           int num_outputs); 
- 
-  ExecPlan* plan_; 
-  std::string label_; 
- 
-  NodeVector inputs_; 
-  std::vector<std::string> input_labels_; 
- 
-  std::shared_ptr<Schema> output_schema_; 
-  int num_outputs_; 
-  NodeVector outputs_; 
-}; 
- 
-/// \brief Adapt an AsyncGenerator<ExecBatch> as a source node 
-/// 
-/// plan->exec_context()->executor() is used to parallelize pushing to 
-/// outputs, if provided. 
-ARROW_EXPORT 
-ExecNode* MakeSourceNode(ExecPlan* plan, std::string label, 
-                         std::shared_ptr<Schema> output_schema, 
-                         std::function<Future<util::optional<ExecBatch>>()>); 
- 
-/// \brief Add a sink node which forwards to an AsyncGenerator<ExecBatch> 
-/// 
-/// Emitted batches will not be ordered. 
-ARROW_EXPORT 
-std::function<Future<util::optional<ExecBatch>>()> MakeSinkNode(ExecNode* input, 
-                                                                std::string label); 
- 
-/// \brief Wrap an ExecBatch generator in a RecordBatchReader. 
-/// 
-/// The RecordBatchReader does not impose any ordering on emitted batches. 
-ARROW_EXPORT 
-std::shared_ptr<RecordBatchReader> MakeGeneratorReader( 
-    std::shared_ptr<Schema>, std::function<Future<util::optional<ExecBatch>>()>, 
-    MemoryPool*); 
- 
-/// \brief Make a node which excludes some rows from batches passed through it 
-/// 
-/// The filter Expression will be evaluated against each batch which is pushed to 
-/// this node. Any rows for which the filter does not evaluate to `true` will be excluded 
-/// in the batch emitted by this node. 
-/// 
-/// If the filter is not already bound, it will be bound against the input's schema. 
-ARROW_EXPORT 
-Result<ExecNode*> MakeFilterNode(ExecNode* input, std::string label, Expression filter); 
- 
-/// \brief Make a node which executes expressions on input batches, producing new batches. 
-/// 
-/// Each expression will be evaluated against each batch which is pushed to 
-/// this node to produce a corresponding output column. 
-/// 
-/// If exprs are not already bound, they will be bound against the input's schema. 
-/// If names are not provided, the string representations of exprs will be used. 
-ARROW_EXPORT 
-Result<ExecNode*> MakeProjectNode(ExecNode* input, std::string label, 
-                                  std::vector<Expression> exprs, 
-                                  std::vector<std::string> names = {}); 
- 
-ARROW_EXPORT 
-Result<ExecNode*> MakeScalarAggregateNode(ExecNode* input, std::string label, 
-                                          std::vector<internal::Aggregate> aggregates); 
- 
-}  // namespace compute 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <functional>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/compute/api_aggregate.h"
+#include "arrow/compute/exec.h"
+#include "arrow/compute/type_fwd.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/optional.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace compute {
+
+class ARROW_EXPORT ExecPlan : public std::enable_shared_from_this<ExecPlan> {
+ public:
+  using NodeVector = std::vector<ExecNode*>;
+
+  virtual ~ExecPlan() = default;
+
+  ExecContext* exec_context() const { return exec_context_; }
+
+  /// Make an empty exec plan
+  static Result<std::shared_ptr<ExecPlan>> Make(ExecContext* = default_exec_context());
+
+  ExecNode* AddNode(std::unique_ptr<ExecNode> node);
+
+  template <typename Node, typename... Args>
+  Node* EmplaceNode(Args&&... args) {
+    std::unique_ptr<Node> node{new Node{std::forward<Args>(args)...}};
+    auto out = node.get();
+    AddNode(std::move(node));
+    return out;
+  }
+
+  /// The initial inputs
+  const NodeVector& sources() const;
+
+  /// The final outputs
+  const NodeVector& sinks() const;
+
+  Status Validate();
+
+  /// \brief Start producing on all nodes
+  ///
+  /// Nodes are started in reverse topological order, such that any node
+  /// is started before all of its inputs.
+  Status StartProducing();
+
+  /// \brief Stop producing on all nodes
+  ///
+  /// Nodes are stopped in topological order, such that any node
+  /// is stopped before all of its outputs.
+  void StopProducing();
+
+  /// \brief A future which will be marked finished when all nodes have stopped producing.
+  Future<> finished();
+
+ protected:
+  ExecContext* exec_context_;
+  explicit ExecPlan(ExecContext* exec_context) : exec_context_(exec_context) {}
+};
+
+class ARROW_EXPORT ExecNode {
+ public:
+  using NodeVector = std::vector<ExecNode*>;
+
+  virtual ~ExecNode() = default;
+
+  virtual const char* kind_name() = 0;
+
+  // The number of inputs/outputs expected by this node
+  int num_inputs() const { return static_cast<int>(inputs_.size()); }
+  int num_outputs() const { return num_outputs_; }
+
+  /// This node's predecessors in the exec plan
+  const NodeVector& inputs() const { return inputs_; }
+
+  /// \brief Labels identifying the function of each input.
+  const std::vector<std::string>& input_labels() const { return input_labels_; }
+
+  /// This node's successors in the exec plan
+  const NodeVector& outputs() const { return outputs_; }
+
+  /// The datatypes for batches produced by this node
+  const std::shared_ptr<Schema>& output_schema() const { return output_schema_; }
+
+  /// This node's exec plan
+  ExecPlan* plan() { return plan_; }
+
+  /// \brief An optional label, for display and debugging
+  ///
+  /// There is no guarantee that this value is non-empty or unique.
+  const std::string& label() const { return label_; }
+
+  Status Validate() const;
+
+  /// Upstream API:
+  /// These functions are called by input nodes that want to inform this node
+  /// about an updated condition (a new input batch, an error, an impeding
+  /// end of stream).
+  ///
+  /// Implementation rules:
+  /// - these may be called anytime after StartProducing() has succeeded
+  ///   (and even during or after StopProducing())
+  /// - these may be called concurrently
+  /// - these are allowed to call back into PauseProducing(), ResumeProducing()
+  ///   and StopProducing()
+
+  /// Transfer input batch to ExecNode
+  virtual void InputReceived(ExecNode* input, int seq_num, ExecBatch batch) = 0;
+
+  /// Signal error to ExecNode
+  virtual void ErrorReceived(ExecNode* input, Status error) = 0;
+
+  /// Mark the inputs finished after the given number of batches.
+  ///
+  /// This may be called before all inputs are received.  This simply fixes
+  /// the total number of incoming batches for an input, so that the ExecNode
+  /// knows when it has received all input, regardless of order.
+  virtual void InputFinished(ExecNode* input, int seq_stop) = 0;
+
+  /// Lifecycle API:
+  /// - start / stop to initiate and terminate production
+  /// - pause / resume to apply backpressure
+  ///
+  /// Implementation rules:
+  /// - StartProducing() should not recurse into the inputs, as it is
+  ///   handled by ExecPlan::StartProducing()
+  /// - PauseProducing(), ResumeProducing(), StopProducing() may be called
+  ///   concurrently (but only after StartProducing() has returned successfully)
+  /// - PauseProducing(), ResumeProducing(), StopProducing() may be called
+  ///   by the downstream nodes' InputReceived(), ErrorReceived(), InputFinished()
+  ///   methods
+  /// - StopProducing() should recurse into the inputs
+  /// - StopProducing() must be idempotent
+
+  // XXX What happens if StartProducing() calls an output's InputReceived()
+  // synchronously, and InputReceived() decides to call back into StopProducing()
+  // (or PauseProducing()) because it received enough data?
+  //
+  // Right now, since synchronous calls happen in both directions (input to
+  // output and then output to input), a node must be careful to be reentrant
+  // against synchronous calls from its output, *and* also concurrent calls from
+  // other threads.  The most reliable solution is to update the internal state
+  // first, and notify outputs only at the end.
+  //
+  // Alternate rules:
+  // - StartProducing(), ResumeProducing() can call synchronously into
+  //   its ouputs' consuming methods (InputReceived() etc.)
+  // - InputReceived(), ErrorReceived(), InputFinished() can call asynchronously
+  //   into its inputs' PauseProducing(), StopProducing()
+  //
+  // Alternate API:
+  // - InputReceived(), ErrorReceived(), InputFinished() return a ProductionHint
+  //   enum: either None (default), PauseProducing, ResumeProducing, StopProducing
+  // - A method allows passing a ProductionHint asynchronously from an output node
+  //   (replacing PauseProducing(), ResumeProducing(), StopProducing())
+
+  /// \brief Start producing
+  ///
+  /// This must only be called once.  If this fails, then other lifecycle
+  /// methods must not be called.
+  ///
+  /// This is typically called automatically by ExecPlan::StartProducing().
+  virtual Status StartProducing() = 0;
+
+  /// \brief Pause producing temporarily
+  ///
+  /// This call is a hint that an output node is currently not willing
+  /// to receive data.
+  ///
+  /// This may be called any number of times after StartProducing() succeeds.
+  /// However, the node is still free to produce data (which may be difficult
+  /// to prevent anyway if data is produced using multiple threads).
+  virtual void PauseProducing(ExecNode* output) = 0;
+
+  /// \brief Resume producing after a temporary pause
+  ///
+  /// This call is a hint that an output node is willing to receive data again.
+  ///
+  /// This may be called any number of times after StartProducing() succeeds.
+  /// This may also be called concurrently with PauseProducing(), which suggests
+  /// the implementation may use an atomic counter.
+  virtual void ResumeProducing(ExecNode* output) = 0;
+
+  /// \brief Stop producing definitively to a single output
+  ///
+  /// This call is a hint that an output node has completed and is not willing
+  /// to receive any further data.
+  virtual void StopProducing(ExecNode* output) = 0;
+
+  /// \brief Stop producing definitively to all outputs
+  virtual void StopProducing() = 0;
+
+  /// \brief A future which will be marked finished when this node has stopped producing.
+  virtual Future<> finished() = 0;
+
+ protected:
+  ExecNode(ExecPlan* plan, std::string label, NodeVector inputs,
+           std::vector<std::string> input_labels, std::shared_ptr<Schema> output_schema,
+           int num_outputs);
+
+  ExecPlan* plan_;
+  std::string label_;
+
+  NodeVector inputs_;
+  std::vector<std::string> input_labels_;
+
+  std::shared_ptr<Schema> output_schema_;
+  int num_outputs_;
+  NodeVector outputs_;
+};
+
+/// \brief Adapt an AsyncGenerator<ExecBatch> as a source node
+///
+/// plan->exec_context()->executor() is used to parallelize pushing to
+/// outputs, if provided.
+ARROW_EXPORT
+ExecNode* MakeSourceNode(ExecPlan* plan, std::string label,
+                         std::shared_ptr<Schema> output_schema,
+                         std::function<Future<util::optional<ExecBatch>>()>);
+
+/// \brief Add a sink node which forwards to an AsyncGenerator<ExecBatch>
+///
+/// Emitted batches will not be ordered.
+ARROW_EXPORT
+std::function<Future<util::optional<ExecBatch>>()> MakeSinkNode(ExecNode* input,
+                                                                std::string label);
+
+/// \brief Wrap an ExecBatch generator in a RecordBatchReader.
+///
+/// The RecordBatchReader does not impose any ordering on emitted batches.
+ARROW_EXPORT
+std::shared_ptr<RecordBatchReader> MakeGeneratorReader(
+    std::shared_ptr<Schema>, std::function<Future<util::optional<ExecBatch>>()>,
+    MemoryPool*);
+
+/// \brief Make a node which excludes some rows from batches passed through it
+///
+/// The filter Expression will be evaluated against each batch which is pushed to
+/// this node. Any rows for which the filter does not evaluate to `true` will be excluded
+/// in the batch emitted by this node.
+///
+/// If the filter is not already bound, it will be bound against the input's schema.
+ARROW_EXPORT
+Result<ExecNode*> MakeFilterNode(ExecNode* input, std::string label, Expression filter);
+
+/// \brief Make a node which executes expressions on input batches, producing new batches.
+///
+/// Each expression will be evaluated against each batch which is pushed to
+/// this node to produce a corresponding output column.
+///
+/// If exprs are not already bound, they will be bound against the input's schema.
+/// If names are not provided, the string representations of exprs will be used.
+ARROW_EXPORT
+Result<ExecNode*> MakeProjectNode(ExecNode* input, std::string label,
+                                  std::vector<Expression> exprs,
+                                  std::vector<std::string> names = {});
+
+ARROW_EXPORT
+Result<ExecNode*> MakeScalarAggregateNode(ExecNode* input, std::string label,
+                                          std::vector<internal::Aggregate> aggregates);
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/expression.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/expression.cc
index 44fb7cf1104..4aab64a46a4 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/expression.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/expression.cc
@@ -1,1186 +1,1186 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "arrow/compute/exec/expression.h" 
- 
-#include <unordered_map> 
-#include <unordered_set> 
- 
-#include "arrow/chunked_array.h" 
-#include "arrow/compute/api_vector.h" 
-#include "arrow/compute/exec/expression_internal.h" 
-#include "arrow/compute/exec_internal.h" 
-#include "arrow/compute/function_internal.h" 
-#include "arrow/io/memory.h" 
-#include "arrow/ipc/reader.h" 
-#include "arrow/ipc/writer.h" 
-#include "arrow/util/hash_util.h" 
-#include "arrow/util/key_value_metadata.h" 
-#include "arrow/util/logging.h" 
-#include "arrow/util/optional.h" 
-#include "arrow/util/string.h" 
-#include "arrow/util/value_parsing.h" 
- 
-namespace arrow { 
- 
-using internal::checked_cast; 
-using internal::checked_pointer_cast; 
- 
-namespace compute { 
- 
-void Expression::Call::ComputeHash() { 
-  hash = std::hash<std::string>{}(function_name); 
-  for (const auto& arg : arguments) { 
-    arrow::internal::hash_combine(hash, arg.hash()); 
-  } 
-} 
- 
-Expression::Expression(Call call) { 
-  call.ComputeHash(); 
-  impl_ = std::make_shared<Impl>(std::move(call)); 
-} 
- 
-Expression::Expression(Datum literal) 
-    : impl_(std::make_shared<Impl>(std::move(literal))) {} 
- 
-Expression::Expression(Parameter parameter) 
-    : impl_(std::make_shared<Impl>(std::move(parameter))) {} 
- 
-Expression literal(Datum lit) { return Expression(std::move(lit)); } 
- 
-Expression field_ref(FieldRef ref) { 
-  return Expression(Expression::Parameter{std::move(ref), ValueDescr{}, -1}); 
-} 
- 
-Expression call(std::string function, std::vector<Expression> arguments, 
-                std::shared_ptr<compute::FunctionOptions> options) { 
-  Expression::Call call; 
-  call.function_name = std::move(function); 
-  call.arguments = std::move(arguments); 
-  call.options = std::move(options); 
-  return Expression(std::move(call)); 
-} 
- 
-const Datum* Expression::literal() const { return util::get_if<Datum>(impl_.get()); } 
- 
-const Expression::Parameter* Expression::parameter() const { 
-  return util::get_if<Parameter>(impl_.get()); 
-} 
- 
-const FieldRef* Expression::field_ref() const { 
-  if (auto parameter = this->parameter()) { 
-    return &parameter->ref; 
-  } 
-  return nullptr; 
-} 
- 
-const Expression::Call* Expression::call() const { 
-  return util::get_if<Call>(impl_.get()); 
-} 
- 
-ValueDescr Expression::descr() const { 
-  if (impl_ == nullptr) return {}; 
- 
-  if (auto lit = literal()) { 
-    return lit->descr(); 
-  } 
- 
-  if (auto parameter = this->parameter()) { 
-    return parameter->descr; 
-  } 
- 
-  return CallNotNull(*this)->descr; 
-} 
- 
-namespace { 
- 
-std::string PrintDatum(const Datum& datum) { 
-  if (datum.is_scalar()) { 
-    if (!datum.scalar()->is_valid) return "null"; 
- 
-    switch (datum.type()->id()) { 
-      case Type::STRING: 
-      case Type::LARGE_STRING: 
-        return '"' + 
-               Escape(util::string_view(*datum.scalar_as<BaseBinaryScalar>().value)) + 
-               '"'; 
- 
-      case Type::BINARY: 
-      case Type::FIXED_SIZE_BINARY: 
-      case Type::LARGE_BINARY: 
-        return '"' + datum.scalar_as<BaseBinaryScalar>().value->ToHexString() + '"'; 
- 
-      default: 
-        break; 
-    } 
- 
-    return datum.scalar()->ToString(); 
-  } 
-  return datum.ToString(); 
-} 
- 
-}  // namespace 
- 
-std::string Expression::ToString() const { 
-  if (auto lit = literal()) { 
-    return PrintDatum(*lit); 
-  } 
- 
-  if (auto ref = field_ref()) { 
-    if (auto name = ref->name()) { 
-      return *name; 
-    } 
-    if (auto path = ref->field_path()) { 
-      return path->ToString(); 
-    } 
-    return ref->ToString(); 
-  } 
- 
-  auto call = CallNotNull(*this); 
-  auto binary = [&](std::string op) { 
-    return "(" + call->arguments[0].ToString() + " " + op + " " + 
-           call->arguments[1].ToString() + ")"; 
-  }; 
- 
-  if (auto cmp = Comparison::Get(call->function_name)) { 
-    return binary(Comparison::GetOp(*cmp)); 
-  } 
- 
-  constexpr util::string_view kleene = "_kleene"; 
-  if (util::string_view{call->function_name}.ends_with(kleene)) { 
-    auto op = call->function_name.substr(0, call->function_name.size() - kleene.size()); 
-    return binary(std::move(op)); 
-  } 
- 
-  if (auto options = GetMakeStructOptions(*call)) { 
-    std::string out = "{"; 
-    auto argument = call->arguments.begin(); 
-    for (const auto& field_name : options->field_names) { 
-      out += field_name + "=" + argument++->ToString() + ", "; 
-    } 
-    out.resize(out.size() - 1); 
-    out.back() = '}'; 
-    return out; 
-  } 
- 
-  std::string out = call->function_name + "("; 
-  for (const auto& arg : call->arguments) { 
-    out += arg.ToString() + ", "; 
-  } 
- 
-  if (call->options) { 
-    out += call->options->ToString(); 
-    out.resize(out.size() + 1); 
-  } else { 
-    out.resize(out.size() - 1); 
-  } 
-  out.back() = ')'; 
-  return out; 
-} 
- 
-void PrintTo(const Expression& expr, std::ostream* os) { 
-  *os << expr.ToString(); 
-  if (expr.IsBound()) { 
-    *os << "[bound]"; 
-  } 
-} 
- 
-bool Expression::Equals(const Expression& other) const { 
-  if (Identical(*this, other)) return true; 
- 
-  if (impl_->index() != other.impl_->index()) { 
-    return false; 
-  } 
- 
-  if (auto lit = literal()) { 
-    return lit->Equals(*other.literal()); 
-  } 
- 
-  if (auto ref = field_ref()) { 
-    return ref->Equals(*other.field_ref()); 
-  } 
- 
-  auto call = CallNotNull(*this); 
-  auto other_call = CallNotNull(other); 
- 
-  if (call->function_name != other_call->function_name || 
-      call->kernel != other_call->kernel) { 
-    return false; 
-  } 
- 
-  for (size_t i = 0; i < call->arguments.size(); ++i) { 
-    if (!call->arguments[i].Equals(other_call->arguments[i])) { 
-      return false; 
-    } 
-  } 
- 
-  if (call->options == other_call->options) return true; 
-  if (call->options && other_call->options) { 
-    return call->options->Equals(other_call->options); 
-  } 
-  return false; 
-} 
- 
-bool Identical(const Expression& l, const Expression& r) { return l.impl_ == r.impl_; } 
- 
-size_t Expression::hash() const { 
-  if (auto lit = literal()) { 
-    if (lit->is_scalar()) { 
-      return lit->scalar()->hash(); 
-    } 
-    return 0; 
-  } 
- 
-  if (auto ref = field_ref()) { 
-    return ref->hash(); 
-  } 
- 
-  return CallNotNull(*this)->hash; 
-} 
- 
-bool Expression::IsBound() const { 
-  if (type() == nullptr) return false; 
- 
-  if (auto call = this->call()) { 
-    if (call->kernel == nullptr) return false; 
- 
-    for (const Expression& arg : call->arguments) { 
-      if (!arg.IsBound()) return false; 
-    } 
-  } 
- 
-  return true; 
-} 
- 
-bool Expression::IsScalarExpression() const { 
-  if (auto lit = literal()) { 
-    return lit->is_scalar(); 
-  } 
- 
-  if (field_ref()) return true; 
- 
-  auto call = CallNotNull(*this); 
- 
-  for (const Expression& arg : call->arguments) { 
-    if (!arg.IsScalarExpression()) return false; 
-  } 
- 
-  if (call->function) { 
-    return call->function->kind() == compute::Function::SCALAR; 
-  } 
- 
-  // this expression is not bound; make a best guess based on 
-  // the default function registry 
-  if (auto function = compute::GetFunctionRegistry() 
-                          ->GetFunction(call->function_name) 
-                          .ValueOr(nullptr)) { 
-    return function->kind() == compute::Function::SCALAR; 
-  } 
- 
-  // unknown function or other error; conservatively return false 
-  return false; 
-} 
- 
-bool Expression::IsNullLiteral() const { 
-  if (auto lit = literal()) { 
-    if (lit->null_count() == lit->length()) { 
-      return true; 
-    } 
-  } 
- 
-  return false; 
-} 
- 
-bool Expression::IsSatisfiable() const { 
-  if (type() && type()->id() == Type::NA) { 
-    return false; 
-  } 
- 
-  if (auto lit = literal()) { 
-    if (lit->null_count() == lit->length()) { 
-      return false; 
-    } 
- 
-    if (lit->is_scalar() && lit->type()->id() == Type::BOOL) { 
-      return lit->scalar_as<BooleanScalar>().value; 
-    } 
-  } 
- 
-  return true; 
-} 
- 
-namespace { 
- 
-// Produce a bound Expression from unbound Call and bound arguments. 
-Result<Expression> BindNonRecursive(Expression::Call call, bool insert_implicit_casts, 
-                                    compute::ExecContext* exec_context) { 
-  DCHECK(std::all_of(call.arguments.begin(), call.arguments.end(), 
-                     [](const Expression& argument) { return argument.IsBound(); })); 
- 
-  auto descrs = GetDescriptors(call.arguments); 
-  ARROW_ASSIGN_OR_RAISE(call.function, GetFunction(call, exec_context)); 
- 
-  if (!insert_implicit_casts) { 
-    ARROW_ASSIGN_OR_RAISE(call.kernel, call.function->DispatchExact(descrs)); 
-  } else { 
-    ARROW_ASSIGN_OR_RAISE(call.kernel, call.function->DispatchBest(&descrs)); 
- 
-    for (size_t i = 0; i < descrs.size(); ++i) { 
-      if (descrs[i] == call.arguments[i].descr()) continue; 
- 
-      if (descrs[i].shape != call.arguments[i].descr().shape) { 
-        return Status::NotImplemented( 
-            "Automatic broadcasting of scalars arguments to arrays in ", 
-            Expression(std::move(call)).ToString()); 
-      } 
- 
-      if (auto lit = call.arguments[i].literal()) { 
-        ARROW_ASSIGN_OR_RAISE(Datum new_lit, compute::Cast(*lit, descrs[i].type)); 
-        call.arguments[i] = literal(std::move(new_lit)); 
-        continue; 
-      } 
- 
-      // construct an implicit cast Expression with which to replace this argument 
-      Expression::Call implicit_cast; 
-      implicit_cast.function_name = "cast"; 
-      implicit_cast.arguments = {std::move(call.arguments[i])}; 
-      implicit_cast.options = std::make_shared<compute::CastOptions>( 
-          compute::CastOptions::Safe(descrs[i].type)); 
- 
-      ARROW_ASSIGN_OR_RAISE( 
-          call.arguments[i], 
-          BindNonRecursive(std::move(implicit_cast), 
-                           /*insert_implicit_casts=*/false, exec_context)); 
-    } 
-  } 
- 
-  compute::KernelContext kernel_context(exec_context); 
-  if (call.kernel->init) { 
-    ARROW_ASSIGN_OR_RAISE( 
-        call.kernel_state, 
-        call.kernel->init(&kernel_context, {call.kernel, descrs, call.options.get()})); 
- 
-    kernel_context.SetState(call.kernel_state.get()); 
-  } 
- 
-  ARROW_ASSIGN_OR_RAISE( 
-      call.descr, call.kernel->signature->out_type().Resolve(&kernel_context, descrs)); 
- 
-  return Expression(std::move(call)); 
-} 
- 
-template <typename TypeOrSchema> 
-Result<Expression> BindImpl(Expression expr, const TypeOrSchema& in, 
-                            ValueDescr::Shape shape, compute::ExecContext* exec_context) { 
-  if (exec_context == nullptr) { 
-    compute::ExecContext exec_context; 
-    return BindImpl(std::move(expr), in, shape, &exec_context); 
-  } 
- 
-  if (expr.literal()) return expr; 
- 
-  if (auto ref = expr.field_ref()) { 
-    if (ref->IsNested()) { 
-      return Status::NotImplemented("nested field references"); 
-    } 
- 
-    ARROW_ASSIGN_OR_RAISE(auto path, ref->FindOne(in)); 
- 
-    auto bound = *expr.parameter(); 
-    bound.index = path[0]; 
-    ARROW_ASSIGN_OR_RAISE(auto field, path.Get(in)); 
-    bound.descr.type = field->type(); 
-    bound.descr.shape = shape; 
-    return Expression{std::move(bound)}; 
-  } 
- 
-  auto call = *CallNotNull(expr); 
-  for (auto& argument : call.arguments) { 
-    ARROW_ASSIGN_OR_RAISE(argument, 
-                          BindImpl(std::move(argument), in, shape, exec_context)); 
-  } 
-  return BindNonRecursive(std::move(call), 
-                          /*insert_implicit_casts=*/true, exec_context); 
-} 
- 
-}  // namespace 
- 
-Result<Expression> Expression::Bind(const ValueDescr& in, 
-                                    compute::ExecContext* exec_context) const { 
-  return BindImpl(*this, *in.type, in.shape, exec_context); 
-} 
- 
-Result<Expression> Expression::Bind(const Schema& in_schema, 
-                                    compute::ExecContext* exec_context) const { 
-  return BindImpl(*this, in_schema, ValueDescr::ARRAY, exec_context); 
-} 
- 
-Result<ExecBatch> MakeExecBatch(const Schema& full_schema, const Datum& partial) { 
-  ExecBatch out; 
- 
-  if (partial.kind() == Datum::RECORD_BATCH) { 
-    const auto& partial_batch = *partial.record_batch(); 
-    out.length = partial_batch.num_rows(); 
- 
-    for (const auto& field : full_schema.fields()) { 
-      ARROW_ASSIGN_OR_RAISE(auto column, 
-                            FieldRef(field->name()).GetOneOrNone(partial_batch)); 
- 
-      if (column) { 
-        if (!column->type()->Equals(field->type())) { 
-          // Referenced field was present but didn't have the expected type. 
-          // This *should* be handled by readers, and will just be an error in the future. 
-          ARROW_ASSIGN_OR_RAISE( 
-              auto converted, 
-              compute::Cast(column, field->type(), compute::CastOptions::Safe())); 
-          column = converted.make_array(); 
-        } 
-        out.values.emplace_back(std::move(column)); 
-      } else { 
-        out.values.emplace_back(MakeNullScalar(field->type())); 
-      } 
-    } 
-    return out; 
-  } 
- 
-  // wasteful but useful for testing: 
-  if (partial.type()->id() == Type::STRUCT) { 
-    if (partial.is_array()) { 
-      ARROW_ASSIGN_OR_RAISE(auto partial_batch, 
-                            RecordBatch::FromStructArray(partial.make_array())); 
- 
-      return MakeExecBatch(full_schema, partial_batch); 
-    } 
- 
-    if (partial.is_scalar()) { 
-      ARROW_ASSIGN_OR_RAISE(auto partial_array, 
-                            MakeArrayFromScalar(*partial.scalar(), 1)); 
-      ARROW_ASSIGN_OR_RAISE(auto out, MakeExecBatch(full_schema, partial_array)); 
- 
-      for (Datum& value : out.values) { 
-        if (value.is_scalar()) continue; 
-        ARROW_ASSIGN_OR_RAISE(value, value.make_array()->GetScalar(0)); 
-      } 
-      return out; 
-    } 
-  } 
- 
-  return Status::NotImplemented("MakeExecBatch from ", PrintDatum(partial)); 
-} 
- 
-Result<Datum> ExecuteScalarExpression(const Expression& expr, const Schema& full_schema, 
-                                      const Datum& partial_input, 
-                                      compute::ExecContext* exec_context) { 
-  ARROW_ASSIGN_OR_RAISE(auto input, MakeExecBatch(full_schema, partial_input)); 
-  return ExecuteScalarExpression(expr, input, exec_context); 
-} 
- 
-Result<Datum> ExecuteScalarExpression(const Expression& expr, const ExecBatch& input, 
-                                      compute::ExecContext* exec_context) { 
-  if (exec_context == nullptr) { 
-    compute::ExecContext exec_context; 
-    return ExecuteScalarExpression(expr, input, &exec_context); 
-  } 
- 
-  if (!expr.IsBound()) { 
-    return Status::Invalid("Cannot Execute unbound expression."); 
-  } 
- 
-  if (!expr.IsScalarExpression()) { 
-    return Status::Invalid( 
-        "ExecuteScalarExpression cannot Execute non-scalar expression ", expr.ToString()); 
-  } 
- 
-  if (auto lit = expr.literal()) return *lit; 
- 
-  if (auto param = expr.parameter()) { 
-    if (param->descr.type->id() == Type::NA) { 
-      return MakeNullScalar(null()); 
-    } 
- 
-    const Datum& field = input[param->index]; 
-    if (!field.type()->Equals(param->descr.type)) { 
-      return Status::Invalid("Referenced field ", expr.ToString(), " was ", 
-                             field.type()->ToString(), " but should have been ", 
-                             param->descr.type->ToString()); 
-    } 
- 
-    return field; 
-  } 
- 
-  auto call = CallNotNull(expr); 
- 
-  std::vector<Datum> arguments(call->arguments.size()); 
-  for (size_t i = 0; i < arguments.size(); ++i) { 
-    ARROW_ASSIGN_OR_RAISE( 
-        arguments[i], ExecuteScalarExpression(call->arguments[i], input, exec_context)); 
-  } 
- 
-  auto executor = compute::detail::KernelExecutor::MakeScalar(); 
- 
-  compute::KernelContext kernel_context(exec_context); 
-  kernel_context.SetState(call->kernel_state.get()); 
- 
-  auto kernel = call->kernel; 
-  auto descrs = GetDescriptors(arguments); 
-  auto options = call->options.get(); 
-  RETURN_NOT_OK(executor->Init(&kernel_context, {kernel, descrs, options})); 
- 
-  auto listener = std::make_shared<compute::detail::DatumAccumulator>(); 
-  RETURN_NOT_OK(executor->Execute(arguments, listener.get())); 
-  return executor->WrapResults(arguments, listener->values()); 
-} 
- 
-namespace { 
- 
-std::array<std::pair<const Expression&, const Expression&>, 2> 
-ArgumentsAndFlippedArguments(const Expression::Call& call) { 
-  DCHECK_EQ(call.arguments.size(), 2); 
-  return {std::pair<const Expression&, const Expression&>{call.arguments[0], 
-                                                          call.arguments[1]}, 
-          std::pair<const Expression&, const Expression&>{call.arguments[1], 
-                                                          call.arguments[0]}}; 
-} 
- 
-template <typename BinOp, typename It, 
-          typename Out = typename std::iterator_traits<It>::value_type> 
-util::optional<Out> FoldLeft(It begin, It end, const BinOp& bin_op) { 
-  if (begin == end) return util::nullopt; 
- 
-  Out folded = std::move(*begin++); 
-  while (begin != end) { 
-    folded = bin_op(std::move(folded), std::move(*begin++)); 
-  } 
-  return folded; 
-} 
- 
-util::optional<compute::NullHandling::type> GetNullHandling( 
-    const Expression::Call& call) { 
-  if (call.function && call.function->kind() == compute::Function::SCALAR) { 
-    return static_cast<const compute::ScalarKernel*>(call.kernel)->null_handling; 
-  } 
-  return util::nullopt; 
-} 
- 
-}  // namespace 
- 
-std::vector<FieldRef> FieldsInExpression(const Expression& expr) { 
-  if (expr.literal()) return {}; 
- 
-  if (auto ref = expr.field_ref()) { 
-    return {*ref}; 
-  } 
- 
-  std::vector<FieldRef> fields; 
-  for (const Expression& arg : CallNotNull(expr)->arguments) { 
-    auto argument_fields = FieldsInExpression(arg); 
-    std::move(argument_fields.begin(), argument_fields.end(), std::back_inserter(fields)); 
-  } 
-  return fields; 
-} 
- 
-bool ExpressionHasFieldRefs(const Expression& expr) { 
-  if (expr.literal()) return false; 
- 
-  if (expr.field_ref()) return true; 
- 
-  for (const Expression& arg : CallNotNull(expr)->arguments) { 
-    if (ExpressionHasFieldRefs(arg)) return true; 
-  } 
-  return false; 
-} 
- 
-Result<Expression> FoldConstants(Expression expr) { 
-  return Modify( 
-      std::move(expr), [](Expression expr) { return expr; }, 
-      [](Expression expr, ...) -> Result<Expression> { 
-        auto call = CallNotNull(expr); 
-        if (std::all_of(call->arguments.begin(), call->arguments.end(), 
-                        [](const Expression& argument) { return argument.literal(); })) { 
-          // all arguments are literal; we can evaluate this subexpression *now* 
-          static const ExecBatch ignored_input = ExecBatch{}; 
-          ARROW_ASSIGN_OR_RAISE(Datum constant, 
-                                ExecuteScalarExpression(expr, ignored_input)); 
- 
-          return literal(std::move(constant)); 
-        } 
- 
-        // XXX the following should probably be in a registry of passes instead 
-        // of inline 
- 
-        if (GetNullHandling(*call) == compute::NullHandling::INTERSECTION) { 
-          // kernels which always produce intersected validity can be resolved 
-          // to null *now* if any of their inputs is a null literal 
-          for (const auto& argument : call->arguments) { 
-            if (argument.IsNullLiteral()) { 
-              return argument; 
-            } 
-          } 
-        } 
- 
-        if (call->function_name == "and_kleene") { 
-          for (auto args : ArgumentsAndFlippedArguments(*call)) { 
-            // true and x == x 
-            if (args.first == literal(true)) return args.second; 
- 
-            // false and x == false 
-            if (args.first == literal(false)) return args.first; 
- 
-            // x and x == x 
-            if (args.first == args.second) return args.first; 
-          } 
-          return expr; 
-        } 
- 
-        if (call->function_name == "or_kleene") { 
-          for (auto args : ArgumentsAndFlippedArguments(*call)) { 
-            // false or x == x 
-            if (args.first == literal(false)) return args.second; 
- 
-            // true or x == true 
-            if (args.first == literal(true)) return args.first; 
- 
-            // x or x == x 
-            if (args.first == args.second) return args.first; 
-          } 
-          return expr; 
-        } 
- 
-        return expr; 
-      }); 
-} 
- 
-namespace { 
- 
-std::vector<Expression> GuaranteeConjunctionMembers( 
-    const Expression& guaranteed_true_predicate) { 
-  auto guarantee = guaranteed_true_predicate.call(); 
-  if (!guarantee || guarantee->function_name != "and_kleene") { 
-    return {guaranteed_true_predicate}; 
-  } 
-  return FlattenedAssociativeChain(guaranteed_true_predicate).fringe; 
-} 
- 
-// Conjunction members which are represented in known_values are erased from 
-// conjunction_members 
-Status ExtractKnownFieldValuesImpl( 
-    std::vector<Expression>* conjunction_members, 
-    std::unordered_map<FieldRef, Datum, FieldRef::Hash>* known_values) { 
-  auto unconsumed_end = 
-      std::partition(conjunction_members->begin(), conjunction_members->end(), 
-                     [](const Expression& expr) { 
-                       // search for an equality conditions between a field and a literal 
-                       auto call = expr.call(); 
-                       if (!call) return true; 
- 
-                       if (call->function_name == "equal") { 
-                         auto ref = call->arguments[0].field_ref(); 
-                         auto lit = call->arguments[1].literal(); 
-                         return !(ref && lit); 
-                       } 
- 
-                       if (call->function_name == "is_null") { 
-                         auto ref = call->arguments[0].field_ref(); 
-                         return !ref; 
-                       } 
- 
-                       return true; 
-                     }); 
- 
-  for (auto it = unconsumed_end; it != conjunction_members->end(); ++it) { 
-    auto call = CallNotNull(*it); 
- 
-    if (call->function_name == "equal") { 
-      auto ref = call->arguments[0].field_ref(); 
-      auto lit = call->arguments[1].literal(); 
-      known_values->emplace(*ref, *lit); 
-    } else if (call->function_name == "is_null") { 
-      auto ref = call->arguments[0].field_ref(); 
-      known_values->emplace(*ref, Datum(std::make_shared<NullScalar>())); 
-    } 
-  } 
- 
-  conjunction_members->erase(unconsumed_end, conjunction_members->end()); 
- 
-  return Status::OK(); 
-} 
- 
-}  // namespace 
- 
-Result<KnownFieldValues> ExtractKnownFieldValues( 
-    const Expression& guaranteed_true_predicate) { 
-  auto conjunction_members = GuaranteeConjunctionMembers(guaranteed_true_predicate); 
-  KnownFieldValues known_values; 
-  RETURN_NOT_OK(ExtractKnownFieldValuesImpl(&conjunction_members, &known_values.map)); 
-  return known_values; 
-} 
- 
-Result<Expression> ReplaceFieldsWithKnownValues(const KnownFieldValues& known_values, 
-                                                Expression expr) { 
-  if (!expr.IsBound()) { 
-    return Status::Invalid( 
-        "ReplaceFieldsWithKnownValues called on an unbound Expression"); 
-  } 
- 
-  return Modify( 
-      std::move(expr), 
-      [&known_values](Expression expr) -> Result<Expression> { 
-        if (auto ref = expr.field_ref()) { 
-          auto it = known_values.map.find(*ref); 
-          if (it != known_values.map.end()) { 
-            Datum lit = it->second; 
-            if (lit.descr() == expr.descr()) return literal(std::move(lit)); 
-            // type mismatch, try casting the known value to the correct type 
- 
-            if (expr.type()->id() == Type::DICTIONARY && 
-                lit.type()->id() != Type::DICTIONARY) { 
-              // the known value must be dictionary encoded 
- 
-              const auto& dict_type = checked_cast<const DictionaryType&>(*expr.type()); 
-              if (!lit.type()->Equals(dict_type.value_type())) { 
-                ARROW_ASSIGN_OR_RAISE(lit, compute::Cast(lit, dict_type.value_type())); 
-              } 
- 
-              if (lit.is_scalar()) { 
-                ARROW_ASSIGN_OR_RAISE(auto dictionary, 
-                                      MakeArrayFromScalar(*lit.scalar(), 1)); 
- 
-                lit = Datum{DictionaryScalar::Make(MakeScalar<int32_t>(0), 
-                                                   std::move(dictionary))}; 
-              } 
-            } 
- 
-            ARROW_ASSIGN_OR_RAISE(lit, compute::Cast(lit, expr.type())); 
-            return literal(std::move(lit)); 
-          } 
-        } 
-        return expr; 
-      }, 
-      [](Expression expr, ...) { return expr; }); 
-} 
- 
-namespace { 
- 
-bool IsBinaryAssociativeCommutative(const Expression::Call& call) { 
-  static std::unordered_set<std::string> binary_associative_commutative{ 
-      "and",      "or",  "and_kleene",       "or_kleene",  "xor", 
-      "multiply", "add", "multiply_checked", "add_checked"}; 
- 
-  auto it = binary_associative_commutative.find(call.function_name); 
-  return it != binary_associative_commutative.end(); 
-} 
- 
-}  // namespace 
- 
-Result<Expression> Canonicalize(Expression expr, compute::ExecContext* exec_context) { 
-  if (exec_context == nullptr) { 
-    compute::ExecContext exec_context; 
-    return Canonicalize(std::move(expr), &exec_context); 
-  } 
- 
-  // If potentially reconstructing more deeply than a call's immediate arguments 
-  // (for example, when reorganizing an associative chain), add expressions to this set to 
-  // avoid unnecessary work 
-  struct { 
-    std::unordered_set<Expression, Expression::Hash> set_; 
- 
-    bool operator()(const Expression& expr) const { 
-      return set_.find(expr) != set_.end(); 
-    } 
- 
-    void Add(std::vector<Expression> exprs) { 
-      std::move(exprs.begin(), exprs.end(), std::inserter(set_, set_.end())); 
-    } 
-  } AlreadyCanonicalized; 
- 
-  return Modify( 
-      std::move(expr), 
-      [&AlreadyCanonicalized, exec_context](Expression expr) -> Result<Expression> { 
-        auto call = expr.call(); 
-        if (!call) return expr; 
- 
-        if (AlreadyCanonicalized(expr)) return expr; 
- 
-        if (IsBinaryAssociativeCommutative(*call)) { 
-          struct { 
-            int Priority(const Expression& operand) const { 
-              // order literals first, starting with nulls 
-              if (operand.IsNullLiteral()) return 0; 
-              if (operand.literal()) return 1; 
-              return 2; 
-            } 
-            bool operator()(const Expression& l, const Expression& r) const { 
-              return Priority(l) < Priority(r); 
-            } 
-          } CanonicalOrdering; 
- 
-          FlattenedAssociativeChain chain(expr); 
-          if (chain.was_left_folded && 
-              std::is_sorted(chain.fringe.begin(), chain.fringe.end(), 
-                             CanonicalOrdering)) { 
-            AlreadyCanonicalized.Add(std::move(chain.exprs)); 
-            return expr; 
-          } 
- 
-          std::stable_sort(chain.fringe.begin(), chain.fringe.end(), CanonicalOrdering); 
- 
-          // fold the chain back up 
-          auto folded = 
-              FoldLeft(chain.fringe.begin(), chain.fringe.end(), 
-                       [call, &AlreadyCanonicalized](Expression l, Expression r) { 
-                         auto canonicalized_call = *call; 
-                         canonicalized_call.arguments = {std::move(l), std::move(r)}; 
-                         Expression expr(std::move(canonicalized_call)); 
-                         AlreadyCanonicalized.Add({expr}); 
-                         return expr; 
-                       }); 
-          return std::move(*folded); 
-        } 
- 
-        if (auto cmp = Comparison::Get(call->function_name)) { 
-          if (call->arguments[0].literal() && !call->arguments[1].literal()) { 
-            // ensure that literals are on comparisons' RHS 
-            auto flipped_call = *call; 
- 
-            std::swap(flipped_call.arguments[0], flipped_call.arguments[1]); 
-            flipped_call.function_name = 
-                Comparison::GetName(Comparison::GetFlipped(*cmp)); 
- 
-            return BindNonRecursive(flipped_call, 
-                                    /*insert_implicit_casts=*/false, exec_context); 
-          } 
-        } 
- 
-        return expr; 
-      }, 
-      [](Expression expr, ...) { return expr; }); 
-} 
- 
-namespace { 
- 
-Result<Expression> DirectComparisonSimplification(Expression expr, 
-                                                  const Expression::Call& guarantee) { 
-  return Modify( 
-      std::move(expr), [](Expression expr) { return expr; }, 
-      [&guarantee](Expression expr, ...) -> Result<Expression> { 
-        auto call = expr.call(); 
-        if (!call) return expr; 
- 
-        // Ensure both calls are comparisons with equal LHS and scalar RHS 
-        auto cmp = Comparison::Get(expr); 
-        auto cmp_guarantee = Comparison::Get(guarantee.function_name); 
- 
-        if (!cmp) return expr; 
-        if (!cmp_guarantee) return expr; 
- 
-        const auto& lhs = Comparison::StripOrderPreservingCasts(call->arguments[0]); 
-        const auto& guarantee_lhs = guarantee.arguments[0]; 
-        if (lhs != guarantee_lhs) return expr; 
- 
-        auto rhs = call->arguments[1].literal(); 
-        auto guarantee_rhs = guarantee.arguments[1].literal(); 
- 
-        if (!rhs) return expr; 
-        if (!rhs->is_scalar()) return expr; 
- 
-        if (!guarantee_rhs) return expr; 
-        if (!guarantee_rhs->is_scalar()) return expr; 
- 
-        ARROW_ASSIGN_OR_RAISE(auto cmp_rhs_guarantee_rhs, 
-                              Comparison::Execute(*rhs, *guarantee_rhs)); 
-        DCHECK_NE(cmp_rhs_guarantee_rhs, Comparison::NA); 
- 
-        if (cmp_rhs_guarantee_rhs == Comparison::EQUAL) { 
-          // RHS of filter is equal to RHS of guarantee 
- 
-          if ((*cmp & *cmp_guarantee) == *cmp_guarantee) { 
-            // guarantee is a subset of filter, so all data will be included 
-            // x > 1, x >= 1, x != 1 guaranteed by x > 1 
-            return literal(true); 
-          } 
- 
-          if ((*cmp & *cmp_guarantee) == 0) { 
-            // guarantee disjoint with filter, so all data will be excluded 
-            // x > 1, x >= 1, x != 1 unsatisfiable if x == 1 
-            return literal(false); 
-          } 
- 
-          return expr; 
-        } 
- 
-        if (*cmp_guarantee & cmp_rhs_guarantee_rhs) { 
-          // x > 1, x >= 1, x != 1 cannot use guarantee x >= 3 
-          return expr; 
-        } 
- 
-        if (*cmp & Comparison::GetFlipped(cmp_rhs_guarantee_rhs)) { 
-          // x > 1, x >= 1, x != 1 guaranteed by x >= 3 
-          return literal(true); 
-        } else { 
-          // x < 1, x <= 1, x == 1 unsatisfiable if x >= 3 
-          return literal(false); 
-        } 
-      }); 
-} 
- 
-}  // namespace 
- 
-Result<Expression> SimplifyWithGuarantee(Expression expr, 
-                                         const Expression& guaranteed_true_predicate) { 
-  auto conjunction_members = GuaranteeConjunctionMembers(guaranteed_true_predicate); 
- 
-  KnownFieldValues known_values; 
-  RETURN_NOT_OK(ExtractKnownFieldValuesImpl(&conjunction_members, &known_values.map)); 
- 
-  ARROW_ASSIGN_OR_RAISE(expr, 
-                        ReplaceFieldsWithKnownValues(known_values, std::move(expr))); 
- 
-  auto CanonicalizeAndFoldConstants = [&expr] { 
-    ARROW_ASSIGN_OR_RAISE(expr, Canonicalize(std::move(expr))); 
-    ARROW_ASSIGN_OR_RAISE(expr, FoldConstants(std::move(expr))); 
-    return Status::OK(); 
-  }; 
-  RETURN_NOT_OK(CanonicalizeAndFoldConstants()); 
- 
-  for (const auto& guarantee : conjunction_members) { 
-    if (Comparison::Get(guarantee) && guarantee.call()->arguments[1].literal()) { 
-      ARROW_ASSIGN_OR_RAISE( 
-          auto simplified, DirectComparisonSimplification(expr, *CallNotNull(guarantee))); 
- 
-      if (Identical(simplified, expr)) continue; 
- 
-      expr = std::move(simplified); 
-      RETURN_NOT_OK(CanonicalizeAndFoldConstants()); 
-    } 
-  } 
- 
-  return expr; 
-} 
- 
-// Serialization is accomplished by converting expressions to KeyValueMetadata and storing 
-// this in the schema of a RecordBatch. Embedded arrays and scalars are stored in its 
-// columns. Finally, the RecordBatch is written to an IPC file. 
-Result<std::shared_ptr<Buffer>> Serialize(const Expression& expr) { 
-  struct { 
-    std::shared_ptr<KeyValueMetadata> metadata_ = std::make_shared<KeyValueMetadata>(); 
-    ArrayVector columns_; 
- 
-    Result<std::string> AddScalar(const Scalar& scalar) { 
-      auto ret = columns_.size(); 
-      ARROW_ASSIGN_OR_RAISE(auto array, MakeArrayFromScalar(scalar, 1)); 
-      columns_.push_back(std::move(array)); 
-      return std::to_string(ret); 
-    } 
- 
-    Status Visit(const Expression& expr) { 
-      if (auto lit = expr.literal()) { 
-        if (!lit->is_scalar()) { 
-          return Status::NotImplemented("Serialization of non-scalar literals"); 
-        } 
-        ARROW_ASSIGN_OR_RAISE(auto value, AddScalar(*lit->scalar())); 
-        metadata_->Append("literal", std::move(value)); 
-        return Status::OK(); 
-      } 
- 
-      if (auto ref = expr.field_ref()) { 
-        if (!ref->name()) { 
-          return Status::NotImplemented("Serialization of non-name field_refs"); 
-        } 
-        metadata_->Append("field_ref", *ref->name()); 
-        return Status::OK(); 
-      } 
- 
-      auto call = CallNotNull(expr); 
-      metadata_->Append("call", call->function_name); 
- 
-      for (const auto& argument : call->arguments) { 
-        RETURN_NOT_OK(Visit(argument)); 
-      } 
- 
-      if (call->options) { 
-        ARROW_ASSIGN_OR_RAISE(auto options_scalar, 
-                              internal::FunctionOptionsToStructScalar(*call->options)); 
-        ARROW_ASSIGN_OR_RAISE(auto value, AddScalar(*options_scalar)); 
-        metadata_->Append("options", std::move(value)); 
-      } 
- 
-      metadata_->Append("end", call->function_name); 
-      return Status::OK(); 
-    } 
- 
-    Result<std::shared_ptr<RecordBatch>> operator()(const Expression& expr) { 
-      RETURN_NOT_OK(Visit(expr)); 
-      FieldVector fields(columns_.size()); 
-      for (size_t i = 0; i < fields.size(); ++i) { 
-        fields[i] = field("", columns_[i]->type()); 
-      } 
-      return RecordBatch::Make(schema(std::move(fields), std::move(metadata_)), 1, 
-                               std::move(columns_)); 
-    } 
-  } ToRecordBatch; 
- 
-  ARROW_ASSIGN_OR_RAISE(auto batch, ToRecordBatch(expr)); 
-  ARROW_ASSIGN_OR_RAISE(auto stream, io::BufferOutputStream::Create()); 
-  ARROW_ASSIGN_OR_RAISE(auto writer, ipc::MakeFileWriter(stream, batch->schema())); 
-  RETURN_NOT_OK(writer->WriteRecordBatch(*batch)); 
-  RETURN_NOT_OK(writer->Close()); 
-  return stream->Finish(); 
-} 
- 
-Result<Expression> Deserialize(std::shared_ptr<Buffer> buffer) { 
-  io::BufferReader stream(std::move(buffer)); 
-  ARROW_ASSIGN_OR_RAISE(auto reader, ipc::RecordBatchFileReader::Open(&stream)); 
-  ARROW_ASSIGN_OR_RAISE(auto batch, reader->ReadRecordBatch(0)); 
-  if (batch->schema()->metadata() == nullptr) { 
-    return Status::Invalid("serialized Expression's batch repr had null metadata"); 
-  } 
-  if (batch->num_rows() != 1) { 
-    return Status::Invalid( 
-        "serialized Expression's batch repr was not a single row - had ", 
-        batch->num_rows()); 
-  } 
- 
-  struct FromRecordBatch { 
-    const RecordBatch& batch_; 
-    int index_; 
- 
-    const KeyValueMetadata& metadata() { return *batch_.schema()->metadata(); } 
- 
-    Result<std::shared_ptr<Scalar>> GetScalar(const std::string& i) { 
-      int32_t column_index; 
-      if (!::arrow::internal::ParseValue<Int32Type>(i.data(), i.length(), 
-                                                    &column_index)) { 
-        return Status::Invalid("Couldn't parse column_index"); 
-      } 
-      if (column_index >= batch_.num_columns()) { 
-        return Status::Invalid("column_index out of bounds"); 
-      } 
-      return batch_.column(column_index)->GetScalar(0); 
-    } 
- 
-    Result<Expression> GetOne() { 
-      if (index_ >= metadata().size()) { 
-        return Status::Invalid("unterminated serialized Expression"); 
-      } 
- 
-      const std::string& key = metadata().key(index_); 
-      const std::string& value = metadata().value(index_); 
-      ++index_; 
- 
-      if (key == "literal") { 
-        ARROW_ASSIGN_OR_RAISE(auto scalar, GetScalar(value)); 
-        return literal(std::move(scalar)); 
-      } 
- 
-      if (key == "field_ref") { 
-        return field_ref(value); 
-      } 
- 
-      if (key != "call") { 
-        return Status::Invalid("Unrecognized serialized Expression key ", key); 
-      } 
- 
-      std::vector<Expression> arguments; 
-      while (metadata().key(index_) != "end") { 
-        if (metadata().key(index_) == "options") { 
-          ARROW_ASSIGN_OR_RAISE(auto options_scalar, GetScalar(metadata().value(index_))); 
-          std::shared_ptr<compute::FunctionOptions> options; 
-          if (options_scalar) { 
-            ARROW_ASSIGN_OR_RAISE( 
-                options, internal::FunctionOptionsFromStructScalar( 
-                             checked_cast<const StructScalar&>(*options_scalar))); 
-          } 
-          auto expr = call(value, std::move(arguments), std::move(options)); 
-          index_ += 2; 
-          return expr; 
-        } 
- 
-        ARROW_ASSIGN_OR_RAISE(auto argument, GetOne()); 
-        arguments.push_back(std::move(argument)); 
-      } 
- 
-      ++index_; 
-      return call(value, std::move(arguments)); 
-    } 
-  }; 
- 
-  return FromRecordBatch{*batch, 0}.GetOne(); 
-} 
- 
-Expression project(std::vector<Expression> values, std::vector<std::string> names) { 
-  return call("make_struct", std::move(values), 
-              compute::MakeStructOptions{std::move(names)}); 
-} 
- 
-Expression equal(Expression lhs, Expression rhs) { 
-  return call("equal", {std::move(lhs), std::move(rhs)}); 
-} 
- 
-Expression not_equal(Expression lhs, Expression rhs) { 
-  return call("not_equal", {std::move(lhs), std::move(rhs)}); 
-} 
- 
-Expression less(Expression lhs, Expression rhs) { 
-  return call("less", {std::move(lhs), std::move(rhs)}); 
-} 
- 
-Expression less_equal(Expression lhs, Expression rhs) { 
-  return call("less_equal", {std::move(lhs), std::move(rhs)}); 
-} 
- 
-Expression greater(Expression lhs, Expression rhs) { 
-  return call("greater", {std::move(lhs), std::move(rhs)}); 
-} 
- 
-Expression greater_equal(Expression lhs, Expression rhs) { 
-  return call("greater_equal", {std::move(lhs), std::move(rhs)}); 
-} 
- 
-Expression is_null(Expression lhs) { return call("is_null", {std::move(lhs)}); } 
- 
-Expression is_valid(Expression lhs) { return call("is_valid", {std::move(lhs)}); } 
- 
-Expression and_(Expression lhs, Expression rhs) { 
-  return call("and_kleene", {std::move(lhs), std::move(rhs)}); 
-} 
- 
-Expression and_(const std::vector<Expression>& operands) { 
-  auto folded = FoldLeft<Expression(Expression, Expression)>(operands.begin(), 
-                                                             operands.end(), and_); 
-  if (folded) { 
-    return std::move(*folded); 
-  } 
-  return literal(true); 
-} 
- 
-Expression or_(Expression lhs, Expression rhs) { 
-  return call("or_kleene", {std::move(lhs), std::move(rhs)}); 
-} 
- 
-Expression or_(const std::vector<Expression>& operands) { 
-  auto folded = 
-      FoldLeft<Expression(Expression, Expression)>(operands.begin(), operands.end(), or_); 
-  if (folded) { 
-    return std::move(*folded); 
-  } 
-  return literal(false); 
-} 
- 
-Expression not_(Expression operand) { return call("invert", {std::move(operand)}); } 
- 
-}  // namespace compute 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/exec/expression.h"
+
+#include <unordered_map>
+#include <unordered_set>
+
+#include "arrow/chunked_array.h"
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/exec/expression_internal.h"
+#include "arrow/compute/exec_internal.h"
+#include "arrow/compute/function_internal.h"
+#include "arrow/io/memory.h"
+#include "arrow/ipc/reader.h"
+#include "arrow/ipc/writer.h"
+#include "arrow/util/hash_util.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/optional.h"
+#include "arrow/util/string.h"
+#include "arrow/util/value_parsing.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+using internal::checked_pointer_cast;
+
+namespace compute {
+
+void Expression::Call::ComputeHash() {
+  hash = std::hash<std::string>{}(function_name);
+  for (const auto& arg : arguments) {
+    arrow::internal::hash_combine(hash, arg.hash());
+  }
+}
+
+Expression::Expression(Call call) {
+  call.ComputeHash();
+  impl_ = std::make_shared<Impl>(std::move(call));
+}
+
+Expression::Expression(Datum literal)
+    : impl_(std::make_shared<Impl>(std::move(literal))) {}
+
+Expression::Expression(Parameter parameter)
+    : impl_(std::make_shared<Impl>(std::move(parameter))) {}
+
+Expression literal(Datum lit) { return Expression(std::move(lit)); }
+
+Expression field_ref(FieldRef ref) {
+  return Expression(Expression::Parameter{std::move(ref), ValueDescr{}, -1});
+}
+
+Expression call(std::string function, std::vector<Expression> arguments,
+                std::shared_ptr<compute::FunctionOptions> options) {
+  Expression::Call call;
+  call.function_name = std::move(function);
+  call.arguments = std::move(arguments);
+  call.options = std::move(options);
+  return Expression(std::move(call));
+}
+
+const Datum* Expression::literal() const { return util::get_if<Datum>(impl_.get()); }
+
+const Expression::Parameter* Expression::parameter() const {
+  return util::get_if<Parameter>(impl_.get());
+}
+
+const FieldRef* Expression::field_ref() const {
+  if (auto parameter = this->parameter()) {
+    return &parameter->ref;
+  }
+  return nullptr;
+}
+
+const Expression::Call* Expression::call() const {
+  return util::get_if<Call>(impl_.get());
+}
+
+ValueDescr Expression::descr() const {
+  if (impl_ == nullptr) return {};
+
+  if (auto lit = literal()) {
+    return lit->descr();
+  }
+
+  if (auto parameter = this->parameter()) {
+    return parameter->descr;
+  }
+
+  return CallNotNull(*this)->descr;
+}
+
+namespace {
+
+std::string PrintDatum(const Datum& datum) {
+  if (datum.is_scalar()) {
+    if (!datum.scalar()->is_valid) return "null";
+
+    switch (datum.type()->id()) {
+      case Type::STRING:
+      case Type::LARGE_STRING:
+        return '"' +
+               Escape(util::string_view(*datum.scalar_as<BaseBinaryScalar>().value)) +
+               '"';
+
+      case Type::BINARY:
+      case Type::FIXED_SIZE_BINARY:
+      case Type::LARGE_BINARY:
+        return '"' + datum.scalar_as<BaseBinaryScalar>().value->ToHexString() + '"';
+
+      default:
+        break;
+    }
+
+    return datum.scalar()->ToString();
+  }
+  return datum.ToString();
+}
+
+}  // namespace
+
+std::string Expression::ToString() const {
+  if (auto lit = literal()) {
+    return PrintDatum(*lit);
+  }
+
+  if (auto ref = field_ref()) {
+    if (auto name = ref->name()) {
+      return *name;
+    }
+    if (auto path = ref->field_path()) {
+      return path->ToString();
+    }
+    return ref->ToString();
+  }
+
+  auto call = CallNotNull(*this);
+  auto binary = [&](std::string op) {
+    return "(" + call->arguments[0].ToString() + " " + op + " " +
+           call->arguments[1].ToString() + ")";
+  };
+
+  if (auto cmp = Comparison::Get(call->function_name)) {
+    return binary(Comparison::GetOp(*cmp));
+  }
+
+  constexpr util::string_view kleene = "_kleene";
+  if (util::string_view{call->function_name}.ends_with(kleene)) {
+    auto op = call->function_name.substr(0, call->function_name.size() - kleene.size());
+    return binary(std::move(op));
+  }
+
+  if (auto options = GetMakeStructOptions(*call)) {
+    std::string out = "{";
+    auto argument = call->arguments.begin();
+    for (const auto& field_name : options->field_names) {
+      out += field_name + "=" + argument++->ToString() + ", ";
+    }
+    out.resize(out.size() - 1);
+    out.back() = '}';
+    return out;
+  }
+
+  std::string out = call->function_name + "(";
+  for (const auto& arg : call->arguments) {
+    out += arg.ToString() + ", ";
+  }
+
+  if (call->options) {
+    out += call->options->ToString();
+    out.resize(out.size() + 1);
+  } else {
+    out.resize(out.size() - 1);
+  }
+  out.back() = ')';
+  return out;
+}
+
+void PrintTo(const Expression& expr, std::ostream* os) {
+  *os << expr.ToString();
+  if (expr.IsBound()) {
+    *os << "[bound]";
+  }
+}
+
+bool Expression::Equals(const Expression& other) const {
+  if (Identical(*this, other)) return true;
+
+  if (impl_->index() != other.impl_->index()) {
+    return false;
+  }
+
+  if (auto lit = literal()) {
+    return lit->Equals(*other.literal());
+  }
+
+  if (auto ref = field_ref()) {
+    return ref->Equals(*other.field_ref());
+  }
+
+  auto call = CallNotNull(*this);
+  auto other_call = CallNotNull(other);
+
+  if (call->function_name != other_call->function_name ||
+      call->kernel != other_call->kernel) {
+    return false;
+  }
+
+  for (size_t i = 0; i < call->arguments.size(); ++i) {
+    if (!call->arguments[i].Equals(other_call->arguments[i])) {
+      return false;
+    }
+  }
+
+  if (call->options == other_call->options) return true;
+  if (call->options && other_call->options) {
+    return call->options->Equals(other_call->options);
+  }
+  return false;
+}
+
+bool Identical(const Expression& l, const Expression& r) { return l.impl_ == r.impl_; }
+
+size_t Expression::hash() const {
+  if (auto lit = literal()) {
+    if (lit->is_scalar()) {
+      return lit->scalar()->hash();
+    }
+    return 0;
+  }
+
+  if (auto ref = field_ref()) {
+    return ref->hash();
+  }
+
+  return CallNotNull(*this)->hash;
+}
+
+bool Expression::IsBound() const {
+  if (type() == nullptr) return false;
+
+  if (auto call = this->call()) {
+    if (call->kernel == nullptr) return false;
+
+    for (const Expression& arg : call->arguments) {
+      if (!arg.IsBound()) return false;
+    }
+  }
+
+  return true;
+}
+
+bool Expression::IsScalarExpression() const {
+  if (auto lit = literal()) {
+    return lit->is_scalar();
+  }
+
+  if (field_ref()) return true;
+
+  auto call = CallNotNull(*this);
+
+  for (const Expression& arg : call->arguments) {
+    if (!arg.IsScalarExpression()) return false;
+  }
+
+  if (call->function) {
+    return call->function->kind() == compute::Function::SCALAR;
+  }
+
+  // this expression is not bound; make a best guess based on
+  // the default function registry
+  if (auto function = compute::GetFunctionRegistry()
+                          ->GetFunction(call->function_name)
+                          .ValueOr(nullptr)) {
+    return function->kind() == compute::Function::SCALAR;
+  }
+
+  // unknown function or other error; conservatively return false
+  return false;
+}
+
+bool Expression::IsNullLiteral() const {
+  if (auto lit = literal()) {
+    if (lit->null_count() == lit->length()) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+bool Expression::IsSatisfiable() const {
+  if (type() && type()->id() == Type::NA) {
+    return false;
+  }
+
+  if (auto lit = literal()) {
+    if (lit->null_count() == lit->length()) {
+      return false;
+    }
+
+    if (lit->is_scalar() && lit->type()->id() == Type::BOOL) {
+      return lit->scalar_as<BooleanScalar>().value;
+    }
+  }
+
+  return true;
+}
+
+namespace {
+
+// Produce a bound Expression from unbound Call and bound arguments.
+Result<Expression> BindNonRecursive(Expression::Call call, bool insert_implicit_casts,
+                                    compute::ExecContext* exec_context) {
+  DCHECK(std::all_of(call.arguments.begin(), call.arguments.end(),
+                     [](const Expression& argument) { return argument.IsBound(); }));
+
+  auto descrs = GetDescriptors(call.arguments);
+  ARROW_ASSIGN_OR_RAISE(call.function, GetFunction(call, exec_context));
+
+  if (!insert_implicit_casts) {
+    ARROW_ASSIGN_OR_RAISE(call.kernel, call.function->DispatchExact(descrs));
+  } else {
+    ARROW_ASSIGN_OR_RAISE(call.kernel, call.function->DispatchBest(&descrs));
+
+    for (size_t i = 0; i < descrs.size(); ++i) {
+      if (descrs[i] == call.arguments[i].descr()) continue;
+
+      if (descrs[i].shape != call.arguments[i].descr().shape) {
+        return Status::NotImplemented(
+            "Automatic broadcasting of scalars arguments to arrays in ",
+            Expression(std::move(call)).ToString());
+      }
+
+      if (auto lit = call.arguments[i].literal()) {
+        ARROW_ASSIGN_OR_RAISE(Datum new_lit, compute::Cast(*lit, descrs[i].type));
+        call.arguments[i] = literal(std::move(new_lit));
+        continue;
+      }
+
+      // construct an implicit cast Expression with which to replace this argument
+      Expression::Call implicit_cast;
+      implicit_cast.function_name = "cast";
+      implicit_cast.arguments = {std::move(call.arguments[i])};
+      implicit_cast.options = std::make_shared<compute::CastOptions>(
+          compute::CastOptions::Safe(descrs[i].type));
+
+      ARROW_ASSIGN_OR_RAISE(
+          call.arguments[i],
+          BindNonRecursive(std::move(implicit_cast),
+                           /*insert_implicit_casts=*/false, exec_context));
+    }
+  }
+
+  compute::KernelContext kernel_context(exec_context);
+  if (call.kernel->init) {
+    ARROW_ASSIGN_OR_RAISE(
+        call.kernel_state,
+        call.kernel->init(&kernel_context, {call.kernel, descrs, call.options.get()}));
+
+    kernel_context.SetState(call.kernel_state.get());
+  }
+
+  ARROW_ASSIGN_OR_RAISE(
+      call.descr, call.kernel->signature->out_type().Resolve(&kernel_context, descrs));
+
+  return Expression(std::move(call));
+}
+
+template <typename TypeOrSchema>
+Result<Expression> BindImpl(Expression expr, const TypeOrSchema& in,
+                            ValueDescr::Shape shape, compute::ExecContext* exec_context) {
+  if (exec_context == nullptr) {
+    compute::ExecContext exec_context;
+    return BindImpl(std::move(expr), in, shape, &exec_context);
+  }
+
+  if (expr.literal()) return expr;
+
+  if (auto ref = expr.field_ref()) {
+    if (ref->IsNested()) {
+      return Status::NotImplemented("nested field references");
+    }
+
+    ARROW_ASSIGN_OR_RAISE(auto path, ref->FindOne(in));
+
+    auto bound = *expr.parameter();
+    bound.index = path[0];
+    ARROW_ASSIGN_OR_RAISE(auto field, path.Get(in));
+    bound.descr.type = field->type();
+    bound.descr.shape = shape;
+    return Expression{std::move(bound)};
+  }
+
+  auto call = *CallNotNull(expr);
+  for (auto& argument : call.arguments) {
+    ARROW_ASSIGN_OR_RAISE(argument,
+                          BindImpl(std::move(argument), in, shape, exec_context));
+  }
+  return BindNonRecursive(std::move(call),
+                          /*insert_implicit_casts=*/true, exec_context);
+}
+
+}  // namespace
+
+Result<Expression> Expression::Bind(const ValueDescr& in,
+                                    compute::ExecContext* exec_context) const {
+  return BindImpl(*this, *in.type, in.shape, exec_context);
+}
+
+Result<Expression> Expression::Bind(const Schema& in_schema,
+                                    compute::ExecContext* exec_context) const {
+  return BindImpl(*this, in_schema, ValueDescr::ARRAY, exec_context);
+}
+
+Result<ExecBatch> MakeExecBatch(const Schema& full_schema, const Datum& partial) {
+  ExecBatch out;
+
+  if (partial.kind() == Datum::RECORD_BATCH) {
+    const auto& partial_batch = *partial.record_batch();
+    out.length = partial_batch.num_rows();
+
+    for (const auto& field : full_schema.fields()) {
+      ARROW_ASSIGN_OR_RAISE(auto column,
+                            FieldRef(field->name()).GetOneOrNone(partial_batch));
+
+      if (column) {
+        if (!column->type()->Equals(field->type())) {
+          // Referenced field was present but didn't have the expected type.
+          // This *should* be handled by readers, and will just be an error in the future.
+          ARROW_ASSIGN_OR_RAISE(
+              auto converted,
+              compute::Cast(column, field->type(), compute::CastOptions::Safe()));
+          column = converted.make_array();
+        }
+        out.values.emplace_back(std::move(column));
+      } else {
+        out.values.emplace_back(MakeNullScalar(field->type()));
+      }
+    }
+    return out;
+  }
+
+  // wasteful but useful for testing:
+  if (partial.type()->id() == Type::STRUCT) {
+    if (partial.is_array()) {
+      ARROW_ASSIGN_OR_RAISE(auto partial_batch,
+                            RecordBatch::FromStructArray(partial.make_array()));
+
+      return MakeExecBatch(full_schema, partial_batch);
+    }
+
+    if (partial.is_scalar()) {
+      ARROW_ASSIGN_OR_RAISE(auto partial_array,
+                            MakeArrayFromScalar(*partial.scalar(), 1));
+      ARROW_ASSIGN_OR_RAISE(auto out, MakeExecBatch(full_schema, partial_array));
+
+      for (Datum& value : out.values) {
+        if (value.is_scalar()) continue;
+        ARROW_ASSIGN_OR_RAISE(value, value.make_array()->GetScalar(0));
+      }
+      return out;
+    }
+  }
+
+  return Status::NotImplemented("MakeExecBatch from ", PrintDatum(partial));
+}
+
+Result<Datum> ExecuteScalarExpression(const Expression& expr, const Schema& full_schema,
+                                      const Datum& partial_input,
+                                      compute::ExecContext* exec_context) {
+  ARROW_ASSIGN_OR_RAISE(auto input, MakeExecBatch(full_schema, partial_input));
+  return ExecuteScalarExpression(expr, input, exec_context);
+}
+
+Result<Datum> ExecuteScalarExpression(const Expression& expr, const ExecBatch& input,
+                                      compute::ExecContext* exec_context) {
+  if (exec_context == nullptr) {
+    compute::ExecContext exec_context;
+    return ExecuteScalarExpression(expr, input, &exec_context);
+  }
+
+  if (!expr.IsBound()) {
+    return Status::Invalid("Cannot Execute unbound expression.");
+  }
+
+  if (!expr.IsScalarExpression()) {
+    return Status::Invalid(
+        "ExecuteScalarExpression cannot Execute non-scalar expression ", expr.ToString());
+  }
+
+  if (auto lit = expr.literal()) return *lit;
+
+  if (auto param = expr.parameter()) {
+    if (param->descr.type->id() == Type::NA) {
+      return MakeNullScalar(null());
+    }
+
+    const Datum& field = input[param->index];
+    if (!field.type()->Equals(param->descr.type)) {
+      return Status::Invalid("Referenced field ", expr.ToString(), " was ",
+                             field.type()->ToString(), " but should have been ",
+                             param->descr.type->ToString());
+    }
+
+    return field;
+  }
+
+  auto call = CallNotNull(expr);
+
+  std::vector<Datum> arguments(call->arguments.size());
+  for (size_t i = 0; i < arguments.size(); ++i) {
+    ARROW_ASSIGN_OR_RAISE(
+        arguments[i], ExecuteScalarExpression(call->arguments[i], input, exec_context));
+  }
+
+  auto executor = compute::detail::KernelExecutor::MakeScalar();
+
+  compute::KernelContext kernel_context(exec_context);
+  kernel_context.SetState(call->kernel_state.get());
+
+  auto kernel = call->kernel;
+  auto descrs = GetDescriptors(arguments);
+  auto options = call->options.get();
+  RETURN_NOT_OK(executor->Init(&kernel_context, {kernel, descrs, options}));
+
+  auto listener = std::make_shared<compute::detail::DatumAccumulator>();
+  RETURN_NOT_OK(executor->Execute(arguments, listener.get()));
+  return executor->WrapResults(arguments, listener->values());
+}
+
+namespace {
+
+std::array<std::pair<const Expression&, const Expression&>, 2>
+ArgumentsAndFlippedArguments(const Expression::Call& call) {
+  DCHECK_EQ(call.arguments.size(), 2);
+  return {std::pair<const Expression&, const Expression&>{call.arguments[0],
+                                                          call.arguments[1]},
+          std::pair<const Expression&, const Expression&>{call.arguments[1],
+                                                          call.arguments[0]}};
+}
+
+template <typename BinOp, typename It,
+          typename Out = typename std::iterator_traits<It>::value_type>
+util::optional<Out> FoldLeft(It begin, It end, const BinOp& bin_op) {
+  if (begin == end) return util::nullopt;
+
+  Out folded = std::move(*begin++);
+  while (begin != end) {
+    folded = bin_op(std::move(folded), std::move(*begin++));
+  }
+  return folded;
+}
+
+util::optional<compute::NullHandling::type> GetNullHandling(
+    const Expression::Call& call) {
+  if (call.function && call.function->kind() == compute::Function::SCALAR) {
+    return static_cast<const compute::ScalarKernel*>(call.kernel)->null_handling;
+  }
+  return util::nullopt;
+}
+
+}  // namespace
+
+std::vector<FieldRef> FieldsInExpression(const Expression& expr) {
+  if (expr.literal()) return {};
+
+  if (auto ref = expr.field_ref()) {
+    return {*ref};
+  }
+
+  std::vector<FieldRef> fields;
+  for (const Expression& arg : CallNotNull(expr)->arguments) {
+    auto argument_fields = FieldsInExpression(arg);
+    std::move(argument_fields.begin(), argument_fields.end(), std::back_inserter(fields));
+  }
+  return fields;
+}
+
+bool ExpressionHasFieldRefs(const Expression& expr) {
+  if (expr.literal()) return false;
+
+  if (expr.field_ref()) return true;
+
+  for (const Expression& arg : CallNotNull(expr)->arguments) {
+    if (ExpressionHasFieldRefs(arg)) return true;
+  }
+  return false;
+}
+
+Result<Expression> FoldConstants(Expression expr) {
+  return Modify(
+      std::move(expr), [](Expression expr) { return expr; },
+      [](Expression expr, ...) -> Result<Expression> {
+        auto call = CallNotNull(expr);
+        if (std::all_of(call->arguments.begin(), call->arguments.end(),
+                        [](const Expression& argument) { return argument.literal(); })) {
+          // all arguments are literal; we can evaluate this subexpression *now*
+          static const ExecBatch ignored_input = ExecBatch{};
+          ARROW_ASSIGN_OR_RAISE(Datum constant,
+                                ExecuteScalarExpression(expr, ignored_input));
+
+          return literal(std::move(constant));
+        }
+
+        // XXX the following should probably be in a registry of passes instead
+        // of inline
+
+        if (GetNullHandling(*call) == compute::NullHandling::INTERSECTION) {
+          // kernels which always produce intersected validity can be resolved
+          // to null *now* if any of their inputs is a null literal
+          for (const auto& argument : call->arguments) {
+            if (argument.IsNullLiteral()) {
+              return argument;
+            }
+          }
+        }
+
+        if (call->function_name == "and_kleene") {
+          for (auto args : ArgumentsAndFlippedArguments(*call)) {
+            // true and x == x
+            if (args.first == literal(true)) return args.second;
+
+            // false and x == false
+            if (args.first == literal(false)) return args.first;
+
+            // x and x == x
+            if (args.first == args.second) return args.first;
+          }
+          return expr;
+        }
+
+        if (call->function_name == "or_kleene") {
+          for (auto args : ArgumentsAndFlippedArguments(*call)) {
+            // false or x == x
+            if (args.first == literal(false)) return args.second;
+
+            // true or x == true
+            if (args.first == literal(true)) return args.first;
+
+            // x or x == x
+            if (args.first == args.second) return args.first;
+          }
+          return expr;
+        }
+
+        return expr;
+      });
+}
+
+namespace {
+
+std::vector<Expression> GuaranteeConjunctionMembers(
+    const Expression& guaranteed_true_predicate) {
+  auto guarantee = guaranteed_true_predicate.call();
+  if (!guarantee || guarantee->function_name != "and_kleene") {
+    return {guaranteed_true_predicate};
+  }
+  return FlattenedAssociativeChain(guaranteed_true_predicate).fringe;
+}
+
+// Conjunction members which are represented in known_values are erased from
+// conjunction_members
+Status ExtractKnownFieldValuesImpl(
+    std::vector<Expression>* conjunction_members,
+    std::unordered_map<FieldRef, Datum, FieldRef::Hash>* known_values) {
+  auto unconsumed_end =
+      std::partition(conjunction_members->begin(), conjunction_members->end(),
+                     [](const Expression& expr) {
+                       // search for an equality conditions between a field and a literal
+                       auto call = expr.call();
+                       if (!call) return true;
+
+                       if (call->function_name == "equal") {
+                         auto ref = call->arguments[0].field_ref();
+                         auto lit = call->arguments[1].literal();
+                         return !(ref && lit);
+                       }
+
+                       if (call->function_name == "is_null") {
+                         auto ref = call->arguments[0].field_ref();
+                         return !ref;
+                       }
+
+                       return true;
+                     });
+
+  for (auto it = unconsumed_end; it != conjunction_members->end(); ++it) {
+    auto call = CallNotNull(*it);
+
+    if (call->function_name == "equal") {
+      auto ref = call->arguments[0].field_ref();
+      auto lit = call->arguments[1].literal();
+      known_values->emplace(*ref, *lit);
+    } else if (call->function_name == "is_null") {
+      auto ref = call->arguments[0].field_ref();
+      known_values->emplace(*ref, Datum(std::make_shared<NullScalar>()));
+    }
+  }
+
+  conjunction_members->erase(unconsumed_end, conjunction_members->end());
+
+  return Status::OK();
+}
+
+}  // namespace
+
+Result<KnownFieldValues> ExtractKnownFieldValues(
+    const Expression& guaranteed_true_predicate) {
+  auto conjunction_members = GuaranteeConjunctionMembers(guaranteed_true_predicate);
+  KnownFieldValues known_values;
+  RETURN_NOT_OK(ExtractKnownFieldValuesImpl(&conjunction_members, &known_values.map));
+  return known_values;
+}
+
+Result<Expression> ReplaceFieldsWithKnownValues(const KnownFieldValues& known_values,
+                                                Expression expr) {
+  if (!expr.IsBound()) {
+    return Status::Invalid(
+        "ReplaceFieldsWithKnownValues called on an unbound Expression");
+  }
+
+  return Modify(
+      std::move(expr),
+      [&known_values](Expression expr) -> Result<Expression> {
+        if (auto ref = expr.field_ref()) {
+          auto it = known_values.map.find(*ref);
+          if (it != known_values.map.end()) {
+            Datum lit = it->second;
+            if (lit.descr() == expr.descr()) return literal(std::move(lit));
+            // type mismatch, try casting the known value to the correct type
+
+            if (expr.type()->id() == Type::DICTIONARY &&
+                lit.type()->id() != Type::DICTIONARY) {
+              // the known value must be dictionary encoded
+
+              const auto& dict_type = checked_cast<const DictionaryType&>(*expr.type());
+              if (!lit.type()->Equals(dict_type.value_type())) {
+                ARROW_ASSIGN_OR_RAISE(lit, compute::Cast(lit, dict_type.value_type()));
+              }
+
+              if (lit.is_scalar()) {
+                ARROW_ASSIGN_OR_RAISE(auto dictionary,
+                                      MakeArrayFromScalar(*lit.scalar(), 1));
+
+                lit = Datum{DictionaryScalar::Make(MakeScalar<int32_t>(0),
+                                                   std::move(dictionary))};
+              }
+            }
+
+            ARROW_ASSIGN_OR_RAISE(lit, compute::Cast(lit, expr.type()));
+            return literal(std::move(lit));
+          }
+        }
+        return expr;
+      },
+      [](Expression expr, ...) { return expr; });
+}
+
+namespace {
+
+bool IsBinaryAssociativeCommutative(const Expression::Call& call) {
+  static std::unordered_set<std::string> binary_associative_commutative{
+      "and",      "or",  "and_kleene",       "or_kleene",  "xor",
+      "multiply", "add", "multiply_checked", "add_checked"};
+
+  auto it = binary_associative_commutative.find(call.function_name);
+  return it != binary_associative_commutative.end();
+}
+
+}  // namespace
+
+Result<Expression> Canonicalize(Expression expr, compute::ExecContext* exec_context) {
+  if (exec_context == nullptr) {
+    compute::ExecContext exec_context;
+    return Canonicalize(std::move(expr), &exec_context);
+  }
+
+  // If potentially reconstructing more deeply than a call's immediate arguments
+  // (for example, when reorganizing an associative chain), add expressions to this set to
+  // avoid unnecessary work
+  struct {
+    std::unordered_set<Expression, Expression::Hash> set_;
+
+    bool operator()(const Expression& expr) const {
+      return set_.find(expr) != set_.end();
+    }
+
+    void Add(std::vector<Expression> exprs) {
+      std::move(exprs.begin(), exprs.end(), std::inserter(set_, set_.end()));
+    }
+  } AlreadyCanonicalized;
+
+  return Modify(
+      std::move(expr),
+      [&AlreadyCanonicalized, exec_context](Expression expr) -> Result<Expression> {
+        auto call = expr.call();
+        if (!call) return expr;
+
+        if (AlreadyCanonicalized(expr)) return expr;
+
+        if (IsBinaryAssociativeCommutative(*call)) {
+          struct {
+            int Priority(const Expression& operand) const {
+              // order literals first, starting with nulls
+              if (operand.IsNullLiteral()) return 0;
+              if (operand.literal()) return 1;
+              return 2;
+            }
+            bool operator()(const Expression& l, const Expression& r) const {
+              return Priority(l) < Priority(r);
+            }
+          } CanonicalOrdering;
+
+          FlattenedAssociativeChain chain(expr);
+          if (chain.was_left_folded &&
+              std::is_sorted(chain.fringe.begin(), chain.fringe.end(),
+                             CanonicalOrdering)) {
+            AlreadyCanonicalized.Add(std::move(chain.exprs));
+            return expr;
+          }
+
+          std::stable_sort(chain.fringe.begin(), chain.fringe.end(), CanonicalOrdering);
+
+          // fold the chain back up
+          auto folded =
+              FoldLeft(chain.fringe.begin(), chain.fringe.end(),
+                       [call, &AlreadyCanonicalized](Expression l, Expression r) {
+                         auto canonicalized_call = *call;
+                         canonicalized_call.arguments = {std::move(l), std::move(r)};
+                         Expression expr(std::move(canonicalized_call));
+                         AlreadyCanonicalized.Add({expr});
+                         return expr;
+                       });
+          return std::move(*folded);
+        }
+
+        if (auto cmp = Comparison::Get(call->function_name)) {
+          if (call->arguments[0].literal() && !call->arguments[1].literal()) {
+            // ensure that literals are on comparisons' RHS
+            auto flipped_call = *call;
+
+            std::swap(flipped_call.arguments[0], flipped_call.arguments[1]);
+            flipped_call.function_name =
+                Comparison::GetName(Comparison::GetFlipped(*cmp));
+
+            return BindNonRecursive(flipped_call,
+                                    /*insert_implicit_casts=*/false, exec_context);
+          }
+        }
+
+        return expr;
+      },
+      [](Expression expr, ...) { return expr; });
+}
+
+namespace {
+
+Result<Expression> DirectComparisonSimplification(Expression expr,
+                                                  const Expression::Call& guarantee) {
+  return Modify(
+      std::move(expr), [](Expression expr) { return expr; },
+      [&guarantee](Expression expr, ...) -> Result<Expression> {
+        auto call = expr.call();
+        if (!call) return expr;
+
+        // Ensure both calls are comparisons with equal LHS and scalar RHS
+        auto cmp = Comparison::Get(expr);
+        auto cmp_guarantee = Comparison::Get(guarantee.function_name);
+
+        if (!cmp) return expr;
+        if (!cmp_guarantee) return expr;
+
+        const auto& lhs = Comparison::StripOrderPreservingCasts(call->arguments[0]);
+        const auto& guarantee_lhs = guarantee.arguments[0];
+        if (lhs != guarantee_lhs) return expr;
+
+        auto rhs = call->arguments[1].literal();
+        auto guarantee_rhs = guarantee.arguments[1].literal();
+
+        if (!rhs) return expr;
+        if (!rhs->is_scalar()) return expr;
+
+        if (!guarantee_rhs) return expr;
+        if (!guarantee_rhs->is_scalar()) return expr;
+
+        ARROW_ASSIGN_OR_RAISE(auto cmp_rhs_guarantee_rhs,
+                              Comparison::Execute(*rhs, *guarantee_rhs));
+        DCHECK_NE(cmp_rhs_guarantee_rhs, Comparison::NA);
+
+        if (cmp_rhs_guarantee_rhs == Comparison::EQUAL) {
+          // RHS of filter is equal to RHS of guarantee
+
+          if ((*cmp & *cmp_guarantee) == *cmp_guarantee) {
+            // guarantee is a subset of filter, so all data will be included
+            // x > 1, x >= 1, x != 1 guaranteed by x > 1
+            return literal(true);
+          }
+
+          if ((*cmp & *cmp_guarantee) == 0) {
+            // guarantee disjoint with filter, so all data will be excluded
+            // x > 1, x >= 1, x != 1 unsatisfiable if x == 1
+            return literal(false);
+          }
+
+          return expr;
+        }
+
+        if (*cmp_guarantee & cmp_rhs_guarantee_rhs) {
+          // x > 1, x >= 1, x != 1 cannot use guarantee x >= 3
+          return expr;
+        }
+
+        if (*cmp & Comparison::GetFlipped(cmp_rhs_guarantee_rhs)) {
+          // x > 1, x >= 1, x != 1 guaranteed by x >= 3
+          return literal(true);
+        } else {
+          // x < 1, x <= 1, x == 1 unsatisfiable if x >= 3
+          return literal(false);
+        }
+      });
+}
+
+}  // namespace
+
+Result<Expression> SimplifyWithGuarantee(Expression expr,
+                                         const Expression& guaranteed_true_predicate) {
+  auto conjunction_members = GuaranteeConjunctionMembers(guaranteed_true_predicate);
+
+  KnownFieldValues known_values;
+  RETURN_NOT_OK(ExtractKnownFieldValuesImpl(&conjunction_members, &known_values.map));
+
+  ARROW_ASSIGN_OR_RAISE(expr,
+                        ReplaceFieldsWithKnownValues(known_values, std::move(expr)));
+
+  auto CanonicalizeAndFoldConstants = [&expr] {
+    ARROW_ASSIGN_OR_RAISE(expr, Canonicalize(std::move(expr)));
+    ARROW_ASSIGN_OR_RAISE(expr, FoldConstants(std::move(expr)));
+    return Status::OK();
+  };
+  RETURN_NOT_OK(CanonicalizeAndFoldConstants());
+
+  for (const auto& guarantee : conjunction_members) {
+    if (Comparison::Get(guarantee) && guarantee.call()->arguments[1].literal()) {
+      ARROW_ASSIGN_OR_RAISE(
+          auto simplified, DirectComparisonSimplification(expr, *CallNotNull(guarantee)));
+
+      if (Identical(simplified, expr)) continue;
+
+      expr = std::move(simplified);
+      RETURN_NOT_OK(CanonicalizeAndFoldConstants());
+    }
+  }
+
+  return expr;
+}
+
+// Serialization is accomplished by converting expressions to KeyValueMetadata and storing
+// this in the schema of a RecordBatch. Embedded arrays and scalars are stored in its
+// columns. Finally, the RecordBatch is written to an IPC file.
+Result<std::shared_ptr<Buffer>> Serialize(const Expression& expr) {
+  struct {
+    std::shared_ptr<KeyValueMetadata> metadata_ = std::make_shared<KeyValueMetadata>();
+    ArrayVector columns_;
+
+    Result<std::string> AddScalar(const Scalar& scalar) {
+      auto ret = columns_.size();
+      ARROW_ASSIGN_OR_RAISE(auto array, MakeArrayFromScalar(scalar, 1));
+      columns_.push_back(std::move(array));
+      return std::to_string(ret);
+    }
+
+    Status Visit(const Expression& expr) {
+      if (auto lit = expr.literal()) {
+        if (!lit->is_scalar()) {
+          return Status::NotImplemented("Serialization of non-scalar literals");
+        }
+        ARROW_ASSIGN_OR_RAISE(auto value, AddScalar(*lit->scalar()));
+        metadata_->Append("literal", std::move(value));
+        return Status::OK();
+      }
+
+      if (auto ref = expr.field_ref()) {
+        if (!ref->name()) {
+          return Status::NotImplemented("Serialization of non-name field_refs");
+        }
+        metadata_->Append("field_ref", *ref->name());
+        return Status::OK();
+      }
+
+      auto call = CallNotNull(expr);
+      metadata_->Append("call", call->function_name);
+
+      for (const auto& argument : call->arguments) {
+        RETURN_NOT_OK(Visit(argument));
+      }
+
+      if (call->options) {
+        ARROW_ASSIGN_OR_RAISE(auto options_scalar,
+                              internal::FunctionOptionsToStructScalar(*call->options));
+        ARROW_ASSIGN_OR_RAISE(auto value, AddScalar(*options_scalar));
+        metadata_->Append("options", std::move(value));
+      }
+
+      metadata_->Append("end", call->function_name);
+      return Status::OK();
+    }
+
+    Result<std::shared_ptr<RecordBatch>> operator()(const Expression& expr) {
+      RETURN_NOT_OK(Visit(expr));
+      FieldVector fields(columns_.size());
+      for (size_t i = 0; i < fields.size(); ++i) {
+        fields[i] = field("", columns_[i]->type());
+      }
+      return RecordBatch::Make(schema(std::move(fields), std::move(metadata_)), 1,
+                               std::move(columns_));
+    }
+  } ToRecordBatch;
+
+  ARROW_ASSIGN_OR_RAISE(auto batch, ToRecordBatch(expr));
+  ARROW_ASSIGN_OR_RAISE(auto stream, io::BufferOutputStream::Create());
+  ARROW_ASSIGN_OR_RAISE(auto writer, ipc::MakeFileWriter(stream, batch->schema()));
+  RETURN_NOT_OK(writer->WriteRecordBatch(*batch));
+  RETURN_NOT_OK(writer->Close());
+  return stream->Finish();
+}
+
+Result<Expression> Deserialize(std::shared_ptr<Buffer> buffer) {
+  io::BufferReader stream(std::move(buffer));
+  ARROW_ASSIGN_OR_RAISE(auto reader, ipc::RecordBatchFileReader::Open(&stream));
+  ARROW_ASSIGN_OR_RAISE(auto batch, reader->ReadRecordBatch(0));
+  if (batch->schema()->metadata() == nullptr) {
+    return Status::Invalid("serialized Expression's batch repr had null metadata");
+  }
+  if (batch->num_rows() != 1) {
+    return Status::Invalid(
+        "serialized Expression's batch repr was not a single row - had ",
+        batch->num_rows());
+  }
+
+  struct FromRecordBatch {
+    const RecordBatch& batch_;
+    int index_;
+
+    const KeyValueMetadata& metadata() { return *batch_.schema()->metadata(); }
+
+    Result<std::shared_ptr<Scalar>> GetScalar(const std::string& i) {
+      int32_t column_index;
+      if (!::arrow::internal::ParseValue<Int32Type>(i.data(), i.length(),
+                                                    &column_index)) {
+        return Status::Invalid("Couldn't parse column_index");
+      }
+      if (column_index >= batch_.num_columns()) {
+        return Status::Invalid("column_index out of bounds");
+      }
+      return batch_.column(column_index)->GetScalar(0);
+    }
+
+    Result<Expression> GetOne() {
+      if (index_ >= metadata().size()) {
+        return Status::Invalid("unterminated serialized Expression");
+      }
+
+      const std::string& key = metadata().key(index_);
+      const std::string& value = metadata().value(index_);
+      ++index_;
+
+      if (key == "literal") {
+        ARROW_ASSIGN_OR_RAISE(auto scalar, GetScalar(value));
+        return literal(std::move(scalar));
+      }
+
+      if (key == "field_ref") {
+        return field_ref(value);
+      }
+
+      if (key != "call") {
+        return Status::Invalid("Unrecognized serialized Expression key ", key);
+      }
+
+      std::vector<Expression> arguments;
+      while (metadata().key(index_) != "end") {
+        if (metadata().key(index_) == "options") {
+          ARROW_ASSIGN_OR_RAISE(auto options_scalar, GetScalar(metadata().value(index_)));
+          std::shared_ptr<compute::FunctionOptions> options;
+          if (options_scalar) {
+            ARROW_ASSIGN_OR_RAISE(
+                options, internal::FunctionOptionsFromStructScalar(
+                             checked_cast<const StructScalar&>(*options_scalar)));
+          }
+          auto expr = call(value, std::move(arguments), std::move(options));
+          index_ += 2;
+          return expr;
+        }
+
+        ARROW_ASSIGN_OR_RAISE(auto argument, GetOne());
+        arguments.push_back(std::move(argument));
+      }
+
+      ++index_;
+      return call(value, std::move(arguments));
+    }
+  };
+
+  return FromRecordBatch{*batch, 0}.GetOne();
+}
+
+Expression project(std::vector<Expression> values, std::vector<std::string> names) {
+  return call("make_struct", std::move(values),
+              compute::MakeStructOptions{std::move(names)});
+}
+
+Expression equal(Expression lhs, Expression rhs) {
+  return call("equal", {std::move(lhs), std::move(rhs)});
+}
+
+Expression not_equal(Expression lhs, Expression rhs) {
+  return call("not_equal", {std::move(lhs), std::move(rhs)});
+}
+
+Expression less(Expression lhs, Expression rhs) {
+  return call("less", {std::move(lhs), std::move(rhs)});
+}
+
+Expression less_equal(Expression lhs, Expression rhs) {
+  return call("less_equal", {std::move(lhs), std::move(rhs)});
+}
+
+Expression greater(Expression lhs, Expression rhs) {
+  return call("greater", {std::move(lhs), std::move(rhs)});
+}
+
+Expression greater_equal(Expression lhs, Expression rhs) {
+  return call("greater_equal", {std::move(lhs), std::move(rhs)});
+}
+
+Expression is_null(Expression lhs) { return call("is_null", {std::move(lhs)}); }
+
+Expression is_valid(Expression lhs) { return call("is_valid", {std::move(lhs)}); }
+
+Expression and_(Expression lhs, Expression rhs) {
+  return call("and_kleene", {std::move(lhs), std::move(rhs)});
+}
+
+Expression and_(const std::vector<Expression>& operands) {
+  auto folded = FoldLeft<Expression(Expression, Expression)>(operands.begin(),
+                                                             operands.end(), and_);
+  if (folded) {
+    return std::move(*folded);
+  }
+  return literal(true);
+}
+
+Expression or_(Expression lhs, Expression rhs) {
+  return call("or_kleene", {std::move(lhs), std::move(rhs)});
+}
+
+Expression or_(const std::vector<Expression>& operands) {
+  auto folded =
+      FoldLeft<Expression(Expression, Expression)>(operands.begin(), operands.end(), or_);
+  if (folded) {
+    return std::move(*folded);
+  }
+  return literal(false);
+}
+
+Expression not_(Expression operand) { return call("invert", {std::move(operand)}); }
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/expression.h b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/expression.h
index 5ae95532c2b..3810accf70a 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/expression.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/expression.h
@@ -1,269 +1,269 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-// This API is EXPERIMENTAL. 
- 
-#pragma once 
- 
-#include <memory> 
-#include <string> 
-#include <utility> 
-#include <vector> 
- 
-#include "arrow/compute/type_fwd.h" 
-#include "arrow/datum.h" 
-#include "arrow/type_fwd.h" 
-#include "arrow/util/variant.h" 
- 
-namespace arrow { 
-namespace compute { 
- 
-/// An unbound expression which maps a single Datum to another Datum. 
-/// An expression is one of 
-/// - A literal Datum. 
-/// - A reference to a single (potentially nested) field of the input Datum. 
-/// - A call to a compute function, with arguments specified by other Expressions. 
-class ARROW_EXPORT Expression { 
- public: 
-  struct Call { 
-    std::string function_name; 
-    std::vector<Expression> arguments; 
-    std::shared_ptr<FunctionOptions> options; 
-    // Cached hash value 
-    size_t hash; 
- 
-    // post-Bind properties: 
-    std::shared_ptr<Function> function; 
-    const Kernel* kernel = NULLPTR; 
-    std::shared_ptr<KernelState> kernel_state; 
-    ValueDescr descr; 
- 
-    void ComputeHash(); 
-  }; 
- 
-  std::string ToString() const; 
-  bool Equals(const Expression& other) const; 
-  size_t hash() const; 
-  struct Hash { 
-    size_t operator()(const Expression& expr) const { return expr.hash(); } 
-  }; 
- 
-  /// Bind this expression to the given input type, looking up Kernels and field types. 
-  /// Some expression simplification may be performed and implicit casts will be inserted. 
-  /// Any state necessary for execution will be initialized and returned. 
-  Result<Expression> Bind(const ValueDescr& in, ExecContext* = NULLPTR) const; 
-  Result<Expression> Bind(const Schema& in_schema, ExecContext* = NULLPTR) const; 
- 
-  // XXX someday 
-  // Clone all KernelState in this bound expression. If any function referenced by this 
-  // expression has mutable KernelState, it is not safe to execute or apply simplification 
-  // passes to it (or copies of it!) from multiple threads. Cloning state produces new 
-  // KernelStates where necessary to ensure that Expressions may be manipulated safely 
-  // on multiple threads. 
-  // Result<ExpressionState> CloneState() const; 
-  // Status SetState(ExpressionState); 
- 
-  /// Return true if all an expression's field references have explicit ValueDescr and all 
-  /// of its functions' kernels are looked up. 
-  bool IsBound() const; 
- 
-  /// Return true if this expression is composed only of Scalar literals, field 
-  /// references, and calls to ScalarFunctions. 
-  bool IsScalarExpression() const; 
- 
-  /// Return true if this expression is literal and entirely null. 
-  bool IsNullLiteral() const; 
- 
-  /// Return true if this expression could evaluate to true. 
-  bool IsSatisfiable() const; 
- 
-  // XXX someday 
-  // Result<PipelineGraph> GetPipelines(); 
- 
-  /// Access a Call or return nullptr if this expression is not a call 
-  const Call* call() const; 
-  /// Access a Datum or return nullptr if this expression is not a literal 
-  const Datum* literal() const; 
-  /// Access a FieldRef or return nullptr if this expression is not a field_ref 
-  const FieldRef* field_ref() const; 
- 
-  /// The type and shape to which this expression will evaluate 
-  ValueDescr descr() const; 
-  std::shared_ptr<DataType> type() const { return descr().type; } 
-  // XXX someday 
-  // NullGeneralization::type nullable() const; 
- 
-  struct Parameter { 
-    FieldRef ref; 
- 
-    // post-bind properties 
-    ValueDescr descr; 
-    int index; 
-  }; 
-  const Parameter* parameter() const; 
- 
-  Expression() = default; 
-  explicit Expression(Call call); 
-  explicit Expression(Datum literal); 
-  explicit Expression(Parameter parameter); 
- 
- private: 
-  using Impl = util::Variant<Datum, Parameter, Call>; 
-  std::shared_ptr<Impl> impl_; 
- 
-  ARROW_EXPORT friend bool Identical(const Expression& l, const Expression& r); 
- 
-  ARROW_EXPORT friend void PrintTo(const Expression&, std::ostream*); 
-}; 
- 
-inline bool operator==(const Expression& l, const Expression& r) { return l.Equals(r); } 
-inline bool operator!=(const Expression& l, const Expression& r) { return !l.Equals(r); } 
- 
-// Factories 
- 
-ARROW_EXPORT 
-Expression literal(Datum lit); 
- 
-template <typename Arg> 
-Expression literal(Arg&& arg) { 
-  return literal(Datum(std::forward<Arg>(arg))); 
-} 
- 
-ARROW_EXPORT 
-Expression field_ref(FieldRef ref); 
- 
-ARROW_EXPORT 
-Expression call(std::string function, std::vector<Expression> arguments, 
-                std::shared_ptr<FunctionOptions> options = NULLPTR); 
- 
-template <typename Options, typename = typename std::enable_if< 
-                                std::is_base_of<FunctionOptions, Options>::value>::type> 
-Expression call(std::string function, std::vector<Expression> arguments, 
-                Options options) { 
-  return call(std::move(function), std::move(arguments), 
-              std::make_shared<Options>(std::move(options))); 
-} 
- 
-/// Assemble a list of all fields referenced by an Expression at any depth. 
-ARROW_EXPORT 
-std::vector<FieldRef> FieldsInExpression(const Expression&); 
- 
-/// Check if the expression references any fields. 
-ARROW_EXPORT 
-bool ExpressionHasFieldRefs(const Expression&); 
- 
-/// Assemble a mapping from field references to known values. 
-struct ARROW_EXPORT KnownFieldValues; 
-ARROW_EXPORT 
-Result<KnownFieldValues> ExtractKnownFieldValues( 
-    const Expression& guaranteed_true_predicate); 
- 
-/// \defgroup expression-passes Functions for modification of Expressions 
-/// 
-/// @{ 
-/// 
-/// These transform bound expressions. Some transforms utilize a guarantee, which is 
-/// provided as an Expression which is guaranteed to evaluate to true. The 
-/// guaranteed_true_predicate need not be bound, but canonicalization is currently 
-/// deferred to producers of guarantees. For example in order to be recognized as a 
-/// guarantee on a field value, an Expression must be a call to "equal" with field_ref LHS 
-/// and literal RHS. Flipping the arguments, "is_in" with a one-long value_set, ... or 
-/// other semantically identical Expressions will not be recognized. 
- 
-/// Weak canonicalization which establishes guarantees for subsequent passes. Even 
-/// equivalent Expressions may result in different canonicalized expressions. 
-/// TODO this could be a strong canonicalization 
-ARROW_EXPORT 
-Result<Expression> Canonicalize(Expression, ExecContext* = NULLPTR); 
- 
-/// Simplify Expressions based on literal arguments (for example, add(null, x) will always 
-/// be null so replace the call with a null literal). Includes early evaluation of all 
-/// calls whose arguments are entirely literal. 
-ARROW_EXPORT 
-Result<Expression> FoldConstants(Expression); 
- 
-/// Simplify Expressions by replacing with known values of the fields which it references. 
-ARROW_EXPORT 
-Result<Expression> ReplaceFieldsWithKnownValues(const KnownFieldValues& known_values, 
-                                                Expression); 
- 
-/// Simplify an expression by replacing subexpressions based on a guarantee: 
-/// a boolean expression which is guaranteed to evaluate to `true`. For example, this is 
-/// used to remove redundant function calls from a filter expression or to replace a 
-/// reference to a constant-value field with a literal. 
-ARROW_EXPORT 
-Result<Expression> SimplifyWithGuarantee(Expression, 
-                                         const Expression& guaranteed_true_predicate); 
- 
-/// @} 
- 
-// Execution 
- 
-/// Create an ExecBatch suitable for passing to ExecuteScalarExpression() from a 
-/// RecordBatch which may have missing or incorrectly ordered columns. 
-/// Missing fields will be replaced with null scalars. 
-ARROW_EXPORT Result<ExecBatch> MakeExecBatch(const Schema& full_schema, 
-                                             const Datum& partial); 
- 
-/// Execute a scalar expression against the provided state and input ExecBatch. This 
-/// expression must be bound. 
-ARROW_EXPORT 
-Result<Datum> ExecuteScalarExpression(const Expression&, const ExecBatch& input, 
-                                      ExecContext* = NULLPTR); 
- 
-/// Convenience function for invoking against a RecordBatch 
-ARROW_EXPORT 
-Result<Datum> ExecuteScalarExpression(const Expression&, const Schema& full_schema, 
-                                      const Datum& partial_input, ExecContext* = NULLPTR); 
- 
-// Serialization 
- 
-ARROW_EXPORT 
-Result<std::shared_ptr<Buffer>> Serialize(const Expression&); 
- 
-ARROW_EXPORT 
-Result<Expression> Deserialize(std::shared_ptr<Buffer>); 
- 
-// Convenience aliases for factories 
- 
-ARROW_EXPORT Expression project(std::vector<Expression> values, 
-                                std::vector<std::string> names); 
- 
-ARROW_EXPORT Expression equal(Expression lhs, Expression rhs); 
- 
-ARROW_EXPORT Expression not_equal(Expression lhs, Expression rhs); 
- 
-ARROW_EXPORT Expression less(Expression lhs, Expression rhs); 
- 
-ARROW_EXPORT Expression less_equal(Expression lhs, Expression rhs); 
- 
-ARROW_EXPORT Expression greater(Expression lhs, Expression rhs); 
- 
-ARROW_EXPORT Expression greater_equal(Expression lhs, Expression rhs); 
- 
-ARROW_EXPORT Expression is_null(Expression lhs); 
- 
-ARROW_EXPORT Expression is_valid(Expression lhs); 
- 
-ARROW_EXPORT Expression and_(Expression lhs, Expression rhs); 
-ARROW_EXPORT Expression and_(const std::vector<Expression>&); 
-ARROW_EXPORT Expression or_(Expression lhs, Expression rhs); 
-ARROW_EXPORT Expression or_(const std::vector<Expression>&); 
-ARROW_EXPORT Expression not_(Expression operand); 
- 
-}  // namespace compute 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This API is EXPERIMENTAL.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/compute/type_fwd.h"
+#include "arrow/datum.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/variant.h"
+
+namespace arrow {
+namespace compute {
+
+/// An unbound expression which maps a single Datum to another Datum.
+/// An expression is one of
+/// - A literal Datum.
+/// - A reference to a single (potentially nested) field of the input Datum.
+/// - A call to a compute function, with arguments specified by other Expressions.
+class ARROW_EXPORT Expression {
+ public:
+  struct Call {
+    std::string function_name;
+    std::vector<Expression> arguments;
+    std::shared_ptr<FunctionOptions> options;
+    // Cached hash value
+    size_t hash;
+
+    // post-Bind properties:
+    std::shared_ptr<Function> function;
+    const Kernel* kernel = NULLPTR;
+    std::shared_ptr<KernelState> kernel_state;
+    ValueDescr descr;
+
+    void ComputeHash();
+  };
+
+  std::string ToString() const;
+  bool Equals(const Expression& other) const;
+  size_t hash() const;
+  struct Hash {
+    size_t operator()(const Expression& expr) const { return expr.hash(); }
+  };
+
+  /// Bind this expression to the given input type, looking up Kernels and field types.
+  /// Some expression simplification may be performed and implicit casts will be inserted.
+  /// Any state necessary for execution will be initialized and returned.
+  Result<Expression> Bind(const ValueDescr& in, ExecContext* = NULLPTR) const;
+  Result<Expression> Bind(const Schema& in_schema, ExecContext* = NULLPTR) const;
+
+  // XXX someday
+  // Clone all KernelState in this bound expression. If any function referenced by this
+  // expression has mutable KernelState, it is not safe to execute or apply simplification
+  // passes to it (or copies of it!) from multiple threads. Cloning state produces new
+  // KernelStates where necessary to ensure that Expressions may be manipulated safely
+  // on multiple threads.
+  // Result<ExpressionState> CloneState() const;
+  // Status SetState(ExpressionState);
+
+  /// Return true if all an expression's field references have explicit ValueDescr and all
+  /// of its functions' kernels are looked up.
+  bool IsBound() const;
+
+  /// Return true if this expression is composed only of Scalar literals, field
+  /// references, and calls to ScalarFunctions.
+  bool IsScalarExpression() const;
+
+  /// Return true if this expression is literal and entirely null.
+  bool IsNullLiteral() const;
+
+  /// Return true if this expression could evaluate to true.
+  bool IsSatisfiable() const;
+
+  // XXX someday
+  // Result<PipelineGraph> GetPipelines();
+
+  /// Access a Call or return nullptr if this expression is not a call
+  const Call* call() const;
+  /// Access a Datum or return nullptr if this expression is not a literal
+  const Datum* literal() const;
+  /// Access a FieldRef or return nullptr if this expression is not a field_ref
+  const FieldRef* field_ref() const;
+
+  /// The type and shape to which this expression will evaluate
+  ValueDescr descr() const;
+  std::shared_ptr<DataType> type() const { return descr().type; }
+  // XXX someday
+  // NullGeneralization::type nullable() const;
+
+  struct Parameter {
+    FieldRef ref;
+
+    // post-bind properties
+    ValueDescr descr;
+    int index;
+  };
+  const Parameter* parameter() const;
+
+  Expression() = default;
+  explicit Expression(Call call);
+  explicit Expression(Datum literal);
+  explicit Expression(Parameter parameter);
+
+ private:
+  using Impl = util::Variant<Datum, Parameter, Call>;
+  std::shared_ptr<Impl> impl_;
+
+  ARROW_EXPORT friend bool Identical(const Expression& l, const Expression& r);
+
+  ARROW_EXPORT friend void PrintTo(const Expression&, std::ostream*);
+};
+
+inline bool operator==(const Expression& l, const Expression& r) { return l.Equals(r); }
+inline bool operator!=(const Expression& l, const Expression& r) { return !l.Equals(r); }
+
+// Factories
+
+ARROW_EXPORT
+Expression literal(Datum lit);
+
+template <typename Arg>
+Expression literal(Arg&& arg) {
+  return literal(Datum(std::forward<Arg>(arg)));
+}
+
+ARROW_EXPORT
+Expression field_ref(FieldRef ref);
+
+ARROW_EXPORT
+Expression call(std::string function, std::vector<Expression> arguments,
+                std::shared_ptr<FunctionOptions> options = NULLPTR);
+
+template <typename Options, typename = typename std::enable_if<
+                                std::is_base_of<FunctionOptions, Options>::value>::type>
+Expression call(std::string function, std::vector<Expression> arguments,
+                Options options) {
+  return call(std::move(function), std::move(arguments),
+              std::make_shared<Options>(std::move(options)));
+}
+
+/// Assemble a list of all fields referenced by an Expression at any depth.
+ARROW_EXPORT
+std::vector<FieldRef> FieldsInExpression(const Expression&);
+
+/// Check if the expression references any fields.
+ARROW_EXPORT
+bool ExpressionHasFieldRefs(const Expression&);
+
+/// Assemble a mapping from field references to known values.
+struct ARROW_EXPORT KnownFieldValues;
+ARROW_EXPORT
+Result<KnownFieldValues> ExtractKnownFieldValues(
+    const Expression& guaranteed_true_predicate);
+
+/// \defgroup expression-passes Functions for modification of Expressions
+///
+/// @{
+///
+/// These transform bound expressions. Some transforms utilize a guarantee, which is
+/// provided as an Expression which is guaranteed to evaluate to true. The
+/// guaranteed_true_predicate need not be bound, but canonicalization is currently
+/// deferred to producers of guarantees. For example in order to be recognized as a
+/// guarantee on a field value, an Expression must be a call to "equal" with field_ref LHS
+/// and literal RHS. Flipping the arguments, "is_in" with a one-long value_set, ... or
+/// other semantically identical Expressions will not be recognized.
+
+/// Weak canonicalization which establishes guarantees for subsequent passes. Even
+/// equivalent Expressions may result in different canonicalized expressions.
+/// TODO this could be a strong canonicalization
+ARROW_EXPORT
+Result<Expression> Canonicalize(Expression, ExecContext* = NULLPTR);
+
+/// Simplify Expressions based on literal arguments (for example, add(null, x) will always
+/// be null so replace the call with a null literal). Includes early evaluation of all
+/// calls whose arguments are entirely literal.
+ARROW_EXPORT
+Result<Expression> FoldConstants(Expression);
+
+/// Simplify Expressions by replacing with known values of the fields which it references.
+ARROW_EXPORT
+Result<Expression> ReplaceFieldsWithKnownValues(const KnownFieldValues& known_values,
+                                                Expression);
+
+/// Simplify an expression by replacing subexpressions based on a guarantee:
+/// a boolean expression which is guaranteed to evaluate to `true`. For example, this is
+/// used to remove redundant function calls from a filter expression or to replace a
+/// reference to a constant-value field with a literal.
+ARROW_EXPORT
+Result<Expression> SimplifyWithGuarantee(Expression,
+                                         const Expression& guaranteed_true_predicate);
+
+/// @}
+
+// Execution
+
+/// Create an ExecBatch suitable for passing to ExecuteScalarExpression() from a
+/// RecordBatch which may have missing or incorrectly ordered columns.
+/// Missing fields will be replaced with null scalars.
+ARROW_EXPORT Result<ExecBatch> MakeExecBatch(const Schema& full_schema,
+                                             const Datum& partial);
+
+/// Execute a scalar expression against the provided state and input ExecBatch. This
+/// expression must be bound.
+ARROW_EXPORT
+Result<Datum> ExecuteScalarExpression(const Expression&, const ExecBatch& input,
+                                      ExecContext* = NULLPTR);
+
+/// Convenience function for invoking against a RecordBatch
+ARROW_EXPORT
+Result<Datum> ExecuteScalarExpression(const Expression&, const Schema& full_schema,
+                                      const Datum& partial_input, ExecContext* = NULLPTR);
+
+// Serialization
+
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> Serialize(const Expression&);
+
+ARROW_EXPORT
+Result<Expression> Deserialize(std::shared_ptr<Buffer>);
+
+// Convenience aliases for factories
+
+ARROW_EXPORT Expression project(std::vector<Expression> values,
+                                std::vector<std::string> names);
+
+ARROW_EXPORT Expression equal(Expression lhs, Expression rhs);
+
+ARROW_EXPORT Expression not_equal(Expression lhs, Expression rhs);
+
+ARROW_EXPORT Expression less(Expression lhs, Expression rhs);
+
+ARROW_EXPORT Expression less_equal(Expression lhs, Expression rhs);
+
+ARROW_EXPORT Expression greater(Expression lhs, Expression rhs);
+
+ARROW_EXPORT Expression greater_equal(Expression lhs, Expression rhs);
+
+ARROW_EXPORT Expression is_null(Expression lhs);
+
+ARROW_EXPORT Expression is_valid(Expression lhs);
+
+ARROW_EXPORT Expression and_(Expression lhs, Expression rhs);
+ARROW_EXPORT Expression and_(const std::vector<Expression>&);
+ARROW_EXPORT Expression or_(Expression lhs, Expression rhs);
+ARROW_EXPORT Expression or_(const std::vector<Expression>&);
+ARROW_EXPORT Expression not_(Expression operand);
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/expression_internal.h b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/expression_internal.h
index abcb99bc576..dc38924d932 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/expression_internal.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/expression_internal.h
@@ -1,336 +1,336 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "arrow/compute/exec/expression.h" 
- 
-#include <unordered_map> 
-#include <unordered_set> 
-#include <vector> 
- 
-#include "arrow/compute/api_scalar.h" 
-#include "arrow/compute/cast.h" 
-#include "arrow/compute/registry.h" 
-#include "arrow/record_batch.h" 
-#include "arrow/table.h" 
-#include "arrow/util/logging.h" 
- 
-namespace arrow { 
- 
-using internal::checked_cast; 
- 
-namespace compute { 
- 
-struct KnownFieldValues { 
-  std::unordered_map<FieldRef, Datum, FieldRef::Hash> map; 
-}; 
- 
-inline const Expression::Call* CallNotNull(const Expression& expr) { 
-  auto call = expr.call(); 
-  DCHECK_NE(call, nullptr); 
-  return call; 
-} 
- 
-inline std::vector<ValueDescr> GetDescriptors(const std::vector<Expression>& exprs) { 
-  std::vector<ValueDescr> descrs(exprs.size()); 
-  for (size_t i = 0; i < exprs.size(); ++i) { 
-    DCHECK(exprs[i].IsBound()); 
-    descrs[i] = exprs[i].descr(); 
-  } 
-  return descrs; 
-} 
- 
-inline std::vector<ValueDescr> GetDescriptors(const std::vector<Datum>& values) { 
-  std::vector<ValueDescr> descrs(values.size()); 
-  for (size_t i = 0; i < values.size(); ++i) { 
-    descrs[i] = values[i].descr(); 
-  } 
-  return descrs; 
-} 
- 
-struct Comparison { 
-  enum type { 
-    NA = 0, 
-    EQUAL = 1, 
-    LESS = 2, 
-    GREATER = 4, 
-    NOT_EQUAL = LESS | GREATER, 
-    LESS_EQUAL = LESS | EQUAL, 
-    GREATER_EQUAL = GREATER | EQUAL, 
-  }; 
- 
-  static const type* Get(const std::string& function) { 
-    static std::unordered_map<std::string, type> map{ 
-        {"equal", EQUAL},     {"not_equal", NOT_EQUAL}, 
-        {"less", LESS},       {"less_equal", LESS_EQUAL}, 
-        {"greater", GREATER}, {"greater_equal", GREATER_EQUAL}, 
-    }; 
- 
-    auto it = map.find(function); 
-    return it != map.end() ? &it->second : nullptr; 
-  } 
- 
-  static const type* Get(const Expression& expr) { 
-    if (auto call = expr.call()) { 
-      return Comparison::Get(call->function_name); 
-    } 
-    return nullptr; 
-  } 
- 
-  // Execute a simple Comparison between scalars 
-  static Result<type> Execute(Datum l, Datum r) { 
-    if (!l.is_scalar() || !r.is_scalar()) { 
-      return Status::Invalid("Cannot Execute Comparison on non-scalars"); 
-    } 
- 
-    std::vector<Datum> arguments{std::move(l), std::move(r)}; 
- 
-    ARROW_ASSIGN_OR_RAISE(auto equal, compute::CallFunction("equal", arguments)); 
- 
-    if (!equal.scalar()->is_valid) return NA; 
-    if (equal.scalar_as<BooleanScalar>().value) return EQUAL; 
- 
-    ARROW_ASSIGN_OR_RAISE(auto less, compute::CallFunction("less", arguments)); 
- 
-    if (!less.scalar()->is_valid) return NA; 
-    return less.scalar_as<BooleanScalar>().value ? LESS : GREATER; 
-  } 
- 
-  // Given an Expression wrapped in casts which preserve ordering 
-  // (for example, cast(field_ref("i16"), to_type=int32())), unwrap the inner Expression. 
-  // This is used to destructure implicitly cast field_refs during Expression 
-  // simplification. 
-  static const Expression& StripOrderPreservingCasts(const Expression& expr) { 
-    auto call = expr.call(); 
-    if (!call) return expr; 
-    if (call->function_name != "cast") return expr; 
- 
-    const Expression& from = call->arguments[0]; 
- 
-    auto from_id = from.type()->id(); 
-    auto to_id = expr.type()->id(); 
- 
-    if (is_floating(to_id)) { 
-      if (is_integer(from_id) || is_floating(from_id)) { 
-        return StripOrderPreservingCasts(from); 
-      } 
-      return expr; 
-    } 
- 
-    if (is_unsigned_integer(to_id)) { 
-      if (is_unsigned_integer(from_id) && bit_width(to_id) >= bit_width(from_id)) { 
-        return StripOrderPreservingCasts(from); 
-      } 
-      return expr; 
-    } 
- 
-    if (is_signed_integer(to_id)) { 
-      if (is_integer(from_id) && bit_width(to_id) >= bit_width(from_id)) { 
-        return StripOrderPreservingCasts(from); 
-      } 
-      return expr; 
-    } 
- 
-    return expr; 
-  } 
- 
-  static type GetFlipped(type op) { 
-    switch (op) { 
-      case NA: 
-        return NA; 
-      case EQUAL: 
-        return EQUAL; 
-      case LESS: 
-        return GREATER; 
-      case GREATER: 
-        return LESS; 
-      case NOT_EQUAL: 
-        return NOT_EQUAL; 
-      case LESS_EQUAL: 
-        return GREATER_EQUAL; 
-      case GREATER_EQUAL: 
-        return LESS_EQUAL; 
-    } 
-    DCHECK(false); 
-    return NA; 
-  } 
- 
-  static std::string GetName(type op) { 
-    switch (op) { 
-      case NA: 
-        break; 
-      case EQUAL: 
-        return "equal"; 
-      case LESS: 
-        return "less"; 
-      case GREATER: 
-        return "greater"; 
-      case NOT_EQUAL: 
-        return "not_equal"; 
-      case LESS_EQUAL: 
-        return "less_equal"; 
-      case GREATER_EQUAL: 
-        return "greater_equal"; 
-    } 
-    return "na"; 
-  } 
- 
-  static std::string GetOp(type op) { 
-    switch (op) { 
-      case NA: 
-        DCHECK(false) << "unreachable"; 
-        break; 
-      case EQUAL: 
-        return "=="; 
-      case LESS: 
-        return "<"; 
-      case GREATER: 
-        return ">"; 
-      case NOT_EQUAL: 
-        return "!="; 
-      case LESS_EQUAL: 
-        return "<="; 
-      case GREATER_EQUAL: 
-        return ">="; 
-    } 
-    DCHECK(false); 
-    return ""; 
-  } 
-}; 
- 
-inline const compute::CastOptions* GetCastOptions(const Expression::Call& call) { 
-  if (call.function_name != "cast") return nullptr; 
-  return checked_cast<const compute::CastOptions*>(call.options.get()); 
-} 
- 
-inline bool IsSetLookup(const std::string& function) { 
-  return function == "is_in" || function == "index_in"; 
-} 
- 
-inline const compute::MakeStructOptions* GetMakeStructOptions( 
-    const Expression::Call& call) { 
-  if (call.function_name != "make_struct") return nullptr; 
-  return checked_cast<const compute::MakeStructOptions*>(call.options.get()); 
-} 
- 
-/// A helper for unboxing an Expression composed of associative function calls. 
-/// Such expressions can frequently be rearranged to a semantically equivalent 
-/// expression for more optimal execution or more straightforward manipulation. 
-/// For example, (a + ((b + 3) + 4)) is equivalent to (((4 + 3) + a) + b) and the latter 
-/// can be trivially constant-folded to ((7 + a) + b). 
-struct FlattenedAssociativeChain { 
-  /// True if a chain was already a left fold. 
-  bool was_left_folded = true; 
- 
-  /// All "branch" expressions in a flattened chain. For example given (a + ((b + 3) + 4)) 
-  /// exprs would be [(a + ((b + 3) + 4)), ((b + 3) + 4), (b + 3)] 
-  std::vector<Expression> exprs; 
- 
-  /// All "leaf" expressions in a flattened chain. For example given (a + ((b + 3) + 4)) 
-  /// the fringe would be [a, b, 3, 4] 
-  std::vector<Expression> fringe; 
- 
-  explicit FlattenedAssociativeChain(Expression expr) : exprs{std::move(expr)} { 
-    auto call = CallNotNull(exprs.back()); 
-    fringe = call->arguments; 
- 
-    auto it = fringe.begin(); 
- 
-    while (it != fringe.end()) { 
-      auto sub_call = it->call(); 
-      if (!sub_call || sub_call->function_name != call->function_name) { 
-        ++it; 
-        continue; 
-      } 
- 
-      if (it != fringe.begin()) { 
-        was_left_folded = false; 
-      } 
- 
-      exprs.push_back(std::move(*it)); 
-      it = fringe.erase(it); 
- 
-      auto index = it - fringe.begin(); 
-      fringe.insert(it, sub_call->arguments.begin(), sub_call->arguments.end()); 
-      it = fringe.begin() + index; 
-      // NB: no increment so we hit sub_call's first argument next iteration 
-    } 
- 
-    DCHECK(std::all_of(exprs.begin(), exprs.end(), [](const Expression& expr) { 
-      return CallNotNull(expr)->options == nullptr; 
-    })); 
-  } 
-}; 
- 
-inline Result<std::shared_ptr<compute::Function>> GetFunction( 
-    const Expression::Call& call, compute::ExecContext* exec_context) { 
-  if (call.function_name != "cast") { 
-    return exec_context->func_registry()->GetFunction(call.function_name); 
-  } 
-  // XXX this special case is strange; why not make "cast" a ScalarFunction? 
-  const auto& to_type = checked_cast<const compute::CastOptions&>(*call.options).to_type; 
-  return compute::GetCastFunction(to_type); 
-} 
- 
-/// Modify an Expression with pre-order and post-order visitation. 
-/// `pre` will be invoked on each Expression. `pre` will visit Calls before their 
-/// arguments, `post_call` will visit Calls (and no other Expressions) after their 
-/// arguments. Visitors should return the Identical expression to indicate no change; this 
-/// will prevent unnecessary construction in the common case where a modification is not 
-/// possible/necessary/... 
-/// 
-/// If an argument was modified, `post_call` visits a reconstructed Call with the modified 
-/// arguments but also receives a pointer to the unmodified Expression as a second 
-/// argument. If no arguments were modified the unmodified Expression* will be nullptr. 
-template <typename PreVisit, typename PostVisitCall> 
-Result<Expression> Modify(Expression expr, const PreVisit& pre, 
-                          const PostVisitCall& post_call) { 
-  ARROW_ASSIGN_OR_RAISE(expr, Result<Expression>(pre(std::move(expr)))); 
- 
-  auto call = expr.call(); 
-  if (!call) return expr; 
- 
-  bool at_least_one_modified = false; 
-  std::vector<Expression> modified_arguments; 
- 
-  for (size_t i = 0; i < call->arguments.size(); ++i) { 
-    ARROW_ASSIGN_OR_RAISE(auto modified_argument, 
-                          Modify(call->arguments[i], pre, post_call)); 
- 
-    if (Identical(modified_argument, call->arguments[i])) { 
-      continue; 
-    } 
- 
-    if (!at_least_one_modified) { 
-      modified_arguments = call->arguments; 
-      at_least_one_modified = true; 
-    } 
- 
-    modified_arguments[i] = std::move(modified_argument); 
-  } 
- 
-  if (at_least_one_modified) { 
-    // reconstruct the call expression with the modified arguments 
-    auto modified_call = *call; 
-    modified_call.arguments = std::move(modified_arguments); 
-    return post_call(Expression(std::move(modified_call)), &expr); 
-  } 
- 
-  return post_call(std::move(expr), nullptr); 
-} 
- 
-}  // namespace compute 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/exec/expression.h"
+
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "arrow/compute/api_scalar.h"
+#include "arrow/compute/cast.h"
+#include "arrow/compute/registry.h"
+#include "arrow/record_batch.h"
+#include "arrow/table.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+
+namespace compute {
+
+struct KnownFieldValues {
+  std::unordered_map<FieldRef, Datum, FieldRef::Hash> map;
+};
+
+inline const Expression::Call* CallNotNull(const Expression& expr) {
+  auto call = expr.call();
+  DCHECK_NE(call, nullptr);
+  return call;
+}
+
+inline std::vector<ValueDescr> GetDescriptors(const std::vector<Expression>& exprs) {
+  std::vector<ValueDescr> descrs(exprs.size());
+  for (size_t i = 0; i < exprs.size(); ++i) {
+    DCHECK(exprs[i].IsBound());
+    descrs[i] = exprs[i].descr();
+  }
+  return descrs;
+}
+
+inline std::vector<ValueDescr> GetDescriptors(const std::vector<Datum>& values) {
+  std::vector<ValueDescr> descrs(values.size());
+  for (size_t i = 0; i < values.size(); ++i) {
+    descrs[i] = values[i].descr();
+  }
+  return descrs;
+}
+
+struct Comparison {
+  enum type {
+    NA = 0,
+    EQUAL = 1,
+    LESS = 2,
+    GREATER = 4,
+    NOT_EQUAL = LESS | GREATER,
+    LESS_EQUAL = LESS | EQUAL,
+    GREATER_EQUAL = GREATER | EQUAL,
+  };
+
+  static const type* Get(const std::string& function) {
+    static std::unordered_map<std::string, type> map{
+        {"equal", EQUAL},     {"not_equal", NOT_EQUAL},
+        {"less", LESS},       {"less_equal", LESS_EQUAL},
+        {"greater", GREATER}, {"greater_equal", GREATER_EQUAL},
+    };
+
+    auto it = map.find(function);
+    return it != map.end() ? &it->second : nullptr;
+  }
+
+  static const type* Get(const Expression& expr) {
+    if (auto call = expr.call()) {
+      return Comparison::Get(call->function_name);
+    }
+    return nullptr;
+  }
+
+  // Execute a simple Comparison between scalars
+  static Result<type> Execute(Datum l, Datum r) {
+    if (!l.is_scalar() || !r.is_scalar()) {
+      return Status::Invalid("Cannot Execute Comparison on non-scalars");
+    }
+
+    std::vector<Datum> arguments{std::move(l), std::move(r)};
+
+    ARROW_ASSIGN_OR_RAISE(auto equal, compute::CallFunction("equal", arguments));
+
+    if (!equal.scalar()->is_valid) return NA;
+    if (equal.scalar_as<BooleanScalar>().value) return EQUAL;
+
+    ARROW_ASSIGN_OR_RAISE(auto less, compute::CallFunction("less", arguments));
+
+    if (!less.scalar()->is_valid) return NA;
+    return less.scalar_as<BooleanScalar>().value ? LESS : GREATER;
+  }
+
+  // Given an Expression wrapped in casts which preserve ordering
+  // (for example, cast(field_ref("i16"), to_type=int32())), unwrap the inner Expression.
+  // This is used to destructure implicitly cast field_refs during Expression
+  // simplification.
+  static const Expression& StripOrderPreservingCasts(const Expression& expr) {
+    auto call = expr.call();
+    if (!call) return expr;
+    if (call->function_name != "cast") return expr;
+
+    const Expression& from = call->arguments[0];
+
+    auto from_id = from.type()->id();
+    auto to_id = expr.type()->id();
+
+    if (is_floating(to_id)) {
+      if (is_integer(from_id) || is_floating(from_id)) {
+        return StripOrderPreservingCasts(from);
+      }
+      return expr;
+    }
+
+    if (is_unsigned_integer(to_id)) {
+      if (is_unsigned_integer(from_id) && bit_width(to_id) >= bit_width(from_id)) {
+        return StripOrderPreservingCasts(from);
+      }
+      return expr;
+    }
+
+    if (is_signed_integer(to_id)) {
+      if (is_integer(from_id) && bit_width(to_id) >= bit_width(from_id)) {
+        return StripOrderPreservingCasts(from);
+      }
+      return expr;
+    }
+
+    return expr;
+  }
+
+  static type GetFlipped(type op) {
+    switch (op) {
+      case NA:
+        return NA;
+      case EQUAL:
+        return EQUAL;
+      case LESS:
+        return GREATER;
+      case GREATER:
+        return LESS;
+      case NOT_EQUAL:
+        return NOT_EQUAL;
+      case LESS_EQUAL:
+        return GREATER_EQUAL;
+      case GREATER_EQUAL:
+        return LESS_EQUAL;
+    }
+    DCHECK(false);
+    return NA;
+  }
+
+  static std::string GetName(type op) {
+    switch (op) {
+      case NA:
+        break;
+      case EQUAL:
+        return "equal";
+      case LESS:
+        return "less";
+      case GREATER:
+        return "greater";
+      case NOT_EQUAL:
+        return "not_equal";
+      case LESS_EQUAL:
+        return "less_equal";
+      case GREATER_EQUAL:
+        return "greater_equal";
+    }
+    return "na";
+  }
+
+  static std::string GetOp(type op) {
+    switch (op) {
+      case NA:
+        DCHECK(false) << "unreachable";
+        break;
+      case EQUAL:
+        return "==";
+      case LESS:
+        return "<";
+      case GREATER:
+        return ">";
+      case NOT_EQUAL:
+        return "!=";
+      case LESS_EQUAL:
+        return "<=";
+      case GREATER_EQUAL:
+        return ">=";
+    }
+    DCHECK(false);
+    return "";
+  }
+};
+
+inline const compute::CastOptions* GetCastOptions(const Expression::Call& call) {
+  if (call.function_name != "cast") return nullptr;
+  return checked_cast<const compute::CastOptions*>(call.options.get());
+}
+
+inline bool IsSetLookup(const std::string& function) {
+  return function == "is_in" || function == "index_in";
+}
+
+inline const compute::MakeStructOptions* GetMakeStructOptions(
+    const Expression::Call& call) {
+  if (call.function_name != "make_struct") return nullptr;
+  return checked_cast<const compute::MakeStructOptions*>(call.options.get());
+}
+
+/// A helper for unboxing an Expression composed of associative function calls.
+/// Such expressions can frequently be rearranged to a semantically equivalent
+/// expression for more optimal execution or more straightforward manipulation.
+/// For example, (a + ((b + 3) + 4)) is equivalent to (((4 + 3) + a) + b) and the latter
+/// can be trivially constant-folded to ((7 + a) + b).
+struct FlattenedAssociativeChain {
+  /// True if a chain was already a left fold.
+  bool was_left_folded = true;
+
+  /// All "branch" expressions in a flattened chain. For example given (a + ((b + 3) + 4))
+  /// exprs would be [(a + ((b + 3) + 4)), ((b + 3) + 4), (b + 3)]
+  std::vector<Expression> exprs;
+
+  /// All "leaf" expressions in a flattened chain. For example given (a + ((b + 3) + 4))
+  /// the fringe would be [a, b, 3, 4]
+  std::vector<Expression> fringe;
+
+  explicit FlattenedAssociativeChain(Expression expr) : exprs{std::move(expr)} {
+    auto call = CallNotNull(exprs.back());
+    fringe = call->arguments;
+
+    auto it = fringe.begin();
+
+    while (it != fringe.end()) {
+      auto sub_call = it->call();
+      if (!sub_call || sub_call->function_name != call->function_name) {
+        ++it;
+        continue;
+      }
+
+      if (it != fringe.begin()) {
+        was_left_folded = false;
+      }
+
+      exprs.push_back(std::move(*it));
+      it = fringe.erase(it);
+
+      auto index = it - fringe.begin();
+      fringe.insert(it, sub_call->arguments.begin(), sub_call->arguments.end());
+      it = fringe.begin() + index;
+      // NB: no increment so we hit sub_call's first argument next iteration
+    }
+
+    DCHECK(std::all_of(exprs.begin(), exprs.end(), [](const Expression& expr) {
+      return CallNotNull(expr)->options == nullptr;
+    }));
+  }
+};
+
+inline Result<std::shared_ptr<compute::Function>> GetFunction(
+    const Expression::Call& call, compute::ExecContext* exec_context) {
+  if (call.function_name != "cast") {
+    return exec_context->func_registry()->GetFunction(call.function_name);
+  }
+  // XXX this special case is strange; why not make "cast" a ScalarFunction?
+  const auto& to_type = checked_cast<const compute::CastOptions&>(*call.options).to_type;
+  return compute::GetCastFunction(to_type);
+}
+
+/// Modify an Expression with pre-order and post-order visitation.
+/// `pre` will be invoked on each Expression. `pre` will visit Calls before their
+/// arguments, `post_call` will visit Calls (and no other Expressions) after their
+/// arguments. Visitors should return the Identical expression to indicate no change; this
+/// will prevent unnecessary construction in the common case where a modification is not
+/// possible/necessary/...
+///
+/// If an argument was modified, `post_call` visits a reconstructed Call with the modified
+/// arguments but also receives a pointer to the unmodified Expression as a second
+/// argument. If no arguments were modified the unmodified Expression* will be nullptr.
+template <typename PreVisit, typename PostVisitCall>
+Result<Expression> Modify(Expression expr, const PreVisit& pre,
+                          const PostVisitCall& post_call) {
+  ARROW_ASSIGN_OR_RAISE(expr, Result<Expression>(pre(std::move(expr))));
+
+  auto call = expr.call();
+  if (!call) return expr;
+
+  bool at_least_one_modified = false;
+  std::vector<Expression> modified_arguments;
+
+  for (size_t i = 0; i < call->arguments.size(); ++i) {
+    ARROW_ASSIGN_OR_RAISE(auto modified_argument,
+                          Modify(call->arguments[i], pre, post_call));
+
+    if (Identical(modified_argument, call->arguments[i])) {
+      continue;
+    }
+
+    if (!at_least_one_modified) {
+      modified_arguments = call->arguments;
+      at_least_one_modified = true;
+    }
+
+    modified_arguments[i] = std::move(modified_argument);
+  }
+
+  if (at_least_one_modified) {
+    // reconstruct the call expression with the modified arguments
+    auto modified_call = *call;
+    modified_call.arguments = std::move(modified_arguments);
+    return post_call(Expression(std::move(modified_call)), &expr);
+  }
+
+  return post_call(std::move(expr), nullptr);
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/key_compare.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/key_compare.cc
index 01de727978f..7a5b0be9990 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/key_compare.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/key_compare.cc
@@ -1,268 +1,268 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "arrow/compute/exec/key_compare.h" 
- 
-#include <algorithm> 
-#include <cstdint> 
- 
-#include "arrow/compute/exec/util.h" 
-#include "arrow/util/ubsan.h" 
- 
-namespace arrow { 
-namespace compute { 
- 
-void KeyCompare::CompareRows(uint32_t num_rows_to_compare, 
-                             const uint16_t* sel_left_maybe_null, 
-                             const uint32_t* left_to_right_map, 
-                             KeyEncoder::KeyEncoderContext* ctx, uint32_t* out_num_rows, 
-                             uint16_t* out_sel_left_maybe_same, 
-                             const KeyEncoder::KeyRowArray& rows_left, 
-                             const KeyEncoder::KeyRowArray& rows_right) { 
-  ARROW_DCHECK(rows_left.metadata().is_compatible(rows_right.metadata())); 
- 
-  if (num_rows_to_compare == 0) { 
-    *out_num_rows = 0; 
-    return; 
-  } 
- 
-  // Allocate temporary byte and bit vectors 
-  auto bytevector_holder = 
-      util::TempVectorHolder<uint8_t>(ctx->stack, num_rows_to_compare); 
-  auto bitvector_holder = 
-      util::TempVectorHolder<uint8_t>(ctx->stack, num_rows_to_compare); 
- 
-  uint8_t* match_bytevector = bytevector_holder.mutable_data(); 
-  uint8_t* match_bitvector = bitvector_holder.mutable_data(); 
- 
-  // All comparison functions called here will update match byte vector 
-  // (AND it with comparison result) instead of overwriting it. 
-  memset(match_bytevector, 0xff, num_rows_to_compare); 
- 
-  if (rows_left.metadata().is_fixed_length) { 
-    CompareFixedLength(num_rows_to_compare, sel_left_maybe_null, left_to_right_map, 
-                       match_bytevector, ctx, rows_left.metadata().fixed_length, 
-                       rows_left.data(1), rows_right.data(1)); 
-  } else { 
-    CompareVaryingLength(num_rows_to_compare, sel_left_maybe_null, left_to_right_map, 
-                         match_bytevector, ctx, rows_left.data(2), rows_right.data(2), 
-                         rows_left.offsets(), rows_right.offsets()); 
-  } 
- 
-  // CompareFixedLength can be used to compare nulls as well 
-  bool nulls_present = rows_left.has_any_nulls(ctx) || rows_right.has_any_nulls(ctx); 
-  if (nulls_present) { 
-    CompareFixedLength(num_rows_to_compare, sel_left_maybe_null, left_to_right_map, 
-                       match_bytevector, ctx, 
-                       rows_left.metadata().null_masks_bytes_per_row, 
-                       rows_left.null_masks(), rows_right.null_masks()); 
-  } 
- 
-  util::BitUtil::bytes_to_bits(ctx->hardware_flags, num_rows_to_compare, match_bytevector, 
-                               match_bitvector); 
-  if (sel_left_maybe_null) { 
-    int out_num_rows_int; 
-    util::BitUtil::bits_filter_indexes(0, ctx->hardware_flags, num_rows_to_compare, 
-                                       match_bitvector, sel_left_maybe_null, 
-                                       &out_num_rows_int, out_sel_left_maybe_same); 
-    *out_num_rows = out_num_rows_int; 
-  } else { 
-    int out_num_rows_int; 
-    util::BitUtil::bits_to_indexes(0, ctx->hardware_flags, num_rows_to_compare, 
-                                   match_bitvector, &out_num_rows_int, 
-                                   out_sel_left_maybe_same); 
-    *out_num_rows = out_num_rows_int; 
-  } 
-} 
- 
-void KeyCompare::CompareFixedLength(uint32_t num_rows_to_compare, 
-                                    const uint16_t* sel_left_maybe_null, 
-                                    const uint32_t* left_to_right_map, 
-                                    uint8_t* match_bytevector, 
-                                    KeyEncoder::KeyEncoderContext* ctx, 
-                                    uint32_t fixed_length, const uint8_t* rows_left, 
-                                    const uint8_t* rows_right) { 
-  bool use_selection = (sel_left_maybe_null != nullptr); 
- 
-  uint32_t num_rows_already_processed = 0; 
- 
-#if defined(ARROW_HAVE_AVX2) 
-  if (ctx->has_avx2() && !use_selection) { 
-    // Choose between up-to-8B length, up-to-16B length and any size versions 
-    if (fixed_length <= 8) { 
-      num_rows_already_processed = CompareFixedLength_UpTo8B_avx2( 
-          num_rows_to_compare, left_to_right_map, match_bytevector, fixed_length, 
-          rows_left, rows_right); 
-    } else if (fixed_length <= 16) { 
-      num_rows_already_processed = CompareFixedLength_UpTo16B_avx2( 
-          num_rows_to_compare, left_to_right_map, match_bytevector, fixed_length, 
-          rows_left, rows_right); 
-    } else { 
-      num_rows_already_processed = 
-          CompareFixedLength_avx2(num_rows_to_compare, left_to_right_map, 
-                                  match_bytevector, fixed_length, rows_left, rows_right); 
-    } 
-  } 
-#endif 
- 
-  typedef void (*CompareFixedLengthImp_t)(uint32_t, uint32_t, const uint16_t*, 
-                                          const uint32_t*, uint8_t*, uint32_t, 
-                                          const uint8_t*, const uint8_t*); 
-  static const CompareFixedLengthImp_t CompareFixedLengthImp_fn[] = { 
-      CompareFixedLengthImp<false, 1>, CompareFixedLengthImp<false, 2>, 
-      CompareFixedLengthImp<false, 0>, CompareFixedLengthImp<true, 1>, 
-      CompareFixedLengthImp<true, 2>,  CompareFixedLengthImp<true, 0>}; 
-  int dispatch_const = (use_selection ? 3 : 0) + 
-                       ((fixed_length <= 8) ? 0 : ((fixed_length <= 16) ? 1 : 2)); 
-  CompareFixedLengthImp_fn[dispatch_const]( 
-      num_rows_already_processed, num_rows_to_compare, sel_left_maybe_null, 
-      left_to_right_map, match_bytevector, fixed_length, rows_left, rows_right); 
-} 
- 
-template <bool use_selection, int num_64bit_words> 
-void KeyCompare::CompareFixedLengthImp(uint32_t num_rows_already_processed, 
-                                       uint32_t num_rows, 
-                                       const uint16_t* sel_left_maybe_null, 
-                                       const uint32_t* left_to_right_map, 
-                                       uint8_t* match_bytevector, uint32_t length, 
-                                       const uint8_t* rows_left, 
-                                       const uint8_t* rows_right) { 
-  // Key length (for encoded key) has to be non-zero 
-  ARROW_DCHECK(length > 0); 
- 
-  // Non-zero length guarantees no underflow 
-  int32_t num_loops_less_one = (static_cast<int32_t>(length) + 7) / 8 - 1; 
- 
-  // Length remaining in last loop can only be zero for input length equal to zero 
-  uint32_t length_remaining_last_loop = length - num_loops_less_one * 8; 
-  uint64_t tail_mask = (~0ULL) >> (8 * (8 - length_remaining_last_loop)); 
- 
-  for (uint32_t id_input = num_rows_already_processed; id_input < num_rows; ++id_input) { 
-    uint32_t irow_left = use_selection ? sel_left_maybe_null[id_input] : id_input; 
-    uint32_t irow_right = left_to_right_map[irow_left]; 
-    uint32_t begin_left = length * irow_left; 
-    uint32_t begin_right = length * irow_right; 
-    const uint64_t* key_left_ptr = 
-        reinterpret_cast<const uint64_t*>(rows_left + begin_left); 
-    const uint64_t* key_right_ptr = 
-        reinterpret_cast<const uint64_t*>(rows_right + begin_right); 
-    uint64_t result_or = 0ULL; 
-    int32_t istripe = 0; 
- 
-    // Specializations for keys up to 8 bytes and between 9 and 16 bytes to 
-    // avoid internal loop over words in the value for short ones. 
-    // 
-    // Template argument 0 means arbitrarily many 64-bit words, 
-    // 1 means up to 1 and 2 means up to 2. 
-    // 
-    if (num_64bit_words == 0) { 
-      for (; istripe < num_loops_less_one; ++istripe) { 
-        uint64_t key_left = util::SafeLoad(&key_left_ptr[istripe]); 
-        uint64_t key_right = util::SafeLoad(&key_right_ptr[istripe]); 
-        result_or |= (key_left ^ key_right); 
-      } 
-    } else if (num_64bit_words == 2) { 
-      uint64_t key_left = util::SafeLoad(&key_left_ptr[istripe]); 
-      uint64_t key_right = util::SafeLoad(&key_right_ptr[istripe]); 
-      result_or |= (key_left ^ key_right); 
-      ++istripe; 
-    } 
- 
-    uint64_t key_left = util::SafeLoad(&key_left_ptr[istripe]); 
-    uint64_t key_right = util::SafeLoad(&key_right_ptr[istripe]); 
-    result_or |= (tail_mask & (key_left ^ key_right)); 
- 
-    int result = (result_or == 0 ? 0xff : 0); 
-    match_bytevector[id_input] &= result; 
-  } 
-} 
- 
-void KeyCompare::CompareVaryingLength(uint32_t num_rows_to_compare, 
-                                      const uint16_t* sel_left_maybe_null, 
-                                      const uint32_t* left_to_right_map, 
-                                      uint8_t* match_bytevector, 
-                                      KeyEncoder::KeyEncoderContext* ctx, 
-                                      const uint8_t* rows_left, const uint8_t* rows_right, 
-                                      const uint32_t* offsets_left, 
-                                      const uint32_t* offsets_right) { 
-  bool use_selection = (sel_left_maybe_null != nullptr); 
- 
-#if defined(ARROW_HAVE_AVX2) 
-  if (ctx->has_avx2() && !use_selection) { 
-    CompareVaryingLength_avx2(num_rows_to_compare, left_to_right_map, match_bytevector, 
-                              rows_left, rows_right, offsets_left, offsets_right); 
-  } else { 
-#endif 
-    if (use_selection) { 
-      CompareVaryingLengthImp<true>(num_rows_to_compare, sel_left_maybe_null, 
-                                    left_to_right_map, match_bytevector, rows_left, 
-                                    rows_right, offsets_left, offsets_right); 
-    } else { 
-      CompareVaryingLengthImp<false>(num_rows_to_compare, sel_left_maybe_null, 
-                                     left_to_right_map, match_bytevector, rows_left, 
-                                     rows_right, offsets_left, offsets_right); 
-    } 
-#if defined(ARROW_HAVE_AVX2) 
-  } 
-#endif 
-} 
- 
-template <bool use_selection> 
-void KeyCompare::CompareVaryingLengthImp( 
-    uint32_t num_rows, const uint16_t* sel_left_maybe_null, 
-    const uint32_t* left_to_right_map, uint8_t* match_bytevector, 
-    const uint8_t* rows_left, const uint8_t* rows_right, const uint32_t* offsets_left, 
-    const uint32_t* offsets_right) { 
-  static const uint64_t tail_masks[] = { 
-      0x0000000000000000ULL, 0x00000000000000ffULL, 0x000000000000ffffULL, 
-      0x0000000000ffffffULL, 0x00000000ffffffffULL, 0x000000ffffffffffULL, 
-      0x0000ffffffffffffULL, 0x00ffffffffffffffULL, 0xffffffffffffffffULL}; 
-  for (uint32_t i = 0; i < num_rows; ++i) { 
-    uint32_t irow_left = use_selection ? sel_left_maybe_null[i] : i; 
-    uint32_t irow_right = left_to_right_map[irow_left]; 
-    uint32_t begin_left = offsets_left[irow_left]; 
-    uint32_t begin_right = offsets_right[irow_right]; 
-    uint32_t length_left = offsets_left[irow_left + 1] - begin_left; 
-    uint32_t length_right = offsets_right[irow_right + 1] - begin_right; 
-    uint32_t length = std::min(length_left, length_right); 
-    const uint64_t* key_left_ptr = 
-        reinterpret_cast<const uint64_t*>(rows_left + begin_left); 
-    const uint64_t* key_right_ptr = 
-        reinterpret_cast<const uint64_t*>(rows_right + begin_right); 
-    uint64_t result_or = 0; 
-    int32_t istripe; 
-    // length can be zero 
-    for (istripe = 0; istripe < (static_cast<int32_t>(length) + 7) / 8 - 1; ++istripe) { 
-      uint64_t key_left = util::SafeLoad(&key_left_ptr[istripe]); 
-      uint64_t key_right = util::SafeLoad(&key_right_ptr[istripe]); 
-      result_or |= (key_left ^ key_right); 
-    } 
- 
-    uint32_t length_remaining = length - static_cast<uint32_t>(istripe) * 8; 
-    uint64_t tail_mask = tail_masks[length_remaining]; 
- 
-    uint64_t key_left = util::SafeLoad(&key_left_ptr[istripe]); 
-    uint64_t key_right = util::SafeLoad(&key_right_ptr[istripe]); 
-    result_or |= (tail_mask & (key_left ^ key_right)); 
- 
-    int result = (result_or == 0 ? 0xff : 0); 
-    match_bytevector[i] &= result; 
-  } 
-} 
- 
-}  // namespace compute 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/exec/key_compare.h"
+
+#include <algorithm>
+#include <cstdint>
+
+#include "arrow/compute/exec/util.h"
+#include "arrow/util/ubsan.h"
+
+namespace arrow {
+namespace compute {
+
+void KeyCompare::CompareRows(uint32_t num_rows_to_compare,
+                             const uint16_t* sel_left_maybe_null,
+                             const uint32_t* left_to_right_map,
+                             KeyEncoder::KeyEncoderContext* ctx, uint32_t* out_num_rows,
+                             uint16_t* out_sel_left_maybe_same,
+                             const KeyEncoder::KeyRowArray& rows_left,
+                             const KeyEncoder::KeyRowArray& rows_right) {
+  ARROW_DCHECK(rows_left.metadata().is_compatible(rows_right.metadata()));
+
+  if (num_rows_to_compare == 0) {
+    *out_num_rows = 0;
+    return;
+  }
+
+  // Allocate temporary byte and bit vectors
+  auto bytevector_holder =
+      util::TempVectorHolder<uint8_t>(ctx->stack, num_rows_to_compare);
+  auto bitvector_holder =
+      util::TempVectorHolder<uint8_t>(ctx->stack, num_rows_to_compare);
+
+  uint8_t* match_bytevector = bytevector_holder.mutable_data();
+  uint8_t* match_bitvector = bitvector_holder.mutable_data();
+
+  // All comparison functions called here will update match byte vector
+  // (AND it with comparison result) instead of overwriting it.
+  memset(match_bytevector, 0xff, num_rows_to_compare);
+
+  if (rows_left.metadata().is_fixed_length) {
+    CompareFixedLength(num_rows_to_compare, sel_left_maybe_null, left_to_right_map,
+                       match_bytevector, ctx, rows_left.metadata().fixed_length,
+                       rows_left.data(1), rows_right.data(1));
+  } else {
+    CompareVaryingLength(num_rows_to_compare, sel_left_maybe_null, left_to_right_map,
+                         match_bytevector, ctx, rows_left.data(2), rows_right.data(2),
+                         rows_left.offsets(), rows_right.offsets());
+  }
+
+  // CompareFixedLength can be used to compare nulls as well
+  bool nulls_present = rows_left.has_any_nulls(ctx) || rows_right.has_any_nulls(ctx);
+  if (nulls_present) {
+    CompareFixedLength(num_rows_to_compare, sel_left_maybe_null, left_to_right_map,
+                       match_bytevector, ctx,
+                       rows_left.metadata().null_masks_bytes_per_row,
+                       rows_left.null_masks(), rows_right.null_masks());
+  }
+
+  util::BitUtil::bytes_to_bits(ctx->hardware_flags, num_rows_to_compare, match_bytevector,
+                               match_bitvector);
+  if (sel_left_maybe_null) {
+    int out_num_rows_int;
+    util::BitUtil::bits_filter_indexes(0, ctx->hardware_flags, num_rows_to_compare,
+                                       match_bitvector, sel_left_maybe_null,
+                                       &out_num_rows_int, out_sel_left_maybe_same);
+    *out_num_rows = out_num_rows_int;
+  } else {
+    int out_num_rows_int;
+    util::BitUtil::bits_to_indexes(0, ctx->hardware_flags, num_rows_to_compare,
+                                   match_bitvector, &out_num_rows_int,
+                                   out_sel_left_maybe_same);
+    *out_num_rows = out_num_rows_int;
+  }
+}
+
+void KeyCompare::CompareFixedLength(uint32_t num_rows_to_compare,
+                                    const uint16_t* sel_left_maybe_null,
+                                    const uint32_t* left_to_right_map,
+                                    uint8_t* match_bytevector,
+                                    KeyEncoder::KeyEncoderContext* ctx,
+                                    uint32_t fixed_length, const uint8_t* rows_left,
+                                    const uint8_t* rows_right) {
+  bool use_selection = (sel_left_maybe_null != nullptr);
+
+  uint32_t num_rows_already_processed = 0;
+
+#if defined(ARROW_HAVE_AVX2)
+  if (ctx->has_avx2() && !use_selection) {
+    // Choose between up-to-8B length, up-to-16B length and any size versions
+    if (fixed_length <= 8) {
+      num_rows_already_processed = CompareFixedLength_UpTo8B_avx2(
+          num_rows_to_compare, left_to_right_map, match_bytevector, fixed_length,
+          rows_left, rows_right);
+    } else if (fixed_length <= 16) {
+      num_rows_already_processed = CompareFixedLength_UpTo16B_avx2(
+          num_rows_to_compare, left_to_right_map, match_bytevector, fixed_length,
+          rows_left, rows_right);
+    } else {
+      num_rows_already_processed =
+          CompareFixedLength_avx2(num_rows_to_compare, left_to_right_map,
+                                  match_bytevector, fixed_length, rows_left, rows_right);
+    }
+  }
+#endif
+
+  typedef void (*CompareFixedLengthImp_t)(uint32_t, uint32_t, const uint16_t*,
+                                          const uint32_t*, uint8_t*, uint32_t,
+                                          const uint8_t*, const uint8_t*);
+  static const CompareFixedLengthImp_t CompareFixedLengthImp_fn[] = {
+      CompareFixedLengthImp<false, 1>, CompareFixedLengthImp<false, 2>,
+      CompareFixedLengthImp<false, 0>, CompareFixedLengthImp<true, 1>,
+      CompareFixedLengthImp<true, 2>,  CompareFixedLengthImp<true, 0>};
+  int dispatch_const = (use_selection ? 3 : 0) +
+                       ((fixed_length <= 8) ? 0 : ((fixed_length <= 16) ? 1 : 2));
+  CompareFixedLengthImp_fn[dispatch_const](
+      num_rows_already_processed, num_rows_to_compare, sel_left_maybe_null,
+      left_to_right_map, match_bytevector, fixed_length, rows_left, rows_right);
+}
+
+template <bool use_selection, int num_64bit_words>
+void KeyCompare::CompareFixedLengthImp(uint32_t num_rows_already_processed,
+                                       uint32_t num_rows,
+                                       const uint16_t* sel_left_maybe_null,
+                                       const uint32_t* left_to_right_map,
+                                       uint8_t* match_bytevector, uint32_t length,
+                                       const uint8_t* rows_left,
+                                       const uint8_t* rows_right) {
+  // Key length (for encoded key) has to be non-zero
+  ARROW_DCHECK(length > 0);
+
+  // Non-zero length guarantees no underflow
+  int32_t num_loops_less_one = (static_cast<int32_t>(length) + 7) / 8 - 1;
+
+  // Length remaining in last loop can only be zero for input length equal to zero
+  uint32_t length_remaining_last_loop = length - num_loops_less_one * 8;
+  uint64_t tail_mask = (~0ULL) >> (8 * (8 - length_remaining_last_loop));
+
+  for (uint32_t id_input = num_rows_already_processed; id_input < num_rows; ++id_input) {
+    uint32_t irow_left = use_selection ? sel_left_maybe_null[id_input] : id_input;
+    uint32_t irow_right = left_to_right_map[irow_left];
+    uint32_t begin_left = length * irow_left;
+    uint32_t begin_right = length * irow_right;
+    const uint64_t* key_left_ptr =
+        reinterpret_cast<const uint64_t*>(rows_left + begin_left);
+    const uint64_t* key_right_ptr =
+        reinterpret_cast<const uint64_t*>(rows_right + begin_right);
+    uint64_t result_or = 0ULL;
+    int32_t istripe = 0;
+
+    // Specializations for keys up to 8 bytes and between 9 and 16 bytes to
+    // avoid internal loop over words in the value for short ones.
+    //
+    // Template argument 0 means arbitrarily many 64-bit words,
+    // 1 means up to 1 and 2 means up to 2.
+    //
+    if (num_64bit_words == 0) {
+      for (; istripe < num_loops_less_one; ++istripe) {
+        uint64_t key_left = util::SafeLoad(&key_left_ptr[istripe]);
+        uint64_t key_right = util::SafeLoad(&key_right_ptr[istripe]);
+        result_or |= (key_left ^ key_right);
+      }
+    } else if (num_64bit_words == 2) {
+      uint64_t key_left = util::SafeLoad(&key_left_ptr[istripe]);
+      uint64_t key_right = util::SafeLoad(&key_right_ptr[istripe]);
+      result_or |= (key_left ^ key_right);
+      ++istripe;
+    }
+
+    uint64_t key_left = util::SafeLoad(&key_left_ptr[istripe]);
+    uint64_t key_right = util::SafeLoad(&key_right_ptr[istripe]);
+    result_or |= (tail_mask & (key_left ^ key_right));
+
+    int result = (result_or == 0 ? 0xff : 0);
+    match_bytevector[id_input] &= result;
+  }
+}
+
+void KeyCompare::CompareVaryingLength(uint32_t num_rows_to_compare,
+                                      const uint16_t* sel_left_maybe_null,
+                                      const uint32_t* left_to_right_map,
+                                      uint8_t* match_bytevector,
+                                      KeyEncoder::KeyEncoderContext* ctx,
+                                      const uint8_t* rows_left, const uint8_t* rows_right,
+                                      const uint32_t* offsets_left,
+                                      const uint32_t* offsets_right) {
+  bool use_selection = (sel_left_maybe_null != nullptr);
+
+#if defined(ARROW_HAVE_AVX2)
+  if (ctx->has_avx2() && !use_selection) {
+    CompareVaryingLength_avx2(num_rows_to_compare, left_to_right_map, match_bytevector,
+                              rows_left, rows_right, offsets_left, offsets_right);
+  } else {
+#endif
+    if (use_selection) {
+      CompareVaryingLengthImp<true>(num_rows_to_compare, sel_left_maybe_null,
+                                    left_to_right_map, match_bytevector, rows_left,
+                                    rows_right, offsets_left, offsets_right);
+    } else {
+      CompareVaryingLengthImp<false>(num_rows_to_compare, sel_left_maybe_null,
+                                     left_to_right_map, match_bytevector, rows_left,
+                                     rows_right, offsets_left, offsets_right);
+    }
+#if defined(ARROW_HAVE_AVX2)
+  }
+#endif
+}
+
+template <bool use_selection>
+void KeyCompare::CompareVaryingLengthImp(
+    uint32_t num_rows, const uint16_t* sel_left_maybe_null,
+    const uint32_t* left_to_right_map, uint8_t* match_bytevector,
+    const uint8_t* rows_left, const uint8_t* rows_right, const uint32_t* offsets_left,
+    const uint32_t* offsets_right) {
+  static const uint64_t tail_masks[] = {
+      0x0000000000000000ULL, 0x00000000000000ffULL, 0x000000000000ffffULL,
+      0x0000000000ffffffULL, 0x00000000ffffffffULL, 0x000000ffffffffffULL,
+      0x0000ffffffffffffULL, 0x00ffffffffffffffULL, 0xffffffffffffffffULL};
+  for (uint32_t i = 0; i < num_rows; ++i) {
+    uint32_t irow_left = use_selection ? sel_left_maybe_null[i] : i;
+    uint32_t irow_right = left_to_right_map[irow_left];
+    uint32_t begin_left = offsets_left[irow_left];
+    uint32_t begin_right = offsets_right[irow_right];
+    uint32_t length_left = offsets_left[irow_left + 1] - begin_left;
+    uint32_t length_right = offsets_right[irow_right + 1] - begin_right;
+    uint32_t length = std::min(length_left, length_right);
+    const uint64_t* key_left_ptr =
+        reinterpret_cast<const uint64_t*>(rows_left + begin_left);
+    const uint64_t* key_right_ptr =
+        reinterpret_cast<const uint64_t*>(rows_right + begin_right);
+    uint64_t result_or = 0;
+    int32_t istripe;
+    // length can be zero
+    for (istripe = 0; istripe < (static_cast<int32_t>(length) + 7) / 8 - 1; ++istripe) {
+      uint64_t key_left = util::SafeLoad(&key_left_ptr[istripe]);
+      uint64_t key_right = util::SafeLoad(&key_right_ptr[istripe]);
+      result_or |= (key_left ^ key_right);
+    }
+
+    uint32_t length_remaining = length - static_cast<uint32_t>(istripe) * 8;
+    uint64_t tail_mask = tail_masks[length_remaining];
+
+    uint64_t key_left = util::SafeLoad(&key_left_ptr[istripe]);
+    uint64_t key_right = util::SafeLoad(&key_right_ptr[istripe]);
+    result_or |= (tail_mask & (key_left ^ key_right));
+
+    int result = (result_or == 0 ? 0xff : 0);
+    match_bytevector[i] &= result;
+  }
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/key_compare.h b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/key_compare.h
index 397a729dac6..1dffabb884b 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/key_compare.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/key_compare.h
@@ -1,101 +1,101 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <cstdint> 
- 
-#include "arrow/compute/exec/key_encode.h" 
-#include "arrow/compute/exec/util.h" 
-#include "arrow/memory_pool.h" 
-#include "arrow/result.h" 
-#include "arrow/status.h" 
- 
-namespace arrow { 
-namespace compute { 
- 
-class KeyCompare { 
- public: 
-  // Returns a single 16-bit selection vector of rows that failed comparison. 
-  // If there is input selection on the left, the resulting selection is a filtered image 
-  // of input selection. 
-  static void CompareRows(uint32_t num_rows_to_compare, 
-                          const uint16_t* sel_left_maybe_null, 
-                          const uint32_t* left_to_right_map, 
-                          KeyEncoder::KeyEncoderContext* ctx, uint32_t* out_num_rows, 
-                          uint16_t* out_sel_left_maybe_same, 
-                          const KeyEncoder::KeyRowArray& rows_left, 
-                          const KeyEncoder::KeyRowArray& rows_right); 
- 
- private: 
-  static void CompareFixedLength(uint32_t num_rows_to_compare, 
-                                 const uint16_t* sel_left_maybe_null, 
-                                 const uint32_t* left_to_right_map, 
-                                 uint8_t* match_bytevector, 
-                                 KeyEncoder::KeyEncoderContext* ctx, 
-                                 uint32_t fixed_length, const uint8_t* rows_left, 
-                                 const uint8_t* rows_right); 
-  static void CompareVaryingLength(uint32_t num_rows_to_compare, 
-                                   const uint16_t* sel_left_maybe_null, 
-                                   const uint32_t* left_to_right_map, 
-                                   uint8_t* match_bytevector, 
-                                   KeyEncoder::KeyEncoderContext* ctx, 
-                                   const uint8_t* rows_left, const uint8_t* rows_right, 
-                                   const uint32_t* offsets_left, 
-                                   const uint32_t* offsets_right); 
- 
-  // Second template argument is 0, 1 or 2. 
-  // 0 means arbitrarily many 64-bit words, 1 means up to 1 and 2 means up to 2. 
-  template <bool use_selection, int num_64bit_words> 
-  static void CompareFixedLengthImp(uint32_t num_rows_already_processed, 
-                                    uint32_t num_rows, 
-                                    const uint16_t* sel_left_maybe_null, 
-                                    const uint32_t* left_to_right_map, 
-                                    uint8_t* match_bytevector, uint32_t length, 
-                                    const uint8_t* rows_left, const uint8_t* rows_right); 
-  template <bool use_selection> 
-  static void CompareVaryingLengthImp(uint32_t num_rows, 
-                                      const uint16_t* sel_left_maybe_null, 
-                                      const uint32_t* left_to_right_map, 
-                                      uint8_t* match_bytevector, const uint8_t* rows_left, 
-                                      const uint8_t* rows_right, 
-                                      const uint32_t* offsets_left, 
-                                      const uint32_t* offsets_right); 
- 
-#if defined(ARROW_HAVE_AVX2) 
- 
-  static uint32_t CompareFixedLength_UpTo8B_avx2( 
-      uint32_t num_rows, const uint32_t* left_to_right_map, uint8_t* match_bytevector, 
-      uint32_t length, const uint8_t* rows_left, const uint8_t* rows_right); 
-  static uint32_t CompareFixedLength_UpTo16B_avx2( 
-      uint32_t num_rows, const uint32_t* left_to_right_map, uint8_t* match_bytevector, 
-      uint32_t length, const uint8_t* rows_left, const uint8_t* rows_right); 
-  static uint32_t CompareFixedLength_avx2(uint32_t num_rows, 
-                                          const uint32_t* left_to_right_map, 
-                                          uint8_t* match_bytevector, uint32_t length, 
-                                          const uint8_t* rows_left, 
-                                          const uint8_t* rows_right); 
-  static void CompareVaryingLength_avx2( 
-      uint32_t num_rows, const uint32_t* left_to_right_map, uint8_t* match_bytevector, 
-      const uint8_t* rows_left, const uint8_t* rows_right, const uint32_t* offsets_left, 
-      const uint32_t* offsets_right); 
- 
-#endif 
-}; 
- 
-}  // namespace compute 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+
+#include "arrow/compute/exec/key_encode.h"
+#include "arrow/compute/exec/util.h"
+#include "arrow/memory_pool.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+
+namespace arrow {
+namespace compute {
+
+class KeyCompare {
+ public:
+  // Returns a single 16-bit selection vector of rows that failed comparison.
+  // If there is input selection on the left, the resulting selection is a filtered image
+  // of input selection.
+  static void CompareRows(uint32_t num_rows_to_compare,
+                          const uint16_t* sel_left_maybe_null,
+                          const uint32_t* left_to_right_map,
+                          KeyEncoder::KeyEncoderContext* ctx, uint32_t* out_num_rows,
+                          uint16_t* out_sel_left_maybe_same,
+                          const KeyEncoder::KeyRowArray& rows_left,
+                          const KeyEncoder::KeyRowArray& rows_right);
+
+ private:
+  static void CompareFixedLength(uint32_t num_rows_to_compare,
+                                 const uint16_t* sel_left_maybe_null,
+                                 const uint32_t* left_to_right_map,
+                                 uint8_t* match_bytevector,
+                                 KeyEncoder::KeyEncoderContext* ctx,
+                                 uint32_t fixed_length, const uint8_t* rows_left,
+                                 const uint8_t* rows_right);
+  static void CompareVaryingLength(uint32_t num_rows_to_compare,
+                                   const uint16_t* sel_left_maybe_null,
+                                   const uint32_t* left_to_right_map,
+                                   uint8_t* match_bytevector,
+                                   KeyEncoder::KeyEncoderContext* ctx,
+                                   const uint8_t* rows_left, const uint8_t* rows_right,
+                                   const uint32_t* offsets_left,
+                                   const uint32_t* offsets_right);
+
+  // Second template argument is 0, 1 or 2.
+  // 0 means arbitrarily many 64-bit words, 1 means up to 1 and 2 means up to 2.
+  template <bool use_selection, int num_64bit_words>
+  static void CompareFixedLengthImp(uint32_t num_rows_already_processed,
+                                    uint32_t num_rows,
+                                    const uint16_t* sel_left_maybe_null,
+                                    const uint32_t* left_to_right_map,
+                                    uint8_t* match_bytevector, uint32_t length,
+                                    const uint8_t* rows_left, const uint8_t* rows_right);
+  template <bool use_selection>
+  static void CompareVaryingLengthImp(uint32_t num_rows,
+                                      const uint16_t* sel_left_maybe_null,
+                                      const uint32_t* left_to_right_map,
+                                      uint8_t* match_bytevector, const uint8_t* rows_left,
+                                      const uint8_t* rows_right,
+                                      const uint32_t* offsets_left,
+                                      const uint32_t* offsets_right);
+
+#if defined(ARROW_HAVE_AVX2)
+
+  static uint32_t CompareFixedLength_UpTo8B_avx2(
+      uint32_t num_rows, const uint32_t* left_to_right_map, uint8_t* match_bytevector,
+      uint32_t length, const uint8_t* rows_left, const uint8_t* rows_right);
+  static uint32_t CompareFixedLength_UpTo16B_avx2(
+      uint32_t num_rows, const uint32_t* left_to_right_map, uint8_t* match_bytevector,
+      uint32_t length, const uint8_t* rows_left, const uint8_t* rows_right);
+  static uint32_t CompareFixedLength_avx2(uint32_t num_rows,
+                                          const uint32_t* left_to_right_map,
+                                          uint8_t* match_bytevector, uint32_t length,
+                                          const uint8_t* rows_left,
+                                          const uint8_t* rows_right);
+  static void CompareVaryingLength_avx2(
+      uint32_t num_rows, const uint32_t* left_to_right_map, uint8_t* match_bytevector,
+      const uint8_t* rows_left, const uint8_t* rows_right, const uint32_t* offsets_left,
+      const uint32_t* offsets_right);
+
+#endif
+};
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/key_encode.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/key_encode.cc
index f0498b509a1..de79558f2c2 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/key_encode.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/key_encode.cc
@@ -1,1649 +1,1649 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "arrow/compute/exec/key_encode.h" 
- 
-#include <memory.h> 
- 
-#include <algorithm> 
- 
-#include "arrow/compute/exec/util.h" 
-#include "arrow/util/bit_util.h" 
-#include "arrow/util/ubsan.h" 
- 
-namespace arrow { 
-namespace compute { 
- 
-KeyEncoder::KeyRowArray::KeyRowArray() 
-    : pool_(nullptr), rows_capacity_(0), bytes_capacity_(0) {} 
- 
-Status KeyEncoder::KeyRowArray::Init(MemoryPool* pool, const KeyRowMetadata& metadata) { 
-  pool_ = pool; 
-  metadata_ = metadata; 
- 
-  DCHECK(!null_masks_ && !offsets_ && !rows_); 
- 
-  constexpr int64_t rows_capacity = 8; 
-  constexpr int64_t bytes_capacity = 1024; 
- 
-  // Null masks 
-  ARROW_ASSIGN_OR_RAISE(auto null_masks, 
-                        AllocateResizableBuffer(size_null_masks(rows_capacity), pool_)); 
-  null_masks_ = std::move(null_masks); 
-  memset(null_masks_->mutable_data(), 0, size_null_masks(rows_capacity)); 
- 
-  // Offsets and rows 
-  if (!metadata.is_fixed_length) { 
-    ARROW_ASSIGN_OR_RAISE(auto offsets, 
-                          AllocateResizableBuffer(size_offsets(rows_capacity), pool_)); 
-    offsets_ = std::move(offsets); 
-    memset(offsets_->mutable_data(), 0, size_offsets(rows_capacity)); 
-    reinterpret_cast<uint32_t*>(offsets_->mutable_data())[0] = 0; 
- 
-    ARROW_ASSIGN_OR_RAISE( 
-        auto rows, 
-        AllocateResizableBuffer(size_rows_varying_length(bytes_capacity), pool_)); 
-    rows_ = std::move(rows); 
-    memset(rows_->mutable_data(), 0, size_rows_varying_length(bytes_capacity)); 
-    bytes_capacity_ = size_rows_varying_length(bytes_capacity) - padding_for_vectors; 
-  } else { 
-    ARROW_ASSIGN_OR_RAISE( 
-        auto rows, AllocateResizableBuffer(size_rows_fixed_length(rows_capacity), pool_)); 
-    rows_ = std::move(rows); 
-    memset(rows_->mutable_data(), 0, size_rows_fixed_length(rows_capacity)); 
-    bytes_capacity_ = size_rows_fixed_length(rows_capacity) - padding_for_vectors; 
-  } 
- 
-  update_buffer_pointers(); 
- 
-  rows_capacity_ = rows_capacity; 
- 
-  num_rows_ = 0; 
-  num_rows_for_has_any_nulls_ = 0; 
-  has_any_nulls_ = false; 
- 
-  return Status::OK(); 
-} 
- 
-void KeyEncoder::KeyRowArray::Clean() { 
-  num_rows_ = 0; 
-  num_rows_for_has_any_nulls_ = 0; 
-  has_any_nulls_ = false; 
- 
-  if (!metadata_.is_fixed_length) { 
-    reinterpret_cast<uint32_t*>(offsets_->mutable_data())[0] = 0; 
-  } 
-} 
- 
-int64_t KeyEncoder::KeyRowArray::size_null_masks(int64_t num_rows) { 
-  return num_rows * metadata_.null_masks_bytes_per_row + padding_for_vectors; 
-} 
- 
-int64_t KeyEncoder::KeyRowArray::size_offsets(int64_t num_rows) { 
-  return (num_rows + 1) * sizeof(uint32_t) + padding_for_vectors; 
-} 
- 
-int64_t KeyEncoder::KeyRowArray::size_rows_fixed_length(int64_t num_rows) { 
-  return num_rows * metadata_.fixed_length + padding_for_vectors; 
-} 
- 
-int64_t KeyEncoder::KeyRowArray::size_rows_varying_length(int64_t num_bytes) { 
-  return num_bytes + padding_for_vectors; 
-} 
- 
-void KeyEncoder::KeyRowArray::update_buffer_pointers() { 
-  buffers_[0] = mutable_buffers_[0] = null_masks_->mutable_data(); 
-  if (metadata_.is_fixed_length) { 
-    buffers_[1] = mutable_buffers_[1] = rows_->mutable_data(); 
-    buffers_[2] = mutable_buffers_[2] = nullptr; 
-  } else { 
-    buffers_[1] = mutable_buffers_[1] = offsets_->mutable_data(); 
-    buffers_[2] = mutable_buffers_[2] = rows_->mutable_data(); 
-  } 
-} 
- 
-Status KeyEncoder::KeyRowArray::ResizeFixedLengthBuffers(int64_t num_extra_rows) { 
-  if (rows_capacity_ >= num_rows_ + num_extra_rows) { 
-    return Status::OK(); 
-  } 
- 
-  int64_t rows_capacity_new = std::max(static_cast<int64_t>(1), 2 * rows_capacity_); 
-  while (rows_capacity_new < num_rows_ + num_extra_rows) { 
-    rows_capacity_new *= 2; 
-  } 
- 
-  // Null masks 
-  RETURN_NOT_OK(null_masks_->Resize(size_null_masks(rows_capacity_new), false)); 
-  memset(null_masks_->mutable_data() + size_null_masks(rows_capacity_), 0, 
-         size_null_masks(rows_capacity_new) - size_null_masks(rows_capacity_)); 
- 
-  // Either offsets or rows 
-  if (!metadata_.is_fixed_length) { 
-    RETURN_NOT_OK(offsets_->Resize(size_offsets(rows_capacity_new), false)); 
-    memset(offsets_->mutable_data() + size_offsets(rows_capacity_), 0, 
-           size_offsets(rows_capacity_new) - size_offsets(rows_capacity_)); 
-  } else { 
-    RETURN_NOT_OK(rows_->Resize(size_rows_fixed_length(rows_capacity_new), false)); 
-    memset(rows_->mutable_data() + size_rows_fixed_length(rows_capacity_), 0, 
-           size_rows_fixed_length(rows_capacity_new) - 
-               size_rows_fixed_length(rows_capacity_)); 
-    bytes_capacity_ = size_rows_fixed_length(rows_capacity_new) - padding_for_vectors; 
-  } 
- 
-  update_buffer_pointers(); 
- 
-  rows_capacity_ = rows_capacity_new; 
- 
-  return Status::OK(); 
-} 
- 
-Status KeyEncoder::KeyRowArray::ResizeOptionalVaryingLengthBuffer( 
-    int64_t num_extra_bytes) { 
-  int64_t num_bytes = offsets()[num_rows_]; 
-  if (bytes_capacity_ >= num_bytes + num_extra_bytes || metadata_.is_fixed_length) { 
-    return Status::OK(); 
-  } 
- 
-  int64_t bytes_capacity_new = std::max(static_cast<int64_t>(1), 2 * bytes_capacity_); 
-  while (bytes_capacity_new < num_bytes + num_extra_bytes) { 
-    bytes_capacity_new *= 2; 
-  } 
- 
-  RETURN_NOT_OK(rows_->Resize(size_rows_varying_length(bytes_capacity_new), false)); 
-  memset(rows_->mutable_data() + size_rows_varying_length(bytes_capacity_), 0, 
-         size_rows_varying_length(bytes_capacity_new) - 
-             size_rows_varying_length(bytes_capacity_)); 
- 
-  update_buffer_pointers(); 
- 
-  bytes_capacity_ = bytes_capacity_new; 
- 
-  return Status::OK(); 
-} 
- 
-Status KeyEncoder::KeyRowArray::AppendSelectionFrom(const KeyRowArray& from, 
-                                                    uint32_t num_rows_to_append, 
-                                                    const uint16_t* source_row_ids) { 
-  DCHECK(metadata_.is_compatible(from.metadata())); 
- 
-  RETURN_NOT_OK(ResizeFixedLengthBuffers(num_rows_to_append)); 
- 
-  if (!metadata_.is_fixed_length) { 
-    // Varying-length rows 
-    auto from_offsets = reinterpret_cast<const uint32_t*>(from.offsets_->data()); 
-    auto to_offsets = reinterpret_cast<uint32_t*>(offsets_->mutable_data()); 
-    uint32_t total_length = to_offsets[num_rows_]; 
-    uint32_t total_length_to_append = 0; 
-    for (uint32_t i = 0; i < num_rows_to_append; ++i) { 
-      uint16_t row_id = source_row_ids[i]; 
-      uint32_t length = from_offsets[row_id + 1] - from_offsets[row_id]; 
-      total_length_to_append += length; 
-      to_offsets[num_rows_ + i + 1] = total_length + total_length_to_append; 
-    } 
- 
-    RETURN_NOT_OK(ResizeOptionalVaryingLengthBuffer(total_length_to_append)); 
- 
-    const uint8_t* src = from.rows_->data(); 
-    uint8_t* dst = rows_->mutable_data() + total_length; 
-    for (uint32_t i = 0; i < num_rows_to_append; ++i) { 
-      uint16_t row_id = source_row_ids[i]; 
-      uint32_t length = from_offsets[row_id + 1] - from_offsets[row_id]; 
-      auto src64 = reinterpret_cast<const uint64_t*>(src + from_offsets[row_id]); 
-      auto dst64 = reinterpret_cast<uint64_t*>(dst); 
-      for (uint32_t j = 0; j < (length + 7) / 8; ++j) { 
-        dst64[j] = src64[j]; 
-      } 
-      dst += length; 
-    } 
-  } else { 
-    // Fixed-length rows 
-    const uint8_t* src = from.rows_->data(); 
-    uint8_t* dst = rows_->mutable_data() + num_rows_ * metadata_.fixed_length; 
-    for (uint32_t i = 0; i < num_rows_to_append; ++i) { 
-      uint16_t row_id = source_row_ids[i]; 
-      uint32_t length = metadata_.fixed_length; 
-      auto src64 = reinterpret_cast<const uint64_t*>(src + length * row_id); 
-      auto dst64 = reinterpret_cast<uint64_t*>(dst); 
-      for (uint32_t j = 0; j < (length + 7) / 8; ++j) { 
-        dst64[j] = src64[j]; 
-      } 
-      dst += length; 
-    } 
-  } 
- 
-  // Null masks 
-  uint32_t byte_length = metadata_.null_masks_bytes_per_row; 
-  uint64_t dst_byte_offset = num_rows_ * byte_length; 
-  const uint8_t* src_base = from.null_masks_->data(); 
-  uint8_t* dst_base = null_masks_->mutable_data(); 
-  for (uint32_t i = 0; i < num_rows_to_append; ++i) { 
-    uint32_t row_id = source_row_ids[i]; 
-    int64_t src_byte_offset = row_id * byte_length; 
-    const uint8_t* src = src_base + src_byte_offset; 
-    uint8_t* dst = dst_base + dst_byte_offset; 
-    for (uint32_t ibyte = 0; ibyte < byte_length; ++ibyte) { 
-      dst[ibyte] = src[ibyte]; 
-    } 
-    dst_byte_offset += byte_length; 
-  } 
- 
-  num_rows_ += num_rows_to_append; 
- 
-  return Status::OK(); 
-} 
- 
-Status KeyEncoder::KeyRowArray::AppendEmpty(uint32_t num_rows_to_append, 
-                                            uint32_t num_extra_bytes_to_append) { 
-  RETURN_NOT_OK(ResizeFixedLengthBuffers(num_rows_to_append)); 
-  RETURN_NOT_OK(ResizeOptionalVaryingLengthBuffer(num_extra_bytes_to_append)); 
-  num_rows_ += num_rows_to_append; 
-  if (metadata_.row_alignment > 1 || metadata_.string_alignment > 1) { 
-    memset(rows_->mutable_data(), 0, bytes_capacity_); 
-  } 
-  return Status::OK(); 
-} 
- 
-bool KeyEncoder::KeyRowArray::has_any_nulls(const KeyEncoderContext* ctx) const { 
-  if (has_any_nulls_) { 
-    return true; 
-  } 
-  if (num_rows_for_has_any_nulls_ < num_rows_) { 
-    auto size_per_row = metadata().null_masks_bytes_per_row; 
-    has_any_nulls_ = !util::BitUtil::are_all_bytes_zero( 
-        ctx->hardware_flags, null_masks() + size_per_row * num_rows_for_has_any_nulls_, 
-        static_cast<uint32_t>(size_per_row * (num_rows_ - num_rows_for_has_any_nulls_))); 
-    num_rows_for_has_any_nulls_ = num_rows_; 
-  } 
-  return has_any_nulls_; 
-} 
- 
-KeyEncoder::KeyColumnArray::KeyColumnArray(const KeyColumnMetadata& metadata, 
-                                           const KeyColumnArray& left, 
-                                           const KeyColumnArray& right, 
-                                           int buffer_id_to_replace) { 
-  metadata_ = metadata; 
-  length_ = left.length(); 
-  for (int i = 0; i < max_buffers_; ++i) { 
-    buffers_[i] = left.buffers_[i]; 
-    mutable_buffers_[i] = left.mutable_buffers_[i]; 
-  } 
-  buffers_[buffer_id_to_replace] = right.buffers_[buffer_id_to_replace]; 
-  mutable_buffers_[buffer_id_to_replace] = right.mutable_buffers_[buffer_id_to_replace]; 
-  bit_offset_[0] = left.bit_offset_[0]; 
-  bit_offset_[1] = left.bit_offset_[1]; 
-  if (buffer_id_to_replace < max_buffers_ - 1) { 
-    bit_offset_[buffer_id_to_replace] = right.bit_offset_[buffer_id_to_replace]; 
-  } 
-} 
- 
-KeyEncoder::KeyColumnArray::KeyColumnArray(const KeyColumnMetadata& metadata, 
-                                           int64_t length, const uint8_t* buffer0, 
-                                           const uint8_t* buffer1, const uint8_t* buffer2, 
-                                           int bit_offset0, int bit_offset1) { 
-  metadata_ = metadata; 
-  length_ = length; 
-  buffers_[0] = buffer0; 
-  buffers_[1] = buffer1; 
-  buffers_[2] = buffer2; 
-  mutable_buffers_[0] = mutable_buffers_[1] = mutable_buffers_[2] = nullptr; 
-  bit_offset_[0] = bit_offset0; 
-  bit_offset_[1] = bit_offset1; 
-} 
- 
-KeyEncoder::KeyColumnArray::KeyColumnArray(const KeyColumnMetadata& metadata, 
-                                           int64_t length, uint8_t* buffer0, 
-                                           uint8_t* buffer1, uint8_t* buffer2, 
-                                           int bit_offset0, int bit_offset1) { 
-  metadata_ = metadata; 
-  length_ = length; 
-  buffers_[0] = mutable_buffers_[0] = buffer0; 
-  buffers_[1] = mutable_buffers_[1] = buffer1; 
-  buffers_[2] = mutable_buffers_[2] = buffer2; 
-  bit_offset_[0] = bit_offset0; 
-  bit_offset_[1] = bit_offset1; 
-} 
- 
-KeyEncoder::KeyColumnArray::KeyColumnArray(const KeyColumnArray& from, int64_t start, 
-                                           int64_t length) { 
-  metadata_ = from.metadata_; 
-  length_ = length; 
-  uint32_t fixed_size = 
-      !metadata_.is_fixed_length ? sizeof(uint32_t) : metadata_.fixed_length; 
- 
-  buffers_[0] = 
-      from.buffers_[0] ? from.buffers_[0] + (from.bit_offset_[0] + start) / 8 : nullptr; 
-  mutable_buffers_[0] = from.mutable_buffers_[0] 
-                            ? from.mutable_buffers_[0] + (from.bit_offset_[0] + start) / 8 
-                            : nullptr; 
-  bit_offset_[0] = (from.bit_offset_[0] + start) % 8; 
- 
-  if (fixed_size == 0) { 
-    buffers_[1] = 
-        from.buffers_[1] ? from.buffers_[1] + (from.bit_offset_[1] + start) / 8 : nullptr; 
-    mutable_buffers_[1] = from.mutable_buffers_[1] ? from.mutable_buffers_[1] + 
-                                                         (from.bit_offset_[1] + start) / 8 
-                                                   : nullptr; 
-    bit_offset_[1] = (from.bit_offset_[1] + start) % 8; 
-  } else { 
-    buffers_[1] = from.buffers_[1] ? from.buffers_[1] + start * fixed_size : nullptr; 
-    mutable_buffers_[1] = from.mutable_buffers_[1] 
-                              ? from.mutable_buffers_[1] + start * fixed_size 
-                              : nullptr; 
-    bit_offset_[1] = 0; 
-  } 
- 
-  buffers_[2] = from.buffers_[2]; 
-  mutable_buffers_[2] = from.mutable_buffers_[2]; 
-} 
- 
-KeyEncoder::KeyColumnArray KeyEncoder::TransformBoolean::ArrayReplace( 
-    const KeyColumnArray& column, const KeyColumnArray& temp) { 
-  // Make sure that the temp buffer is large enough 
-  DCHECK(temp.length() >= column.length() && temp.metadata().is_fixed_length && 
-         temp.metadata().fixed_length >= sizeof(uint8_t)); 
-  KeyColumnMetadata metadata; 
-  metadata.is_fixed_length = true; 
-  metadata.fixed_length = sizeof(uint8_t); 
-  constexpr int buffer_index = 1; 
-  KeyColumnArray result = KeyColumnArray(metadata, column, temp, buffer_index); 
-  return result; 
-} 
- 
-void KeyEncoder::TransformBoolean::PreEncode(const KeyColumnArray& input, 
-                                             KeyColumnArray* output, 
-                                             KeyEncoderContext* ctx) { 
-  // Make sure that metadata and lengths are compatible. 
-  DCHECK(output->metadata().is_fixed_length == input.metadata().is_fixed_length); 
-  DCHECK(output->metadata().fixed_length == 1 && input.metadata().fixed_length == 0); 
-  DCHECK(output->length() == input.length()); 
-  constexpr int buffer_index = 1; 
-  DCHECK(input.data(buffer_index) != nullptr); 
-  DCHECK(output->mutable_data(buffer_index) != nullptr); 
-  util::BitUtil::bits_to_bytes( 
-      ctx->hardware_flags, static_cast<int>(input.length()), input.data(buffer_index), 
-      output->mutable_data(buffer_index), input.bit_offset(buffer_index)); 
-} 
- 
-void KeyEncoder::TransformBoolean::PostDecode(const KeyColumnArray& input, 
-                                              KeyColumnArray* output, 
-                                              KeyEncoderContext* ctx) { 
-  // Make sure that metadata and lengths are compatible. 
-  DCHECK(output->metadata().is_fixed_length == input.metadata().is_fixed_length); 
-  DCHECK(output->metadata().fixed_length == 0 && input.metadata().fixed_length == 1); 
-  DCHECK(output->length() == input.length()); 
-  constexpr int buffer_index = 1; 
-  DCHECK(input.data(buffer_index) != nullptr); 
-  DCHECK(output->mutable_data(buffer_index) != nullptr); 
- 
-  util::BitUtil::bytes_to_bits( 
-      ctx->hardware_flags, static_cast<int>(input.length()), input.data(buffer_index), 
-      output->mutable_data(buffer_index), output->bit_offset(buffer_index)); 
-} 
- 
-bool KeyEncoder::EncoderInteger::IsBoolean(const KeyColumnMetadata& metadata) { 
-  return metadata.is_fixed_length && metadata.fixed_length == 0; 
-} 
- 
-bool KeyEncoder::EncoderInteger::UsesTransform(const KeyColumnArray& column) { 
-  return IsBoolean(column.metadata()); 
-} 
- 
-KeyEncoder::KeyColumnArray KeyEncoder::EncoderInteger::ArrayReplace( 
-    const KeyColumnArray& column, const KeyColumnArray& temp) { 
-  if (IsBoolean(column.metadata())) { 
-    return TransformBoolean::ArrayReplace(column, temp); 
-  } 
-  return column; 
-} 
- 
-void KeyEncoder::EncoderInteger::PreEncode(const KeyColumnArray& input, 
-                                           KeyColumnArray* output, 
-                                           KeyEncoderContext* ctx) { 
-  if (IsBoolean(input.metadata())) { 
-    TransformBoolean::PreEncode(input, output, ctx); 
-  } 
-} 
- 
-void KeyEncoder::EncoderInteger::PostDecode(const KeyColumnArray& input, 
-                                            KeyColumnArray* output, 
-                                            KeyEncoderContext* ctx) { 
-  if (IsBoolean(output->metadata())) { 
-    TransformBoolean::PostDecode(input, output, ctx); 
-  } 
-} 
- 
-void KeyEncoder::EncoderInteger::Encode(uint32_t offset_within_row, KeyRowArray* rows, 
-                                        const KeyColumnArray& col, KeyEncoderContext* ctx, 
-                                        KeyColumnArray* temp) { 
-  KeyColumnArray col_prep; 
-  if (UsesTransform(col)) { 
-    col_prep = ArrayReplace(col, *temp); 
-    PreEncode(col, &col_prep, ctx); 
-  } else { 
-    col_prep = col; 
-  } 
- 
-  const auto num_rows = static_cast<uint32_t>(col.length()); 
- 
-  // When we have a single fixed length column we can just do memcpy 
-  if (rows->metadata().is_fixed_length && 
-      rows->metadata().fixed_length == col.metadata().fixed_length) { 
-    DCHECK_EQ(offset_within_row, 0); 
-    uint32_t row_size = col.metadata().fixed_length; 
-    memcpy(rows->mutable_data(1), col.data(1), num_rows * row_size); 
-  } else if (rows->metadata().is_fixed_length) { 
-    uint32_t row_size = rows->metadata().fixed_length; 
-    uint8_t* row_base = rows->mutable_data(1) + offset_within_row; 
-    const uint8_t* col_base = col_prep.data(1); 
-    switch (col_prep.metadata().fixed_length) { 
-      case 1: 
-        for (uint32_t i = 0; i < num_rows; ++i) { 
-          row_base[i * row_size] = col_base[i]; 
-        } 
-        break; 
-      case 2: 
-        for (uint32_t i = 0; i < num_rows; ++i) { 
-          *reinterpret_cast<uint16_t*>(row_base + i * row_size) = 
-              reinterpret_cast<const uint16_t*>(col_base)[i]; 
-        } 
-        break; 
-      case 4: 
-        for (uint32_t i = 0; i < num_rows; ++i) { 
-          *reinterpret_cast<uint32_t*>(row_base + i * row_size) = 
-              reinterpret_cast<const uint32_t*>(col_base)[i]; 
-        } 
-        break; 
-      case 8: 
-        for (uint32_t i = 0; i < num_rows; ++i) { 
-          *reinterpret_cast<uint64_t*>(row_base + i * row_size) = 
-              reinterpret_cast<const uint64_t*>(col_base)[i]; 
-        } 
-        break; 
-      default: 
-        DCHECK(false); 
-    } 
-  } else { 
-    const uint32_t* row_offsets = rows->offsets(); 
-    uint8_t* row_base = rows->mutable_data(2) + offset_within_row; 
-    const uint8_t* col_base = col_prep.data(1); 
-    switch (col_prep.metadata().fixed_length) { 
-      case 1: 
-        for (uint32_t i = 0; i < num_rows; ++i) { 
-          row_base[row_offsets[i]] = col_base[i]; 
-        } 
-        break; 
-      case 2: 
-        for (uint32_t i = 0; i < num_rows; ++i) { 
-          *reinterpret_cast<uint16_t*>(row_base + row_offsets[i]) = 
-              reinterpret_cast<const uint16_t*>(col_base)[i]; 
-        } 
-        break; 
-      case 4: 
-        for (uint32_t i = 0; i < num_rows; ++i) { 
-          *reinterpret_cast<uint32_t*>(row_base + row_offsets[i]) = 
-              reinterpret_cast<const uint32_t*>(col_base)[i]; 
-        } 
-        break; 
-      case 8: 
-        for (uint32_t i = 0; i < num_rows; ++i) { 
-          *reinterpret_cast<uint64_t*>(row_base + row_offsets[i]) = 
-              reinterpret_cast<const uint64_t*>(col_base)[i]; 
-        } 
-        break; 
-      default: 
-        DCHECK(false); 
-    } 
-  } 
-} 
- 
-void KeyEncoder::EncoderInteger::Decode(uint32_t start_row, uint32_t num_rows, 
-                                        uint32_t offset_within_row, 
-                                        const KeyRowArray& rows, KeyColumnArray* col, 
-                                        KeyEncoderContext* ctx, KeyColumnArray* temp) { 
-  KeyColumnArray col_prep; 
-  if (UsesTransform(*col)) { 
-    col_prep = ArrayReplace(*col, *temp); 
-  } else { 
-    col_prep = *col; 
-  } 
- 
-  // When we have a single fixed length column we can just do memcpy 
-  if (rows.metadata().is_fixed_length && 
-      col_prep.metadata().fixed_length == rows.metadata().fixed_length) { 
-    DCHECK_EQ(offset_within_row, 0); 
-    uint32_t row_size = rows.metadata().fixed_length; 
-    memcpy(col_prep.mutable_data(1), rows.data(1) + start_row * row_size, 
-           num_rows * row_size); 
-  } else if (rows.metadata().is_fixed_length) { 
-    uint32_t row_size = rows.metadata().fixed_length; 
-    const uint8_t* row_base = rows.data(1) + start_row * row_size; 
-    row_base += offset_within_row; 
-    uint8_t* col_base = col_prep.mutable_data(1); 
-    switch (col_prep.metadata().fixed_length) { 
-      case 1: 
-        for (uint32_t i = 0; i < num_rows; ++i) { 
-          col_base[i] = row_base[i * row_size]; 
-        } 
-        break; 
-      case 2: 
-        for (uint32_t i = 0; i < num_rows; ++i) { 
-          reinterpret_cast<uint16_t*>(col_base)[i] = 
-              *reinterpret_cast<const uint16_t*>(row_base + i * row_size); 
-        } 
-        break; 
-      case 4: 
-        for (uint32_t i = 0; i < num_rows; ++i) { 
-          reinterpret_cast<uint32_t*>(col_base)[i] = 
-              *reinterpret_cast<const uint32_t*>(row_base + i * row_size); 
-        } 
-        break; 
-      case 8: 
-        for (uint32_t i = 0; i < num_rows; ++i) { 
-          reinterpret_cast<uint64_t*>(col_base)[i] = 
-              *reinterpret_cast<const uint64_t*>(row_base + i * row_size); 
-        } 
-        break; 
-      default: 
-        DCHECK(false); 
-    } 
-  } else { 
-    const uint32_t* row_offsets = rows.offsets() + start_row; 
-    const uint8_t* row_base = rows.data(2); 
-    row_base += offset_within_row; 
-    uint8_t* col_base = col_prep.mutable_data(1); 
-    switch (col_prep.metadata().fixed_length) { 
-      case 1: 
-        for (uint32_t i = 0; i < num_rows; ++i) { 
-          col_base[i] = row_base[row_offsets[i]]; 
-        } 
-        break; 
-      case 2: 
-        for (uint32_t i = 0; i < num_rows; ++i) { 
-          reinterpret_cast<uint16_t*>(col_base)[i] = 
-              *reinterpret_cast<const uint16_t*>(row_base + row_offsets[i]); 
-        } 
-        break; 
-      case 4: 
-        for (uint32_t i = 0; i < num_rows; ++i) { 
-          reinterpret_cast<uint32_t*>(col_base)[i] = 
-              *reinterpret_cast<const uint32_t*>(row_base + row_offsets[i]); 
-        } 
-        break; 
-      case 8: 
-        for (uint32_t i = 0; i < num_rows; ++i) { 
-          reinterpret_cast<uint64_t*>(col_base)[i] = 
-              *reinterpret_cast<const uint64_t*>(row_base + row_offsets[i]); 
-        } 
-        break; 
-      default: 
-        DCHECK(false); 
-    } 
-  } 
- 
-  if (UsesTransform(*col)) { 
-    PostDecode(col_prep, col, ctx); 
-  } 
-} 
- 
-bool KeyEncoder::EncoderBinary::IsInteger(const KeyColumnMetadata& metadata) { 
-  bool is_fixed_length = metadata.is_fixed_length; 
-  auto size = metadata.fixed_length; 
-  return is_fixed_length && 
-         (size == 0 || size == 1 || size == 2 || size == 4 || size == 8); 
-} 
- 
-void KeyEncoder::EncoderBinary::Encode(uint32_t offset_within_row, KeyRowArray* rows, 
-                                       const KeyColumnArray& col, KeyEncoderContext* ctx, 
-                                       KeyColumnArray* temp) { 
-  if (IsInteger(col.metadata())) { 
-    EncoderInteger::Encode(offset_within_row, rows, col, ctx, temp); 
-  } else { 
-    KeyColumnArray col_prep; 
-    if (EncoderInteger::UsesTransform(col)) { 
-      col_prep = EncoderInteger::ArrayReplace(col, *temp); 
-      EncoderInteger::PreEncode(col, &col_prep, ctx); 
-    } else { 
-      col_prep = col; 
-    } 
- 
-    bool is_row_fixed_length = rows->metadata().is_fixed_length; 
- 
-#if defined(ARROW_HAVE_AVX2) 
-    if (ctx->has_avx2()) { 
-      EncodeHelper_avx2(is_row_fixed_length, offset_within_row, rows, col); 
-    } else { 
-#endif 
-      if (is_row_fixed_length) { 
-        EncodeImp<true>(offset_within_row, rows, col); 
-      } else { 
-        EncodeImp<false>(offset_within_row, rows, col); 
-      } 
-#if defined(ARROW_HAVE_AVX2) 
-    } 
-#endif 
-  } 
- 
-  DCHECK(temp->metadata().is_fixed_length); 
-  DCHECK(temp->length() * temp->metadata().fixed_length >= 
-         col.length() * static_cast<int64_t>(sizeof(uint16_t))); 
- 
-  KeyColumnArray temp16bit(KeyColumnMetadata(true, sizeof(uint16_t)), col.length(), 
-                           nullptr, temp->mutable_data(1), nullptr); 
-  ColumnMemsetNulls(offset_within_row, rows, col, ctx, &temp16bit, 0xae); 
-} 
- 
-void KeyEncoder::EncoderBinary::Decode(uint32_t start_row, uint32_t num_rows, 
-                                       uint32_t offset_within_row, 
-                                       const KeyRowArray& rows, KeyColumnArray* col, 
-                                       KeyEncoderContext* ctx, KeyColumnArray* temp) { 
-  if (IsInteger(col->metadata())) { 
-    EncoderInteger::Decode(start_row, num_rows, offset_within_row, rows, col, ctx, temp); 
-  } else { 
-    KeyColumnArray col_prep; 
-    if (EncoderInteger::UsesTransform(*col)) { 
-      col_prep = EncoderInteger::ArrayReplace(*col, *temp); 
-    } else { 
-      col_prep = *col; 
-    } 
- 
-    bool is_row_fixed_length = rows.metadata().is_fixed_length; 
- 
-#if defined(ARROW_HAVE_AVX2) 
-    if (ctx->has_avx2()) { 
-      DecodeHelper_avx2(is_row_fixed_length, start_row, num_rows, offset_within_row, rows, 
-                        col); 
-    } else { 
-#endif 
-      if (is_row_fixed_length) { 
-        DecodeImp<true>(start_row, num_rows, offset_within_row, rows, col); 
-      } else { 
-        DecodeImp<false>(start_row, num_rows, offset_within_row, rows, col); 
-      } 
-#if defined(ARROW_HAVE_AVX2) 
-    } 
-#endif 
- 
-    if (EncoderInteger::UsesTransform(*col)) { 
-      EncoderInteger::PostDecode(col_prep, col, ctx); 
-    } 
-  } 
-} 
- 
-template <bool is_row_fixed_length> 
-void KeyEncoder::EncoderBinary::EncodeImp(uint32_t offset_within_row, KeyRowArray* rows, 
-                                          const KeyColumnArray& col) { 
-  EncodeDecodeHelper<is_row_fixed_length, true>( 
-      0, static_cast<uint32_t>(col.length()), offset_within_row, rows, rows, &col, 
-      nullptr, [](uint8_t* dst, const uint8_t* src, int64_t length) { 
-        auto dst64 = reinterpret_cast<uint64_t*>(dst); 
-        auto src64 = reinterpret_cast<const uint64_t*>(src); 
-        uint32_t istripe; 
-        for (istripe = 0; istripe < length / 8; ++istripe) { 
-          dst64[istripe] = util::SafeLoad(src64 + istripe); 
-        } 
-        if ((length % 8) > 0) { 
-          uint64_t mask_last = ~0ULL >> (8 * (8 * (istripe + 1) - length)); 
-          dst64[istripe] = (dst64[istripe] & ~mask_last) | 
-                           (util::SafeLoad(src64 + istripe) & mask_last); 
-        } 
-      }); 
-} 
- 
-template <bool is_row_fixed_length> 
-void KeyEncoder::EncoderBinary::DecodeImp(uint32_t start_row, uint32_t num_rows, 
-                                          uint32_t offset_within_row, 
-                                          const KeyRowArray& rows, KeyColumnArray* col) { 
-  EncodeDecodeHelper<is_row_fixed_length, false>( 
-      start_row, num_rows, offset_within_row, &rows, nullptr, col, col, 
-      [](uint8_t* dst, const uint8_t* src, int64_t length) { 
-        for (uint32_t istripe = 0; istripe < (length + 7) / 8; ++istripe) { 
-          auto dst64 = reinterpret_cast<uint64_t*>(dst); 
-          auto src64 = reinterpret_cast<const uint64_t*>(src); 
-          util::SafeStore(dst64 + istripe, src64[istripe]); 
-        } 
-      }); 
-} 
- 
-void KeyEncoder::EncoderBinary::ColumnMemsetNulls( 
-    uint32_t offset_within_row, KeyRowArray* rows, const KeyColumnArray& col, 
-    KeyEncoderContext* ctx, KeyColumnArray* temp_vector_16bit, uint8_t byte_value) { 
-  using ColumnMemsetNullsImp_t = void (*)(uint32_t, KeyRowArray*, const KeyColumnArray&, 
-                                          KeyEncoderContext*, KeyColumnArray*, uint8_t); 
-  static const ColumnMemsetNullsImp_t ColumnMemsetNullsImp_fn[] = { 
-      ColumnMemsetNullsImp<false, 1>,  ColumnMemsetNullsImp<false, 2>, 
-      ColumnMemsetNullsImp<false, 4>,  ColumnMemsetNullsImp<false, 8>, 
-      ColumnMemsetNullsImp<false, 16>, ColumnMemsetNullsImp<true, 1>, 
-      ColumnMemsetNullsImp<true, 2>,   ColumnMemsetNullsImp<true, 4>, 
-      ColumnMemsetNullsImp<true, 8>,   ColumnMemsetNullsImp<true, 16>}; 
-  uint32_t col_width = col.metadata().fixed_length; 
-  int dispatch_const = 
-      (rows->metadata().is_fixed_length ? 5 : 0) + 
-      (col_width == 1 ? 0 
-                      : col_width == 2 ? 1 : col_width == 4 ? 2 : col_width == 8 ? 3 : 4); 
-  ColumnMemsetNullsImp_fn[dispatch_const](offset_within_row, rows, col, ctx, 
-                                          temp_vector_16bit, byte_value); 
-} 
- 
-template <bool is_row_fixed_length, uint32_t col_width> 
-void KeyEncoder::EncoderBinary::ColumnMemsetNullsImp( 
-    uint32_t offset_within_row, KeyRowArray* rows, const KeyColumnArray& col, 
-    KeyEncoderContext* ctx, KeyColumnArray* temp_vector_16bit, uint8_t byte_value) { 
-  // Nothing to do when there are no nulls 
-  if (!col.data(0)) { 
-    return; 
-  } 
- 
-  const auto num_rows = static_cast<uint32_t>(col.length()); 
- 
-  // Temp vector needs space for the required number of rows 
-  DCHECK(temp_vector_16bit->length() >= num_rows); 
-  DCHECK(temp_vector_16bit->metadata().is_fixed_length && 
-         temp_vector_16bit->metadata().fixed_length == sizeof(uint16_t)); 
-  auto temp_vector = reinterpret_cast<uint16_t*>(temp_vector_16bit->mutable_data(1)); 
- 
-  // Bit vector to index vector of null positions 
-  int num_selected; 
-  util::BitUtil::bits_to_indexes(0, ctx->hardware_flags, static_cast<int>(col.length()), 
-                                 col.data(0), &num_selected, temp_vector, 
-                                 col.bit_offset(0)); 
- 
-  for (int i = 0; i < num_selected; ++i) { 
-    uint32_t row_id = temp_vector[i]; 
- 
-    // Target binary field pointer 
-    uint8_t* dst; 
-    if (is_row_fixed_length) { 
-      dst = rows->mutable_data(1) + rows->metadata().fixed_length * row_id; 
-    } else { 
-      dst = rows->mutable_data(2) + rows->offsets()[row_id]; 
-    } 
-    dst += offset_within_row; 
- 
-    if (col_width == 1) { 
-      *dst = byte_value; 
-    } else if (col_width == 2) { 
-      *reinterpret_cast<uint16_t*>(dst) = 
-          (static_cast<uint16_t>(byte_value) * static_cast<uint16_t>(0x0101)); 
-    } else if (col_width == 4) { 
-      *reinterpret_cast<uint32_t*>(dst) = 
-          (static_cast<uint32_t>(byte_value) * static_cast<uint32_t>(0x01010101)); 
-    } else if (col_width == 8) { 
-      *reinterpret_cast<uint64_t*>(dst) = 
-          (static_cast<uint64_t>(byte_value) * 0x0101010101010101ULL); 
-    } else { 
-      uint64_t value = (static_cast<uint64_t>(byte_value) * 0x0101010101010101ULL); 
-      uint32_t col_width_actual = col.metadata().fixed_length; 
-      uint32_t j; 
-      for (j = 0; j < col_width_actual / 8; ++j) { 
-        reinterpret_cast<uint64_t*>(dst)[j] = value; 
-      } 
-      int tail = col_width_actual % 8; 
-      if (tail) { 
-        uint64_t mask = ~0ULL >> (8 * (8 - tail)); 
-        reinterpret_cast<uint64_t*>(dst)[j] = 
-            (reinterpret_cast<const uint64_t*>(dst)[j] & ~mask) | (value & mask); 
-      } 
-    } 
-  } 
-} 
- 
-void KeyEncoder::EncoderBinaryPair::Encode(uint32_t offset_within_row, KeyRowArray* rows, 
-                                           const KeyColumnArray& col1, 
-                                           const KeyColumnArray& col2, 
-                                           KeyEncoderContext* ctx, KeyColumnArray* temp1, 
-                                           KeyColumnArray* temp2) { 
-  DCHECK(CanProcessPair(col1.metadata(), col2.metadata())); 
- 
-  KeyColumnArray col_prep[2]; 
-  if (EncoderInteger::UsesTransform(col1)) { 
-    col_prep[0] = EncoderInteger::ArrayReplace(col1, *temp1); 
-    EncoderInteger::PreEncode(col1, &(col_prep[0]), ctx); 
-  } else { 
-    col_prep[0] = col1; 
-  } 
-  if (EncoderInteger::UsesTransform(col2)) { 
-    col_prep[1] = EncoderInteger::ArrayReplace(col2, *temp2); 
-    EncoderInteger::PreEncode(col2, &(col_prep[1]), ctx); 
-  } else { 
-    col_prep[1] = col2; 
-  } 
- 
-  uint32_t col_width1 = col_prep[0].metadata().fixed_length; 
-  uint32_t col_width2 = col_prep[1].metadata().fixed_length; 
-  int log_col_width1 = 
-      col_width1 == 8 ? 3 : col_width1 == 4 ? 2 : col_width1 == 2 ? 1 : 0; 
-  int log_col_width2 = 
-      col_width2 == 8 ? 3 : col_width2 == 4 ? 2 : col_width2 == 2 ? 1 : 0; 
- 
-  bool is_row_fixed_length = rows->metadata().is_fixed_length; 
- 
-  const auto num_rows = static_cast<uint32_t>(col1.length()); 
-  uint32_t num_processed = 0; 
-#if defined(ARROW_HAVE_AVX2) 
-  if (ctx->has_avx2() && col_width1 == col_width2) { 
-    num_processed = EncodeHelper_avx2(is_row_fixed_length, col_width1, offset_within_row, 
-                                      rows, col_prep[0], col_prep[1]); 
-  } 
-#endif 
-  if (num_processed < num_rows) { 
-    using EncodeImp_t = void (*)(uint32_t, uint32_t, KeyRowArray*, const KeyColumnArray&, 
-                                 const KeyColumnArray&); 
-    static const EncodeImp_t EncodeImp_fn[] = { 
-        EncodeImp<false, uint8_t, uint8_t>,   EncodeImp<false, uint16_t, uint8_t>, 
-        EncodeImp<false, uint32_t, uint8_t>,  EncodeImp<false, uint64_t, uint8_t>, 
-        EncodeImp<false, uint8_t, uint16_t>,  EncodeImp<false, uint16_t, uint16_t>, 
-        EncodeImp<false, uint32_t, uint16_t>, EncodeImp<false, uint64_t, uint16_t>, 
-        EncodeImp<false, uint8_t, uint32_t>,  EncodeImp<false, uint16_t, uint32_t>, 
-        EncodeImp<false, uint32_t, uint32_t>, EncodeImp<false, uint64_t, uint32_t>, 
-        EncodeImp<false, uint8_t, uint64_t>,  EncodeImp<false, uint16_t, uint64_t>, 
-        EncodeImp<false, uint32_t, uint64_t>, EncodeImp<false, uint64_t, uint64_t>, 
-        EncodeImp<true, uint8_t, uint8_t>,    EncodeImp<true, uint16_t, uint8_t>, 
-        EncodeImp<true, uint32_t, uint8_t>,   EncodeImp<true, uint64_t, uint8_t>, 
-        EncodeImp<true, uint8_t, uint16_t>,   EncodeImp<true, uint16_t, uint16_t>, 
-        EncodeImp<true, uint32_t, uint16_t>,  EncodeImp<true, uint64_t, uint16_t>, 
-        EncodeImp<true, uint8_t, uint32_t>,   EncodeImp<true, uint16_t, uint32_t>, 
-        EncodeImp<true, uint32_t, uint32_t>,  EncodeImp<true, uint64_t, uint32_t>, 
-        EncodeImp<true, uint8_t, uint64_t>,   EncodeImp<true, uint16_t, uint64_t>, 
-        EncodeImp<true, uint32_t, uint64_t>,  EncodeImp<true, uint64_t, uint64_t>}; 
-    int dispatch_const = (log_col_width2 << 2) | log_col_width1; 
-    dispatch_const += (is_row_fixed_length ? 16 : 0); 
-    EncodeImp_fn[dispatch_const](num_processed, offset_within_row, rows, col_prep[0], 
-                                 col_prep[1]); 
-  } 
-} 
- 
-template <bool is_row_fixed_length, typename col1_type, typename col2_type> 
-void KeyEncoder::EncoderBinaryPair::EncodeImp(uint32_t num_rows_to_skip, 
-                                              uint32_t offset_within_row, 
-                                              KeyRowArray* rows, 
-                                              const KeyColumnArray& col1, 
-                                              const KeyColumnArray& col2) { 
-  const uint8_t* src_A = col1.data(1); 
-  const uint8_t* src_B = col2.data(1); 
- 
-  const auto num_rows = static_cast<uint32_t>(col1.length()); 
- 
-  uint32_t fixed_length = rows->metadata().fixed_length; 
-  const uint32_t* offsets; 
-  uint8_t* dst_base; 
-  if (is_row_fixed_length) { 
-    dst_base = rows->mutable_data(1) + offset_within_row; 
-    offsets = nullptr; 
-  } else { 
-    dst_base = rows->mutable_data(2) + offset_within_row; 
-    offsets = rows->offsets(); 
-  } 
- 
-  using col1_type_const = typename std::add_const<col1_type>::type; 
-  using col2_type_const = typename std::add_const<col2_type>::type; 
- 
-  if (is_row_fixed_length) { 
-    uint8_t* dst = dst_base + num_rows_to_skip * fixed_length; 
-    for (uint32_t i = num_rows_to_skip; i < num_rows; ++i) { 
-      *reinterpret_cast<col1_type*>(dst) = reinterpret_cast<col1_type_const*>(src_A)[i]; 
-      *reinterpret_cast<col2_type*>(dst + sizeof(col1_type)) = 
-          reinterpret_cast<col2_type_const*>(src_B)[i]; 
-      dst += fixed_length; 
-    } 
-  } else { 
-    for (uint32_t i = num_rows_to_skip; i < num_rows; ++i) { 
-      uint8_t* dst = dst_base + offsets[i]; 
-      *reinterpret_cast<col1_type*>(dst) = reinterpret_cast<col1_type_const*>(src_A)[i]; 
-      *reinterpret_cast<col2_type*>(dst + sizeof(col1_type)) = 
-          reinterpret_cast<col2_type_const*>(src_B)[i]; 
-    } 
-  } 
-} 
- 
-void KeyEncoder::EncoderBinaryPair::Decode(uint32_t start_row, uint32_t num_rows, 
-                                           uint32_t offset_within_row, 
-                                           const KeyRowArray& rows, KeyColumnArray* col1, 
-                                           KeyColumnArray* col2, KeyEncoderContext* ctx, 
-                                           KeyColumnArray* temp1, KeyColumnArray* temp2) { 
-  DCHECK(CanProcessPair(col1->metadata(), col2->metadata())); 
- 
-  KeyColumnArray col_prep[2]; 
-  if (EncoderInteger::UsesTransform(*col1)) { 
-    col_prep[0] = EncoderInteger::ArrayReplace(*col1, *temp1); 
-  } else { 
-    col_prep[0] = *col1; 
-  } 
-  if (EncoderInteger::UsesTransform(*col2)) { 
-    col_prep[1] = EncoderInteger::ArrayReplace(*col2, *temp2); 
-  } else { 
-    col_prep[1] = *col2; 
-  } 
- 
-  uint32_t col_width1 = col_prep[0].metadata().fixed_length; 
-  uint32_t col_width2 = col_prep[1].metadata().fixed_length; 
-  int log_col_width1 = 
-      col_width1 == 8 ? 3 : col_width1 == 4 ? 2 : col_width1 == 2 ? 1 : 0; 
-  int log_col_width2 = 
-      col_width2 == 8 ? 3 : col_width2 == 4 ? 2 : col_width2 == 2 ? 1 : 0; 
- 
-  bool is_row_fixed_length = rows.metadata().is_fixed_length; 
- 
-  uint32_t num_processed = 0; 
-#if defined(ARROW_HAVE_AVX2) 
-  if (ctx->has_avx2() && col_width1 == col_width2) { 
-    num_processed = 
-        DecodeHelper_avx2(is_row_fixed_length, col_width1, start_row, num_rows, 
-                          offset_within_row, rows, &col_prep[0], &col_prep[1]); 
-  } 
-#endif 
-  if (num_processed < num_rows) { 
-    using DecodeImp_t = void (*)(uint32_t, uint32_t, uint32_t, uint32_t, 
-                                 const KeyRowArray&, KeyColumnArray*, KeyColumnArray*); 
-    static const DecodeImp_t DecodeImp_fn[] = { 
-        DecodeImp<false, uint8_t, uint8_t>,   DecodeImp<false, uint16_t, uint8_t>, 
-        DecodeImp<false, uint32_t, uint8_t>,  DecodeImp<false, uint64_t, uint8_t>, 
-        DecodeImp<false, uint8_t, uint16_t>,  DecodeImp<false, uint16_t, uint16_t>, 
-        DecodeImp<false, uint32_t, uint16_t>, DecodeImp<false, uint64_t, uint16_t>, 
-        DecodeImp<false, uint8_t, uint32_t>,  DecodeImp<false, uint16_t, uint32_t>, 
-        DecodeImp<false, uint32_t, uint32_t>, DecodeImp<false, uint64_t, uint32_t>, 
-        DecodeImp<false, uint8_t, uint64_t>,  DecodeImp<false, uint16_t, uint64_t>, 
-        DecodeImp<false, uint32_t, uint64_t>, DecodeImp<false, uint64_t, uint64_t>, 
-        DecodeImp<true, uint8_t, uint8_t>,    DecodeImp<true, uint16_t, uint8_t>, 
-        DecodeImp<true, uint32_t, uint8_t>,   DecodeImp<true, uint64_t, uint8_t>, 
-        DecodeImp<true, uint8_t, uint16_t>,   DecodeImp<true, uint16_t, uint16_t>, 
-        DecodeImp<true, uint32_t, uint16_t>,  DecodeImp<true, uint64_t, uint16_t>, 
-        DecodeImp<true, uint8_t, uint32_t>,   DecodeImp<true, uint16_t, uint32_t>, 
-        DecodeImp<true, uint32_t, uint32_t>,  DecodeImp<true, uint64_t, uint32_t>, 
-        DecodeImp<true, uint8_t, uint64_t>,   DecodeImp<true, uint16_t, uint64_t>, 
-        DecodeImp<true, uint32_t, uint64_t>,  DecodeImp<true, uint64_t, uint64_t>}; 
-    int dispatch_const = 
-        (log_col_width2 << 2) | log_col_width1 | (is_row_fixed_length ? 16 : 0); 
-    DecodeImp_fn[dispatch_const](num_processed, start_row, num_rows, offset_within_row, 
-                                 rows, &(col_prep[0]), &(col_prep[1])); 
-  } 
- 
-  if (EncoderInteger::UsesTransform(*col1)) { 
-    EncoderInteger::PostDecode(col_prep[0], col1, ctx); 
-  } 
-  if (EncoderInteger::UsesTransform(*col2)) { 
-    EncoderInteger::PostDecode(col_prep[1], col2, ctx); 
-  } 
-} 
- 
-template <bool is_row_fixed_length, typename col1_type, typename col2_type> 
-void KeyEncoder::EncoderBinaryPair::DecodeImp(uint32_t num_rows_to_skip, 
-                                              uint32_t start_row, uint32_t num_rows, 
-                                              uint32_t offset_within_row, 
-                                              const KeyRowArray& rows, 
-                                              KeyColumnArray* col1, 
-                                              KeyColumnArray* col2) { 
-  DCHECK(rows.length() >= start_row + num_rows); 
-  DCHECK(col1->length() == num_rows && col2->length() == num_rows); 
- 
-  uint8_t* dst_A = col1->mutable_data(1); 
-  uint8_t* dst_B = col2->mutable_data(1); 
- 
-  uint32_t fixed_length = rows.metadata().fixed_length; 
-  const uint32_t* offsets; 
-  const uint8_t* src_base; 
-  if (is_row_fixed_length) { 
-    src_base = rows.data(1) + fixed_length * start_row + offset_within_row; 
-    offsets = nullptr; 
-  } else { 
-    src_base = rows.data(2) + offset_within_row; 
-    offsets = rows.offsets() + start_row; 
-  } 
- 
-  using col1_type_const = typename std::add_const<col1_type>::type; 
-  using col2_type_const = typename std::add_const<col2_type>::type; 
- 
-  if (is_row_fixed_length) { 
-    const uint8_t* src = src_base + num_rows_to_skip * fixed_length; 
-    for (uint32_t i = num_rows_to_skip; i < num_rows; ++i) { 
-      reinterpret_cast<col1_type*>(dst_A)[i] = *reinterpret_cast<col1_type_const*>(src); 
-      reinterpret_cast<col2_type*>(dst_B)[i] = 
-          *reinterpret_cast<col2_type_const*>(src + sizeof(col1_type)); 
-      src += fixed_length; 
-    } 
-  } else { 
-    for (uint32_t i = num_rows_to_skip; i < num_rows; ++i) { 
-      const uint8_t* src = src_base + offsets[i]; 
-      reinterpret_cast<col1_type*>(dst_A)[i] = *reinterpret_cast<col1_type_const*>(src); 
-      reinterpret_cast<col2_type*>(dst_B)[i] = 
-          *reinterpret_cast<col2_type_const*>(src + sizeof(col1_type)); 
-    } 
-  } 
-} 
- 
-void KeyEncoder::EncoderOffsets::Encode(KeyRowArray* rows, 
-                                        const std::vector<KeyColumnArray>& varbinary_cols, 
-                                        KeyEncoderContext* ctx) { 
-  DCHECK(!varbinary_cols.empty()); 
- 
-  // Rows and columns must all be varying-length 
-  DCHECK(!rows->metadata().is_fixed_length); 
-  for (const auto& col : varbinary_cols) { 
-    DCHECK(!col.metadata().is_fixed_length); 
-  } 
- 
-  const auto num_rows = static_cast<uint32_t>(varbinary_cols[0].length()); 
- 
-  uint32_t num_processed = 0; 
-#if defined(ARROW_HAVE_AVX2) 
-  // Whether any of the columns has non-zero starting bit offset for non-nulls bit vector 
-  bool has_bit_offset = false; 
- 
-  // The space in columns must be exactly equal to a space for offsets in rows 
-  DCHECK(rows->length() == num_rows); 
-  for (const auto& col : varbinary_cols) { 
-    DCHECK(col.length() == num_rows); 
-    if (col.bit_offset(0) != 0) { 
-      has_bit_offset = true; 
-    } 
-  } 
- 
-  if (ctx->has_avx2() && !has_bit_offset) { 
-    // Create a temp vector sized based on the number of columns 
-    auto temp_buffer_holder = util::TempVectorHolder<uint32_t>( 
-        ctx->stack, static_cast<uint32_t>(varbinary_cols.size()) * 8); 
-    auto temp_buffer_32B_per_col = KeyColumnArray( 
-        KeyColumnMetadata(true, sizeof(uint32_t)), varbinary_cols.size() * 8, nullptr, 
-        reinterpret_cast<uint8_t*>(temp_buffer_holder.mutable_data()), nullptr); 
- 
-    num_processed = EncodeImp_avx2(rows, varbinary_cols, &temp_buffer_32B_per_col); 
-  } 
-#endif 
-  if (num_processed < num_rows) { 
-    EncodeImp(num_processed, rows, varbinary_cols); 
-  } 
-} 
- 
-void KeyEncoder::EncoderOffsets::EncodeImp( 
-    uint32_t num_rows_already_processed, KeyRowArray* rows, 
-    const std::vector<KeyColumnArray>& varbinary_cols) { 
-  DCHECK_GT(varbinary_cols.size(), 0); 
- 
-  int row_alignment = rows->metadata().row_alignment; 
-  int string_alignment = rows->metadata().string_alignment; 
- 
-  uint32_t* row_offsets = rows->mutable_offsets(); 
-  uint8_t* row_values = rows->mutable_data(2); 
-  const auto num_rows = static_cast<uint32_t>(varbinary_cols[0].length()); 
- 
-  if (num_rows_already_processed == 0) { 
-    row_offsets[0] = 0; 
-  } 
- 
-  uint32_t row_offset = row_offsets[num_rows_already_processed]; 
-  for (uint32_t i = num_rows_already_processed; i < num_rows; ++i) { 
-    uint32_t* varbinary_end = 
-        rows->metadata().varbinary_end_array(row_values + row_offset); 
- 
-    // Zero out lengths for nulls. 
-    // Add lengths of all columns to get row size. 
-    // Store varbinary field ends while summing their lengths. 
- 
-    uint32_t offset_within_row = rows->metadata().fixed_length; 
- 
-    for (size_t col = 0; col < varbinary_cols.size(); ++col) { 
-      const uint32_t* col_offsets = varbinary_cols[col].offsets(); 
-      uint32_t col_length = col_offsets[i + 1] - col_offsets[i]; 
- 
-      const int bit_offset = varbinary_cols[col].bit_offset(0); 
- 
-      const uint8_t* non_nulls = varbinary_cols[col].data(0); 
-      if (non_nulls && BitUtil::GetBit(non_nulls, bit_offset + i) == 0) { 
-        col_length = 0; 
-      } 
- 
-      offset_within_row += 
-          KeyRowMetadata::padding_for_alignment(offset_within_row, string_alignment); 
-      offset_within_row += col_length; 
- 
-      varbinary_end[col] = offset_within_row; 
-    } 
- 
-    offset_within_row += 
-        KeyRowMetadata::padding_for_alignment(offset_within_row, row_alignment); 
-    row_offset += offset_within_row; 
-    row_offsets[i + 1] = row_offset; 
-  } 
-} 
- 
-void KeyEncoder::EncoderOffsets::Decode( 
-    uint32_t start_row, uint32_t num_rows, const KeyRowArray& rows, 
-    std::vector<KeyColumnArray>* varbinary_cols, 
-    const std::vector<uint32_t>& varbinary_cols_base_offset, KeyEncoderContext* ctx) { 
-  DCHECK(!varbinary_cols->empty()); 
-  DCHECK(varbinary_cols->size() == varbinary_cols_base_offset.size()); 
- 
-  DCHECK(!rows.metadata().is_fixed_length); 
-  DCHECK(rows.length() >= start_row + num_rows); 
-  for (const auto& col : *varbinary_cols) { 
-    // Rows and columns must all be varying-length 
-    DCHECK(!col.metadata().is_fixed_length); 
-    // The space in columns must be exactly equal to a subset of rows selected 
-    DCHECK(col.length() == num_rows); 
-  } 
- 
-  // Offsets of varbinary columns data within each encoded row are stored 
-  // in the same encoded row as an array of 32-bit integers. 
-  // This array follows immediately the data of fixed-length columns. 
-  // There is one element for each varying-length column. 
-  // The Nth element is the sum of all the lengths of varbinary columns data in 
-  // that row, up to and including Nth varbinary column. 
- 
-  const uint32_t* row_offsets = rows.offsets() + start_row; 
- 
-  // Set the base offset for each column 
-  for (size_t col = 0; col < varbinary_cols->size(); ++col) { 
-    uint32_t* col_offsets = (*varbinary_cols)[col].mutable_offsets(); 
-    col_offsets[0] = varbinary_cols_base_offset[col]; 
-  } 
- 
-  int string_alignment = rows.metadata().string_alignment; 
- 
-  for (uint32_t i = 0; i < num_rows; ++i) { 
-    // Find the beginning of cumulative lengths array for next row 
-    const uint8_t* row = rows.data(2) + row_offsets[i]; 
-    const uint32_t* varbinary_ends = rows.metadata().varbinary_end_array(row); 
- 
-    // Update the offset of each column 
-    uint32_t offset_within_row = rows.metadata().fixed_length; 
-    for (size_t col = 0; col < varbinary_cols->size(); ++col) { 
-      offset_within_row += 
-          KeyRowMetadata::padding_for_alignment(offset_within_row, string_alignment); 
-      uint32_t length = varbinary_ends[col] - offset_within_row; 
-      offset_within_row = varbinary_ends[col]; 
-      uint32_t* col_offsets = (*varbinary_cols)[col].mutable_offsets(); 
-      col_offsets[i + 1] = col_offsets[i] + length; 
-    } 
-  } 
-} 
- 
-void KeyEncoder::EncoderVarBinary::Encode(uint32_t varbinary_col_id, KeyRowArray* rows, 
-                                          const KeyColumnArray& col, 
-                                          KeyEncoderContext* ctx) { 
-#if defined(ARROW_HAVE_AVX2) 
-  if (ctx->has_avx2()) { 
-    EncodeHelper_avx2(varbinary_col_id, rows, col); 
-  } else { 
-#endif 
-    if (varbinary_col_id == 0) { 
-      EncodeImp<true>(varbinary_col_id, rows, col); 
-    } else { 
-      EncodeImp<false>(varbinary_col_id, rows, col); 
-    } 
-#if defined(ARROW_HAVE_AVX2) 
-  } 
-#endif 
-} 
- 
-void KeyEncoder::EncoderVarBinary::Decode(uint32_t start_row, uint32_t num_rows, 
-                                          uint32_t varbinary_col_id, 
-                                          const KeyRowArray& rows, KeyColumnArray* col, 
-                                          KeyEncoderContext* ctx) { 
-  // Output column varbinary buffer needs an extra 32B 
-  // at the end in avx2 version and 8B otherwise. 
-#if defined(ARROW_HAVE_AVX2) 
-  if (ctx->has_avx2()) { 
-    DecodeHelper_avx2(start_row, num_rows, varbinary_col_id, rows, col); 
-  } else { 
-#endif 
-    if (varbinary_col_id == 0) { 
-      DecodeImp<true>(start_row, num_rows, varbinary_col_id, rows, col); 
-    } else { 
-      DecodeImp<false>(start_row, num_rows, varbinary_col_id, rows, col); 
-    } 
-#if defined(ARROW_HAVE_AVX2) 
-  } 
-#endif 
-} 
- 
-template <bool first_varbinary_col> 
-void KeyEncoder::EncoderVarBinary::EncodeImp(uint32_t varbinary_col_id, KeyRowArray* rows, 
-                                             const KeyColumnArray& col) { 
-  EncodeDecodeHelper<first_varbinary_col, true>( 
-      0, static_cast<uint32_t>(col.length()), varbinary_col_id, rows, rows, &col, nullptr, 
-      [](uint8_t* dst, const uint8_t* src, int64_t length) { 
-        auto dst64 = reinterpret_cast<uint64_t*>(dst); 
-        auto src64 = reinterpret_cast<const uint64_t*>(src); 
-        uint32_t istripe; 
-        for (istripe = 0; istripe < length / 8; ++istripe) { 
-          dst64[istripe] = util::SafeLoad(src64 + istripe); 
-        } 
-        if ((length % 8) > 0) { 
-          uint64_t mask_last = ~0ULL >> (8 * (8 * (istripe + 1) - length)); 
-          dst64[istripe] = (dst64[istripe] & ~mask_last) | 
-                           (util::SafeLoad(src64 + istripe) & mask_last); 
-        } 
-      }); 
-} 
- 
-template <bool first_varbinary_col> 
-void KeyEncoder::EncoderVarBinary::DecodeImp(uint32_t start_row, uint32_t num_rows, 
-                                             uint32_t varbinary_col_id, 
-                                             const KeyRowArray& rows, 
-                                             KeyColumnArray* col) { 
-  EncodeDecodeHelper<first_varbinary_col, false>( 
-      start_row, num_rows, varbinary_col_id, &rows, nullptr, col, col, 
-      [](uint8_t* dst, const uint8_t* src, int64_t length) { 
-        for (uint32_t istripe = 0; istripe < (length + 7) / 8; ++istripe) { 
-          auto dst64 = reinterpret_cast<uint64_t*>(dst); 
-          auto src64 = reinterpret_cast<const uint64_t*>(src); 
-          util::SafeStore(dst64 + istripe, src64[istripe]); 
-        } 
-      }); 
-} 
- 
-void KeyEncoder::EncoderNulls::Encode(KeyRowArray* rows, 
-                                      const std::vector<KeyColumnArray>& cols, 
-                                      KeyEncoderContext* ctx, 
-                                      KeyColumnArray* temp_vector_16bit) { 
-  DCHECK_GT(cols.size(), 0); 
-  const auto num_rows = static_cast<uint32_t>(rows->length()); 
- 
-  // All input columns should have the same number of rows. 
-  // They may or may not have non-nulls bit-vectors allocated. 
-  for (const auto& col : cols) { 
-    DCHECK(col.length() == num_rows); 
-  } 
- 
-  // Temp vector needs space for the required number of rows 
-  DCHECK(temp_vector_16bit->length() >= num_rows); 
-  DCHECK(temp_vector_16bit->metadata().is_fixed_length && 
-         temp_vector_16bit->metadata().fixed_length == sizeof(uint16_t)); 
- 
-  uint8_t* null_masks = rows->null_masks(); 
-  uint32_t null_masks_bytes_per_row = rows->metadata().null_masks_bytes_per_row; 
-  memset(null_masks, 0, null_masks_bytes_per_row * num_rows); 
-  for (size_t col = 0; col < cols.size(); ++col) { 
-    const uint8_t* non_nulls = cols[col].data(0); 
-    if (!non_nulls) { 
-      continue; 
-    } 
-    int bit_offset = cols[col].bit_offset(0); 
-    DCHECK_LT(bit_offset, 8); 
-    int num_selected; 
-    util::BitUtil::bits_to_indexes( 
-        0, ctx->hardware_flags, num_rows, non_nulls, &num_selected, 
-        reinterpret_cast<uint16_t*>(temp_vector_16bit->mutable_data(1)), bit_offset); 
-    for (int i = 0; i < num_selected; ++i) { 
-      uint16_t row_id = reinterpret_cast<const uint16_t*>(temp_vector_16bit->data(1))[i]; 
-      int64_t null_masks_bit_id = row_id * null_masks_bytes_per_row * 8 + col; 
-      BitUtil::SetBit(null_masks, null_masks_bit_id); 
-    } 
-  } 
-} 
- 
-void KeyEncoder::EncoderNulls::Decode(uint32_t start_row, uint32_t num_rows, 
-                                      const KeyRowArray& rows, 
-                                      std::vector<KeyColumnArray>* cols) { 
-  // Every output column needs to have a space for exactly the required number 
-  // of rows. It also needs to have non-nulls bit-vector allocated and mutable. 
-  DCHECK_GT(cols->size(), 0); 
-  for (auto& col : *cols) { 
-    DCHECK(col.length() == num_rows); 
-    DCHECK(col.mutable_data(0)); 
-  } 
- 
-  const uint8_t* null_masks = rows.null_masks(); 
-  uint32_t null_masks_bytes_per_row = rows.metadata().null_masks_bytes_per_row; 
-  for (size_t col = 0; col < cols->size(); ++col) { 
-    uint8_t* non_nulls = (*cols)[col].mutable_data(0); 
-    const int bit_offset = (*cols)[col].bit_offset(0); 
-    DCHECK_LT(bit_offset, 8); 
-    non_nulls[0] |= 0xff << (bit_offset); 
-    if (bit_offset + num_rows > 8) { 
-      int bits_in_first_byte = 8 - bit_offset; 
-      memset(non_nulls + 1, 0xff, BitUtil::BytesForBits(num_rows - bits_in_first_byte)); 
-    } 
-    for (uint32_t row = 0; row < num_rows; ++row) { 
-      uint32_t null_masks_bit_id = 
-          (start_row + row) * null_masks_bytes_per_row * 8 + static_cast<uint32_t>(col); 
-      bool is_set = BitUtil::GetBit(null_masks, null_masks_bit_id); 
-      if (is_set) { 
-        BitUtil::ClearBit(non_nulls, bit_offset + row); 
-      } 
-    } 
-  } 
-} 
- 
-uint32_t KeyEncoder::KeyRowMetadata::num_varbinary_cols() const { 
-  uint32_t result = 0; 
-  for (auto column_metadata : column_metadatas) { 
-    if (!column_metadata.is_fixed_length) { 
-      ++result; 
-    } 
-  } 
-  return result; 
-} 
- 
-bool KeyEncoder::KeyRowMetadata::is_compatible(const KeyRowMetadata& other) const { 
-  if (other.num_cols() != num_cols()) { 
-    return false; 
-  } 
-  if (row_alignment != other.row_alignment || 
-      string_alignment != other.string_alignment) { 
-    return false; 
-  } 
-  for (size_t i = 0; i < column_metadatas.size(); ++i) { 
-    if (column_metadatas[i].is_fixed_length != 
-        other.column_metadatas[i].is_fixed_length) { 
-      return false; 
-    } 
-    if (column_metadatas[i].fixed_length != other.column_metadatas[i].fixed_length) { 
-      return false; 
-    } 
-  } 
-  return true; 
-} 
- 
-void KeyEncoder::KeyRowMetadata::FromColumnMetadataVector( 
-    const std::vector<KeyColumnMetadata>& cols, int in_row_alignment, 
-    int in_string_alignment) { 
-  column_metadatas.resize(cols.size()); 
-  for (size_t i = 0; i < cols.size(); ++i) { 
-    column_metadatas[i] = cols[i]; 
-  } 
- 
-  const auto num_cols = static_cast<uint32_t>(cols.size()); 
- 
-  // Sort columns. 
-  // Columns are sorted based on the size in bytes of their fixed-length part. 
-  // For the varying-length column, the fixed-length part is the 32-bit field storing 
-  // cumulative length of varying-length fields. 
-  // The rules are: 
-  // a) Boolean column, marked with fixed-length 0, is considered to have fixed-length 
-  // part of 1 byte. b) Columns with fixed-length part being power of 2 or multiple of row 
-  // alignment precede other columns. They are sorted among themselves based on size of 
-  // fixed-length part. c) Fixed-length columns precede varying-length columns when both 
-  // have the same size fixed-length part. 
-  column_order.resize(num_cols); 
-  for (uint32_t i = 0; i < num_cols; ++i) { 
-    column_order[i] = i; 
-  } 
-  std::sort( 
-      column_order.begin(), column_order.end(), [&cols](uint32_t left, uint32_t right) { 
-        bool is_left_pow2 = 
-            !cols[left].is_fixed_length || ARROW_POPCOUNT64(cols[left].fixed_length) <= 1; 
-        bool is_right_pow2 = !cols[right].is_fixed_length || 
-                             ARROW_POPCOUNT64(cols[right].fixed_length) <= 1; 
-        bool is_left_fixedlen = cols[left].is_fixed_length; 
-        bool is_right_fixedlen = cols[right].is_fixed_length; 
-        uint32_t width_left = 
-            cols[left].is_fixed_length ? cols[left].fixed_length : sizeof(uint32_t); 
-        uint32_t width_right = 
-            cols[right].is_fixed_length ? cols[right].fixed_length : sizeof(uint32_t); 
-        if (is_left_pow2 != is_right_pow2) { 
-          return is_left_pow2; 
-        } 
-        if (!is_left_pow2) { 
-          return left < right; 
-        } 
-        if (width_left != width_right) { 
-          return width_left > width_right; 
-        } 
-        if (is_left_fixedlen != is_right_fixedlen) { 
-          return is_left_fixedlen; 
-        } 
-        return left < right; 
-      }); 
- 
-  row_alignment = in_row_alignment; 
-  string_alignment = in_string_alignment; 
-  varbinary_end_array_offset = 0; 
- 
-  column_offsets.resize(num_cols); 
-  uint32_t num_varbinary_cols = 0; 
-  uint32_t offset_within_row = 0; 
-  for (uint32_t i = 0; i < num_cols; ++i) { 
-    const KeyColumnMetadata& col = cols[column_order[i]]; 
-    offset_within_row += 
-        KeyRowMetadata::padding_for_alignment(offset_within_row, string_alignment, col); 
-    column_offsets[i] = offset_within_row; 
-    if (!col.is_fixed_length) { 
-      if (num_varbinary_cols == 0) { 
-        varbinary_end_array_offset = offset_within_row; 
-      } 
-      DCHECK(column_offsets[i] - varbinary_end_array_offset == 
-             num_varbinary_cols * sizeof(uint32_t)); 
-      ++num_varbinary_cols; 
-      offset_within_row += sizeof(uint32_t); 
-    } else { 
-      // Boolean column is a bit-vector, which is indicated by 
-      // setting fixed length in column metadata to zero. 
-      // It will be stored as a byte in output row. 
-      if (col.fixed_length == 0) { 
-        offset_within_row += 1; 
-      } else { 
-        offset_within_row += col.fixed_length; 
-      } 
-    } 
-  } 
- 
-  is_fixed_length = (num_varbinary_cols == 0); 
-  fixed_length = 
-      offset_within_row + 
-      KeyRowMetadata::padding_for_alignment( 
-          offset_within_row, num_varbinary_cols == 0 ? row_alignment : string_alignment); 
- 
-  // We set the number of bytes per row storing null masks of individual key columns 
-  // to be a power of two. This is not required. It could be also set to the minimal 
-  // number of bytes required for a given number of bits (one bit per column). 
-  null_masks_bytes_per_row = 1; 
-  while (static_cast<uint32_t>(null_masks_bytes_per_row * 8) < num_cols) { 
-    null_masks_bytes_per_row *= 2; 
-  } 
-} 
- 
-void KeyEncoder::Init(const std::vector<KeyColumnMetadata>& cols, KeyEncoderContext* ctx, 
-                      int row_alignment, int string_alignment) { 
-  ctx_ = ctx; 
-  row_metadata_.FromColumnMetadataVector(cols, row_alignment, string_alignment); 
-  uint32_t num_cols = row_metadata_.num_cols(); 
-  uint32_t num_varbinary_cols = row_metadata_.num_varbinary_cols(); 
-  batch_all_cols_.resize(num_cols); 
-  batch_varbinary_cols_.resize(num_varbinary_cols); 
-  batch_varbinary_cols_base_offsets_.resize(num_varbinary_cols); 
-} 
- 
-void KeyEncoder::PrepareKeyColumnArrays(int64_t start_row, int64_t num_rows, 
-                                        const std::vector<KeyColumnArray>& cols_in) { 
-  const auto num_cols = static_cast<uint32_t>(cols_in.size()); 
-  DCHECK(batch_all_cols_.size() == num_cols); 
- 
-  uint32_t num_varbinary_visited = 0; 
-  for (uint32_t i = 0; i < num_cols; ++i) { 
-    const KeyColumnArray& col = cols_in[row_metadata_.column_order[i]]; 
-    KeyColumnArray col_window(col, start_row, num_rows); 
-    batch_all_cols_[i] = col_window; 
-    if (!col.metadata().is_fixed_length) { 
-      DCHECK(num_varbinary_visited < batch_varbinary_cols_.size()); 
-      // If start row is zero, then base offset of varbinary column is also zero. 
-      if (start_row == 0) { 
-        batch_varbinary_cols_base_offsets_[num_varbinary_visited] = 0; 
-      } else { 
-        batch_varbinary_cols_base_offsets_[num_varbinary_visited] = 
-            col.offsets()[start_row]; 
-      } 
-      batch_varbinary_cols_[num_varbinary_visited++] = col_window; 
-    } 
-  } 
-} 
- 
-Status KeyEncoder::PrepareOutputForEncode(int64_t start_row, int64_t num_rows, 
-                                          KeyRowArray* rows, 
-                                          const std::vector<KeyColumnArray>& all_cols) { 
-  int64_t num_bytes_required = 0; 
- 
-  int64_t fixed_part = row_metadata_.fixed_length * num_rows; 
-  int64_t var_part = 0; 
-  for (const auto& col : all_cols) { 
-    if (!col.metadata().is_fixed_length) { 
-      DCHECK(col.length() >= start_row + num_rows); 
-      const uint32_t* offsets = col.offsets(); 
-      var_part += offsets[start_row + num_rows] - offsets[start_row]; 
-      // Include maximum padding that can be added to align the start of varbinary fields. 
-      var_part += num_rows * row_metadata_.string_alignment; 
-    } 
-  } 
-  // Include maximum padding that can be added to align the start of the rows. 
-  if (!row_metadata_.is_fixed_length) { 
-    fixed_part += row_metadata_.row_alignment * num_rows; 
-  } 
-  num_bytes_required = fixed_part + var_part; 
- 
-  rows->Clean(); 
-  RETURN_NOT_OK(rows->AppendEmpty(static_cast<uint32_t>(num_rows), 
-                                  static_cast<uint32_t>(num_bytes_required))); 
- 
-  return Status::OK(); 
-} 
- 
-void KeyEncoder::Encode(int64_t start_row, int64_t num_rows, KeyRowArray* rows, 
-                        const std::vector<KeyColumnArray>& cols) { 
-  // Prepare column array vectors 
-  PrepareKeyColumnArrays(start_row, num_rows, cols); 
- 
-  // Create two temp vectors with 16-bit elements 
-  auto temp_buffer_holder_A = 
-      util::TempVectorHolder<uint16_t>(ctx_->stack, static_cast<uint32_t>(num_rows)); 
-  auto temp_buffer_A = KeyColumnArray( 
-      KeyColumnMetadata(true, sizeof(uint16_t)), num_rows, nullptr, 
-      reinterpret_cast<uint8_t*>(temp_buffer_holder_A.mutable_data()), nullptr); 
-  auto temp_buffer_holder_B = 
-      util::TempVectorHolder<uint16_t>(ctx_->stack, static_cast<uint32_t>(num_rows)); 
-  auto temp_buffer_B = KeyColumnArray( 
-      KeyColumnMetadata(true, sizeof(uint16_t)), num_rows, nullptr, 
-      reinterpret_cast<uint8_t*>(temp_buffer_holder_B.mutable_data()), nullptr); 
- 
-  bool is_row_fixed_length = row_metadata_.is_fixed_length; 
-  if (!is_row_fixed_length) { 
-    // This call will generate and fill in data for both: 
-    // - offsets to the entire encoded arrays 
-    // - offsets for individual varbinary fields within each row 
-    EncoderOffsets::Encode(rows, batch_varbinary_cols_, ctx_); 
- 
-    for (size_t i = 0; i < batch_varbinary_cols_.size(); ++i) { 
-      // Memcpy varbinary fields into precomputed in the previous step 
-      // positions in the output row buffer. 
-      EncoderVarBinary::Encode(static_cast<uint32_t>(i), rows, batch_varbinary_cols_[i], 
-                               ctx_); 
-    } 
-  } 
- 
-  // Process fixed length columns 
-  const auto num_cols = static_cast<uint32_t>(batch_all_cols_.size()); 
-  for (uint32_t i = 0; i < num_cols;) { 
-    if (!batch_all_cols_[i].metadata().is_fixed_length) { 
-      i += 1; 
-      continue; 
-    } 
-    bool can_process_pair = 
-        (i + 1 < num_cols) && batch_all_cols_[i + 1].metadata().is_fixed_length && 
-        EncoderBinaryPair::CanProcessPair(batch_all_cols_[i].metadata(), 
-                                          batch_all_cols_[i + 1].metadata()); 
-    if (!can_process_pair) { 
-      EncoderBinary::Encode(row_metadata_.column_offsets[i], rows, batch_all_cols_[i], 
-                            ctx_, &temp_buffer_A); 
-      i += 1; 
-    } else { 
-      EncoderBinaryPair::Encode(row_metadata_.column_offsets[i], rows, batch_all_cols_[i], 
-                                batch_all_cols_[i + 1], ctx_, &temp_buffer_A, 
-                                &temp_buffer_B); 
-      i += 2; 
-    } 
-  } 
- 
-  // Process nulls 
-  EncoderNulls::Encode(rows, batch_all_cols_, ctx_, &temp_buffer_A); 
-} 
- 
-void KeyEncoder::DecodeFixedLengthBuffers(int64_t start_row_input, 
-                                          int64_t start_row_output, int64_t num_rows, 
-                                          const KeyRowArray& rows, 
-                                          std::vector<KeyColumnArray>* cols) { 
-  // Prepare column array vectors 
-  PrepareKeyColumnArrays(start_row_output, num_rows, *cols); 
- 
-  // Create two temp vectors with 16-bit elements 
-  auto temp_buffer_holder_A = 
-      util::TempVectorHolder<uint16_t>(ctx_->stack, static_cast<uint32_t>(num_rows)); 
-  auto temp_buffer_A = KeyColumnArray( 
-      KeyColumnMetadata(true, sizeof(uint16_t)), num_rows, nullptr, 
-      reinterpret_cast<uint8_t*>(temp_buffer_holder_A.mutable_data()), nullptr); 
-  auto temp_buffer_holder_B = 
-      util::TempVectorHolder<uint16_t>(ctx_->stack, static_cast<uint32_t>(num_rows)); 
-  auto temp_buffer_B = KeyColumnArray( 
-      KeyColumnMetadata(true, sizeof(uint16_t)), num_rows, nullptr, 
-      reinterpret_cast<uint8_t*>(temp_buffer_holder_B.mutable_data()), nullptr); 
- 
-  bool is_row_fixed_length = row_metadata_.is_fixed_length; 
-  if (!is_row_fixed_length) { 
-    EncoderOffsets::Decode(static_cast<uint32_t>(start_row_input), 
-                           static_cast<uint32_t>(num_rows), rows, &batch_varbinary_cols_, 
-                           batch_varbinary_cols_base_offsets_, ctx_); 
-  } 
- 
-  // Process fixed length columns 
-  const auto num_cols = static_cast<uint32_t>(batch_all_cols_.size()); 
-  for (uint32_t i = 0; i < num_cols;) { 
-    if (!batch_all_cols_[i].metadata().is_fixed_length) { 
-      i += 1; 
-      continue; 
-    } 
-    bool can_process_pair = 
-        (i + 1 < num_cols) && batch_all_cols_[i + 1].metadata().is_fixed_length && 
-        EncoderBinaryPair::CanProcessPair(batch_all_cols_[i].metadata(), 
-                                          batch_all_cols_[i + 1].metadata()); 
-    if (!can_process_pair) { 
-      EncoderBinary::Decode(static_cast<uint32_t>(start_row_input), 
-                            static_cast<uint32_t>(num_rows), 
-                            row_metadata_.column_offsets[i], rows, &batch_all_cols_[i], 
-                            ctx_, &temp_buffer_A); 
-      i += 1; 
-    } else { 
-      EncoderBinaryPair::Decode( 
-          static_cast<uint32_t>(start_row_input), static_cast<uint32_t>(num_rows), 
-          row_metadata_.column_offsets[i], rows, &batch_all_cols_[i], 
-          &batch_all_cols_[i + 1], ctx_, &temp_buffer_A, &temp_buffer_B); 
-      i += 2; 
-    } 
-  } 
- 
-  // Process nulls 
-  EncoderNulls::Decode(static_cast<uint32_t>(start_row_input), 
-                       static_cast<uint32_t>(num_rows), rows, &batch_all_cols_); 
-} 
- 
-void KeyEncoder::DecodeVaryingLengthBuffers(int64_t start_row_input, 
-                                            int64_t start_row_output, int64_t num_rows, 
-                                            const KeyRowArray& rows, 
-                                            std::vector<KeyColumnArray>* cols) { 
-  // Prepare column array vectors 
-  PrepareKeyColumnArrays(start_row_output, num_rows, *cols); 
- 
-  bool is_row_fixed_length = row_metadata_.is_fixed_length; 
-  if (!is_row_fixed_length) { 
-    for (size_t i = 0; i < batch_varbinary_cols_.size(); ++i) { 
-      // Memcpy varbinary fields into precomputed in the previous step 
-      // positions in the output row buffer. 
-      EncoderVarBinary::Decode(static_cast<uint32_t>(start_row_input), 
-                               static_cast<uint32_t>(num_rows), static_cast<uint32_t>(i), 
-                               rows, &batch_varbinary_cols_[i], ctx_); 
-    } 
-  } 
-} 
- 
-}  // namespace compute 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/exec/key_encode.h"
+
+#include <memory.h>
+
+#include <algorithm>
+
+#include "arrow/compute/exec/util.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/ubsan.h"
+
+namespace arrow {
+namespace compute {
+
+KeyEncoder::KeyRowArray::KeyRowArray()
+    : pool_(nullptr), rows_capacity_(0), bytes_capacity_(0) {}
+
+Status KeyEncoder::KeyRowArray::Init(MemoryPool* pool, const KeyRowMetadata& metadata) {
+  pool_ = pool;
+  metadata_ = metadata;
+
+  DCHECK(!null_masks_ && !offsets_ && !rows_);
+
+  constexpr int64_t rows_capacity = 8;
+  constexpr int64_t bytes_capacity = 1024;
+
+  // Null masks
+  ARROW_ASSIGN_OR_RAISE(auto null_masks,
+                        AllocateResizableBuffer(size_null_masks(rows_capacity), pool_));
+  null_masks_ = std::move(null_masks);
+  memset(null_masks_->mutable_data(), 0, size_null_masks(rows_capacity));
+
+  // Offsets and rows
+  if (!metadata.is_fixed_length) {
+    ARROW_ASSIGN_OR_RAISE(auto offsets,
+                          AllocateResizableBuffer(size_offsets(rows_capacity), pool_));
+    offsets_ = std::move(offsets);
+    memset(offsets_->mutable_data(), 0, size_offsets(rows_capacity));
+    reinterpret_cast<uint32_t*>(offsets_->mutable_data())[0] = 0;
+
+    ARROW_ASSIGN_OR_RAISE(
+        auto rows,
+        AllocateResizableBuffer(size_rows_varying_length(bytes_capacity), pool_));
+    rows_ = std::move(rows);
+    memset(rows_->mutable_data(), 0, size_rows_varying_length(bytes_capacity));
+    bytes_capacity_ = size_rows_varying_length(bytes_capacity) - padding_for_vectors;
+  } else {
+    ARROW_ASSIGN_OR_RAISE(
+        auto rows, AllocateResizableBuffer(size_rows_fixed_length(rows_capacity), pool_));
+    rows_ = std::move(rows);
+    memset(rows_->mutable_data(), 0, size_rows_fixed_length(rows_capacity));
+    bytes_capacity_ = size_rows_fixed_length(rows_capacity) - padding_for_vectors;
+  }
+
+  update_buffer_pointers();
+
+  rows_capacity_ = rows_capacity;
+
+  num_rows_ = 0;
+  num_rows_for_has_any_nulls_ = 0;
+  has_any_nulls_ = false;
+
+  return Status::OK();
+}
+
+void KeyEncoder::KeyRowArray::Clean() {
+  num_rows_ = 0;
+  num_rows_for_has_any_nulls_ = 0;
+  has_any_nulls_ = false;
+
+  if (!metadata_.is_fixed_length) {
+    reinterpret_cast<uint32_t*>(offsets_->mutable_data())[0] = 0;
+  }
+}
+
+int64_t KeyEncoder::KeyRowArray::size_null_masks(int64_t num_rows) {
+  return num_rows * metadata_.null_masks_bytes_per_row + padding_for_vectors;
+}
+
+int64_t KeyEncoder::KeyRowArray::size_offsets(int64_t num_rows) {
+  return (num_rows + 1) * sizeof(uint32_t) + padding_for_vectors;
+}
+
+int64_t KeyEncoder::KeyRowArray::size_rows_fixed_length(int64_t num_rows) {
+  return num_rows * metadata_.fixed_length + padding_for_vectors;
+}
+
+int64_t KeyEncoder::KeyRowArray::size_rows_varying_length(int64_t num_bytes) {
+  return num_bytes + padding_for_vectors;
+}
+
+void KeyEncoder::KeyRowArray::update_buffer_pointers() {
+  buffers_[0] = mutable_buffers_[0] = null_masks_->mutable_data();
+  if (metadata_.is_fixed_length) {
+    buffers_[1] = mutable_buffers_[1] = rows_->mutable_data();
+    buffers_[2] = mutable_buffers_[2] = nullptr;
+  } else {
+    buffers_[1] = mutable_buffers_[1] = offsets_->mutable_data();
+    buffers_[2] = mutable_buffers_[2] = rows_->mutable_data();
+  }
+}
+
+Status KeyEncoder::KeyRowArray::ResizeFixedLengthBuffers(int64_t num_extra_rows) {
+  if (rows_capacity_ >= num_rows_ + num_extra_rows) {
+    return Status::OK();
+  }
+
+  int64_t rows_capacity_new = std::max(static_cast<int64_t>(1), 2 * rows_capacity_);
+  while (rows_capacity_new < num_rows_ + num_extra_rows) {
+    rows_capacity_new *= 2;
+  }
+
+  // Null masks
+  RETURN_NOT_OK(null_masks_->Resize(size_null_masks(rows_capacity_new), false));
+  memset(null_masks_->mutable_data() + size_null_masks(rows_capacity_), 0,
+         size_null_masks(rows_capacity_new) - size_null_masks(rows_capacity_));
+
+  // Either offsets or rows
+  if (!metadata_.is_fixed_length) {
+    RETURN_NOT_OK(offsets_->Resize(size_offsets(rows_capacity_new), false));
+    memset(offsets_->mutable_data() + size_offsets(rows_capacity_), 0,
+           size_offsets(rows_capacity_new) - size_offsets(rows_capacity_));
+  } else {
+    RETURN_NOT_OK(rows_->Resize(size_rows_fixed_length(rows_capacity_new), false));
+    memset(rows_->mutable_data() + size_rows_fixed_length(rows_capacity_), 0,
+           size_rows_fixed_length(rows_capacity_new) -
+               size_rows_fixed_length(rows_capacity_));
+    bytes_capacity_ = size_rows_fixed_length(rows_capacity_new) - padding_for_vectors;
+  }
+
+  update_buffer_pointers();
+
+  rows_capacity_ = rows_capacity_new;
+
+  return Status::OK();
+}
+
+Status KeyEncoder::KeyRowArray::ResizeOptionalVaryingLengthBuffer(
+    int64_t num_extra_bytes) {
+  int64_t num_bytes = offsets()[num_rows_];
+  if (bytes_capacity_ >= num_bytes + num_extra_bytes || metadata_.is_fixed_length) {
+    return Status::OK();
+  }
+
+  int64_t bytes_capacity_new = std::max(static_cast<int64_t>(1), 2 * bytes_capacity_);
+  while (bytes_capacity_new < num_bytes + num_extra_bytes) {
+    bytes_capacity_new *= 2;
+  }
+
+  RETURN_NOT_OK(rows_->Resize(size_rows_varying_length(bytes_capacity_new), false));
+  memset(rows_->mutable_data() + size_rows_varying_length(bytes_capacity_), 0,
+         size_rows_varying_length(bytes_capacity_new) -
+             size_rows_varying_length(bytes_capacity_));
+
+  update_buffer_pointers();
+
+  bytes_capacity_ = bytes_capacity_new;
+
+  return Status::OK();
+}
+
+Status KeyEncoder::KeyRowArray::AppendSelectionFrom(const KeyRowArray& from,
+                                                    uint32_t num_rows_to_append,
+                                                    const uint16_t* source_row_ids) {
+  DCHECK(metadata_.is_compatible(from.metadata()));
+
+  RETURN_NOT_OK(ResizeFixedLengthBuffers(num_rows_to_append));
+
+  if (!metadata_.is_fixed_length) {
+    // Varying-length rows
+    auto from_offsets = reinterpret_cast<const uint32_t*>(from.offsets_->data());
+    auto to_offsets = reinterpret_cast<uint32_t*>(offsets_->mutable_data());
+    uint32_t total_length = to_offsets[num_rows_];
+    uint32_t total_length_to_append = 0;
+    for (uint32_t i = 0; i < num_rows_to_append; ++i) {
+      uint16_t row_id = source_row_ids[i];
+      uint32_t length = from_offsets[row_id + 1] - from_offsets[row_id];
+      total_length_to_append += length;
+      to_offsets[num_rows_ + i + 1] = total_length + total_length_to_append;
+    }
+
+    RETURN_NOT_OK(ResizeOptionalVaryingLengthBuffer(total_length_to_append));
+
+    const uint8_t* src = from.rows_->data();
+    uint8_t* dst = rows_->mutable_data() + total_length;
+    for (uint32_t i = 0; i < num_rows_to_append; ++i) {
+      uint16_t row_id = source_row_ids[i];
+      uint32_t length = from_offsets[row_id + 1] - from_offsets[row_id];
+      auto src64 = reinterpret_cast<const uint64_t*>(src + from_offsets[row_id]);
+      auto dst64 = reinterpret_cast<uint64_t*>(dst);
+      for (uint32_t j = 0; j < (length + 7) / 8; ++j) {
+        dst64[j] = src64[j];
+      }
+      dst += length;
+    }
+  } else {
+    // Fixed-length rows
+    const uint8_t* src = from.rows_->data();
+    uint8_t* dst = rows_->mutable_data() + num_rows_ * metadata_.fixed_length;
+    for (uint32_t i = 0; i < num_rows_to_append; ++i) {
+      uint16_t row_id = source_row_ids[i];
+      uint32_t length = metadata_.fixed_length;
+      auto src64 = reinterpret_cast<const uint64_t*>(src + length * row_id);
+      auto dst64 = reinterpret_cast<uint64_t*>(dst);
+      for (uint32_t j = 0; j < (length + 7) / 8; ++j) {
+        dst64[j] = src64[j];
+      }
+      dst += length;
+    }
+  }
+
+  // Null masks
+  uint32_t byte_length = metadata_.null_masks_bytes_per_row;
+  uint64_t dst_byte_offset = num_rows_ * byte_length;
+  const uint8_t* src_base = from.null_masks_->data();
+  uint8_t* dst_base = null_masks_->mutable_data();
+  for (uint32_t i = 0; i < num_rows_to_append; ++i) {
+    uint32_t row_id = source_row_ids[i];
+    int64_t src_byte_offset = row_id * byte_length;
+    const uint8_t* src = src_base + src_byte_offset;
+    uint8_t* dst = dst_base + dst_byte_offset;
+    for (uint32_t ibyte = 0; ibyte < byte_length; ++ibyte) {
+      dst[ibyte] = src[ibyte];
+    }
+    dst_byte_offset += byte_length;
+  }
+
+  num_rows_ += num_rows_to_append;
+
+  return Status::OK();
+}
+
+Status KeyEncoder::KeyRowArray::AppendEmpty(uint32_t num_rows_to_append,
+                                            uint32_t num_extra_bytes_to_append) {
+  RETURN_NOT_OK(ResizeFixedLengthBuffers(num_rows_to_append));
+  RETURN_NOT_OK(ResizeOptionalVaryingLengthBuffer(num_extra_bytes_to_append));
+  num_rows_ += num_rows_to_append;
+  if (metadata_.row_alignment > 1 || metadata_.string_alignment > 1) {
+    memset(rows_->mutable_data(), 0, bytes_capacity_);
+  }
+  return Status::OK();
+}
+
+bool KeyEncoder::KeyRowArray::has_any_nulls(const KeyEncoderContext* ctx) const {
+  if (has_any_nulls_) {
+    return true;
+  }
+  if (num_rows_for_has_any_nulls_ < num_rows_) {
+    auto size_per_row = metadata().null_masks_bytes_per_row;
+    has_any_nulls_ = !util::BitUtil::are_all_bytes_zero(
+        ctx->hardware_flags, null_masks() + size_per_row * num_rows_for_has_any_nulls_,
+        static_cast<uint32_t>(size_per_row * (num_rows_ - num_rows_for_has_any_nulls_)));
+    num_rows_for_has_any_nulls_ = num_rows_;
+  }
+  return has_any_nulls_;
+}
+
+KeyEncoder::KeyColumnArray::KeyColumnArray(const KeyColumnMetadata& metadata,
+                                           const KeyColumnArray& left,
+                                           const KeyColumnArray& right,
+                                           int buffer_id_to_replace) {
+  metadata_ = metadata;
+  length_ = left.length();
+  for (int i = 0; i < max_buffers_; ++i) {
+    buffers_[i] = left.buffers_[i];
+    mutable_buffers_[i] = left.mutable_buffers_[i];
+  }
+  buffers_[buffer_id_to_replace] = right.buffers_[buffer_id_to_replace];
+  mutable_buffers_[buffer_id_to_replace] = right.mutable_buffers_[buffer_id_to_replace];
+  bit_offset_[0] = left.bit_offset_[0];
+  bit_offset_[1] = left.bit_offset_[1];
+  if (buffer_id_to_replace < max_buffers_ - 1) {
+    bit_offset_[buffer_id_to_replace] = right.bit_offset_[buffer_id_to_replace];
+  }
+}
+
+KeyEncoder::KeyColumnArray::KeyColumnArray(const KeyColumnMetadata& metadata,
+                                           int64_t length, const uint8_t* buffer0,
+                                           const uint8_t* buffer1, const uint8_t* buffer2,
+                                           int bit_offset0, int bit_offset1) {
+  metadata_ = metadata;
+  length_ = length;
+  buffers_[0] = buffer0;
+  buffers_[1] = buffer1;
+  buffers_[2] = buffer2;
+  mutable_buffers_[0] = mutable_buffers_[1] = mutable_buffers_[2] = nullptr;
+  bit_offset_[0] = bit_offset0;
+  bit_offset_[1] = bit_offset1;
+}
+
+KeyEncoder::KeyColumnArray::KeyColumnArray(const KeyColumnMetadata& metadata,
+                                           int64_t length, uint8_t* buffer0,
+                                           uint8_t* buffer1, uint8_t* buffer2,
+                                           int bit_offset0, int bit_offset1) {
+  metadata_ = metadata;
+  length_ = length;
+  buffers_[0] = mutable_buffers_[0] = buffer0;
+  buffers_[1] = mutable_buffers_[1] = buffer1;
+  buffers_[2] = mutable_buffers_[2] = buffer2;
+  bit_offset_[0] = bit_offset0;
+  bit_offset_[1] = bit_offset1;
+}
+
+KeyEncoder::KeyColumnArray::KeyColumnArray(const KeyColumnArray& from, int64_t start,
+                                           int64_t length) {
+  metadata_ = from.metadata_;
+  length_ = length;
+  uint32_t fixed_size =
+      !metadata_.is_fixed_length ? sizeof(uint32_t) : metadata_.fixed_length;
+
+  buffers_[0] =
+      from.buffers_[0] ? from.buffers_[0] + (from.bit_offset_[0] + start) / 8 : nullptr;
+  mutable_buffers_[0] = from.mutable_buffers_[0]
+                            ? from.mutable_buffers_[0] + (from.bit_offset_[0] + start) / 8
+                            : nullptr;
+  bit_offset_[0] = (from.bit_offset_[0] + start) % 8;
+
+  if (fixed_size == 0) {
+    buffers_[1] =
+        from.buffers_[1] ? from.buffers_[1] + (from.bit_offset_[1] + start) / 8 : nullptr;
+    mutable_buffers_[1] = from.mutable_buffers_[1] ? from.mutable_buffers_[1] +
+                                                         (from.bit_offset_[1] + start) / 8
+                                                   : nullptr;
+    bit_offset_[1] = (from.bit_offset_[1] + start) % 8;
+  } else {
+    buffers_[1] = from.buffers_[1] ? from.buffers_[1] + start * fixed_size : nullptr;
+    mutable_buffers_[1] = from.mutable_buffers_[1]
+                              ? from.mutable_buffers_[1] + start * fixed_size
+                              : nullptr;
+    bit_offset_[1] = 0;
+  }
+
+  buffers_[2] = from.buffers_[2];
+  mutable_buffers_[2] = from.mutable_buffers_[2];
+}
+
+KeyEncoder::KeyColumnArray KeyEncoder::TransformBoolean::ArrayReplace(
+    const KeyColumnArray& column, const KeyColumnArray& temp) {
+  // Make sure that the temp buffer is large enough
+  DCHECK(temp.length() >= column.length() && temp.metadata().is_fixed_length &&
+         temp.metadata().fixed_length >= sizeof(uint8_t));
+  KeyColumnMetadata metadata;
+  metadata.is_fixed_length = true;
+  metadata.fixed_length = sizeof(uint8_t);
+  constexpr int buffer_index = 1;
+  KeyColumnArray result = KeyColumnArray(metadata, column, temp, buffer_index);
+  return result;
+}
+
+void KeyEncoder::TransformBoolean::PreEncode(const KeyColumnArray& input,
+                                             KeyColumnArray* output,
+                                             KeyEncoderContext* ctx) {
+  // Make sure that metadata and lengths are compatible.
+  DCHECK(output->metadata().is_fixed_length == input.metadata().is_fixed_length);
+  DCHECK(output->metadata().fixed_length == 1 && input.metadata().fixed_length == 0);
+  DCHECK(output->length() == input.length());
+  constexpr int buffer_index = 1;
+  DCHECK(input.data(buffer_index) != nullptr);
+  DCHECK(output->mutable_data(buffer_index) != nullptr);
+  util::BitUtil::bits_to_bytes(
+      ctx->hardware_flags, static_cast<int>(input.length()), input.data(buffer_index),
+      output->mutable_data(buffer_index), input.bit_offset(buffer_index));
+}
+
+void KeyEncoder::TransformBoolean::PostDecode(const KeyColumnArray& input,
+                                              KeyColumnArray* output,
+                                              KeyEncoderContext* ctx) {
+  // Make sure that metadata and lengths are compatible.
+  DCHECK(output->metadata().is_fixed_length == input.metadata().is_fixed_length);
+  DCHECK(output->metadata().fixed_length == 0 && input.metadata().fixed_length == 1);
+  DCHECK(output->length() == input.length());
+  constexpr int buffer_index = 1;
+  DCHECK(input.data(buffer_index) != nullptr);
+  DCHECK(output->mutable_data(buffer_index) != nullptr);
+
+  util::BitUtil::bytes_to_bits(
+      ctx->hardware_flags, static_cast<int>(input.length()), input.data(buffer_index),
+      output->mutable_data(buffer_index), output->bit_offset(buffer_index));
+}
+
+bool KeyEncoder::EncoderInteger::IsBoolean(const KeyColumnMetadata& metadata) {
+  return metadata.is_fixed_length && metadata.fixed_length == 0;
+}
+
+bool KeyEncoder::EncoderInteger::UsesTransform(const KeyColumnArray& column) {
+  return IsBoolean(column.metadata());
+}
+
+KeyEncoder::KeyColumnArray KeyEncoder::EncoderInteger::ArrayReplace(
+    const KeyColumnArray& column, const KeyColumnArray& temp) {
+  if (IsBoolean(column.metadata())) {
+    return TransformBoolean::ArrayReplace(column, temp);
+  }
+  return column;
+}
+
+void KeyEncoder::EncoderInteger::PreEncode(const KeyColumnArray& input,
+                                           KeyColumnArray* output,
+                                           KeyEncoderContext* ctx) {
+  if (IsBoolean(input.metadata())) {
+    TransformBoolean::PreEncode(input, output, ctx);
+  }
+}
+
+void KeyEncoder::EncoderInteger::PostDecode(const KeyColumnArray& input,
+                                            KeyColumnArray* output,
+                                            KeyEncoderContext* ctx) {
+  if (IsBoolean(output->metadata())) {
+    TransformBoolean::PostDecode(input, output, ctx);
+  }
+}
+
+void KeyEncoder::EncoderInteger::Encode(uint32_t offset_within_row, KeyRowArray* rows,
+                                        const KeyColumnArray& col, KeyEncoderContext* ctx,
+                                        KeyColumnArray* temp) {
+  KeyColumnArray col_prep;
+  if (UsesTransform(col)) {
+    col_prep = ArrayReplace(col, *temp);
+    PreEncode(col, &col_prep, ctx);
+  } else {
+    col_prep = col;
+  }
+
+  const auto num_rows = static_cast<uint32_t>(col.length());
+
+  // When we have a single fixed length column we can just do memcpy
+  if (rows->metadata().is_fixed_length &&
+      rows->metadata().fixed_length == col.metadata().fixed_length) {
+    DCHECK_EQ(offset_within_row, 0);
+    uint32_t row_size = col.metadata().fixed_length;
+    memcpy(rows->mutable_data(1), col.data(1), num_rows * row_size);
+  } else if (rows->metadata().is_fixed_length) {
+    uint32_t row_size = rows->metadata().fixed_length;
+    uint8_t* row_base = rows->mutable_data(1) + offset_within_row;
+    const uint8_t* col_base = col_prep.data(1);
+    switch (col_prep.metadata().fixed_length) {
+      case 1:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          row_base[i * row_size] = col_base[i];
+        }
+        break;
+      case 2:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          *reinterpret_cast<uint16_t*>(row_base + i * row_size) =
+              reinterpret_cast<const uint16_t*>(col_base)[i];
+        }
+        break;
+      case 4:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          *reinterpret_cast<uint32_t*>(row_base + i * row_size) =
+              reinterpret_cast<const uint32_t*>(col_base)[i];
+        }
+        break;
+      case 8:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          *reinterpret_cast<uint64_t*>(row_base + i * row_size) =
+              reinterpret_cast<const uint64_t*>(col_base)[i];
+        }
+        break;
+      default:
+        DCHECK(false);
+    }
+  } else {
+    const uint32_t* row_offsets = rows->offsets();
+    uint8_t* row_base = rows->mutable_data(2) + offset_within_row;
+    const uint8_t* col_base = col_prep.data(1);
+    switch (col_prep.metadata().fixed_length) {
+      case 1:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          row_base[row_offsets[i]] = col_base[i];
+        }
+        break;
+      case 2:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          *reinterpret_cast<uint16_t*>(row_base + row_offsets[i]) =
+              reinterpret_cast<const uint16_t*>(col_base)[i];
+        }
+        break;
+      case 4:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          *reinterpret_cast<uint32_t*>(row_base + row_offsets[i]) =
+              reinterpret_cast<const uint32_t*>(col_base)[i];
+        }
+        break;
+      case 8:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          *reinterpret_cast<uint64_t*>(row_base + row_offsets[i]) =
+              reinterpret_cast<const uint64_t*>(col_base)[i];
+        }
+        break;
+      default:
+        DCHECK(false);
+    }
+  }
+}
+
+void KeyEncoder::EncoderInteger::Decode(uint32_t start_row, uint32_t num_rows,
+                                        uint32_t offset_within_row,
+                                        const KeyRowArray& rows, KeyColumnArray* col,
+                                        KeyEncoderContext* ctx, KeyColumnArray* temp) {
+  KeyColumnArray col_prep;
+  if (UsesTransform(*col)) {
+    col_prep = ArrayReplace(*col, *temp);
+  } else {
+    col_prep = *col;
+  }
+
+  // When we have a single fixed length column we can just do memcpy
+  if (rows.metadata().is_fixed_length &&
+      col_prep.metadata().fixed_length == rows.metadata().fixed_length) {
+    DCHECK_EQ(offset_within_row, 0);
+    uint32_t row_size = rows.metadata().fixed_length;
+    memcpy(col_prep.mutable_data(1), rows.data(1) + start_row * row_size,
+           num_rows * row_size);
+  } else if (rows.metadata().is_fixed_length) {
+    uint32_t row_size = rows.metadata().fixed_length;
+    const uint8_t* row_base = rows.data(1) + start_row * row_size;
+    row_base += offset_within_row;
+    uint8_t* col_base = col_prep.mutable_data(1);
+    switch (col_prep.metadata().fixed_length) {
+      case 1:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          col_base[i] = row_base[i * row_size];
+        }
+        break;
+      case 2:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          reinterpret_cast<uint16_t*>(col_base)[i] =
+              *reinterpret_cast<const uint16_t*>(row_base + i * row_size);
+        }
+        break;
+      case 4:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          reinterpret_cast<uint32_t*>(col_base)[i] =
+              *reinterpret_cast<const uint32_t*>(row_base + i * row_size);
+        }
+        break;
+      case 8:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          reinterpret_cast<uint64_t*>(col_base)[i] =
+              *reinterpret_cast<const uint64_t*>(row_base + i * row_size);
+        }
+        break;
+      default:
+        DCHECK(false);
+    }
+  } else {
+    const uint32_t* row_offsets = rows.offsets() + start_row;
+    const uint8_t* row_base = rows.data(2);
+    row_base += offset_within_row;
+    uint8_t* col_base = col_prep.mutable_data(1);
+    switch (col_prep.metadata().fixed_length) {
+      case 1:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          col_base[i] = row_base[row_offsets[i]];
+        }
+        break;
+      case 2:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          reinterpret_cast<uint16_t*>(col_base)[i] =
+              *reinterpret_cast<const uint16_t*>(row_base + row_offsets[i]);
+        }
+        break;
+      case 4:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          reinterpret_cast<uint32_t*>(col_base)[i] =
+              *reinterpret_cast<const uint32_t*>(row_base + row_offsets[i]);
+        }
+        break;
+      case 8:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          reinterpret_cast<uint64_t*>(col_base)[i] =
+              *reinterpret_cast<const uint64_t*>(row_base + row_offsets[i]);
+        }
+        break;
+      default:
+        DCHECK(false);
+    }
+  }
+
+  if (UsesTransform(*col)) {
+    PostDecode(col_prep, col, ctx);
+  }
+}
+
+bool KeyEncoder::EncoderBinary::IsInteger(const KeyColumnMetadata& metadata) {
+  bool is_fixed_length = metadata.is_fixed_length;
+  auto size = metadata.fixed_length;
+  return is_fixed_length &&
+         (size == 0 || size == 1 || size == 2 || size == 4 || size == 8);
+}
+
+void KeyEncoder::EncoderBinary::Encode(uint32_t offset_within_row, KeyRowArray* rows,
+                                       const KeyColumnArray& col, KeyEncoderContext* ctx,
+                                       KeyColumnArray* temp) {
+  if (IsInteger(col.metadata())) {
+    EncoderInteger::Encode(offset_within_row, rows, col, ctx, temp);
+  } else {
+    KeyColumnArray col_prep;
+    if (EncoderInteger::UsesTransform(col)) {
+      col_prep = EncoderInteger::ArrayReplace(col, *temp);
+      EncoderInteger::PreEncode(col, &col_prep, ctx);
+    } else {
+      col_prep = col;
+    }
+
+    bool is_row_fixed_length = rows->metadata().is_fixed_length;
+
+#if defined(ARROW_HAVE_AVX2)
+    if (ctx->has_avx2()) {
+      EncodeHelper_avx2(is_row_fixed_length, offset_within_row, rows, col);
+    } else {
+#endif
+      if (is_row_fixed_length) {
+        EncodeImp<true>(offset_within_row, rows, col);
+      } else {
+        EncodeImp<false>(offset_within_row, rows, col);
+      }
+#if defined(ARROW_HAVE_AVX2)
+    }
+#endif
+  }
+
+  DCHECK(temp->metadata().is_fixed_length);
+  DCHECK(temp->length() * temp->metadata().fixed_length >=
+         col.length() * static_cast<int64_t>(sizeof(uint16_t)));
+
+  KeyColumnArray temp16bit(KeyColumnMetadata(true, sizeof(uint16_t)), col.length(),
+                           nullptr, temp->mutable_data(1), nullptr);
+  ColumnMemsetNulls(offset_within_row, rows, col, ctx, &temp16bit, 0xae);
+}
+
+void KeyEncoder::EncoderBinary::Decode(uint32_t start_row, uint32_t num_rows,
+                                       uint32_t offset_within_row,
+                                       const KeyRowArray& rows, KeyColumnArray* col,
+                                       KeyEncoderContext* ctx, KeyColumnArray* temp) {
+  if (IsInteger(col->metadata())) {
+    EncoderInteger::Decode(start_row, num_rows, offset_within_row, rows, col, ctx, temp);
+  } else {
+    KeyColumnArray col_prep;
+    if (EncoderInteger::UsesTransform(*col)) {
+      col_prep = EncoderInteger::ArrayReplace(*col, *temp);
+    } else {
+      col_prep = *col;
+    }
+
+    bool is_row_fixed_length = rows.metadata().is_fixed_length;
+
+#if defined(ARROW_HAVE_AVX2)
+    if (ctx->has_avx2()) {
+      DecodeHelper_avx2(is_row_fixed_length, start_row, num_rows, offset_within_row, rows,
+                        col);
+    } else {
+#endif
+      if (is_row_fixed_length) {
+        DecodeImp<true>(start_row, num_rows, offset_within_row, rows, col);
+      } else {
+        DecodeImp<false>(start_row, num_rows, offset_within_row, rows, col);
+      }
+#if defined(ARROW_HAVE_AVX2)
+    }
+#endif
+
+    if (EncoderInteger::UsesTransform(*col)) {
+      EncoderInteger::PostDecode(col_prep, col, ctx);
+    }
+  }
+}
+
+template <bool is_row_fixed_length>
+void KeyEncoder::EncoderBinary::EncodeImp(uint32_t offset_within_row, KeyRowArray* rows,
+                                          const KeyColumnArray& col) {
+  EncodeDecodeHelper<is_row_fixed_length, true>(
+      0, static_cast<uint32_t>(col.length()), offset_within_row, rows, rows, &col,
+      nullptr, [](uint8_t* dst, const uint8_t* src, int64_t length) {
+        auto dst64 = reinterpret_cast<uint64_t*>(dst);
+        auto src64 = reinterpret_cast<const uint64_t*>(src);
+        uint32_t istripe;
+        for (istripe = 0; istripe < length / 8; ++istripe) {
+          dst64[istripe] = util::SafeLoad(src64 + istripe);
+        }
+        if ((length % 8) > 0) {
+          uint64_t mask_last = ~0ULL >> (8 * (8 * (istripe + 1) - length));
+          dst64[istripe] = (dst64[istripe] & ~mask_last) |
+                           (util::SafeLoad(src64 + istripe) & mask_last);
+        }
+      });
+}
+
+template <bool is_row_fixed_length>
+void KeyEncoder::EncoderBinary::DecodeImp(uint32_t start_row, uint32_t num_rows,
+                                          uint32_t offset_within_row,
+                                          const KeyRowArray& rows, KeyColumnArray* col) {
+  EncodeDecodeHelper<is_row_fixed_length, false>(
+      start_row, num_rows, offset_within_row, &rows, nullptr, col, col,
+      [](uint8_t* dst, const uint8_t* src, int64_t length) {
+        for (uint32_t istripe = 0; istripe < (length + 7) / 8; ++istripe) {
+          auto dst64 = reinterpret_cast<uint64_t*>(dst);
+          auto src64 = reinterpret_cast<const uint64_t*>(src);
+          util::SafeStore(dst64 + istripe, src64[istripe]);
+        }
+      });
+}
+
+void KeyEncoder::EncoderBinary::ColumnMemsetNulls(
+    uint32_t offset_within_row, KeyRowArray* rows, const KeyColumnArray& col,
+    KeyEncoderContext* ctx, KeyColumnArray* temp_vector_16bit, uint8_t byte_value) {
+  using ColumnMemsetNullsImp_t = void (*)(uint32_t, KeyRowArray*, const KeyColumnArray&,
+                                          KeyEncoderContext*, KeyColumnArray*, uint8_t);
+  static const ColumnMemsetNullsImp_t ColumnMemsetNullsImp_fn[] = {
+      ColumnMemsetNullsImp<false, 1>,  ColumnMemsetNullsImp<false, 2>,
+      ColumnMemsetNullsImp<false, 4>,  ColumnMemsetNullsImp<false, 8>,
+      ColumnMemsetNullsImp<false, 16>, ColumnMemsetNullsImp<true, 1>,
+      ColumnMemsetNullsImp<true, 2>,   ColumnMemsetNullsImp<true, 4>,
+      ColumnMemsetNullsImp<true, 8>,   ColumnMemsetNullsImp<true, 16>};
+  uint32_t col_width = col.metadata().fixed_length;
+  int dispatch_const =
+      (rows->metadata().is_fixed_length ? 5 : 0) +
+      (col_width == 1 ? 0
+                      : col_width == 2 ? 1 : col_width == 4 ? 2 : col_width == 8 ? 3 : 4);
+  ColumnMemsetNullsImp_fn[dispatch_const](offset_within_row, rows, col, ctx,
+                                          temp_vector_16bit, byte_value);
+}
+
+template <bool is_row_fixed_length, uint32_t col_width>
+void KeyEncoder::EncoderBinary::ColumnMemsetNullsImp(
+    uint32_t offset_within_row, KeyRowArray* rows, const KeyColumnArray& col,
+    KeyEncoderContext* ctx, KeyColumnArray* temp_vector_16bit, uint8_t byte_value) {
+  // Nothing to do when there are no nulls
+  if (!col.data(0)) {
+    return;
+  }
+
+  const auto num_rows = static_cast<uint32_t>(col.length());
+
+  // Temp vector needs space for the required number of rows
+  DCHECK(temp_vector_16bit->length() >= num_rows);
+  DCHECK(temp_vector_16bit->metadata().is_fixed_length &&
+         temp_vector_16bit->metadata().fixed_length == sizeof(uint16_t));
+  auto temp_vector = reinterpret_cast<uint16_t*>(temp_vector_16bit->mutable_data(1));
+
+  // Bit vector to index vector of null positions
+  int num_selected;
+  util::BitUtil::bits_to_indexes(0, ctx->hardware_flags, static_cast<int>(col.length()),
+                                 col.data(0), &num_selected, temp_vector,
+                                 col.bit_offset(0));
+
+  for (int i = 0; i < num_selected; ++i) {
+    uint32_t row_id = temp_vector[i];
+
+    // Target binary field pointer
+    uint8_t* dst;
+    if (is_row_fixed_length) {
+      dst = rows->mutable_data(1) + rows->metadata().fixed_length * row_id;
+    } else {
+      dst = rows->mutable_data(2) + rows->offsets()[row_id];
+    }
+    dst += offset_within_row;
+
+    if (col_width == 1) {
+      *dst = byte_value;
+    } else if (col_width == 2) {
+      *reinterpret_cast<uint16_t*>(dst) =
+          (static_cast<uint16_t>(byte_value) * static_cast<uint16_t>(0x0101));
+    } else if (col_width == 4) {
+      *reinterpret_cast<uint32_t*>(dst) =
+          (static_cast<uint32_t>(byte_value) * static_cast<uint32_t>(0x01010101));
+    } else if (col_width == 8) {
+      *reinterpret_cast<uint64_t*>(dst) =
+          (static_cast<uint64_t>(byte_value) * 0x0101010101010101ULL);
+    } else {
+      uint64_t value = (static_cast<uint64_t>(byte_value) * 0x0101010101010101ULL);
+      uint32_t col_width_actual = col.metadata().fixed_length;
+      uint32_t j;
+      for (j = 0; j < col_width_actual / 8; ++j) {
+        reinterpret_cast<uint64_t*>(dst)[j] = value;
+      }
+      int tail = col_width_actual % 8;
+      if (tail) {
+        uint64_t mask = ~0ULL >> (8 * (8 - tail));
+        reinterpret_cast<uint64_t*>(dst)[j] =
+            (reinterpret_cast<const uint64_t*>(dst)[j] & ~mask) | (value & mask);
+      }
+    }
+  }
+}
+
+void KeyEncoder::EncoderBinaryPair::Encode(uint32_t offset_within_row, KeyRowArray* rows,
+                                           const KeyColumnArray& col1,
+                                           const KeyColumnArray& col2,
+                                           KeyEncoderContext* ctx, KeyColumnArray* temp1,
+                                           KeyColumnArray* temp2) {
+  DCHECK(CanProcessPair(col1.metadata(), col2.metadata()));
+
+  KeyColumnArray col_prep[2];
+  if (EncoderInteger::UsesTransform(col1)) {
+    col_prep[0] = EncoderInteger::ArrayReplace(col1, *temp1);
+    EncoderInteger::PreEncode(col1, &(col_prep[0]), ctx);
+  } else {
+    col_prep[0] = col1;
+  }
+  if (EncoderInteger::UsesTransform(col2)) {
+    col_prep[1] = EncoderInteger::ArrayReplace(col2, *temp2);
+    EncoderInteger::PreEncode(col2, &(col_prep[1]), ctx);
+  } else {
+    col_prep[1] = col2;
+  }
+
+  uint32_t col_width1 = col_prep[0].metadata().fixed_length;
+  uint32_t col_width2 = col_prep[1].metadata().fixed_length;
+  int log_col_width1 =
+      col_width1 == 8 ? 3 : col_width1 == 4 ? 2 : col_width1 == 2 ? 1 : 0;
+  int log_col_width2 =
+      col_width2 == 8 ? 3 : col_width2 == 4 ? 2 : col_width2 == 2 ? 1 : 0;
+
+  bool is_row_fixed_length = rows->metadata().is_fixed_length;
+
+  const auto num_rows = static_cast<uint32_t>(col1.length());
+  uint32_t num_processed = 0;
+#if defined(ARROW_HAVE_AVX2)
+  if (ctx->has_avx2() && col_width1 == col_width2) {
+    num_processed = EncodeHelper_avx2(is_row_fixed_length, col_width1, offset_within_row,
+                                      rows, col_prep[0], col_prep[1]);
+  }
+#endif
+  if (num_processed < num_rows) {
+    using EncodeImp_t = void (*)(uint32_t, uint32_t, KeyRowArray*, const KeyColumnArray&,
+                                 const KeyColumnArray&);
+    static const EncodeImp_t EncodeImp_fn[] = {
+        EncodeImp<false, uint8_t, uint8_t>,   EncodeImp<false, uint16_t, uint8_t>,
+        EncodeImp<false, uint32_t, uint8_t>,  EncodeImp<false, uint64_t, uint8_t>,
+        EncodeImp<false, uint8_t, uint16_t>,  EncodeImp<false, uint16_t, uint16_t>,
+        EncodeImp<false, uint32_t, uint16_t>, EncodeImp<false, uint64_t, uint16_t>,
+        EncodeImp<false, uint8_t, uint32_t>,  EncodeImp<false, uint16_t, uint32_t>,
+        EncodeImp<false, uint32_t, uint32_t>, EncodeImp<false, uint64_t, uint32_t>,
+        EncodeImp<false, uint8_t, uint64_t>,  EncodeImp<false, uint16_t, uint64_t>,
+        EncodeImp<false, uint32_t, uint64_t>, EncodeImp<false, uint64_t, uint64_t>,
+        EncodeImp<true, uint8_t, uint8_t>,    EncodeImp<true, uint16_t, uint8_t>,
+        EncodeImp<true, uint32_t, uint8_t>,   EncodeImp<true, uint64_t, uint8_t>,
+        EncodeImp<true, uint8_t, uint16_t>,   EncodeImp<true, uint16_t, uint16_t>,
+        EncodeImp<true, uint32_t, uint16_t>,  EncodeImp<true, uint64_t, uint16_t>,
+        EncodeImp<true, uint8_t, uint32_t>,   EncodeImp<true, uint16_t, uint32_t>,
+        EncodeImp<true, uint32_t, uint32_t>,  EncodeImp<true, uint64_t, uint32_t>,
+        EncodeImp<true, uint8_t, uint64_t>,   EncodeImp<true, uint16_t, uint64_t>,
+        EncodeImp<true, uint32_t, uint64_t>,  EncodeImp<true, uint64_t, uint64_t>};
+    int dispatch_const = (log_col_width2 << 2) | log_col_width1;
+    dispatch_const += (is_row_fixed_length ? 16 : 0);
+    EncodeImp_fn[dispatch_const](num_processed, offset_within_row, rows, col_prep[0],
+                                 col_prep[1]);
+  }
+}
+
+template <bool is_row_fixed_length, typename col1_type, typename col2_type>
+void KeyEncoder::EncoderBinaryPair::EncodeImp(uint32_t num_rows_to_skip,
+                                              uint32_t offset_within_row,
+                                              KeyRowArray* rows,
+                                              const KeyColumnArray& col1,
+                                              const KeyColumnArray& col2) {
+  const uint8_t* src_A = col1.data(1);
+  const uint8_t* src_B = col2.data(1);
+
+  const auto num_rows = static_cast<uint32_t>(col1.length());
+
+  uint32_t fixed_length = rows->metadata().fixed_length;
+  const uint32_t* offsets;
+  uint8_t* dst_base;
+  if (is_row_fixed_length) {
+    dst_base = rows->mutable_data(1) + offset_within_row;
+    offsets = nullptr;
+  } else {
+    dst_base = rows->mutable_data(2) + offset_within_row;
+    offsets = rows->offsets();
+  }
+
+  using col1_type_const = typename std::add_const<col1_type>::type;
+  using col2_type_const = typename std::add_const<col2_type>::type;
+
+  if (is_row_fixed_length) {
+    uint8_t* dst = dst_base + num_rows_to_skip * fixed_length;
+    for (uint32_t i = num_rows_to_skip; i < num_rows; ++i) {
+      *reinterpret_cast<col1_type*>(dst) = reinterpret_cast<col1_type_const*>(src_A)[i];
+      *reinterpret_cast<col2_type*>(dst + sizeof(col1_type)) =
+          reinterpret_cast<col2_type_const*>(src_B)[i];
+      dst += fixed_length;
+    }
+  } else {
+    for (uint32_t i = num_rows_to_skip; i < num_rows; ++i) {
+      uint8_t* dst = dst_base + offsets[i];
+      *reinterpret_cast<col1_type*>(dst) = reinterpret_cast<col1_type_const*>(src_A)[i];
+      *reinterpret_cast<col2_type*>(dst + sizeof(col1_type)) =
+          reinterpret_cast<col2_type_const*>(src_B)[i];
+    }
+  }
+}
+
+void KeyEncoder::EncoderBinaryPair::Decode(uint32_t start_row, uint32_t num_rows,
+                                           uint32_t offset_within_row,
+                                           const KeyRowArray& rows, KeyColumnArray* col1,
+                                           KeyColumnArray* col2, KeyEncoderContext* ctx,
+                                           KeyColumnArray* temp1, KeyColumnArray* temp2) {
+  DCHECK(CanProcessPair(col1->metadata(), col2->metadata()));
+
+  KeyColumnArray col_prep[2];
+  if (EncoderInteger::UsesTransform(*col1)) {
+    col_prep[0] = EncoderInteger::ArrayReplace(*col1, *temp1);
+  } else {
+    col_prep[0] = *col1;
+  }
+  if (EncoderInteger::UsesTransform(*col2)) {
+    col_prep[1] = EncoderInteger::ArrayReplace(*col2, *temp2);
+  } else {
+    col_prep[1] = *col2;
+  }
+
+  uint32_t col_width1 = col_prep[0].metadata().fixed_length;
+  uint32_t col_width2 = col_prep[1].metadata().fixed_length;
+  int log_col_width1 =
+      col_width1 == 8 ? 3 : col_width1 == 4 ? 2 : col_width1 == 2 ? 1 : 0;
+  int log_col_width2 =
+      col_width2 == 8 ? 3 : col_width2 == 4 ? 2 : col_width2 == 2 ? 1 : 0;
+
+  bool is_row_fixed_length = rows.metadata().is_fixed_length;
+
+  uint32_t num_processed = 0;
+#if defined(ARROW_HAVE_AVX2)
+  if (ctx->has_avx2() && col_width1 == col_width2) {
+    num_processed =
+        DecodeHelper_avx2(is_row_fixed_length, col_width1, start_row, num_rows,
+                          offset_within_row, rows, &col_prep[0], &col_prep[1]);
+  }
+#endif
+  if (num_processed < num_rows) {
+    using DecodeImp_t = void (*)(uint32_t, uint32_t, uint32_t, uint32_t,
+                                 const KeyRowArray&, KeyColumnArray*, KeyColumnArray*);
+    static const DecodeImp_t DecodeImp_fn[] = {
+        DecodeImp<false, uint8_t, uint8_t>,   DecodeImp<false, uint16_t, uint8_t>,
+        DecodeImp<false, uint32_t, uint8_t>,  DecodeImp<false, uint64_t, uint8_t>,
+        DecodeImp<false, uint8_t, uint16_t>,  DecodeImp<false, uint16_t, uint16_t>,
+        DecodeImp<false, uint32_t, uint16_t>, DecodeImp<false, uint64_t, uint16_t>,
+        DecodeImp<false, uint8_t, uint32_t>,  DecodeImp<false, uint16_t, uint32_t>,
+        DecodeImp<false, uint32_t, uint32_t>, DecodeImp<false, uint64_t, uint32_t>,
+        DecodeImp<false, uint8_t, uint64_t>,  DecodeImp<false, uint16_t, uint64_t>,
+        DecodeImp<false, uint32_t, uint64_t>, DecodeImp<false, uint64_t, uint64_t>,
+        DecodeImp<true, uint8_t, uint8_t>,    DecodeImp<true, uint16_t, uint8_t>,
+        DecodeImp<true, uint32_t, uint8_t>,   DecodeImp<true, uint64_t, uint8_t>,
+        DecodeImp<true, uint8_t, uint16_t>,   DecodeImp<true, uint16_t, uint16_t>,
+        DecodeImp<true, uint32_t, uint16_t>,  DecodeImp<true, uint64_t, uint16_t>,
+        DecodeImp<true, uint8_t, uint32_t>,   DecodeImp<true, uint16_t, uint32_t>,
+        DecodeImp<true, uint32_t, uint32_t>,  DecodeImp<true, uint64_t, uint32_t>,
+        DecodeImp<true, uint8_t, uint64_t>,   DecodeImp<true, uint16_t, uint64_t>,
+        DecodeImp<true, uint32_t, uint64_t>,  DecodeImp<true, uint64_t, uint64_t>};
+    int dispatch_const =
+        (log_col_width2 << 2) | log_col_width1 | (is_row_fixed_length ? 16 : 0);
+    DecodeImp_fn[dispatch_const](num_processed, start_row, num_rows, offset_within_row,
+                                 rows, &(col_prep[0]), &(col_prep[1]));
+  }
+
+  if (EncoderInteger::UsesTransform(*col1)) {
+    EncoderInteger::PostDecode(col_prep[0], col1, ctx);
+  }
+  if (EncoderInteger::UsesTransform(*col2)) {
+    EncoderInteger::PostDecode(col_prep[1], col2, ctx);
+  }
+}
+
+template <bool is_row_fixed_length, typename col1_type, typename col2_type>
+void KeyEncoder::EncoderBinaryPair::DecodeImp(uint32_t num_rows_to_skip,
+                                              uint32_t start_row, uint32_t num_rows,
+                                              uint32_t offset_within_row,
+                                              const KeyRowArray& rows,
+                                              KeyColumnArray* col1,
+                                              KeyColumnArray* col2) {
+  DCHECK(rows.length() >= start_row + num_rows);
+  DCHECK(col1->length() == num_rows && col2->length() == num_rows);
+
+  uint8_t* dst_A = col1->mutable_data(1);
+  uint8_t* dst_B = col2->mutable_data(1);
+
+  uint32_t fixed_length = rows.metadata().fixed_length;
+  const uint32_t* offsets;
+  const uint8_t* src_base;
+  if (is_row_fixed_length) {
+    src_base = rows.data(1) + fixed_length * start_row + offset_within_row;
+    offsets = nullptr;
+  } else {
+    src_base = rows.data(2) + offset_within_row;
+    offsets = rows.offsets() + start_row;
+  }
+
+  using col1_type_const = typename std::add_const<col1_type>::type;
+  using col2_type_const = typename std::add_const<col2_type>::type;
+
+  if (is_row_fixed_length) {
+    const uint8_t* src = src_base + num_rows_to_skip * fixed_length;
+    for (uint32_t i = num_rows_to_skip; i < num_rows; ++i) {
+      reinterpret_cast<col1_type*>(dst_A)[i] = *reinterpret_cast<col1_type_const*>(src);
+      reinterpret_cast<col2_type*>(dst_B)[i] =
+          *reinterpret_cast<col2_type_const*>(src + sizeof(col1_type));
+      src += fixed_length;
+    }
+  } else {
+    for (uint32_t i = num_rows_to_skip; i < num_rows; ++i) {
+      const uint8_t* src = src_base + offsets[i];
+      reinterpret_cast<col1_type*>(dst_A)[i] = *reinterpret_cast<col1_type_const*>(src);
+      reinterpret_cast<col2_type*>(dst_B)[i] =
+          *reinterpret_cast<col2_type_const*>(src + sizeof(col1_type));
+    }
+  }
+}
+
+void KeyEncoder::EncoderOffsets::Encode(KeyRowArray* rows,
+                                        const std::vector<KeyColumnArray>& varbinary_cols,
+                                        KeyEncoderContext* ctx) {
+  DCHECK(!varbinary_cols.empty());
+
+  // Rows and columns must all be varying-length
+  DCHECK(!rows->metadata().is_fixed_length);
+  for (const auto& col : varbinary_cols) {
+    DCHECK(!col.metadata().is_fixed_length);
+  }
+
+  const auto num_rows = static_cast<uint32_t>(varbinary_cols[0].length());
+
+  uint32_t num_processed = 0;
+#if defined(ARROW_HAVE_AVX2)
+  // Whether any of the columns has non-zero starting bit offset for non-nulls bit vector
+  bool has_bit_offset = false;
+
+  // The space in columns must be exactly equal to a space for offsets in rows
+  DCHECK(rows->length() == num_rows);
+  for (const auto& col : varbinary_cols) {
+    DCHECK(col.length() == num_rows);
+    if (col.bit_offset(0) != 0) {
+      has_bit_offset = true;
+    }
+  }
+
+  if (ctx->has_avx2() && !has_bit_offset) {
+    // Create a temp vector sized based on the number of columns
+    auto temp_buffer_holder = util::TempVectorHolder<uint32_t>(
+        ctx->stack, static_cast<uint32_t>(varbinary_cols.size()) * 8);
+    auto temp_buffer_32B_per_col = KeyColumnArray(
+        KeyColumnMetadata(true, sizeof(uint32_t)), varbinary_cols.size() * 8, nullptr,
+        reinterpret_cast<uint8_t*>(temp_buffer_holder.mutable_data()), nullptr);
+
+    num_processed = EncodeImp_avx2(rows, varbinary_cols, &temp_buffer_32B_per_col);
+  }
+#endif
+  if (num_processed < num_rows) {
+    EncodeImp(num_processed, rows, varbinary_cols);
+  }
+}
+
+void KeyEncoder::EncoderOffsets::EncodeImp(
+    uint32_t num_rows_already_processed, KeyRowArray* rows,
+    const std::vector<KeyColumnArray>& varbinary_cols) {
+  DCHECK_GT(varbinary_cols.size(), 0);
+
+  int row_alignment = rows->metadata().row_alignment;
+  int string_alignment = rows->metadata().string_alignment;
+
+  uint32_t* row_offsets = rows->mutable_offsets();
+  uint8_t* row_values = rows->mutable_data(2);
+  const auto num_rows = static_cast<uint32_t>(varbinary_cols[0].length());
+
+  if (num_rows_already_processed == 0) {
+    row_offsets[0] = 0;
+  }
+
+  uint32_t row_offset = row_offsets[num_rows_already_processed];
+  for (uint32_t i = num_rows_already_processed; i < num_rows; ++i) {
+    uint32_t* varbinary_end =
+        rows->metadata().varbinary_end_array(row_values + row_offset);
+
+    // Zero out lengths for nulls.
+    // Add lengths of all columns to get row size.
+    // Store varbinary field ends while summing their lengths.
+
+    uint32_t offset_within_row = rows->metadata().fixed_length;
+
+    for (size_t col = 0; col < varbinary_cols.size(); ++col) {
+      const uint32_t* col_offsets = varbinary_cols[col].offsets();
+      uint32_t col_length = col_offsets[i + 1] - col_offsets[i];
+
+      const int bit_offset = varbinary_cols[col].bit_offset(0);
+
+      const uint8_t* non_nulls = varbinary_cols[col].data(0);
+      if (non_nulls && BitUtil::GetBit(non_nulls, bit_offset + i) == 0) {
+        col_length = 0;
+      }
+
+      offset_within_row +=
+          KeyRowMetadata::padding_for_alignment(offset_within_row, string_alignment);
+      offset_within_row += col_length;
+
+      varbinary_end[col] = offset_within_row;
+    }
+
+    offset_within_row +=
+        KeyRowMetadata::padding_for_alignment(offset_within_row, row_alignment);
+    row_offset += offset_within_row;
+    row_offsets[i + 1] = row_offset;
+  }
+}
+
+void KeyEncoder::EncoderOffsets::Decode(
+    uint32_t start_row, uint32_t num_rows, const KeyRowArray& rows,
+    std::vector<KeyColumnArray>* varbinary_cols,
+    const std::vector<uint32_t>& varbinary_cols_base_offset, KeyEncoderContext* ctx) {
+  DCHECK(!varbinary_cols->empty());
+  DCHECK(varbinary_cols->size() == varbinary_cols_base_offset.size());
+
+  DCHECK(!rows.metadata().is_fixed_length);
+  DCHECK(rows.length() >= start_row + num_rows);
+  for (const auto& col : *varbinary_cols) {
+    // Rows and columns must all be varying-length
+    DCHECK(!col.metadata().is_fixed_length);
+    // The space in columns must be exactly equal to a subset of rows selected
+    DCHECK(col.length() == num_rows);
+  }
+
+  // Offsets of varbinary columns data within each encoded row are stored
+  // in the same encoded row as an array of 32-bit integers.
+  // This array follows immediately the data of fixed-length columns.
+  // There is one element for each varying-length column.
+  // The Nth element is the sum of all the lengths of varbinary columns data in
+  // that row, up to and including Nth varbinary column.
+
+  const uint32_t* row_offsets = rows.offsets() + start_row;
+
+  // Set the base offset for each column
+  for (size_t col = 0; col < varbinary_cols->size(); ++col) {
+    uint32_t* col_offsets = (*varbinary_cols)[col].mutable_offsets();
+    col_offsets[0] = varbinary_cols_base_offset[col];
+  }
+
+  int string_alignment = rows.metadata().string_alignment;
+
+  for (uint32_t i = 0; i < num_rows; ++i) {
+    // Find the beginning of cumulative lengths array for next row
+    const uint8_t* row = rows.data(2) + row_offsets[i];
+    const uint32_t* varbinary_ends = rows.metadata().varbinary_end_array(row);
+
+    // Update the offset of each column
+    uint32_t offset_within_row = rows.metadata().fixed_length;
+    for (size_t col = 0; col < varbinary_cols->size(); ++col) {
+      offset_within_row +=
+          KeyRowMetadata::padding_for_alignment(offset_within_row, string_alignment);
+      uint32_t length = varbinary_ends[col] - offset_within_row;
+      offset_within_row = varbinary_ends[col];
+      uint32_t* col_offsets = (*varbinary_cols)[col].mutable_offsets();
+      col_offsets[i + 1] = col_offsets[i] + length;
+    }
+  }
+}
+
+void KeyEncoder::EncoderVarBinary::Encode(uint32_t varbinary_col_id, KeyRowArray* rows,
+                                          const KeyColumnArray& col,
+                                          KeyEncoderContext* ctx) {
+#if defined(ARROW_HAVE_AVX2)
+  if (ctx->has_avx2()) {
+    EncodeHelper_avx2(varbinary_col_id, rows, col);
+  } else {
+#endif
+    if (varbinary_col_id == 0) {
+      EncodeImp<true>(varbinary_col_id, rows, col);
+    } else {
+      EncodeImp<false>(varbinary_col_id, rows, col);
+    }
+#if defined(ARROW_HAVE_AVX2)
+  }
+#endif
+}
+
+void KeyEncoder::EncoderVarBinary::Decode(uint32_t start_row, uint32_t num_rows,
+                                          uint32_t varbinary_col_id,
+                                          const KeyRowArray& rows, KeyColumnArray* col,
+                                          KeyEncoderContext* ctx) {
+  // Output column varbinary buffer needs an extra 32B
+  // at the end in avx2 version and 8B otherwise.
+#if defined(ARROW_HAVE_AVX2)
+  if (ctx->has_avx2()) {
+    DecodeHelper_avx2(start_row, num_rows, varbinary_col_id, rows, col);
+  } else {
+#endif
+    if (varbinary_col_id == 0) {
+      DecodeImp<true>(start_row, num_rows, varbinary_col_id, rows, col);
+    } else {
+      DecodeImp<false>(start_row, num_rows, varbinary_col_id, rows, col);
+    }
+#if defined(ARROW_HAVE_AVX2)
+  }
+#endif
+}
+
+template <bool first_varbinary_col>
+void KeyEncoder::EncoderVarBinary::EncodeImp(uint32_t varbinary_col_id, KeyRowArray* rows,
+                                             const KeyColumnArray& col) {
+  EncodeDecodeHelper<first_varbinary_col, true>(
+      0, static_cast<uint32_t>(col.length()), varbinary_col_id, rows, rows, &col, nullptr,
+      [](uint8_t* dst, const uint8_t* src, int64_t length) {
+        auto dst64 = reinterpret_cast<uint64_t*>(dst);
+        auto src64 = reinterpret_cast<const uint64_t*>(src);
+        uint32_t istripe;
+        for (istripe = 0; istripe < length / 8; ++istripe) {
+          dst64[istripe] = util::SafeLoad(src64 + istripe);
+        }
+        if ((length % 8) > 0) {
+          uint64_t mask_last = ~0ULL >> (8 * (8 * (istripe + 1) - length));
+          dst64[istripe] = (dst64[istripe] & ~mask_last) |
+                           (util::SafeLoad(src64 + istripe) & mask_last);
+        }
+      });
+}
+
+template <bool first_varbinary_col>
+void KeyEncoder::EncoderVarBinary::DecodeImp(uint32_t start_row, uint32_t num_rows,
+                                             uint32_t varbinary_col_id,
+                                             const KeyRowArray& rows,
+                                             KeyColumnArray* col) {
+  EncodeDecodeHelper<first_varbinary_col, false>(
+      start_row, num_rows, varbinary_col_id, &rows, nullptr, col, col,
+      [](uint8_t* dst, const uint8_t* src, int64_t length) {
+        for (uint32_t istripe = 0; istripe < (length + 7) / 8; ++istripe) {
+          auto dst64 = reinterpret_cast<uint64_t*>(dst);
+          auto src64 = reinterpret_cast<const uint64_t*>(src);
+          util::SafeStore(dst64 + istripe, src64[istripe]);
+        }
+      });
+}
+
+void KeyEncoder::EncoderNulls::Encode(KeyRowArray* rows,
+                                      const std::vector<KeyColumnArray>& cols,
+                                      KeyEncoderContext* ctx,
+                                      KeyColumnArray* temp_vector_16bit) {
+  DCHECK_GT(cols.size(), 0);
+  const auto num_rows = static_cast<uint32_t>(rows->length());
+
+  // All input columns should have the same number of rows.
+  // They may or may not have non-nulls bit-vectors allocated.
+  for (const auto& col : cols) {
+    DCHECK(col.length() == num_rows);
+  }
+
+  // Temp vector needs space for the required number of rows
+  DCHECK(temp_vector_16bit->length() >= num_rows);
+  DCHECK(temp_vector_16bit->metadata().is_fixed_length &&
+         temp_vector_16bit->metadata().fixed_length == sizeof(uint16_t));
+
+  uint8_t* null_masks = rows->null_masks();
+  uint32_t null_masks_bytes_per_row = rows->metadata().null_masks_bytes_per_row;
+  memset(null_masks, 0, null_masks_bytes_per_row * num_rows);
+  for (size_t col = 0; col < cols.size(); ++col) {
+    const uint8_t* non_nulls = cols[col].data(0);
+    if (!non_nulls) {
+      continue;
+    }
+    int bit_offset = cols[col].bit_offset(0);
+    DCHECK_LT(bit_offset, 8);
+    int num_selected;
+    util::BitUtil::bits_to_indexes(
+        0, ctx->hardware_flags, num_rows, non_nulls, &num_selected,
+        reinterpret_cast<uint16_t*>(temp_vector_16bit->mutable_data(1)), bit_offset);
+    for (int i = 0; i < num_selected; ++i) {
+      uint16_t row_id = reinterpret_cast<const uint16_t*>(temp_vector_16bit->data(1))[i];
+      int64_t null_masks_bit_id = row_id * null_masks_bytes_per_row * 8 + col;
+      BitUtil::SetBit(null_masks, null_masks_bit_id);
+    }
+  }
+}
+
+void KeyEncoder::EncoderNulls::Decode(uint32_t start_row, uint32_t num_rows,
+                                      const KeyRowArray& rows,
+                                      std::vector<KeyColumnArray>* cols) {
+  // Every output column needs to have a space for exactly the required number
+  // of rows. It also needs to have non-nulls bit-vector allocated and mutable.
+  DCHECK_GT(cols->size(), 0);
+  for (auto& col : *cols) {
+    DCHECK(col.length() == num_rows);
+    DCHECK(col.mutable_data(0));
+  }
+
+  const uint8_t* null_masks = rows.null_masks();
+  uint32_t null_masks_bytes_per_row = rows.metadata().null_masks_bytes_per_row;
+  for (size_t col = 0; col < cols->size(); ++col) {
+    uint8_t* non_nulls = (*cols)[col].mutable_data(0);
+    const int bit_offset = (*cols)[col].bit_offset(0);
+    DCHECK_LT(bit_offset, 8);
+    non_nulls[0] |= 0xff << (bit_offset);
+    if (bit_offset + num_rows > 8) {
+      int bits_in_first_byte = 8 - bit_offset;
+      memset(non_nulls + 1, 0xff, BitUtil::BytesForBits(num_rows - bits_in_first_byte));
+    }
+    for (uint32_t row = 0; row < num_rows; ++row) {
+      uint32_t null_masks_bit_id =
+          (start_row + row) * null_masks_bytes_per_row * 8 + static_cast<uint32_t>(col);
+      bool is_set = BitUtil::GetBit(null_masks, null_masks_bit_id);
+      if (is_set) {
+        BitUtil::ClearBit(non_nulls, bit_offset + row);
+      }
+    }
+  }
+}
+
+uint32_t KeyEncoder::KeyRowMetadata::num_varbinary_cols() const {
+  uint32_t result = 0;
+  for (auto column_metadata : column_metadatas) {
+    if (!column_metadata.is_fixed_length) {
+      ++result;
+    }
+  }
+  return result;
+}
+
+bool KeyEncoder::KeyRowMetadata::is_compatible(const KeyRowMetadata& other) const {
+  if (other.num_cols() != num_cols()) {
+    return false;
+  }
+  if (row_alignment != other.row_alignment ||
+      string_alignment != other.string_alignment) {
+    return false;
+  }
+  for (size_t i = 0; i < column_metadatas.size(); ++i) {
+    if (column_metadatas[i].is_fixed_length !=
+        other.column_metadatas[i].is_fixed_length) {
+      return false;
+    }
+    if (column_metadatas[i].fixed_length != other.column_metadatas[i].fixed_length) {
+      return false;
+    }
+  }
+  return true;
+}
+
+void KeyEncoder::KeyRowMetadata::FromColumnMetadataVector(
+    const std::vector<KeyColumnMetadata>& cols, int in_row_alignment,
+    int in_string_alignment) {
+  column_metadatas.resize(cols.size());
+  for (size_t i = 0; i < cols.size(); ++i) {
+    column_metadatas[i] = cols[i];
+  }
+
+  const auto num_cols = static_cast<uint32_t>(cols.size());
+
+  // Sort columns.
+  // Columns are sorted based on the size in bytes of their fixed-length part.
+  // For the varying-length column, the fixed-length part is the 32-bit field storing
+  // cumulative length of varying-length fields.
+  // The rules are:
+  // a) Boolean column, marked with fixed-length 0, is considered to have fixed-length
+  // part of 1 byte. b) Columns with fixed-length part being power of 2 or multiple of row
+  // alignment precede other columns. They are sorted among themselves based on size of
+  // fixed-length part. c) Fixed-length columns precede varying-length columns when both
+  // have the same size fixed-length part.
+  column_order.resize(num_cols);
+  for (uint32_t i = 0; i < num_cols; ++i) {
+    column_order[i] = i;
+  }
+  std::sort(
+      column_order.begin(), column_order.end(), [&cols](uint32_t left, uint32_t right) {
+        bool is_left_pow2 =
+            !cols[left].is_fixed_length || ARROW_POPCOUNT64(cols[left].fixed_length) <= 1;
+        bool is_right_pow2 = !cols[right].is_fixed_length ||
+                             ARROW_POPCOUNT64(cols[right].fixed_length) <= 1;
+        bool is_left_fixedlen = cols[left].is_fixed_length;
+        bool is_right_fixedlen = cols[right].is_fixed_length;
+        uint32_t width_left =
+            cols[left].is_fixed_length ? cols[left].fixed_length : sizeof(uint32_t);
+        uint32_t width_right =
+            cols[right].is_fixed_length ? cols[right].fixed_length : sizeof(uint32_t);
+        if (is_left_pow2 != is_right_pow2) {
+          return is_left_pow2;
+        }
+        if (!is_left_pow2) {
+          return left < right;
+        }
+        if (width_left != width_right) {
+          return width_left > width_right;
+        }
+        if (is_left_fixedlen != is_right_fixedlen) {
+          return is_left_fixedlen;
+        }
+        return left < right;
+      });
+
+  row_alignment = in_row_alignment;
+  string_alignment = in_string_alignment;
+  varbinary_end_array_offset = 0;
+
+  column_offsets.resize(num_cols);
+  uint32_t num_varbinary_cols = 0;
+  uint32_t offset_within_row = 0;
+  for (uint32_t i = 0; i < num_cols; ++i) {
+    const KeyColumnMetadata& col = cols[column_order[i]];
+    offset_within_row +=
+        KeyRowMetadata::padding_for_alignment(offset_within_row, string_alignment, col);
+    column_offsets[i] = offset_within_row;
+    if (!col.is_fixed_length) {
+      if (num_varbinary_cols == 0) {
+        varbinary_end_array_offset = offset_within_row;
+      }
+      DCHECK(column_offsets[i] - varbinary_end_array_offset ==
+             num_varbinary_cols * sizeof(uint32_t));
+      ++num_varbinary_cols;
+      offset_within_row += sizeof(uint32_t);
+    } else {
+      // Boolean column is a bit-vector, which is indicated by
+      // setting fixed length in column metadata to zero.
+      // It will be stored as a byte in output row.
+      if (col.fixed_length == 0) {
+        offset_within_row += 1;
+      } else {
+        offset_within_row += col.fixed_length;
+      }
+    }
+  }
+
+  is_fixed_length = (num_varbinary_cols == 0);
+  fixed_length =
+      offset_within_row +
+      KeyRowMetadata::padding_for_alignment(
+          offset_within_row, num_varbinary_cols == 0 ? row_alignment : string_alignment);
+
+  // We set the number of bytes per row storing null masks of individual key columns
+  // to be a power of two. This is not required. It could be also set to the minimal
+  // number of bytes required for a given number of bits (one bit per column).
+  null_masks_bytes_per_row = 1;
+  while (static_cast<uint32_t>(null_masks_bytes_per_row * 8) < num_cols) {
+    null_masks_bytes_per_row *= 2;
+  }
+}
+
+void KeyEncoder::Init(const std::vector<KeyColumnMetadata>& cols, KeyEncoderContext* ctx,
+                      int row_alignment, int string_alignment) {
+  ctx_ = ctx;
+  row_metadata_.FromColumnMetadataVector(cols, row_alignment, string_alignment);
+  uint32_t num_cols = row_metadata_.num_cols();
+  uint32_t num_varbinary_cols = row_metadata_.num_varbinary_cols();
+  batch_all_cols_.resize(num_cols);
+  batch_varbinary_cols_.resize(num_varbinary_cols);
+  batch_varbinary_cols_base_offsets_.resize(num_varbinary_cols);
+}
+
+void KeyEncoder::PrepareKeyColumnArrays(int64_t start_row, int64_t num_rows,
+                                        const std::vector<KeyColumnArray>& cols_in) {
+  const auto num_cols = static_cast<uint32_t>(cols_in.size());
+  DCHECK(batch_all_cols_.size() == num_cols);
+
+  uint32_t num_varbinary_visited = 0;
+  for (uint32_t i = 0; i < num_cols; ++i) {
+    const KeyColumnArray& col = cols_in[row_metadata_.column_order[i]];
+    KeyColumnArray col_window(col, start_row, num_rows);
+    batch_all_cols_[i] = col_window;
+    if (!col.metadata().is_fixed_length) {
+      DCHECK(num_varbinary_visited < batch_varbinary_cols_.size());
+      // If start row is zero, then base offset of varbinary column is also zero.
+      if (start_row == 0) {
+        batch_varbinary_cols_base_offsets_[num_varbinary_visited] = 0;
+      } else {
+        batch_varbinary_cols_base_offsets_[num_varbinary_visited] =
+            col.offsets()[start_row];
+      }
+      batch_varbinary_cols_[num_varbinary_visited++] = col_window;
+    }
+  }
+}
+
+Status KeyEncoder::PrepareOutputForEncode(int64_t start_row, int64_t num_rows,
+                                          KeyRowArray* rows,
+                                          const std::vector<KeyColumnArray>& all_cols) {
+  int64_t num_bytes_required = 0;
+
+  int64_t fixed_part = row_metadata_.fixed_length * num_rows;
+  int64_t var_part = 0;
+  for (const auto& col : all_cols) {
+    if (!col.metadata().is_fixed_length) {
+      DCHECK(col.length() >= start_row + num_rows);
+      const uint32_t* offsets = col.offsets();
+      var_part += offsets[start_row + num_rows] - offsets[start_row];
+      // Include maximum padding that can be added to align the start of varbinary fields.
+      var_part += num_rows * row_metadata_.string_alignment;
+    }
+  }
+  // Include maximum padding that can be added to align the start of the rows.
+  if (!row_metadata_.is_fixed_length) {
+    fixed_part += row_metadata_.row_alignment * num_rows;
+  }
+  num_bytes_required = fixed_part + var_part;
+
+  rows->Clean();
+  RETURN_NOT_OK(rows->AppendEmpty(static_cast<uint32_t>(num_rows),
+                                  static_cast<uint32_t>(num_bytes_required)));
+
+  return Status::OK();
+}
+
+void KeyEncoder::Encode(int64_t start_row, int64_t num_rows, KeyRowArray* rows,
+                        const std::vector<KeyColumnArray>& cols) {
+  // Prepare column array vectors
+  PrepareKeyColumnArrays(start_row, num_rows, cols);
+
+  // Create two temp vectors with 16-bit elements
+  auto temp_buffer_holder_A =
+      util::TempVectorHolder<uint16_t>(ctx_->stack, static_cast<uint32_t>(num_rows));
+  auto temp_buffer_A = KeyColumnArray(
+      KeyColumnMetadata(true, sizeof(uint16_t)), num_rows, nullptr,
+      reinterpret_cast<uint8_t*>(temp_buffer_holder_A.mutable_data()), nullptr);
+  auto temp_buffer_holder_B =
+      util::TempVectorHolder<uint16_t>(ctx_->stack, static_cast<uint32_t>(num_rows));
+  auto temp_buffer_B = KeyColumnArray(
+      KeyColumnMetadata(true, sizeof(uint16_t)), num_rows, nullptr,
+      reinterpret_cast<uint8_t*>(temp_buffer_holder_B.mutable_data()), nullptr);
+
+  bool is_row_fixed_length = row_metadata_.is_fixed_length;
+  if (!is_row_fixed_length) {
+    // This call will generate and fill in data for both:
+    // - offsets to the entire encoded arrays
+    // - offsets for individual varbinary fields within each row
+    EncoderOffsets::Encode(rows, batch_varbinary_cols_, ctx_);
+
+    for (size_t i = 0; i < batch_varbinary_cols_.size(); ++i) {
+      // Memcpy varbinary fields into precomputed in the previous step
+      // positions in the output row buffer.
+      EncoderVarBinary::Encode(static_cast<uint32_t>(i), rows, batch_varbinary_cols_[i],
+                               ctx_);
+    }
+  }
+
+  // Process fixed length columns
+  const auto num_cols = static_cast<uint32_t>(batch_all_cols_.size());
+  for (uint32_t i = 0; i < num_cols;) {
+    if (!batch_all_cols_[i].metadata().is_fixed_length) {
+      i += 1;
+      continue;
+    }
+    bool can_process_pair =
+        (i + 1 < num_cols) && batch_all_cols_[i + 1].metadata().is_fixed_length &&
+        EncoderBinaryPair::CanProcessPair(batch_all_cols_[i].metadata(),
+                                          batch_all_cols_[i + 1].metadata());
+    if (!can_process_pair) {
+      EncoderBinary::Encode(row_metadata_.column_offsets[i], rows, batch_all_cols_[i],
+                            ctx_, &temp_buffer_A);
+      i += 1;
+    } else {
+      EncoderBinaryPair::Encode(row_metadata_.column_offsets[i], rows, batch_all_cols_[i],
+                                batch_all_cols_[i + 1], ctx_, &temp_buffer_A,
+                                &temp_buffer_B);
+      i += 2;
+    }
+  }
+
+  // Process nulls
+  EncoderNulls::Encode(rows, batch_all_cols_, ctx_, &temp_buffer_A);
+}
+
+void KeyEncoder::DecodeFixedLengthBuffers(int64_t start_row_input,
+                                          int64_t start_row_output, int64_t num_rows,
+                                          const KeyRowArray& rows,
+                                          std::vector<KeyColumnArray>* cols) {
+  // Prepare column array vectors
+  PrepareKeyColumnArrays(start_row_output, num_rows, *cols);
+
+  // Create two temp vectors with 16-bit elements
+  auto temp_buffer_holder_A =
+      util::TempVectorHolder<uint16_t>(ctx_->stack, static_cast<uint32_t>(num_rows));
+  auto temp_buffer_A = KeyColumnArray(
+      KeyColumnMetadata(true, sizeof(uint16_t)), num_rows, nullptr,
+      reinterpret_cast<uint8_t*>(temp_buffer_holder_A.mutable_data()), nullptr);
+  auto temp_buffer_holder_B =
+      util::TempVectorHolder<uint16_t>(ctx_->stack, static_cast<uint32_t>(num_rows));
+  auto temp_buffer_B = KeyColumnArray(
+      KeyColumnMetadata(true, sizeof(uint16_t)), num_rows, nullptr,
+      reinterpret_cast<uint8_t*>(temp_buffer_holder_B.mutable_data()), nullptr);
+
+  bool is_row_fixed_length = row_metadata_.is_fixed_length;
+  if (!is_row_fixed_length) {
+    EncoderOffsets::Decode(static_cast<uint32_t>(start_row_input),
+                           static_cast<uint32_t>(num_rows), rows, &batch_varbinary_cols_,
+                           batch_varbinary_cols_base_offsets_, ctx_);
+  }
+
+  // Process fixed length columns
+  const auto num_cols = static_cast<uint32_t>(batch_all_cols_.size());
+  for (uint32_t i = 0; i < num_cols;) {
+    if (!batch_all_cols_[i].metadata().is_fixed_length) {
+      i += 1;
+      continue;
+    }
+    bool can_process_pair =
+        (i + 1 < num_cols) && batch_all_cols_[i + 1].metadata().is_fixed_length &&
+        EncoderBinaryPair::CanProcessPair(batch_all_cols_[i].metadata(),
+                                          batch_all_cols_[i + 1].metadata());
+    if (!can_process_pair) {
+      EncoderBinary::Decode(static_cast<uint32_t>(start_row_input),
+                            static_cast<uint32_t>(num_rows),
+                            row_metadata_.column_offsets[i], rows, &batch_all_cols_[i],
+                            ctx_, &temp_buffer_A);
+      i += 1;
+    } else {
+      EncoderBinaryPair::Decode(
+          static_cast<uint32_t>(start_row_input), static_cast<uint32_t>(num_rows),
+          row_metadata_.column_offsets[i], rows, &batch_all_cols_[i],
+          &batch_all_cols_[i + 1], ctx_, &temp_buffer_A, &temp_buffer_B);
+      i += 2;
+    }
+  }
+
+  // Process nulls
+  EncoderNulls::Decode(static_cast<uint32_t>(start_row_input),
+                       static_cast<uint32_t>(num_rows), rows, &batch_all_cols_);
+}
+
+void KeyEncoder::DecodeVaryingLengthBuffers(int64_t start_row_input,
+                                            int64_t start_row_output, int64_t num_rows,
+                                            const KeyRowArray& rows,
+                                            std::vector<KeyColumnArray>* cols) {
+  // Prepare column array vectors
+  PrepareKeyColumnArrays(start_row_output, num_rows, *cols);
+
+  bool is_row_fixed_length = row_metadata_.is_fixed_length;
+  if (!is_row_fixed_length) {
+    for (size_t i = 0; i < batch_varbinary_cols_.size(); ++i) {
+      // Memcpy varbinary fields into precomputed in the previous step
+      // positions in the output row buffer.
+      EncoderVarBinary::Decode(static_cast<uint32_t>(start_row_input),
+                               static_cast<uint32_t>(num_rows), static_cast<uint32_t>(i),
+                               rows, &batch_varbinary_cols_[i], ctx_);
+    }
+  }
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/key_encode.h b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/key_encode.h
index f59690e0e6c..e5397b9dfd4 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/key_encode.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/key_encode.h
@@ -1,635 +1,635 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <cstdint> 
-#include <memory> 
-#include <vector> 
- 
-#include "arrow/compute/exec/util.h" 
-#include "arrow/memory_pool.h" 
-#include "arrow/result.h" 
-#include "arrow/status.h" 
-#include "arrow/util/bit_util.h" 
- 
-namespace arrow { 
-namespace compute { 
- 
-class KeyColumnMetadata; 
- 
-/// Converts between key representation as a collection of arrays for 
-/// individual columns and another representation as a single array of rows 
-/// combining data from all columns into one value. 
-/// This conversion is reversible. 
-/// Row-oriented storage is beneficial when there is a need for random access 
-/// of individual rows and at the same time all included columns are likely to 
-/// be accessed together, as in the case of hash table key. 
-class KeyEncoder { 
- public: 
-  struct KeyEncoderContext { 
-    bool has_avx2() const { 
-      return (hardware_flags & arrow::internal::CpuInfo::AVX2) > 0; 
-    } 
-    int64_t hardware_flags; 
-    util::TempVectorStack* stack; 
-  }; 
- 
-  /// Description of a storage format of a single key column as needed 
-  /// for the purpose of row encoding. 
-  struct KeyColumnMetadata { 
-    KeyColumnMetadata() = default; 
-    KeyColumnMetadata(bool is_fixed_length_in, uint32_t fixed_length_in) 
-        : is_fixed_length(is_fixed_length_in), fixed_length(fixed_length_in) {} 
-    /// Is column storing a varying-length binary, using offsets array 
-    /// to find a beginning of a value, or is it a fixed-length binary. 
-    bool is_fixed_length; 
-    /// For a fixed-length binary column: number of bytes per value. 
-    /// Zero has a special meaning, indicating a bit vector with one bit per value. 
-    /// For a varying-length binary column: number of bytes per offset. 
-    uint32_t fixed_length; 
-  }; 
- 
-  /// Description of a storage format for rows produced by encoder. 
-  struct KeyRowMetadata { 
-    /// Is row a varying-length binary, using offsets array to find a beginning of a row, 
-    /// or is it a fixed-length binary. 
-    bool is_fixed_length; 
- 
-    /// For a fixed-length binary row, common size of rows in bytes, 
-    /// rounded up to the multiple of alignment. 
-    /// 
-    /// For a varying-length binary, size of all encoded fixed-length key columns, 
-    /// including lengths of varying-length columns, rounded up to the multiple of string 
-    /// alignment. 
-    uint32_t fixed_length; 
- 
-    /// Offset within a row to the array of 32-bit offsets within a row of 
-    /// ends of varbinary fields. 
-    /// Used only when the row is not fixed-length, zero for fixed-length row. 
-    /// There are N elements for N varbinary fields. 
-    /// Each element is the offset within a row of the first byte after 
-    /// the corresponding varbinary field bytes in that row. 
-    /// If varbinary fields begin at aligned addresses, than the end of the previous 
-    /// varbinary field needs to be rounded up according to the specified alignment 
-    /// to obtain the beginning of the next varbinary field. 
-    /// The first varbinary field starts at offset specified by fixed_length, 
-    /// which should already be aligned. 
-    uint32_t varbinary_end_array_offset; 
- 
-    /// Fixed number of bytes per row that are used to encode null masks. 
-    /// Null masks indicate for a single row which of its key columns are null. 
-    /// Nth bit in the sequence of bytes assigned to a row represents null 
-    /// information for Nth field according to the order in which they are encoded. 
-    int null_masks_bytes_per_row; 
- 
-    /// Power of 2. Every row will start at the offset aligned to that number of bytes. 
-    int row_alignment; 
- 
-    /// Power of 2. Must be no greater than row alignment. 
-    /// Every non-power-of-2 binary field and every varbinary field bytes 
-    /// will start aligned to that number of bytes. 
-    int string_alignment; 
- 
-    /// Metadata of encoded columns in their original order. 
-    std::vector<KeyColumnMetadata> column_metadatas; 
- 
-    /// Order in which fields are encoded. 
-    std::vector<uint32_t> column_order; 
- 
-    /// Offsets within a row to fields in their encoding order. 
-    std::vector<uint32_t> column_offsets; 
- 
-    /// Rounding up offset to the nearest multiple of alignment value. 
-    /// Alignment must be a power of 2. 
-    static inline uint32_t padding_for_alignment(uint32_t offset, 
-                                                 int required_alignment) { 
-      ARROW_DCHECK(ARROW_POPCOUNT64(required_alignment) == 1); 
-      return static_cast<uint32_t>((-static_cast<int32_t>(offset)) & 
-                                   (required_alignment - 1)); 
-    } 
- 
-    /// Rounding up offset to the beginning of next column, 
-    /// chosing required alignment based on the data type of that column. 
-    static inline uint32_t padding_for_alignment(uint32_t offset, int string_alignment, 
-                                                 const KeyColumnMetadata& col_metadata) { 
-      if (!col_metadata.is_fixed_length || 
-          ARROW_POPCOUNT64(col_metadata.fixed_length) <= 1) { 
-        return 0; 
-      } else { 
-        return padding_for_alignment(offset, string_alignment); 
-      } 
-    } 
- 
-    /// Returns an array of offsets within a row of ends of varbinary fields. 
-    inline const uint32_t* varbinary_end_array(const uint8_t* row) const { 
-      ARROW_DCHECK(!is_fixed_length); 
-      return reinterpret_cast<const uint32_t*>(row + varbinary_end_array_offset); 
-    } 
-    inline uint32_t* varbinary_end_array(uint8_t* row) const { 
-      ARROW_DCHECK(!is_fixed_length); 
-      return reinterpret_cast<uint32_t*>(row + varbinary_end_array_offset); 
-    } 
- 
-    /// Returns the offset within the row and length of the first varbinary field. 
-    inline void first_varbinary_offset_and_length(const uint8_t* row, uint32_t* offset, 
-                                                  uint32_t* length) const { 
-      ARROW_DCHECK(!is_fixed_length); 
-      *offset = fixed_length; 
-      *length = varbinary_end_array(row)[0] - fixed_length; 
-    } 
- 
-    /// Returns the offset within the row and length of the second and further varbinary 
-    /// fields. 
-    inline void nth_varbinary_offset_and_length(const uint8_t* row, int varbinary_id, 
-                                                uint32_t* out_offset, 
-                                                uint32_t* out_length) const { 
-      ARROW_DCHECK(!is_fixed_length); 
-      ARROW_DCHECK(varbinary_id > 0); 
-      const uint32_t* varbinary_end = varbinary_end_array(row); 
-      uint32_t offset = varbinary_end[varbinary_id - 1]; 
-      offset += padding_for_alignment(offset, string_alignment); 
-      *out_offset = offset; 
-      *out_length = varbinary_end[varbinary_id] - offset; 
-    } 
- 
-    uint32_t encoded_field_order(uint32_t icol) const { return column_order[icol]; } 
- 
-    uint32_t encoded_field_offset(uint32_t icol) const { return column_offsets[icol]; } 
- 
-    uint32_t num_cols() const { return static_cast<uint32_t>(column_metadatas.size()); } 
- 
-    uint32_t num_varbinary_cols() const; 
- 
-    void FromColumnMetadataVector(const std::vector<KeyColumnMetadata>& cols, 
-                                  int in_row_alignment, int in_string_alignment); 
- 
-    bool is_compatible(const KeyRowMetadata& other) const; 
-  }; 
- 
-  class KeyRowArray { 
-   public: 
-    KeyRowArray(); 
-    Status Init(MemoryPool* pool, const KeyRowMetadata& metadata); 
-    void Clean(); 
-    Status AppendEmpty(uint32_t num_rows_to_append, uint32_t num_extra_bytes_to_append); 
-    Status AppendSelectionFrom(const KeyRowArray& from, uint32_t num_rows_to_append, 
-                               const uint16_t* source_row_ids); 
-    const KeyRowMetadata& metadata() const { return metadata_; } 
-    int64_t length() const { return num_rows_; } 
-    const uint8_t* data(int i) const { 
-      ARROW_DCHECK(i >= 0 && i <= max_buffers_); 
-      return buffers_[i]; 
-    } 
-    uint8_t* mutable_data(int i) { 
-      ARROW_DCHECK(i >= 0 && i <= max_buffers_); 
-      return mutable_buffers_[i]; 
-    } 
-    const uint32_t* offsets() const { return reinterpret_cast<const uint32_t*>(data(1)); } 
-    uint32_t* mutable_offsets() { return reinterpret_cast<uint32_t*>(mutable_data(1)); } 
-    const uint8_t* null_masks() const { return null_masks_->data(); } 
-    uint8_t* null_masks() { return null_masks_->mutable_data(); } 
- 
-    bool has_any_nulls(const KeyEncoderContext* ctx) const; 
- 
-   private: 
-    Status ResizeFixedLengthBuffers(int64_t num_extra_rows); 
-    Status ResizeOptionalVaryingLengthBuffer(int64_t num_extra_bytes); 
- 
-    int64_t size_null_masks(int64_t num_rows); 
-    int64_t size_offsets(int64_t num_rows); 
-    int64_t size_rows_fixed_length(int64_t num_rows); 
-    int64_t size_rows_varying_length(int64_t num_bytes); 
-    void update_buffer_pointers(); 
- 
-    static constexpr int64_t padding_for_vectors = 64; 
-    MemoryPool* pool_; 
-    KeyRowMetadata metadata_; 
-    /// Buffers can only expand during lifetime and never shrink. 
-    std::unique_ptr<ResizableBuffer> null_masks_; 
-    std::unique_ptr<ResizableBuffer> offsets_; 
-    std::unique_ptr<ResizableBuffer> rows_; 
-    static constexpr int max_buffers_ = 3; 
-    const uint8_t* buffers_[max_buffers_]; 
-    uint8_t* mutable_buffers_[max_buffers_]; 
-    int64_t num_rows_; 
-    int64_t rows_capacity_; 
-    int64_t bytes_capacity_; 
- 
-    // Mutable to allow lazy evaluation 
-    mutable int64_t num_rows_for_has_any_nulls_; 
-    mutable bool has_any_nulls_; 
-  }; 
- 
-  /// A lightweight description of an array representing one of key columns. 
-  class KeyColumnArray { 
-   public: 
-    KeyColumnArray() = default; 
-    /// Create as a mix of buffers according to the mask from two descriptions 
-    /// (Nth bit is set to 0 if Nth buffer from the first input 
-    /// should be used and is set to 1 otherwise). 
-    /// Metadata is inherited from the first input. 
-    KeyColumnArray(const KeyColumnMetadata& metadata, const KeyColumnArray& left, 
-                   const KeyColumnArray& right, int buffer_id_to_replace); 
-    /// Create for reading 
-    KeyColumnArray(const KeyColumnMetadata& metadata, int64_t length, 
-                   const uint8_t* buffer0, const uint8_t* buffer1, const uint8_t* buffer2, 
-                   int bit_offset0 = 0, int bit_offset1 = 0); 
-    /// Create for writing 
-    KeyColumnArray(const KeyColumnMetadata& metadata, int64_t length, uint8_t* buffer0, 
-                   uint8_t* buffer1, uint8_t* buffer2, int bit_offset0 = 0, 
-                   int bit_offset1 = 0); 
-    /// Create as a window view of original description that is offset 
-    /// by a given number of rows. 
-    /// The number of rows used in offset must be divisible by 8 
-    /// in order to not split bit vectors within a single byte. 
-    KeyColumnArray(const KeyColumnArray& from, int64_t start, int64_t length); 
-    uint8_t* mutable_data(int i) { 
-      ARROW_DCHECK(i >= 0 && i <= max_buffers_); 
-      return mutable_buffers_[i]; 
-    } 
-    const uint8_t* data(int i) const { 
-      ARROW_DCHECK(i >= 0 && i <= max_buffers_); 
-      return buffers_[i]; 
-    } 
-    uint32_t* mutable_offsets() { return reinterpret_cast<uint32_t*>(mutable_data(1)); } 
-    const uint32_t* offsets() const { return reinterpret_cast<const uint32_t*>(data(1)); } 
-    const KeyColumnMetadata& metadata() const { return metadata_; } 
-    int64_t length() const { return length_; } 
-    int bit_offset(int i) const { 
-      ARROW_DCHECK(i >= 0 && i < max_buffers_); 
-      return bit_offset_[i]; 
-    } 
- 
-   private: 
-    static constexpr int max_buffers_ = 3; 
-    const uint8_t* buffers_[max_buffers_]; 
-    uint8_t* mutable_buffers_[max_buffers_]; 
-    KeyColumnMetadata metadata_; 
-    int64_t length_; 
-    // Starting bit offset within the first byte (between 0 and 7) 
-    // to be used when accessing buffers that store bit vectors. 
-    int bit_offset_[max_buffers_ - 1]; 
-  }; 
- 
-  void Init(const std::vector<KeyColumnMetadata>& cols, KeyEncoderContext* ctx, 
-            int row_alignment, int string_alignment); 
- 
-  const KeyRowMetadata& row_metadata() { return row_metadata_; } 
- 
-  /// Find out the required sizes of all buffers output buffers for encoding 
-  /// (including varying-length buffers). 
-  /// Use that information to resize provided row array so that it can fit 
-  /// encoded data. 
-  Status PrepareOutputForEncode(int64_t start_input_row, int64_t num_input_rows, 
-                                KeyRowArray* rows, 
-                                const std::vector<KeyColumnArray>& all_cols); 
- 
-  /// Encode a window of column oriented data into the entire output 
-  /// row oriented storage. 
-  /// The output buffers for encoding need to be correctly sized before 
-  /// starting encoding. 
-  void Encode(int64_t start_input_row, int64_t num_input_rows, KeyRowArray* rows, 
-              const std::vector<KeyColumnArray>& cols); 
- 
-  /// Decode a window of row oriented data into a corresponding 
-  /// window of column oriented storage. 
-  /// The output buffers need to be correctly allocated and sized before 
-  /// calling each method. 
-  /// For that reason decoding is split into two functions. 
-  /// The output of the first one, that processes everything except for 
-  /// varying length buffers, can be used to find out required varying 
-  /// length buffers sizes. 
-  void DecodeFixedLengthBuffers(int64_t start_row_input, int64_t start_row_output, 
-                                int64_t num_rows, const KeyRowArray& rows, 
-                                std::vector<KeyColumnArray>* cols); 
- 
-  void DecodeVaryingLengthBuffers(int64_t start_row_input, int64_t start_row_output, 
-                                  int64_t num_rows, const KeyRowArray& rows, 
-                                  std::vector<KeyColumnArray>* cols); 
- 
- private: 
-  /// Prepare column array vectors. 
-  /// Output column arrays represent a range of input column arrays 
-  /// specified by starting row and number of rows. 
-  /// Three vectors are generated: 
-  /// - all columns 
-  /// - fixed-length columns only 
-  /// - varying-length columns only 
-  void PrepareKeyColumnArrays(int64_t start_row, int64_t num_rows, 
-                              const std::vector<KeyColumnArray>& cols_in); 
- 
-  class TransformBoolean { 
-   public: 
-    static KeyColumnArray ArrayReplace(const KeyColumnArray& column, 
-                                       const KeyColumnArray& temp); 
-    static void PreEncode(const KeyColumnArray& input, KeyColumnArray* output, 
-                          KeyEncoderContext* ctx); 
-    static void PostDecode(const KeyColumnArray& input, KeyColumnArray* output, 
-                           KeyEncoderContext* ctx); 
-  }; 
- 
-  class EncoderInteger { 
-   public: 
-    static void Encode(uint32_t offset_within_row, KeyRowArray* rows, 
-                       const KeyColumnArray& col, KeyEncoderContext* ctx, 
-                       KeyColumnArray* temp); 
-    static void Decode(uint32_t start_row, uint32_t num_rows, uint32_t offset_within_row, 
-                       const KeyRowArray& rows, KeyColumnArray* col, 
-                       KeyEncoderContext* ctx, KeyColumnArray* temp); 
-    static bool UsesTransform(const KeyColumnArray& column); 
-    static KeyColumnArray ArrayReplace(const KeyColumnArray& column, 
-                                       const KeyColumnArray& temp); 
-    static void PreEncode(const KeyColumnArray& input, KeyColumnArray* output, 
-                          KeyEncoderContext* ctx); 
-    static void PostDecode(const KeyColumnArray& input, KeyColumnArray* output, 
-                           KeyEncoderContext* ctx); 
- 
-   private: 
-    static bool IsBoolean(const KeyColumnMetadata& metadata); 
-  }; 
- 
-  class EncoderBinary { 
-   public: 
-    static void Encode(uint32_t offset_within_row, KeyRowArray* rows, 
-                       const KeyColumnArray& col, KeyEncoderContext* ctx, 
-                       KeyColumnArray* temp); 
-    static void Decode(uint32_t start_row, uint32_t num_rows, uint32_t offset_within_row, 
-                       const KeyRowArray& rows, KeyColumnArray* col, 
-                       KeyEncoderContext* ctx, KeyColumnArray* temp); 
-    static bool IsInteger(const KeyColumnMetadata& metadata); 
- 
-   private: 
-    template <bool is_row_fixed_length, bool is_encoding, class COPY_FN> 
-    static inline void EncodeDecodeHelper(uint32_t start_row, uint32_t num_rows, 
-                                          uint32_t offset_within_row, 
-                                          const KeyRowArray* rows_const, 
-                                          KeyRowArray* rows_mutable_maybe_null, 
-                                          const KeyColumnArray* col_const, 
-                                          KeyColumnArray* col_mutable_maybe_null, 
-                                          COPY_FN copy_fn); 
-    template <bool is_row_fixed_length> 
-    static void EncodeImp(uint32_t offset_within_row, KeyRowArray* rows, 
-                          const KeyColumnArray& col); 
-    template <bool is_row_fixed_length> 
-    static void DecodeImp(uint32_t start_row, uint32_t num_rows, 
-                          uint32_t offset_within_row, const KeyRowArray& rows, 
-                          KeyColumnArray* col); 
-#if defined(ARROW_HAVE_AVX2) 
-    static void EncodeHelper_avx2(bool is_row_fixed_length, uint32_t offset_within_row, 
-                                  KeyRowArray* rows, const KeyColumnArray& col); 
-    static void DecodeHelper_avx2(bool is_row_fixed_length, uint32_t start_row, 
-                                  uint32_t num_rows, uint32_t offset_within_row, 
-                                  const KeyRowArray& rows, KeyColumnArray* col); 
-    template <bool is_row_fixed_length> 
-    static void EncodeImp_avx2(uint32_t offset_within_row, KeyRowArray* rows, 
-                               const KeyColumnArray& col); 
-    template <bool is_row_fixed_length> 
-    static void DecodeImp_avx2(uint32_t start_row, uint32_t num_rows, 
-                               uint32_t offset_within_row, const KeyRowArray& rows, 
-                               KeyColumnArray* col); 
-#endif 
-    static void ColumnMemsetNulls(uint32_t offset_within_row, KeyRowArray* rows, 
-                                  const KeyColumnArray& col, KeyEncoderContext* ctx, 
-                                  KeyColumnArray* temp_vector_16bit, uint8_t byte_value); 
-    template <bool is_row_fixed_length, uint32_t col_width> 
-    static void ColumnMemsetNullsImp(uint32_t offset_within_row, KeyRowArray* rows, 
-                                     const KeyColumnArray& col, KeyEncoderContext* ctx, 
-                                     KeyColumnArray* temp_vector_16bit, 
-                                     uint8_t byte_value); 
-  }; 
- 
-  class EncoderBinaryPair { 
-   public: 
-    static bool CanProcessPair(const KeyColumnMetadata& col1, 
-                               const KeyColumnMetadata& col2) { 
-      return EncoderBinary::IsInteger(col1) && EncoderBinary::IsInteger(col2); 
-    } 
-    static void Encode(uint32_t offset_within_row, KeyRowArray* rows, 
-                       const KeyColumnArray& col1, const KeyColumnArray& col2, 
-                       KeyEncoderContext* ctx, KeyColumnArray* temp1, 
-                       KeyColumnArray* temp2); 
-    static void Decode(uint32_t start_row, uint32_t num_rows, uint32_t offset_within_row, 
-                       const KeyRowArray& rows, KeyColumnArray* col1, 
-                       KeyColumnArray* col2, KeyEncoderContext* ctx, 
-                       KeyColumnArray* temp1, KeyColumnArray* temp2); 
- 
-   private: 
-    template <bool is_row_fixed_length, typename col1_type, typename col2_type> 
-    static void EncodeImp(uint32_t num_rows_to_skip, uint32_t offset_within_row, 
-                          KeyRowArray* rows, const KeyColumnArray& col1, 
-                          const KeyColumnArray& col2); 
-    template <bool is_row_fixed_length, typename col1_type, typename col2_type> 
-    static void DecodeImp(uint32_t num_rows_to_skip, uint32_t start_row, 
-                          uint32_t num_rows, uint32_t offset_within_row, 
-                          const KeyRowArray& rows, KeyColumnArray* col1, 
-                          KeyColumnArray* col2); 
-#if defined(ARROW_HAVE_AVX2) 
-    static uint32_t EncodeHelper_avx2(bool is_row_fixed_length, uint32_t col_width, 
-                                      uint32_t offset_within_row, KeyRowArray* rows, 
-                                      const KeyColumnArray& col1, 
-                                      const KeyColumnArray& col2); 
-    static uint32_t DecodeHelper_avx2(bool is_row_fixed_length, uint32_t col_width, 
-                                      uint32_t start_row, uint32_t num_rows, 
-                                      uint32_t offset_within_row, const KeyRowArray& rows, 
-                                      KeyColumnArray* col1, KeyColumnArray* col2); 
-    template <bool is_row_fixed_length, uint32_t col_width> 
-    static uint32_t EncodeImp_avx2(uint32_t offset_within_row, KeyRowArray* rows, 
-                                   const KeyColumnArray& col1, 
-                                   const KeyColumnArray& col2); 
-    template <bool is_row_fixed_length, uint32_t col_width> 
-    static uint32_t DecodeImp_avx2(uint32_t start_row, uint32_t num_rows, 
-                                   uint32_t offset_within_row, const KeyRowArray& rows, 
-                                   KeyColumnArray* col1, KeyColumnArray* col2); 
-#endif 
-  }; 
- 
-  class EncoderOffsets { 
-   public: 
-    // In order not to repeat work twice, 
-    // encoding combines in a single pass computing of: 
-    // a) row offsets for varying-length rows 
-    // b) within each new row, the cumulative length array 
-    // of varying-length values within a row. 
-    static void Encode(KeyRowArray* rows, 
-                       const std::vector<KeyColumnArray>& varbinary_cols, 
-                       KeyEncoderContext* ctx); 
-    static void Decode(uint32_t start_row, uint32_t num_rows, const KeyRowArray& rows, 
-                       std::vector<KeyColumnArray>* varbinary_cols, 
-                       const std::vector<uint32_t>& varbinary_cols_base_offset, 
-                       KeyEncoderContext* ctx); 
- 
-   private: 
-    static void EncodeImp(uint32_t num_rows_already_processed, KeyRowArray* rows, 
-                          const std::vector<KeyColumnArray>& varbinary_cols); 
-#if defined(ARROW_HAVE_AVX2) 
-    static uint32_t EncodeImp_avx2(KeyRowArray* rows, 
-                                   const std::vector<KeyColumnArray>& varbinary_cols, 
-                                   KeyColumnArray* temp_buffer_32B_per_col); 
-#endif 
-  }; 
- 
-  class EncoderVarBinary { 
-   public: 
-    static void Encode(uint32_t varbinary_col_id, KeyRowArray* rows, 
-                       const KeyColumnArray& col, KeyEncoderContext* ctx); 
-    static void Decode(uint32_t start_row, uint32_t num_rows, uint32_t varbinary_col_id, 
-                       const KeyRowArray& rows, KeyColumnArray* col, 
-                       KeyEncoderContext* ctx); 
- 
-   private: 
-    template <bool first_varbinary_col, bool is_encoding, class COPY_FN> 
-    static inline void EncodeDecodeHelper(uint32_t start_row, uint32_t num_rows, 
-                                          uint32_t varbinary_col_id, 
-                                          const KeyRowArray* rows_const, 
-                                          KeyRowArray* rows_mutable_maybe_null, 
-                                          const KeyColumnArray* col_const, 
-                                          KeyColumnArray* col_mutable_maybe_null, 
-                                          COPY_FN copy_fn); 
-    template <bool first_varbinary_col> 
-    static void EncodeImp(uint32_t varbinary_col_id, KeyRowArray* rows, 
-                          const KeyColumnArray& col); 
-    template <bool first_varbinary_col> 
-    static void DecodeImp(uint32_t start_row, uint32_t num_rows, 
-                          uint32_t varbinary_col_id, const KeyRowArray& rows, 
-                          KeyColumnArray* col); 
-#if defined(ARROW_HAVE_AVX2) 
-    static void EncodeHelper_avx2(uint32_t varbinary_col_id, KeyRowArray* rows, 
-                                  const KeyColumnArray& col); 
-    static void DecodeHelper_avx2(uint32_t start_row, uint32_t num_rows, 
-                                  uint32_t varbinary_col_id, const KeyRowArray& rows, 
-                                  KeyColumnArray* col); 
-    template <bool first_varbinary_col> 
-    static void EncodeImp_avx2(uint32_t varbinary_col_id, KeyRowArray* rows, 
-                               const KeyColumnArray& col); 
-    template <bool first_varbinary_col> 
-    static void DecodeImp_avx2(uint32_t start_row, uint32_t num_rows, 
-                               uint32_t varbinary_col_id, const KeyRowArray& rows, 
-                               KeyColumnArray* col); 
-#endif 
-  }; 
- 
-  class EncoderNulls { 
-   public: 
-    static void Encode(KeyRowArray* rows, const std::vector<KeyColumnArray>& cols, 
-                       KeyEncoderContext* ctx, KeyColumnArray* temp_vector_16bit); 
-    static void Decode(uint32_t start_row, uint32_t num_rows, const KeyRowArray& rows, 
-                       std::vector<KeyColumnArray>* cols); 
-  }; 
- 
-  KeyEncoderContext* ctx_; 
- 
-  // Data initialized once, based on data types of key columns 
-  KeyRowMetadata row_metadata_; 
- 
-  // Data initialized for each input batch. 
-  // All elements are ordered according to the order of encoded fields in a row. 
-  std::vector<KeyColumnArray> batch_all_cols_; 
-  std::vector<KeyColumnArray> batch_varbinary_cols_; 
-  std::vector<uint32_t> batch_varbinary_cols_base_offsets_; 
-}; 
- 
-template <bool is_row_fixed_length, bool is_encoding, class COPY_FN> 
-inline void KeyEncoder::EncoderBinary::EncodeDecodeHelper( 
-    uint32_t start_row, uint32_t num_rows, uint32_t offset_within_row, 
-    const KeyRowArray* rows_const, KeyRowArray* rows_mutable_maybe_null, 
-    const KeyColumnArray* col_const, KeyColumnArray* col_mutable_maybe_null, 
-    COPY_FN copy_fn) { 
-  ARROW_DCHECK(col_const && col_const->metadata().is_fixed_length); 
-  uint32_t col_width = col_const->metadata().fixed_length; 
- 
-  if (is_row_fixed_length) { 
-    uint32_t row_width = rows_const->metadata().fixed_length; 
-    for (uint32_t i = 0; i < num_rows; ++i) { 
-      const uint8_t* src; 
-      uint8_t* dst; 
-      if (is_encoding) { 
-        src = col_const->data(1) + col_width * i; 
-        dst = rows_mutable_maybe_null->mutable_data(1) + row_width * (start_row + i) + 
-              offset_within_row; 
-      } else { 
-        src = rows_const->data(1) + row_width * (start_row + i) + offset_within_row; 
-        dst = col_mutable_maybe_null->mutable_data(1) + col_width * i; 
-      } 
-      copy_fn(dst, src, col_width); 
-    } 
-  } else { 
-    const uint32_t* row_offsets = rows_const->offsets(); 
-    for (uint32_t i = 0; i < num_rows; ++i) { 
-      const uint8_t* src; 
-      uint8_t* dst; 
-      if (is_encoding) { 
-        src = col_const->data(1) + col_width * i; 
-        dst = rows_mutable_maybe_null->mutable_data(2) + row_offsets[start_row + i] + 
-              offset_within_row; 
-      } else { 
-        src = rows_const->data(2) + row_offsets[start_row + i] + offset_within_row; 
-        dst = col_mutable_maybe_null->mutable_data(1) + col_width * i; 
-      } 
-      copy_fn(dst, src, col_width); 
-    } 
-  } 
-} 
- 
-template <bool first_varbinary_col, bool is_encoding, class COPY_FN> 
-inline void KeyEncoder::EncoderVarBinary::EncodeDecodeHelper( 
-    uint32_t start_row, uint32_t num_rows, uint32_t varbinary_col_id, 
-    const KeyRowArray* rows_const, KeyRowArray* rows_mutable_maybe_null, 
-    const KeyColumnArray* col_const, KeyColumnArray* col_mutable_maybe_null, 
-    COPY_FN copy_fn) { 
-  // Column and rows need to be varying length 
-  ARROW_DCHECK(!rows_const->metadata().is_fixed_length && 
-               !col_const->metadata().is_fixed_length); 
- 
-  const uint32_t* row_offsets_for_batch = rows_const->offsets() + start_row; 
-  const uint32_t* col_offsets = col_const->offsets(); 
- 
-  uint32_t col_offset_next = col_offsets[0]; 
-  for (uint32_t i = 0; i < num_rows; ++i) { 
-    uint32_t col_offset = col_offset_next; 
-    col_offset_next = col_offsets[i + 1]; 
- 
-    uint32_t row_offset = row_offsets_for_batch[i]; 
-    const uint8_t* row = rows_const->data(2) + row_offset; 
- 
-    uint32_t offset_within_row; 
-    uint32_t length; 
-    if (first_varbinary_col) { 
-      rows_const->metadata().first_varbinary_offset_and_length(row, &offset_within_row, 
-                                                               &length); 
-    } else { 
-      rows_const->metadata().nth_varbinary_offset_and_length(row, varbinary_col_id, 
-                                                             &offset_within_row, &length); 
-    } 
- 
-    row_offset += offset_within_row; 
- 
-    const uint8_t* src; 
-    uint8_t* dst; 
-    if (is_encoding) { 
-      src = col_const->data(2) + col_offset; 
-      dst = rows_mutable_maybe_null->mutable_data(2) + row_offset; 
-    } else { 
-      src = rows_const->data(2) + row_offset; 
-      dst = col_mutable_maybe_null->mutable_data(2) + col_offset; 
-    } 
-    copy_fn(dst, src, length); 
-  } 
-} 
- 
-}  // namespace compute 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "arrow/compute/exec/util.h"
+#include "arrow/memory_pool.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/bit_util.h"
+
+namespace arrow {
+namespace compute {
+
+class KeyColumnMetadata;
+
+/// Converts between key representation as a collection of arrays for
+/// individual columns and another representation as a single array of rows
+/// combining data from all columns into one value.
+/// This conversion is reversible.
+/// Row-oriented storage is beneficial when there is a need for random access
+/// of individual rows and at the same time all included columns are likely to
+/// be accessed together, as in the case of hash table key.
+class KeyEncoder {
+ public:
+  struct KeyEncoderContext {
+    bool has_avx2() const {
+      return (hardware_flags & arrow::internal::CpuInfo::AVX2) > 0;
+    }
+    int64_t hardware_flags;
+    util::TempVectorStack* stack;
+  };
+
+  /// Description of a storage format of a single key column as needed
+  /// for the purpose of row encoding.
+  struct KeyColumnMetadata {
+    KeyColumnMetadata() = default;
+    KeyColumnMetadata(bool is_fixed_length_in, uint32_t fixed_length_in)
+        : is_fixed_length(is_fixed_length_in), fixed_length(fixed_length_in) {}
+    /// Is column storing a varying-length binary, using offsets array
+    /// to find a beginning of a value, or is it a fixed-length binary.
+    bool is_fixed_length;
+    /// For a fixed-length binary column: number of bytes per value.
+    /// Zero has a special meaning, indicating a bit vector with one bit per value.
+    /// For a varying-length binary column: number of bytes per offset.
+    uint32_t fixed_length;
+  };
+
+  /// Description of a storage format for rows produced by encoder.
+  struct KeyRowMetadata {
+    /// Is row a varying-length binary, using offsets array to find a beginning of a row,
+    /// or is it a fixed-length binary.
+    bool is_fixed_length;
+
+    /// For a fixed-length binary row, common size of rows in bytes,
+    /// rounded up to the multiple of alignment.
+    ///
+    /// For a varying-length binary, size of all encoded fixed-length key columns,
+    /// including lengths of varying-length columns, rounded up to the multiple of string
+    /// alignment.
+    uint32_t fixed_length;
+
+    /// Offset within a row to the array of 32-bit offsets within a row of
+    /// ends of varbinary fields.
+    /// Used only when the row is not fixed-length, zero for fixed-length row.
+    /// There are N elements for N varbinary fields.
+    /// Each element is the offset within a row of the first byte after
+    /// the corresponding varbinary field bytes in that row.
+    /// If varbinary fields begin at aligned addresses, than the end of the previous
+    /// varbinary field needs to be rounded up according to the specified alignment
+    /// to obtain the beginning of the next varbinary field.
+    /// The first varbinary field starts at offset specified by fixed_length,
+    /// which should already be aligned.
+    uint32_t varbinary_end_array_offset;
+
+    /// Fixed number of bytes per row that are used to encode null masks.
+    /// Null masks indicate for a single row which of its key columns are null.
+    /// Nth bit in the sequence of bytes assigned to a row represents null
+    /// information for Nth field according to the order in which they are encoded.
+    int null_masks_bytes_per_row;
+
+    /// Power of 2. Every row will start at the offset aligned to that number of bytes.
+    int row_alignment;
+
+    /// Power of 2. Must be no greater than row alignment.
+    /// Every non-power-of-2 binary field and every varbinary field bytes
+    /// will start aligned to that number of bytes.
+    int string_alignment;
+
+    /// Metadata of encoded columns in their original order.
+    std::vector<KeyColumnMetadata> column_metadatas;
+
+    /// Order in which fields are encoded.
+    std::vector<uint32_t> column_order;
+
+    /// Offsets within a row to fields in their encoding order.
+    std::vector<uint32_t> column_offsets;
+
+    /// Rounding up offset to the nearest multiple of alignment value.
+    /// Alignment must be a power of 2.
+    static inline uint32_t padding_for_alignment(uint32_t offset,
+                                                 int required_alignment) {
+      ARROW_DCHECK(ARROW_POPCOUNT64(required_alignment) == 1);
+      return static_cast<uint32_t>((-static_cast<int32_t>(offset)) &
+                                   (required_alignment - 1));
+    }
+
+    /// Rounding up offset to the beginning of next column,
+    /// chosing required alignment based on the data type of that column.
+    static inline uint32_t padding_for_alignment(uint32_t offset, int string_alignment,
+                                                 const KeyColumnMetadata& col_metadata) {
+      if (!col_metadata.is_fixed_length ||
+          ARROW_POPCOUNT64(col_metadata.fixed_length) <= 1) {
+        return 0;
+      } else {
+        return padding_for_alignment(offset, string_alignment);
+      }
+    }
+
+    /// Returns an array of offsets within a row of ends of varbinary fields.
+    inline const uint32_t* varbinary_end_array(const uint8_t* row) const {
+      ARROW_DCHECK(!is_fixed_length);
+      return reinterpret_cast<const uint32_t*>(row + varbinary_end_array_offset);
+    }
+    inline uint32_t* varbinary_end_array(uint8_t* row) const {
+      ARROW_DCHECK(!is_fixed_length);
+      return reinterpret_cast<uint32_t*>(row + varbinary_end_array_offset);
+    }
+
+    /// Returns the offset within the row and length of the first varbinary field.
+    inline void first_varbinary_offset_and_length(const uint8_t* row, uint32_t* offset,
+                                                  uint32_t* length) const {
+      ARROW_DCHECK(!is_fixed_length);
+      *offset = fixed_length;
+      *length = varbinary_end_array(row)[0] - fixed_length;
+    }
+
+    /// Returns the offset within the row and length of the second and further varbinary
+    /// fields.
+    inline void nth_varbinary_offset_and_length(const uint8_t* row, int varbinary_id,
+                                                uint32_t* out_offset,
+                                                uint32_t* out_length) const {
+      ARROW_DCHECK(!is_fixed_length);
+      ARROW_DCHECK(varbinary_id > 0);
+      const uint32_t* varbinary_end = varbinary_end_array(row);
+      uint32_t offset = varbinary_end[varbinary_id - 1];
+      offset += padding_for_alignment(offset, string_alignment);
+      *out_offset = offset;
+      *out_length = varbinary_end[varbinary_id] - offset;
+    }
+
+    uint32_t encoded_field_order(uint32_t icol) const { return column_order[icol]; }
+
+    uint32_t encoded_field_offset(uint32_t icol) const { return column_offsets[icol]; }
+
+    uint32_t num_cols() const { return static_cast<uint32_t>(column_metadatas.size()); }
+
+    uint32_t num_varbinary_cols() const;
+
+    void FromColumnMetadataVector(const std::vector<KeyColumnMetadata>& cols,
+                                  int in_row_alignment, int in_string_alignment);
+
+    bool is_compatible(const KeyRowMetadata& other) const;
+  };
+
+  class KeyRowArray {
+   public:
+    KeyRowArray();
+    Status Init(MemoryPool* pool, const KeyRowMetadata& metadata);
+    void Clean();
+    Status AppendEmpty(uint32_t num_rows_to_append, uint32_t num_extra_bytes_to_append);
+    Status AppendSelectionFrom(const KeyRowArray& from, uint32_t num_rows_to_append,
+                               const uint16_t* source_row_ids);
+    const KeyRowMetadata& metadata() const { return metadata_; }
+    int64_t length() const { return num_rows_; }
+    const uint8_t* data(int i) const {
+      ARROW_DCHECK(i >= 0 && i <= max_buffers_);
+      return buffers_[i];
+    }
+    uint8_t* mutable_data(int i) {
+      ARROW_DCHECK(i >= 0 && i <= max_buffers_);
+      return mutable_buffers_[i];
+    }
+    const uint32_t* offsets() const { return reinterpret_cast<const uint32_t*>(data(1)); }
+    uint32_t* mutable_offsets() { return reinterpret_cast<uint32_t*>(mutable_data(1)); }
+    const uint8_t* null_masks() const { return null_masks_->data(); }
+    uint8_t* null_masks() { return null_masks_->mutable_data(); }
+
+    bool has_any_nulls(const KeyEncoderContext* ctx) const;
+
+   private:
+    Status ResizeFixedLengthBuffers(int64_t num_extra_rows);
+    Status ResizeOptionalVaryingLengthBuffer(int64_t num_extra_bytes);
+
+    int64_t size_null_masks(int64_t num_rows);
+    int64_t size_offsets(int64_t num_rows);
+    int64_t size_rows_fixed_length(int64_t num_rows);
+    int64_t size_rows_varying_length(int64_t num_bytes);
+    void update_buffer_pointers();
+
+    static constexpr int64_t padding_for_vectors = 64;
+    MemoryPool* pool_;
+    KeyRowMetadata metadata_;
+    /// Buffers can only expand during lifetime and never shrink.
+    std::unique_ptr<ResizableBuffer> null_masks_;
+    std::unique_ptr<ResizableBuffer> offsets_;
+    std::unique_ptr<ResizableBuffer> rows_;
+    static constexpr int max_buffers_ = 3;
+    const uint8_t* buffers_[max_buffers_];
+    uint8_t* mutable_buffers_[max_buffers_];
+    int64_t num_rows_;
+    int64_t rows_capacity_;
+    int64_t bytes_capacity_;
+
+    // Mutable to allow lazy evaluation
+    mutable int64_t num_rows_for_has_any_nulls_;
+    mutable bool has_any_nulls_;
+  };
+
+  /// A lightweight description of an array representing one of key columns.
+  class KeyColumnArray {
+   public:
+    KeyColumnArray() = default;
+    /// Create as a mix of buffers according to the mask from two descriptions
+    /// (Nth bit is set to 0 if Nth buffer from the first input
+    /// should be used and is set to 1 otherwise).
+    /// Metadata is inherited from the first input.
+    KeyColumnArray(const KeyColumnMetadata& metadata, const KeyColumnArray& left,
+                   const KeyColumnArray& right, int buffer_id_to_replace);
+    /// Create for reading
+    KeyColumnArray(const KeyColumnMetadata& metadata, int64_t length,
+                   const uint8_t* buffer0, const uint8_t* buffer1, const uint8_t* buffer2,
+                   int bit_offset0 = 0, int bit_offset1 = 0);
+    /// Create for writing
+    KeyColumnArray(const KeyColumnMetadata& metadata, int64_t length, uint8_t* buffer0,
+                   uint8_t* buffer1, uint8_t* buffer2, int bit_offset0 = 0,
+                   int bit_offset1 = 0);
+    /// Create as a window view of original description that is offset
+    /// by a given number of rows.
+    /// The number of rows used in offset must be divisible by 8
+    /// in order to not split bit vectors within a single byte.
+    KeyColumnArray(const KeyColumnArray& from, int64_t start, int64_t length);
+    uint8_t* mutable_data(int i) {
+      ARROW_DCHECK(i >= 0 && i <= max_buffers_);
+      return mutable_buffers_[i];
+    }
+    const uint8_t* data(int i) const {
+      ARROW_DCHECK(i >= 0 && i <= max_buffers_);
+      return buffers_[i];
+    }
+    uint32_t* mutable_offsets() { return reinterpret_cast<uint32_t*>(mutable_data(1)); }
+    const uint32_t* offsets() const { return reinterpret_cast<const uint32_t*>(data(1)); }
+    const KeyColumnMetadata& metadata() const { return metadata_; }
+    int64_t length() const { return length_; }
+    int bit_offset(int i) const {
+      ARROW_DCHECK(i >= 0 && i < max_buffers_);
+      return bit_offset_[i];
+    }
+
+   private:
+    static constexpr int max_buffers_ = 3;
+    const uint8_t* buffers_[max_buffers_];
+    uint8_t* mutable_buffers_[max_buffers_];
+    KeyColumnMetadata metadata_;
+    int64_t length_;
+    // Starting bit offset within the first byte (between 0 and 7)
+    // to be used when accessing buffers that store bit vectors.
+    int bit_offset_[max_buffers_ - 1];
+  };
+
+  void Init(const std::vector<KeyColumnMetadata>& cols, KeyEncoderContext* ctx,
+            int row_alignment, int string_alignment);
+
+  const KeyRowMetadata& row_metadata() { return row_metadata_; }
+
+  /// Find out the required sizes of all buffers output buffers for encoding
+  /// (including varying-length buffers).
+  /// Use that information to resize provided row array so that it can fit
+  /// encoded data.
+  Status PrepareOutputForEncode(int64_t start_input_row, int64_t num_input_rows,
+                                KeyRowArray* rows,
+                                const std::vector<KeyColumnArray>& all_cols);
+
+  /// Encode a window of column oriented data into the entire output
+  /// row oriented storage.
+  /// The output buffers for encoding need to be correctly sized before
+  /// starting encoding.
+  void Encode(int64_t start_input_row, int64_t num_input_rows, KeyRowArray* rows,
+              const std::vector<KeyColumnArray>& cols);
+
+  /// Decode a window of row oriented data into a corresponding
+  /// window of column oriented storage.
+  /// The output buffers need to be correctly allocated and sized before
+  /// calling each method.
+  /// For that reason decoding is split into two functions.
+  /// The output of the first one, that processes everything except for
+  /// varying length buffers, can be used to find out required varying
+  /// length buffers sizes.
+  void DecodeFixedLengthBuffers(int64_t start_row_input, int64_t start_row_output,
+                                int64_t num_rows, const KeyRowArray& rows,
+                                std::vector<KeyColumnArray>* cols);
+
+  void DecodeVaryingLengthBuffers(int64_t start_row_input, int64_t start_row_output,
+                                  int64_t num_rows, const KeyRowArray& rows,
+                                  std::vector<KeyColumnArray>* cols);
+
+ private:
+  /// Prepare column array vectors.
+  /// Output column arrays represent a range of input column arrays
+  /// specified by starting row and number of rows.
+  /// Three vectors are generated:
+  /// - all columns
+  /// - fixed-length columns only
+  /// - varying-length columns only
+  void PrepareKeyColumnArrays(int64_t start_row, int64_t num_rows,
+                              const std::vector<KeyColumnArray>& cols_in);
+
+  class TransformBoolean {
+   public:
+    static KeyColumnArray ArrayReplace(const KeyColumnArray& column,
+                                       const KeyColumnArray& temp);
+    static void PreEncode(const KeyColumnArray& input, KeyColumnArray* output,
+                          KeyEncoderContext* ctx);
+    static void PostDecode(const KeyColumnArray& input, KeyColumnArray* output,
+                           KeyEncoderContext* ctx);
+  };
+
+  class EncoderInteger {
+   public:
+    static void Encode(uint32_t offset_within_row, KeyRowArray* rows,
+                       const KeyColumnArray& col, KeyEncoderContext* ctx,
+                       KeyColumnArray* temp);
+    static void Decode(uint32_t start_row, uint32_t num_rows, uint32_t offset_within_row,
+                       const KeyRowArray& rows, KeyColumnArray* col,
+                       KeyEncoderContext* ctx, KeyColumnArray* temp);
+    static bool UsesTransform(const KeyColumnArray& column);
+    static KeyColumnArray ArrayReplace(const KeyColumnArray& column,
+                                       const KeyColumnArray& temp);
+    static void PreEncode(const KeyColumnArray& input, KeyColumnArray* output,
+                          KeyEncoderContext* ctx);
+    static void PostDecode(const KeyColumnArray& input, KeyColumnArray* output,
+                           KeyEncoderContext* ctx);
+
+   private:
+    static bool IsBoolean(const KeyColumnMetadata& metadata);
+  };
+
+  class EncoderBinary {
+   public:
+    static void Encode(uint32_t offset_within_row, KeyRowArray* rows,
+                       const KeyColumnArray& col, KeyEncoderContext* ctx,
+                       KeyColumnArray* temp);
+    static void Decode(uint32_t start_row, uint32_t num_rows, uint32_t offset_within_row,
+                       const KeyRowArray& rows, KeyColumnArray* col,
+                       KeyEncoderContext* ctx, KeyColumnArray* temp);
+    static bool IsInteger(const KeyColumnMetadata& metadata);
+
+   private:
+    template <bool is_row_fixed_length, bool is_encoding, class COPY_FN>
+    static inline void EncodeDecodeHelper(uint32_t start_row, uint32_t num_rows,
+                                          uint32_t offset_within_row,
+                                          const KeyRowArray* rows_const,
+                                          KeyRowArray* rows_mutable_maybe_null,
+                                          const KeyColumnArray* col_const,
+                                          KeyColumnArray* col_mutable_maybe_null,
+                                          COPY_FN copy_fn);
+    template <bool is_row_fixed_length>
+    static void EncodeImp(uint32_t offset_within_row, KeyRowArray* rows,
+                          const KeyColumnArray& col);
+    template <bool is_row_fixed_length>
+    static void DecodeImp(uint32_t start_row, uint32_t num_rows,
+                          uint32_t offset_within_row, const KeyRowArray& rows,
+                          KeyColumnArray* col);
+#if defined(ARROW_HAVE_AVX2)
+    static void EncodeHelper_avx2(bool is_row_fixed_length, uint32_t offset_within_row,
+                                  KeyRowArray* rows, const KeyColumnArray& col);
+    static void DecodeHelper_avx2(bool is_row_fixed_length, uint32_t start_row,
+                                  uint32_t num_rows, uint32_t offset_within_row,
+                                  const KeyRowArray& rows, KeyColumnArray* col);
+    template <bool is_row_fixed_length>
+    static void EncodeImp_avx2(uint32_t offset_within_row, KeyRowArray* rows,
+                               const KeyColumnArray& col);
+    template <bool is_row_fixed_length>
+    static void DecodeImp_avx2(uint32_t start_row, uint32_t num_rows,
+                               uint32_t offset_within_row, const KeyRowArray& rows,
+                               KeyColumnArray* col);
+#endif
+    static void ColumnMemsetNulls(uint32_t offset_within_row, KeyRowArray* rows,
+                                  const KeyColumnArray& col, KeyEncoderContext* ctx,
+                                  KeyColumnArray* temp_vector_16bit, uint8_t byte_value);
+    template <bool is_row_fixed_length, uint32_t col_width>
+    static void ColumnMemsetNullsImp(uint32_t offset_within_row, KeyRowArray* rows,
+                                     const KeyColumnArray& col, KeyEncoderContext* ctx,
+                                     KeyColumnArray* temp_vector_16bit,
+                                     uint8_t byte_value);
+  };
+
+  class EncoderBinaryPair {
+   public:
+    static bool CanProcessPair(const KeyColumnMetadata& col1,
+                               const KeyColumnMetadata& col2) {
+      return EncoderBinary::IsInteger(col1) && EncoderBinary::IsInteger(col2);
+    }
+    static void Encode(uint32_t offset_within_row, KeyRowArray* rows,
+                       const KeyColumnArray& col1, const KeyColumnArray& col2,
+                       KeyEncoderContext* ctx, KeyColumnArray* temp1,
+                       KeyColumnArray* temp2);
+    static void Decode(uint32_t start_row, uint32_t num_rows, uint32_t offset_within_row,
+                       const KeyRowArray& rows, KeyColumnArray* col1,
+                       KeyColumnArray* col2, KeyEncoderContext* ctx,
+                       KeyColumnArray* temp1, KeyColumnArray* temp2);
+
+   private:
+    template <bool is_row_fixed_length, typename col1_type, typename col2_type>
+    static void EncodeImp(uint32_t num_rows_to_skip, uint32_t offset_within_row,
+                          KeyRowArray* rows, const KeyColumnArray& col1,
+                          const KeyColumnArray& col2);
+    template <bool is_row_fixed_length, typename col1_type, typename col2_type>
+    static void DecodeImp(uint32_t num_rows_to_skip, uint32_t start_row,
+                          uint32_t num_rows, uint32_t offset_within_row,
+                          const KeyRowArray& rows, KeyColumnArray* col1,
+                          KeyColumnArray* col2);
+#if defined(ARROW_HAVE_AVX2)
+    static uint32_t EncodeHelper_avx2(bool is_row_fixed_length, uint32_t col_width,
+                                      uint32_t offset_within_row, KeyRowArray* rows,
+                                      const KeyColumnArray& col1,
+                                      const KeyColumnArray& col2);
+    static uint32_t DecodeHelper_avx2(bool is_row_fixed_length, uint32_t col_width,
+                                      uint32_t start_row, uint32_t num_rows,
+                                      uint32_t offset_within_row, const KeyRowArray& rows,
+                                      KeyColumnArray* col1, KeyColumnArray* col2);
+    template <bool is_row_fixed_length, uint32_t col_width>
+    static uint32_t EncodeImp_avx2(uint32_t offset_within_row, KeyRowArray* rows,
+                                   const KeyColumnArray& col1,
+                                   const KeyColumnArray& col2);
+    template <bool is_row_fixed_length, uint32_t col_width>
+    static uint32_t DecodeImp_avx2(uint32_t start_row, uint32_t num_rows,
+                                   uint32_t offset_within_row, const KeyRowArray& rows,
+                                   KeyColumnArray* col1, KeyColumnArray* col2);
+#endif
+  };
+
+  class EncoderOffsets {
+   public:
+    // In order not to repeat work twice,
+    // encoding combines in a single pass computing of:
+    // a) row offsets for varying-length rows
+    // b) within each new row, the cumulative length array
+    // of varying-length values within a row.
+    static void Encode(KeyRowArray* rows,
+                       const std::vector<KeyColumnArray>& varbinary_cols,
+                       KeyEncoderContext* ctx);
+    static void Decode(uint32_t start_row, uint32_t num_rows, const KeyRowArray& rows,
+                       std::vector<KeyColumnArray>* varbinary_cols,
+                       const std::vector<uint32_t>& varbinary_cols_base_offset,
+                       KeyEncoderContext* ctx);
+
+   private:
+    static void EncodeImp(uint32_t num_rows_already_processed, KeyRowArray* rows,
+                          const std::vector<KeyColumnArray>& varbinary_cols);
+#if defined(ARROW_HAVE_AVX2)
+    static uint32_t EncodeImp_avx2(KeyRowArray* rows,
+                                   const std::vector<KeyColumnArray>& varbinary_cols,
+                                   KeyColumnArray* temp_buffer_32B_per_col);
+#endif
+  };
+
+  class EncoderVarBinary {
+   public:
+    static void Encode(uint32_t varbinary_col_id, KeyRowArray* rows,
+                       const KeyColumnArray& col, KeyEncoderContext* ctx);
+    static void Decode(uint32_t start_row, uint32_t num_rows, uint32_t varbinary_col_id,
+                       const KeyRowArray& rows, KeyColumnArray* col,
+                       KeyEncoderContext* ctx);
+
+   private:
+    template <bool first_varbinary_col, bool is_encoding, class COPY_FN>
+    static inline void EncodeDecodeHelper(uint32_t start_row, uint32_t num_rows,
+                                          uint32_t varbinary_col_id,
+                                          const KeyRowArray* rows_const,
+                                          KeyRowArray* rows_mutable_maybe_null,
+                                          const KeyColumnArray* col_const,
+                                          KeyColumnArray* col_mutable_maybe_null,
+                                          COPY_FN copy_fn);
+    template <bool first_varbinary_col>
+    static void EncodeImp(uint32_t varbinary_col_id, KeyRowArray* rows,
+                          const KeyColumnArray& col);
+    template <bool first_varbinary_col>
+    static void DecodeImp(uint32_t start_row, uint32_t num_rows,
+                          uint32_t varbinary_col_id, const KeyRowArray& rows,
+                          KeyColumnArray* col);
+#if defined(ARROW_HAVE_AVX2)
+    static void EncodeHelper_avx2(uint32_t varbinary_col_id, KeyRowArray* rows,
+                                  const KeyColumnArray& col);
+    static void DecodeHelper_avx2(uint32_t start_row, uint32_t num_rows,
+                                  uint32_t varbinary_col_id, const KeyRowArray& rows,
+                                  KeyColumnArray* col);
+    template <bool first_varbinary_col>
+    static void EncodeImp_avx2(uint32_t varbinary_col_id, KeyRowArray* rows,
+                               const KeyColumnArray& col);
+    template <bool first_varbinary_col>
+    static void DecodeImp_avx2(uint32_t start_row, uint32_t num_rows,
+                               uint32_t varbinary_col_id, const KeyRowArray& rows,
+                               KeyColumnArray* col);
+#endif
+  };
+
+  class EncoderNulls {
+   public:
+    static void Encode(KeyRowArray* rows, const std::vector<KeyColumnArray>& cols,
+                       KeyEncoderContext* ctx, KeyColumnArray* temp_vector_16bit);
+    static void Decode(uint32_t start_row, uint32_t num_rows, const KeyRowArray& rows,
+                       std::vector<KeyColumnArray>* cols);
+  };
+
+  KeyEncoderContext* ctx_;
+
+  // Data initialized once, based on data types of key columns
+  KeyRowMetadata row_metadata_;
+
+  // Data initialized for each input batch.
+  // All elements are ordered according to the order of encoded fields in a row.
+  std::vector<KeyColumnArray> batch_all_cols_;
+  std::vector<KeyColumnArray> batch_varbinary_cols_;
+  std::vector<uint32_t> batch_varbinary_cols_base_offsets_;
+};
+
+template <bool is_row_fixed_length, bool is_encoding, class COPY_FN>
+inline void KeyEncoder::EncoderBinary::EncodeDecodeHelper(
+    uint32_t start_row, uint32_t num_rows, uint32_t offset_within_row,
+    const KeyRowArray* rows_const, KeyRowArray* rows_mutable_maybe_null,
+    const KeyColumnArray* col_const, KeyColumnArray* col_mutable_maybe_null,
+    COPY_FN copy_fn) {
+  ARROW_DCHECK(col_const && col_const->metadata().is_fixed_length);
+  uint32_t col_width = col_const->metadata().fixed_length;
+
+  if (is_row_fixed_length) {
+    uint32_t row_width = rows_const->metadata().fixed_length;
+    for (uint32_t i = 0; i < num_rows; ++i) {
+      const uint8_t* src;
+      uint8_t* dst;
+      if (is_encoding) {
+        src = col_const->data(1) + col_width * i;
+        dst = rows_mutable_maybe_null->mutable_data(1) + row_width * (start_row + i) +
+              offset_within_row;
+      } else {
+        src = rows_const->data(1) + row_width * (start_row + i) + offset_within_row;
+        dst = col_mutable_maybe_null->mutable_data(1) + col_width * i;
+      }
+      copy_fn(dst, src, col_width);
+    }
+  } else {
+    const uint32_t* row_offsets = rows_const->offsets();
+    for (uint32_t i = 0; i < num_rows; ++i) {
+      const uint8_t* src;
+      uint8_t* dst;
+      if (is_encoding) {
+        src = col_const->data(1) + col_width * i;
+        dst = rows_mutable_maybe_null->mutable_data(2) + row_offsets[start_row + i] +
+              offset_within_row;
+      } else {
+        src = rows_const->data(2) + row_offsets[start_row + i] + offset_within_row;
+        dst = col_mutable_maybe_null->mutable_data(1) + col_width * i;
+      }
+      copy_fn(dst, src, col_width);
+    }
+  }
+}
+
+template <bool first_varbinary_col, bool is_encoding, class COPY_FN>
+inline void KeyEncoder::EncoderVarBinary::EncodeDecodeHelper(
+    uint32_t start_row, uint32_t num_rows, uint32_t varbinary_col_id,
+    const KeyRowArray* rows_const, KeyRowArray* rows_mutable_maybe_null,
+    const KeyColumnArray* col_const, KeyColumnArray* col_mutable_maybe_null,
+    COPY_FN copy_fn) {
+  // Column and rows need to be varying length
+  ARROW_DCHECK(!rows_const->metadata().is_fixed_length &&
+               !col_const->metadata().is_fixed_length);
+
+  const uint32_t* row_offsets_for_batch = rows_const->offsets() + start_row;
+  const uint32_t* col_offsets = col_const->offsets();
+
+  uint32_t col_offset_next = col_offsets[0];
+  for (uint32_t i = 0; i < num_rows; ++i) {
+    uint32_t col_offset = col_offset_next;
+    col_offset_next = col_offsets[i + 1];
+
+    uint32_t row_offset = row_offsets_for_batch[i];
+    const uint8_t* row = rows_const->data(2) + row_offset;
+
+    uint32_t offset_within_row;
+    uint32_t length;
+    if (first_varbinary_col) {
+      rows_const->metadata().first_varbinary_offset_and_length(row, &offset_within_row,
+                                                               &length);
+    } else {
+      rows_const->metadata().nth_varbinary_offset_and_length(row, varbinary_col_id,
+                                                             &offset_within_row, &length);
+    }
+
+    row_offset += offset_within_row;
+
+    const uint8_t* src;
+    uint8_t* dst;
+    if (is_encoding) {
+      src = col_const->data(2) + col_offset;
+      dst = rows_mutable_maybe_null->mutable_data(2) + row_offset;
+    } else {
+      src = rows_const->data(2) + row_offset;
+      dst = col_mutable_maybe_null->mutable_data(2) + col_offset;
+    }
+    copy_fn(dst, src, length);
+  }
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/key_hash.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/key_hash.cc
index db69ac37d1d..081411e708e 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/key_hash.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/key_hash.cc
@@ -1,238 +1,238 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "arrow/compute/exec/key_hash.h" 
- 
-#include <memory.h> 
- 
-#include <algorithm> 
-#include <cstdint> 
- 
-#include "arrow/compute/exec/util.h" 
- 
-namespace arrow { 
-namespace compute { 
- 
-inline uint32_t Hashing::avalanche_helper(uint32_t acc) { 
-  acc ^= (acc >> 15); 
-  acc *= PRIME32_2; 
-  acc ^= (acc >> 13); 
-  acc *= PRIME32_3; 
-  acc ^= (acc >> 16); 
-  return acc; 
-} 
- 
-void Hashing::avalanche(int64_t hardware_flags, uint32_t num_keys, uint32_t* hashes) { 
-  uint32_t processed = 0; 
-#if defined(ARROW_HAVE_AVX2) 
-  if (hardware_flags & arrow::internal::CpuInfo::AVX2) { 
-    int tail = num_keys % 8; 
-    avalanche_avx2(num_keys - tail, hashes); 
-    processed = num_keys - tail; 
-  } 
-#endif 
-  for (uint32_t i = processed; i < num_keys; ++i) { 
-    hashes[i] = avalanche_helper(hashes[i]); 
-  } 
-} 
- 
-inline uint32_t Hashing::combine_accumulators(const uint32_t acc1, const uint32_t acc2, 
-                                              const uint32_t acc3, const uint32_t acc4) { 
-  return ROTL(acc1, 1) + ROTL(acc2, 7) + ROTL(acc3, 12) + ROTL(acc4, 18); 
-} 
- 
-inline void Hashing::helper_8B(uint32_t key_length, uint32_t num_keys, 
-                               const uint8_t* keys, uint32_t* hashes) { 
-  ARROW_DCHECK(key_length <= 8); 
-  uint64_t mask = ~0ULL >> (8 * (8 - key_length)); 
-  constexpr uint64_t multiplier = 14029467366897019727ULL; 
-  uint32_t offset = 0; 
-  for (uint32_t ikey = 0; ikey < num_keys; ++ikey) { 
-    uint64_t x = *reinterpret_cast<const uint64_t*>(keys + offset); 
-    x &= mask; 
-    hashes[ikey] = static_cast<uint32_t>(BYTESWAP(x * multiplier)); 
-    offset += key_length; 
-  } 
-} 
- 
-inline void Hashing::helper_stripe(uint32_t offset, uint64_t mask_hi, const uint8_t* keys, 
-                                   uint32_t& acc1, uint32_t& acc2, uint32_t& acc3, 
-                                   uint32_t& acc4) { 
-  uint64_t v1 = reinterpret_cast<const uint64_t*>(keys + offset)[0]; 
-  // We do not need to mask v1, because we will not process a stripe 
-  // unless at least 9 bytes of it are part of the key. 
-  uint64_t v2 = reinterpret_cast<const uint64_t*>(keys + offset)[1]; 
-  v2 &= mask_hi; 
-  uint32_t x1 = static_cast<uint32_t>(v1); 
-  uint32_t x2 = static_cast<uint32_t>(v1 >> 32); 
-  uint32_t x3 = static_cast<uint32_t>(v2); 
-  uint32_t x4 = static_cast<uint32_t>(v2 >> 32); 
-  acc1 += x1 * PRIME32_2; 
-  acc1 = ROTL(acc1, 13) * PRIME32_1; 
-  acc2 += x2 * PRIME32_2; 
-  acc2 = ROTL(acc2, 13) * PRIME32_1; 
-  acc3 += x3 * PRIME32_2; 
-  acc3 = ROTL(acc3, 13) * PRIME32_1; 
-  acc4 += x4 * PRIME32_2; 
-  acc4 = ROTL(acc4, 13) * PRIME32_1; 
-} 
- 
-void Hashing::helper_stripes(int64_t hardware_flags, uint32_t num_keys, 
-                             uint32_t key_length, const uint8_t* keys, uint32_t* hash) { 
-  uint32_t processed = 0; 
-#if defined(ARROW_HAVE_AVX2) 
-  if (hardware_flags & arrow::internal::CpuInfo::AVX2) { 
-    int tail = num_keys % 2; 
-    helper_stripes_avx2(num_keys - tail, key_length, keys, hash); 
-    processed = num_keys - tail; 
-  } 
-#endif 
- 
-  // If length modulo stripe length is less than or equal 8, round down to the nearest 16B 
-  // boundary (8B ending will be processed in a separate function), otherwise round up. 
-  const uint32_t num_stripes = (key_length + 7) / 16; 
-  uint64_t mask_hi = 
-      ~0ULL >> 
-      (8 * ((num_stripes * 16 > key_length) ? num_stripes * 16 - key_length : 0)); 
- 
-  for (uint32_t i = processed; i < num_keys; ++i) { 
-    uint32_t acc1, acc2, acc3, acc4; 
-    acc1 = static_cast<uint32_t>( 
-        (static_cast<uint64_t>(PRIME32_1) + static_cast<uint64_t>(PRIME32_2)) & 
-        0xffffffff); 
-    acc2 = PRIME32_2; 
-    acc3 = 0; 
-    acc4 = static_cast<uint32_t>(-static_cast<int32_t>(PRIME32_1)); 
-    uint32_t offset = i * key_length; 
-    for (uint32_t stripe = 0; stripe < num_stripes - 1; ++stripe) { 
-      helper_stripe(offset, ~0ULL, keys, acc1, acc2, acc3, acc4); 
-      offset += 16; 
-    } 
-    helper_stripe(offset, mask_hi, keys, acc1, acc2, acc3, acc4); 
-    hash[i] = combine_accumulators(acc1, acc2, acc3, acc4); 
-  } 
-} 
- 
-inline uint32_t Hashing::helper_tail(uint32_t offset, uint64_t mask, const uint8_t* keys, 
-                                     uint32_t acc) { 
-  uint64_t v = reinterpret_cast<const uint64_t*>(keys + offset)[0]; 
-  v &= mask; 
-  uint32_t x1 = static_cast<uint32_t>(v); 
-  uint32_t x2 = static_cast<uint32_t>(v >> 32); 
-  acc += x1 * PRIME32_3; 
-  acc = ROTL(acc, 17) * PRIME32_4; 
-  acc += x2 * PRIME32_3; 
-  acc = ROTL(acc, 17) * PRIME32_4; 
-  return acc; 
-} 
- 
-void Hashing::helper_tails(int64_t hardware_flags, uint32_t num_keys, uint32_t key_length, 
-                           const uint8_t* keys, uint32_t* hash) { 
-  uint32_t processed = 0; 
-#if defined(ARROW_HAVE_AVX2) 
-  if (hardware_flags & arrow::internal::CpuInfo::AVX2) { 
-    int tail = num_keys % 8; 
-    helper_tails_avx2(num_keys - tail, key_length, keys, hash); 
-    processed = num_keys - tail; 
-  } 
-#endif 
-  uint64_t mask = ~0ULL >> (8 * (((key_length % 8) == 0) ? 0 : 8 - (key_length % 8))); 
-  uint32_t offset = key_length / 16 * 16; 
-  offset += processed * key_length; 
-  for (uint32_t i = processed; i < num_keys; ++i) { 
-    hash[i] = helper_tail(offset, mask, keys, hash[i]); 
-    offset += key_length; 
-  } 
-} 
- 
-void Hashing::hash_fixed(int64_t hardware_flags, uint32_t num_keys, uint32_t length_key, 
-                         const uint8_t* keys, uint32_t* hashes) { 
-  ARROW_DCHECK(length_key > 0); 
- 
-  if (length_key <= 8) { 
-    helper_8B(length_key, num_keys, keys, hashes); 
-    return; 
-  } 
-  helper_stripes(hardware_flags, num_keys, length_key, keys, hashes); 
-  if ((length_key % 16) > 0 && (length_key % 16) <= 8) { 
-    helper_tails(hardware_flags, num_keys, length_key, keys, hashes); 
-  } 
-  avalanche(hardware_flags, num_keys, hashes); 
-} 
- 
-void Hashing::hash_varlen_helper(uint32_t length, const uint8_t* key, uint32_t* acc) { 
-  for (uint32_t i = 0; i < length / 16; ++i) { 
-    for (int j = 0; j < 4; ++j) { 
-      uint32_t lane = reinterpret_cast<const uint32_t*>(key)[i * 4 + j]; 
-      acc[j] += (lane * PRIME32_2); 
-      acc[j] = ROTL(acc[j], 13); 
-      acc[j] *= PRIME32_1; 
-    } 
-  } 
- 
-  int tail = length % 16; 
-  if (tail) { 
-    uint64_t last_stripe[2]; 
-    const uint64_t* last_stripe_base = 
-        reinterpret_cast<const uint64_t*>(key + length - (length % 16)); 
-    last_stripe[0] = last_stripe_base[0]; 
-    uint64_t mask = ~0ULL >> (8 * ((length + 7) / 8 * 8 - length)); 
-    if (tail <= 8) { 
-      last_stripe[1] = 0; 
-      last_stripe[0] &= mask; 
-    } else { 
-      last_stripe[1] = last_stripe_base[1]; 
-      last_stripe[1] &= mask; 
-    } 
-    for (int j = 0; j < 4; ++j) { 
-      uint32_t lane = reinterpret_cast<const uint32_t*>(last_stripe)[j]; 
-      acc[j] += (lane * PRIME32_2); 
-      acc[j] = ROTL(acc[j], 13); 
-      acc[j] *= PRIME32_1; 
-    } 
-  } 
-} 
- 
-void Hashing::hash_varlen(int64_t hardware_flags, uint32_t num_rows, 
-                          const uint32_t* offsets, const uint8_t* concatenated_keys, 
-                          uint32_t* temp_buffer,  // Needs to hold 4 x 32-bit per row 
-                          uint32_t* hashes) { 
-#if defined(ARROW_HAVE_AVX2) 
-  if (hardware_flags & arrow::internal::CpuInfo::AVX2) { 
-    hash_varlen_avx2(num_rows, offsets, concatenated_keys, temp_buffer, hashes); 
-  } else { 
-#endif 
-    for (uint32_t i = 0; i < num_rows; ++i) { 
-      uint32_t acc[4]; 
-      acc[0] = static_cast<uint32_t>( 
-          (static_cast<uint64_t>(PRIME32_1) + static_cast<uint64_t>(PRIME32_2)) & 
-          0xffffffff); 
-      acc[1] = PRIME32_2; 
-      acc[2] = 0; 
-      acc[3] = static_cast<uint32_t>(-static_cast<int32_t>(PRIME32_1)); 
-      uint32_t length = offsets[i + 1] - offsets[i]; 
-      hash_varlen_helper(length, concatenated_keys + offsets[i], acc); 
-      hashes[i] = combine_accumulators(acc[0], acc[1], acc[2], acc[3]); 
-    } 
-    avalanche(hardware_flags, num_rows, hashes); 
-#if defined(ARROW_HAVE_AVX2) 
-  } 
-#endif 
-} 
- 
-}  // namespace compute 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/exec/key_hash.h"
+
+#include <memory.h>
+
+#include <algorithm>
+#include <cstdint>
+
+#include "arrow/compute/exec/util.h"
+
+namespace arrow {
+namespace compute {
+
+inline uint32_t Hashing::avalanche_helper(uint32_t acc) {
+  acc ^= (acc >> 15);
+  acc *= PRIME32_2;
+  acc ^= (acc >> 13);
+  acc *= PRIME32_3;
+  acc ^= (acc >> 16);
+  return acc;
+}
+
+void Hashing::avalanche(int64_t hardware_flags, uint32_t num_keys, uint32_t* hashes) {
+  uint32_t processed = 0;
+#if defined(ARROW_HAVE_AVX2)
+  if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
+    int tail = num_keys % 8;
+    avalanche_avx2(num_keys - tail, hashes);
+    processed = num_keys - tail;
+  }
+#endif
+  for (uint32_t i = processed; i < num_keys; ++i) {
+    hashes[i] = avalanche_helper(hashes[i]);
+  }
+}
+
+inline uint32_t Hashing::combine_accumulators(const uint32_t acc1, const uint32_t acc2,
+                                              const uint32_t acc3, const uint32_t acc4) {
+  return ROTL(acc1, 1) + ROTL(acc2, 7) + ROTL(acc3, 12) + ROTL(acc4, 18);
+}
+
+inline void Hashing::helper_8B(uint32_t key_length, uint32_t num_keys,
+                               const uint8_t* keys, uint32_t* hashes) {
+  ARROW_DCHECK(key_length <= 8);
+  uint64_t mask = ~0ULL >> (8 * (8 - key_length));
+  constexpr uint64_t multiplier = 14029467366897019727ULL;
+  uint32_t offset = 0;
+  for (uint32_t ikey = 0; ikey < num_keys; ++ikey) {
+    uint64_t x = *reinterpret_cast<const uint64_t*>(keys + offset);
+    x &= mask;
+    hashes[ikey] = static_cast<uint32_t>(BYTESWAP(x * multiplier));
+    offset += key_length;
+  }
+}
+
+inline void Hashing::helper_stripe(uint32_t offset, uint64_t mask_hi, const uint8_t* keys,
+                                   uint32_t& acc1, uint32_t& acc2, uint32_t& acc3,
+                                   uint32_t& acc4) {
+  uint64_t v1 = reinterpret_cast<const uint64_t*>(keys + offset)[0];
+  // We do not need to mask v1, because we will not process a stripe
+  // unless at least 9 bytes of it are part of the key.
+  uint64_t v2 = reinterpret_cast<const uint64_t*>(keys + offset)[1];
+  v2 &= mask_hi;
+  uint32_t x1 = static_cast<uint32_t>(v1);
+  uint32_t x2 = static_cast<uint32_t>(v1 >> 32);
+  uint32_t x3 = static_cast<uint32_t>(v2);
+  uint32_t x4 = static_cast<uint32_t>(v2 >> 32);
+  acc1 += x1 * PRIME32_2;
+  acc1 = ROTL(acc1, 13) * PRIME32_1;
+  acc2 += x2 * PRIME32_2;
+  acc2 = ROTL(acc2, 13) * PRIME32_1;
+  acc3 += x3 * PRIME32_2;
+  acc3 = ROTL(acc3, 13) * PRIME32_1;
+  acc4 += x4 * PRIME32_2;
+  acc4 = ROTL(acc4, 13) * PRIME32_1;
+}
+
+void Hashing::helper_stripes(int64_t hardware_flags, uint32_t num_keys,
+                             uint32_t key_length, const uint8_t* keys, uint32_t* hash) {
+  uint32_t processed = 0;
+#if defined(ARROW_HAVE_AVX2)
+  if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
+    int tail = num_keys % 2;
+    helper_stripes_avx2(num_keys - tail, key_length, keys, hash);
+    processed = num_keys - tail;
+  }
+#endif
+
+  // If length modulo stripe length is less than or equal 8, round down to the nearest 16B
+  // boundary (8B ending will be processed in a separate function), otherwise round up.
+  const uint32_t num_stripes = (key_length + 7) / 16;
+  uint64_t mask_hi =
+      ~0ULL >>
+      (8 * ((num_stripes * 16 > key_length) ? num_stripes * 16 - key_length : 0));
+
+  for (uint32_t i = processed; i < num_keys; ++i) {
+    uint32_t acc1, acc2, acc3, acc4;
+    acc1 = static_cast<uint32_t>(
+        (static_cast<uint64_t>(PRIME32_1) + static_cast<uint64_t>(PRIME32_2)) &
+        0xffffffff);
+    acc2 = PRIME32_2;
+    acc3 = 0;
+    acc4 = static_cast<uint32_t>(-static_cast<int32_t>(PRIME32_1));
+    uint32_t offset = i * key_length;
+    for (uint32_t stripe = 0; stripe < num_stripes - 1; ++stripe) {
+      helper_stripe(offset, ~0ULL, keys, acc1, acc2, acc3, acc4);
+      offset += 16;
+    }
+    helper_stripe(offset, mask_hi, keys, acc1, acc2, acc3, acc4);
+    hash[i] = combine_accumulators(acc1, acc2, acc3, acc4);
+  }
+}
+
+inline uint32_t Hashing::helper_tail(uint32_t offset, uint64_t mask, const uint8_t* keys,
+                                     uint32_t acc) {
+  uint64_t v = reinterpret_cast<const uint64_t*>(keys + offset)[0];
+  v &= mask;
+  uint32_t x1 = static_cast<uint32_t>(v);
+  uint32_t x2 = static_cast<uint32_t>(v >> 32);
+  acc += x1 * PRIME32_3;
+  acc = ROTL(acc, 17) * PRIME32_4;
+  acc += x2 * PRIME32_3;
+  acc = ROTL(acc, 17) * PRIME32_4;
+  return acc;
+}
+
+void Hashing::helper_tails(int64_t hardware_flags, uint32_t num_keys, uint32_t key_length,
+                           const uint8_t* keys, uint32_t* hash) {
+  uint32_t processed = 0;
+#if defined(ARROW_HAVE_AVX2)
+  if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
+    int tail = num_keys % 8;
+    helper_tails_avx2(num_keys - tail, key_length, keys, hash);
+    processed = num_keys - tail;
+  }
+#endif
+  uint64_t mask = ~0ULL >> (8 * (((key_length % 8) == 0) ? 0 : 8 - (key_length % 8)));
+  uint32_t offset = key_length / 16 * 16;
+  offset += processed * key_length;
+  for (uint32_t i = processed; i < num_keys; ++i) {
+    hash[i] = helper_tail(offset, mask, keys, hash[i]);
+    offset += key_length;
+  }
+}
+
+void Hashing::hash_fixed(int64_t hardware_flags, uint32_t num_keys, uint32_t length_key,
+                         const uint8_t* keys, uint32_t* hashes) {
+  ARROW_DCHECK(length_key > 0);
+
+  if (length_key <= 8) {
+    helper_8B(length_key, num_keys, keys, hashes);
+    return;
+  }
+  helper_stripes(hardware_flags, num_keys, length_key, keys, hashes);
+  if ((length_key % 16) > 0 && (length_key % 16) <= 8) {
+    helper_tails(hardware_flags, num_keys, length_key, keys, hashes);
+  }
+  avalanche(hardware_flags, num_keys, hashes);
+}
+
+void Hashing::hash_varlen_helper(uint32_t length, const uint8_t* key, uint32_t* acc) {
+  for (uint32_t i = 0; i < length / 16; ++i) {
+    for (int j = 0; j < 4; ++j) {
+      uint32_t lane = reinterpret_cast<const uint32_t*>(key)[i * 4 + j];
+      acc[j] += (lane * PRIME32_2);
+      acc[j] = ROTL(acc[j], 13);
+      acc[j] *= PRIME32_1;
+    }
+  }
+
+  int tail = length % 16;
+  if (tail) {
+    uint64_t last_stripe[2];
+    const uint64_t* last_stripe_base =
+        reinterpret_cast<const uint64_t*>(key + length - (length % 16));
+    last_stripe[0] = last_stripe_base[0];
+    uint64_t mask = ~0ULL >> (8 * ((length + 7) / 8 * 8 - length));
+    if (tail <= 8) {
+      last_stripe[1] = 0;
+      last_stripe[0] &= mask;
+    } else {
+      last_stripe[1] = last_stripe_base[1];
+      last_stripe[1] &= mask;
+    }
+    for (int j = 0; j < 4; ++j) {
+      uint32_t lane = reinterpret_cast<const uint32_t*>(last_stripe)[j];
+      acc[j] += (lane * PRIME32_2);
+      acc[j] = ROTL(acc[j], 13);
+      acc[j] *= PRIME32_1;
+    }
+  }
+}
+
+void Hashing::hash_varlen(int64_t hardware_flags, uint32_t num_rows,
+                          const uint32_t* offsets, const uint8_t* concatenated_keys,
+                          uint32_t* temp_buffer,  // Needs to hold 4 x 32-bit per row
+                          uint32_t* hashes) {
+#if defined(ARROW_HAVE_AVX2)
+  if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
+    hash_varlen_avx2(num_rows, offsets, concatenated_keys, temp_buffer, hashes);
+  } else {
+#endif
+    for (uint32_t i = 0; i < num_rows; ++i) {
+      uint32_t acc[4];
+      acc[0] = static_cast<uint32_t>(
+          (static_cast<uint64_t>(PRIME32_1) + static_cast<uint64_t>(PRIME32_2)) &
+          0xffffffff);
+      acc[1] = PRIME32_2;
+      acc[2] = 0;
+      acc[3] = static_cast<uint32_t>(-static_cast<int32_t>(PRIME32_1));
+      uint32_t length = offsets[i + 1] - offsets[i];
+      hash_varlen_helper(length, concatenated_keys + offsets[i], acc);
+      hashes[i] = combine_accumulators(acc[0], acc[1], acc[2], acc[3]);
+    }
+    avalanche(hardware_flags, num_rows, hashes);
+#if defined(ARROW_HAVE_AVX2)
+  }
+#endif
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/key_hash.h b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/key_hash.h
index 4d36c9aa585..7f8ab5185cc 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/key_hash.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/key_hash.h
@@ -1,94 +1,94 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#if defined(ARROW_HAVE_AVX2) 
-#include <immintrin.h> 
-#endif 
- 
-#include <cstdint> 
- 
-#include "arrow/compute/exec/util.h" 
- 
-namespace arrow { 
-namespace compute { 
- 
-// Implementations are based on xxh3 32-bit algorithm description from: 
-// https://github.com/Cyan4973/xxHash/blob/dev/doc/xxhash_spec.md 
-// 
-class Hashing { 
- public: 
-  static void hash_fixed(int64_t hardware_flags, uint32_t num_keys, uint32_t length_key, 
-                         const uint8_t* keys, uint32_t* hashes); 
- 
-  static void hash_varlen(int64_t hardware_flags, uint32_t num_rows, 
-                          const uint32_t* offsets, const uint8_t* concatenated_keys, 
-                          uint32_t* temp_buffer,  // Needs to hold 4 x 32-bit per row 
-                          uint32_t* hashes); 
- 
- private: 
-  static const uint32_t PRIME32_1 = 0x9E3779B1;  // 0b10011110001101110111100110110001 
-  static const uint32_t PRIME32_2 = 0x85EBCA77;  // 0b10000101111010111100101001110111 
-  static const uint32_t PRIME32_3 = 0xC2B2AE3D;  // 0b11000010101100101010111000111101 
-  static const uint32_t PRIME32_4 = 0x27D4EB2F;  // 0b00100111110101001110101100101111 
-  static const uint32_t PRIME32_5 = 0x165667B1;  // 0b00010110010101100110011110110001 
- 
-  // Avalanche 
-  static inline uint32_t avalanche_helper(uint32_t acc); 
-#if defined(ARROW_HAVE_AVX2) 
-  static void avalanche_avx2(uint32_t num_keys, uint32_t* hashes); 
-#endif 
-  static void avalanche(int64_t hardware_flags, uint32_t num_keys, uint32_t* hashes); 
- 
-  // Accumulator combine 
-  static inline uint32_t combine_accumulators(const uint32_t acc1, const uint32_t acc2, 
-                                              const uint32_t acc3, const uint32_t acc4); 
-#if defined(ARROW_HAVE_AVX2) 
-  static inline uint64_t combine_accumulators_avx2(__m256i acc); 
-#endif 
- 
-  // Helpers 
-  static inline void helper_8B(uint32_t key_length, uint32_t num_keys, 
-                               const uint8_t* keys, uint32_t* hashes); 
-  static inline void helper_stripe(uint32_t offset, uint64_t mask_hi, const uint8_t* keys, 
-                                   uint32_t& acc1, uint32_t& acc2, uint32_t& acc3, 
-                                   uint32_t& acc4); 
-  static inline uint32_t helper_tail(uint32_t offset, uint64_t mask, const uint8_t* keys, 
-                                     uint32_t acc); 
-#if defined(ARROW_HAVE_AVX2) 
-  static void helper_stripes_avx2(uint32_t num_keys, uint32_t key_length, 
-                                  const uint8_t* keys, uint32_t* hash); 
-  static void helper_tails_avx2(uint32_t num_keys, uint32_t key_length, 
-                                const uint8_t* keys, uint32_t* hash); 
-#endif 
-  static void helper_stripes(int64_t hardware_flags, uint32_t num_keys, 
-                             uint32_t key_length, const uint8_t* keys, uint32_t* hash); 
-  static void helper_tails(int64_t hardware_flags, uint32_t num_keys, uint32_t key_length, 
-                           const uint8_t* keys, uint32_t* hash); 
- 
-  static void hash_varlen_helper(uint32_t length, const uint8_t* key, uint32_t* acc); 
-#if defined(ARROW_HAVE_AVX2) 
-  static void hash_varlen_avx2(uint32_t num_rows, const uint32_t* offsets, 
-                               const uint8_t* concatenated_keys, 
-                               uint32_t* temp_buffer,  // Needs to hold 4 x 32-bit per row 
-                               uint32_t* hashes); 
-#endif 
-}; 
- 
-}  // namespace compute 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#if defined(ARROW_HAVE_AVX2)
+#include <immintrin.h>
+#endif
+
+#include <cstdint>
+
+#include "arrow/compute/exec/util.h"
+
+namespace arrow {
+namespace compute {
+
+// Implementations are based on xxh3 32-bit algorithm description from:
+// https://github.com/Cyan4973/xxHash/blob/dev/doc/xxhash_spec.md
+//
+class Hashing {
+ public:
+  static void hash_fixed(int64_t hardware_flags, uint32_t num_keys, uint32_t length_key,
+                         const uint8_t* keys, uint32_t* hashes);
+
+  static void hash_varlen(int64_t hardware_flags, uint32_t num_rows,
+                          const uint32_t* offsets, const uint8_t* concatenated_keys,
+                          uint32_t* temp_buffer,  // Needs to hold 4 x 32-bit per row
+                          uint32_t* hashes);
+
+ private:
+  static const uint32_t PRIME32_1 = 0x9E3779B1;  // 0b10011110001101110111100110110001
+  static const uint32_t PRIME32_2 = 0x85EBCA77;  // 0b10000101111010111100101001110111
+  static const uint32_t PRIME32_3 = 0xC2B2AE3D;  // 0b11000010101100101010111000111101
+  static const uint32_t PRIME32_4 = 0x27D4EB2F;  // 0b00100111110101001110101100101111
+  static const uint32_t PRIME32_5 = 0x165667B1;  // 0b00010110010101100110011110110001
+
+  // Avalanche
+  static inline uint32_t avalanche_helper(uint32_t acc);
+#if defined(ARROW_HAVE_AVX2)
+  static void avalanche_avx2(uint32_t num_keys, uint32_t* hashes);
+#endif
+  static void avalanche(int64_t hardware_flags, uint32_t num_keys, uint32_t* hashes);
+
+  // Accumulator combine
+  static inline uint32_t combine_accumulators(const uint32_t acc1, const uint32_t acc2,
+                                              const uint32_t acc3, const uint32_t acc4);
+#if defined(ARROW_HAVE_AVX2)
+  static inline uint64_t combine_accumulators_avx2(__m256i acc);
+#endif
+
+  // Helpers
+  static inline void helper_8B(uint32_t key_length, uint32_t num_keys,
+                               const uint8_t* keys, uint32_t* hashes);
+  static inline void helper_stripe(uint32_t offset, uint64_t mask_hi, const uint8_t* keys,
+                                   uint32_t& acc1, uint32_t& acc2, uint32_t& acc3,
+                                   uint32_t& acc4);
+  static inline uint32_t helper_tail(uint32_t offset, uint64_t mask, const uint8_t* keys,
+                                     uint32_t acc);
+#if defined(ARROW_HAVE_AVX2)
+  static void helper_stripes_avx2(uint32_t num_keys, uint32_t key_length,
+                                  const uint8_t* keys, uint32_t* hash);
+  static void helper_tails_avx2(uint32_t num_keys, uint32_t key_length,
+                                const uint8_t* keys, uint32_t* hash);
+#endif
+  static void helper_stripes(int64_t hardware_flags, uint32_t num_keys,
+                             uint32_t key_length, const uint8_t* keys, uint32_t* hash);
+  static void helper_tails(int64_t hardware_flags, uint32_t num_keys, uint32_t key_length,
+                           const uint8_t* keys, uint32_t* hash);
+
+  static void hash_varlen_helper(uint32_t length, const uint8_t* key, uint32_t* acc);
+#if defined(ARROW_HAVE_AVX2)
+  static void hash_varlen_avx2(uint32_t num_rows, const uint32_t* offsets,
+                               const uint8_t* concatenated_keys,
+                               uint32_t* temp_buffer,  // Needs to hold 4 x 32-bit per row
+                               uint32_t* hashes);
+#endif
+};
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/key_map.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/key_map.cc
index 5cc4105f45c..ac47c04403c 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/key_map.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/key_map.cc
@@ -1,610 +1,610 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "arrow/compute/exec/key_map.h" 
- 
-#include <memory.h> 
- 
-#include <algorithm> 
-#include <cstdint> 
- 
-#include "arrow/util/bit_util.h" 
-#include "arrow/util/bitmap_ops.h" 
-#include "arrow/util/ubsan.h" 
- 
-namespace arrow { 
- 
-using BitUtil::CountLeadingZeros; 
- 
-namespace compute { 
- 
-constexpr uint64_t kHighBitOfEachByte = 0x8080808080808080ULL; 
- 
-// Search status bytes inside a block of 8 slots (64-bit word). 
-// Try to find a slot that contains a 7-bit stamp matching the one provided. 
-// There are three possible outcomes: 
-// 1. A matching slot is found. 
-// -> Return its index between 0 and 7 and set match found flag. 
-// 2. A matching slot is not found and there is an empty slot in the block. 
-// -> Return the index of the first empty slot and clear match found flag. 
-// 3. A matching slot is not found and there are no empty slots in the block. 
-// -> Return 8 as the output slot index and clear match found flag. 
-// 
-// Optionally an index of the first slot to start the search from can be specified. 
-// In this case slots before it will be ignored. 
-// 
-template <bool use_start_slot> 
-inline void SwissTable::search_block(uint64_t block, int stamp, int start_slot, 
-                                     int* out_slot, int* out_match_found) { 
-  // Filled slot bytes have the highest bit set to 0 and empty slots are equal to 0x80. 
-  uint64_t block_high_bits = block & kHighBitOfEachByte; 
- 
-  // Replicate 7-bit stamp to all non-empty slots, leaving zeroes for empty slots. 
-  uint64_t stamp_pattern = stamp * ((block_high_bits ^ kHighBitOfEachByte) >> 7); 
- 
-  // If we xor this pattern with block status bytes we get in individual bytes: 
-  // a) 0x00, for filled slots matching the stamp, 
-  // b) 0x00 < x < 0x80, for filled slots not matching the stamp, 
-  // c) 0x80, for empty slots. 
-  uint64_t block_xor_pattern = block ^ stamp_pattern; 
- 
-  // If we then add 0x7f to every byte, we get: 
-  // a) 0x7F 
-  // b) 0x80 <= x < 0xFF 
-  // c) 0xFF 
-  uint64_t match_base = block_xor_pattern + ~kHighBitOfEachByte; 
- 
-  // The highest bit now tells us if we have a match (0) or not (1). 
-  // We will negate the bits so that match is represented by a set bit. 
-  uint64_t matches = ~match_base; 
- 
-  // Clear 7 non-relevant bits in each byte. 
-  // Also clear bytes that correspond to slots that we were supposed to 
-  // skip due to provided start slot index. 
-  // Note: the highest byte corresponds to the first slot. 
-  if (use_start_slot) { 
-    matches &= kHighBitOfEachByte >> (8 * start_slot); 
-  } else { 
-    matches &= kHighBitOfEachByte; 
-  } 
- 
-  // We get 0 if there are no matches 
-  *out_match_found = (matches == 0 ? 0 : 1); 
- 
-  // Now if we or with the highest bits of the block and scan zero bits in reverse, 
-  // we get 8x slot index that we were looking for. 
-  // This formula works in all three cases a), b) and c). 
-  *out_slot = static_cast<int>(CountLeadingZeros(matches | block_high_bits) >> 3); 
-} 
- 
-// This call follows the call to search_block. 
-// The input slot index is the output returned by it, which is a value from 0 to 8, 
-// with 8 indicating that both: no match was found and there were no empty slots. 
-// 
-// If the slot corresponds to a non-empty slot return a group id associated with it. 
-// Otherwise return any group id from any of the slots or 
-// zero, which is the default value stored in empty slots. 
-// 
-inline uint64_t SwissTable::extract_group_id(const uint8_t* block_ptr, int slot, 
-                                             uint64_t group_id_mask) { 
-  // Input slot can be equal to 8, in which case we need to output any valid group id 
-  // value, so we take the one from slot 0 in the block. 
-  int clamped_slot = slot & 7; 
- 
-  // Group id values for all 8 slots in the block are bit-packed and follow the status 
-  // bytes. We assume here that the number of bits is rounded up to 8, 16, 32 or 64. In 
-  // that case we can extract group id using aligned 64-bit word access. 
-  int num_groupid_bits = static_cast<int>(ARROW_POPCOUNT64(group_id_mask)); 
-  ARROW_DCHECK(num_groupid_bits == 8 || num_groupid_bits == 16 || 
-               num_groupid_bits == 32 || num_groupid_bits == 64); 
- 
-  int bit_offset = clamped_slot * num_groupid_bits; 
-  const uint64_t* group_id_bytes = 
-      reinterpret_cast<const uint64_t*>(block_ptr) + 1 + (bit_offset >> 6); 
-  uint64_t group_id = (*group_id_bytes >> (bit_offset & 63)) & group_id_mask; 
- 
-  return group_id; 
-} 
- 
-// Return global slot id (the index including the information about the block) 
-// where the search should continue if the first comparison fails. 
-// This function always follows search_block and receives the slot id returned by it. 
-// 
-inline uint64_t SwissTable::next_slot_to_visit(uint64_t block_index, int slot, 
-                                               int match_found) { 
-  // The result should be taken modulo the number of all slots in all blocks, 
-  // but here we allow it to take a value one above the last slot index. 
-  // Modulo operation is postponed to later. 
-  return block_index * 8 + slot + match_found; 
-} 
- 
-// Implements first (fast-path, optimistic) lookup. 
-// Searches for a match only within the start block and 
-// trying only the first slot with a matching stamp. 
-// 
-// Comparison callback needed for match verification is done outside of this function. 
-// Match bit vector filled by it only indicates finding a matching stamp in a slot. 
-// 
-template <bool use_selection> 
-void SwissTable::lookup_1(const uint16_t* selection, const int num_keys, 
-                          const uint32_t* hashes, uint8_t* out_match_bitvector, 
-                          uint32_t* out_groupids, uint32_t* out_slot_ids) { 
-  // Clear the output bit vector 
-  memset(out_match_bitvector, 0, (num_keys + 7) / 8); 
- 
-  // Based on the size of the table, prepare bit number constants. 
-  uint32_t stamp_mask = (1 << bits_stamp_) - 1; 
-  int num_groupid_bits = num_groupid_bits_from_log_blocks(log_blocks_); 
-  uint32_t groupid_mask = (1 << num_groupid_bits) - 1; 
- 
-  for (int i = 0; i < num_keys; ++i) { 
-    int id; 
-    if (use_selection) { 
-      id = util::SafeLoad(&selection[i]); 
-    } else { 
-      id = i; 
-    } 
- 
-    // Extract from hash: block index and stamp 
-    // 
-    uint32_t hash = hashes[id]; 
-    uint32_t iblock = hash >> (bits_hash_ - bits_stamp_ - log_blocks_); 
-    uint32_t stamp = iblock & stamp_mask; 
-    iblock >>= bits_stamp_; 
- 
-    uint32_t num_block_bytes = num_groupid_bits + 8; 
-    const uint8_t* blockbase = reinterpret_cast<const uint8_t*>(blocks_) + 
-                               static_cast<uint64_t>(iblock) * num_block_bytes; 
-    uint64_t block = util::SafeLoadAs<uint64_t>(blockbase); 
- 
-    // Call helper functions to obtain the output triplet: 
-    // - match (of a stamp) found flag 
-    // - group id for key comparison 
-    // - slot to resume search from in case of no match or false positive 
-    int match_found; 
-    int islot_in_block; 
-    search_block<false>(block, stamp, 0, &islot_in_block, &match_found); 
-    uint64_t groupid = extract_group_id(blockbase, islot_in_block, groupid_mask); 
-    ARROW_DCHECK(groupid < num_inserted_ || num_inserted_ == 0); 
-    uint64_t islot = next_slot_to_visit(iblock, islot_in_block, match_found); 
- 
-    out_match_bitvector[id / 8] |= match_found << (id & 7); 
-    util::SafeStore(&out_groupids[id], static_cast<uint32_t>(groupid)); 
-    util::SafeStore(&out_slot_ids[id], static_cast<uint32_t>(islot)); 
-  } 
-} 
- 
-// How many groups we can keep in the hash table without the need for resizing. 
-// When we reach this limit, we need to break processing of any further rows and resize. 
-// 
-uint64_t SwissTable::num_groups_for_resize() const { 
-  // Resize small hash tables when 50% full (up to 12KB). 
-  // Resize large hash tables when 75% full. 
-  constexpr int log_blocks_small_ = 9; 
-  uint64_t num_slots = 1ULL << (log_blocks_ + 3); 
-  if (log_blocks_ <= log_blocks_small_) { 
-    return num_slots / 2; 
-  } else { 
-    return num_slots * 3 / 4; 
-  } 
-} 
- 
-uint64_t SwissTable::wrap_global_slot_id(uint64_t global_slot_id) { 
-  uint64_t global_slot_id_mask = (1 << (log_blocks_ + 3)) - 1; 
-  return global_slot_id & global_slot_id_mask; 
-} 
- 
-// Run a single round of slot search - comparison / insert - filter unprocessed. 
-// Update selection vector to reflect which items have been processed. 
-// Ids in selection vector do not have to be sorted. 
-// 
-Status SwissTable::lookup_2(const uint32_t* hashes, uint32_t* inout_num_selected, 
-                            uint16_t* inout_selection, bool* out_need_resize, 
-                            uint32_t* out_group_ids, uint32_t* inout_next_slot_ids) { 
-  auto num_groups_limit = num_groups_for_resize(); 
-  ARROW_DCHECK(num_inserted_ < num_groups_limit); 
- 
-  // Temporary arrays are of limited size. 
-  // The input needs to be split into smaller portions if it exceeds that limit. 
-  // 
-  ARROW_DCHECK(*inout_num_selected <= static_cast<uint32_t>(1 << log_minibatch_)); 
- 
-  // We will split input row ids into three categories: 
-  // - needing to visit next block [0] 
-  // - needing comparison [1] 
-  // - inserted [2] 
-  // 
-  auto ids_inserted_buf = 
-      util::TempVectorHolder<uint16_t>(temp_stack_, *inout_num_selected); 
-  auto ids_for_comparison_buf = 
-      util::TempVectorHolder<uint16_t>(temp_stack_, *inout_num_selected); 
-  constexpr int category_nomatch = 0; 
-  constexpr int category_cmp = 1; 
-  constexpr int category_inserted = 2; 
-  int num_ids[3]; 
-  num_ids[0] = num_ids[1] = num_ids[2] = 0; 
-  uint16_t* ids[3]{inout_selection, ids_for_comparison_buf.mutable_data(), 
-                   ids_inserted_buf.mutable_data()}; 
-  auto push_id = [&num_ids, &ids](int category, int id) { 
-    util::SafeStore(&ids[category][num_ids[category]++], static_cast<uint16_t>(id)); 
-  }; 
- 
-  uint64_t num_groupid_bits = num_groupid_bits_from_log_blocks(log_blocks_); 
-  uint64_t groupid_mask = (1ULL << num_groupid_bits) - 1; 
-  constexpr uint64_t stamp_mask = 0x7f; 
-  uint64_t num_block_bytes = (8 + num_groupid_bits); 
- 
-  uint32_t num_processed; 
-  for (num_processed = 0; 
-       // Second condition in for loop: 
-       // We need to break processing and have the caller of this function 
-       // resize hash table if we reach the limit of the number of groups present. 
-       num_processed < *inout_num_selected && 
-       num_inserted_ + num_ids[category_inserted] < num_groups_limit; 
-       ++num_processed) { 
-    // row id in original batch 
-    int id = util::SafeLoad(&inout_selection[num_processed]); 
- 
-    uint64_t slot_id = wrap_global_slot_id(util::SafeLoad(&inout_next_slot_ids[id])); 
-    uint64_t block_id = slot_id >> 3; 
-    uint32_t hash = hashes[id]; 
-    uint8_t* blockbase = blocks_ + num_block_bytes * block_id; 
-    uint64_t block = *reinterpret_cast<uint64_t*>(blockbase); 
-    uint64_t stamp = (hash >> (bits_hash_ - log_blocks_ - bits_stamp_)) & stamp_mask; 
-    int start_slot = (slot_id & 7); 
- 
-    bool isempty = (blockbase[7 - start_slot] == 0x80); 
-    if (isempty) { 
-      // If we reach the empty slot we insert key for new group 
- 
-      blockbase[7 - start_slot] = static_cast<uint8_t>(stamp); 
-      uint32_t group_id = num_inserted_ + num_ids[category_inserted]; 
-      int groupid_bit_offset = static_cast<int>(start_slot * num_groupid_bits); 
- 
-      // We assume here that the number of bits is rounded up to 8, 16, 32 or 64. 
-      // In that case we can insert group id value using aligned 64-bit word access. 
-      ARROW_DCHECK(num_groupid_bits == 8 || num_groupid_bits == 16 || 
-                   num_groupid_bits == 32 || num_groupid_bits == 64); 
-      uint64_t* ptr = 
-          &reinterpret_cast<uint64_t*>(blockbase + 8)[groupid_bit_offset >> 6]; 
-      util::SafeStore(ptr, util::SafeLoad(ptr) | (static_cast<uint64_t>(group_id) 
-                                                  << (groupid_bit_offset & 63))); 
- 
-      hashes_[slot_id] = hash; 
-      util::SafeStore(&out_group_ids[id], group_id); 
-      push_id(category_inserted, id); 
-    } else { 
-      // We search for a slot with a matching stamp within a single block. 
-      // We append row id to the appropriate sequence of ids based on 
-      // whether the match has been found or not. 
- 
-      int new_match_found; 
-      int new_slot; 
-      search_block<true>(block, static_cast<int>(stamp), start_slot, &new_slot, 
-                         &new_match_found); 
-      auto new_groupid = 
-          static_cast<uint32_t>(extract_group_id(blockbase, new_slot, groupid_mask)); 
-      ARROW_DCHECK(new_groupid < num_inserted_ + num_ids[category_inserted]); 
-      new_slot = 
-          static_cast<int>(next_slot_to_visit(block_id, new_slot, new_match_found)); 
-      util::SafeStore(&inout_next_slot_ids[id], new_slot); 
-      util::SafeStore(&out_group_ids[id], new_groupid); 
-      push_id(new_match_found, id); 
-    } 
-  } 
- 
-  // Copy keys for newly inserted rows using callback 
-  RETURN_NOT_OK(append_impl_(num_ids[category_inserted], ids[category_inserted])); 
-  num_inserted_ += num_ids[category_inserted]; 
- 
-  // Evaluate comparisons and append ids of rows that failed it to the non-match set. 
-  uint32_t num_not_equal; 
-  equal_impl_(num_ids[category_cmp], ids[category_cmp], out_group_ids, &num_not_equal, 
-              ids[category_nomatch] + num_ids[category_nomatch]); 
-  num_ids[category_nomatch] += num_not_equal; 
- 
-  // Append ids of any unprocessed entries if we aborted processing due to the need 
-  // to resize. 
-  if (num_processed < *inout_num_selected) { 
-    memmove(ids[category_nomatch] + num_ids[category_nomatch], 
-            inout_selection + num_processed, 
-            sizeof(uint16_t) * (*inout_num_selected - num_processed)); 
-    num_ids[category_nomatch] += (*inout_num_selected - num_processed); 
-  } 
- 
-  *out_need_resize = (num_inserted_ == num_groups_limit); 
-  *inout_num_selected = num_ids[category_nomatch]; 
-  return Status::OK(); 
-} 
- 
-// Use hashes and callbacks to find group ids for already existing keys and 
-// to insert and report newly assigned group ids for new keys. 
-// 
-Status SwissTable::map(const int num_keys, const uint32_t* hashes, 
-                       uint32_t* out_groupids) { 
-  // Temporary buffers have limited size. 
-  // Caller is responsible for splitting larger input arrays into smaller chunks. 
-  ARROW_DCHECK(num_keys <= (1 << log_minibatch_)); 
- 
-  // Allocate temporary buffers with a lifetime of this function 
-  auto match_bitvector_buf = util::TempVectorHolder<uint8_t>(temp_stack_, num_keys); 
-  uint8_t* match_bitvector = match_bitvector_buf.mutable_data(); 
-  auto slot_ids_buf = util::TempVectorHolder<uint32_t>(temp_stack_, num_keys); 
-  uint32_t* slot_ids = slot_ids_buf.mutable_data(); 
-  auto ids_buf = util::TempVectorHolder<uint16_t>(temp_stack_, num_keys); 
-  uint16_t* ids = ids_buf.mutable_data(); 
-  uint32_t num_ids; 
- 
-  // First-pass processing. 
-  // Optimistically use simplified lookup involving only a start block to find 
-  // a single group id candidate for every input. 
-#if defined(ARROW_HAVE_AVX2) 
-  if (hardware_flags_ & arrow::internal::CpuInfo::AVX2) { 
-    if (log_blocks_ <= 4) { 
-      int tail = num_keys % 32; 
-      int delta = num_keys - tail; 
-      lookup_1_avx2_x32(num_keys - tail, hashes, match_bitvector, out_groupids, slot_ids); 
-      lookup_1_avx2_x8(tail, hashes + delta, match_bitvector + delta / 8, 
-                       out_groupids + delta, slot_ids + delta); 
-    } else { 
-      lookup_1_avx2_x8(num_keys, hashes, match_bitvector, out_groupids, slot_ids); 
-    } 
-  } else { 
-#endif 
-    lookup_1<false>(nullptr, num_keys, hashes, match_bitvector, out_groupids, slot_ids); 
-#if defined(ARROW_HAVE_AVX2) 
-  } 
-#endif 
- 
-  int64_t num_matches = 
-      arrow::internal::CountSetBits(match_bitvector, /*offset=*/0, num_keys); 
- 
-  // After the first-pass processing count rows with matches (based on stamp comparison) 
-  // and decide based on their percentage whether to call dense or sparse comparison 
-  // function. Dense comparison means evaluating it for all inputs, even if the matching 
-  // stamp was not found. It may be cheaper to evaluate comparison for all inputs if the 
-  // extra cost of filtering is higher than the wasted processing of rows with no match. 
-  // 
-  // Dense comparison can only be used if there is at least one inserted key, 
-  // because otherwise there is no key to compare to. 
-  // 
-  if (num_inserted_ > 0 && num_matches > 0 && num_matches > 3 * num_keys / 4) { 
-    // Dense comparisons 
-    equal_impl_(num_keys, nullptr, out_groupids, &num_ids, ids); 
-  } else { 
-    // Sparse comparisons that involve filtering the input set of keys 
-    auto ids_cmp_buf = util::TempVectorHolder<uint16_t>(temp_stack_, num_keys); 
-    uint16_t* ids_cmp = ids_cmp_buf.mutable_data(); 
-    int num_ids_result; 
-    util::BitUtil::bits_split_indexes(hardware_flags_, num_keys, match_bitvector, 
-                                      &num_ids_result, ids, ids_cmp); 
-    num_ids = num_ids_result; 
-    uint32_t num_not_equal; 
-    equal_impl_(num_keys - num_ids, ids_cmp, out_groupids, &num_not_equal, ids + num_ids); 
-    num_ids += num_not_equal; 
-  } 
- 
-  do { 
-    // A single round of slow-pass (robust) lookup or insert. 
-    // A single round ends with either a single comparison verifying the match candidate 
-    // or inserting a new key. A single round of slow-pass may return early if we reach 
-    // the limit of the number of groups due to inserts of new keys. In that case we need 
-    // to resize and recalculating starting global slot ids for new bigger hash table. 
-    bool out_of_capacity; 
-    RETURN_NOT_OK( 
-        lookup_2(hashes, &num_ids, ids, &out_of_capacity, out_groupids, slot_ids)); 
-    if (out_of_capacity) { 
-      RETURN_NOT_OK(grow_double()); 
-      // Reset start slot ids for still unprocessed input keys. 
-      // 
-      for (uint32_t i = 0; i < num_ids; ++i) { 
-        // First slot in the new starting block 
-        const int16_t id = util::SafeLoad(&ids[i]); 
-        util::SafeStore(&slot_ids[id], (hashes[id] >> (bits_hash_ - log_blocks_)) * 8); 
-      } 
-    } 
-  } while (num_ids > 0); 
- 
-  return Status::OK(); 
-} 
- 
-Status SwissTable::grow_double() { 
-  // Before and after metadata 
-  int num_group_id_bits_before = num_groupid_bits_from_log_blocks(log_blocks_); 
-  int num_group_id_bits_after = num_groupid_bits_from_log_blocks(log_blocks_ + 1); 
-  uint64_t group_id_mask_before = ~0ULL >> (64 - num_group_id_bits_before); 
-  int log_blocks_before = log_blocks_; 
-  int log_blocks_after = log_blocks_ + 1; 
-  uint64_t block_size_before = (8 + num_group_id_bits_before); 
-  uint64_t block_size_after = (8 + num_group_id_bits_after); 
-  uint64_t block_size_total_before = (block_size_before << log_blocks_before) + padding_; 
-  uint64_t block_size_total_after = (block_size_after << log_blocks_after) + padding_; 
-  uint64_t hashes_size_total_before = 
-      (bits_hash_ / 8 * (1 << (log_blocks_before + 3))) + padding_; 
-  uint64_t hashes_size_total_after = 
-      (bits_hash_ / 8 * (1 << (log_blocks_after + 3))) + padding_; 
-  constexpr uint32_t stamp_mask = (1 << bits_stamp_) - 1; 
- 
-  // Allocate new buffers 
-  uint8_t* blocks_new; 
-  RETURN_NOT_OK(pool_->Allocate(block_size_total_after, &blocks_new)); 
-  memset(blocks_new, 0, block_size_total_after); 
-  uint8_t* hashes_new_8B; 
-  uint32_t* hashes_new; 
-  RETURN_NOT_OK(pool_->Allocate(hashes_size_total_after, &hashes_new_8B)); 
-  hashes_new = reinterpret_cast<uint32_t*>(hashes_new_8B); 
- 
-  // First pass over all old blocks. 
-  // Reinsert entries that were not in the overflow block 
-  // (block other than selected by hash bits corresponding to the entry). 
-  for (int i = 0; i < (1 << log_blocks_); ++i) { 
-    // How many full slots in this block 
-    uint8_t* block_base = blocks_ + i * block_size_before; 
-    uint8_t* double_block_base_new = blocks_new + 2 * i * block_size_after; 
-    uint64_t block = *reinterpret_cast<const uint64_t*>(block_base); 
- 
-    auto full_slots = 
-        static_cast<int>(CountLeadingZeros(block & kHighBitOfEachByte) >> 3); 
-    int full_slots_new[2]; 
-    full_slots_new[0] = full_slots_new[1] = 0; 
-    util::SafeStore(double_block_base_new, kHighBitOfEachByte); 
-    util::SafeStore(double_block_base_new + block_size_after, kHighBitOfEachByte); 
- 
-    for (int j = 0; j < full_slots; ++j) { 
-      uint64_t slot_id = i * 8 + j; 
-      uint32_t hash = hashes_[slot_id]; 
-      uint64_t block_id_new = hash >> (bits_hash_ - log_blocks_after); 
-      bool is_overflow_entry = ((block_id_new >> 1) != static_cast<uint64_t>(i)); 
-      if (is_overflow_entry) { 
-        continue; 
-      } 
- 
-      int ihalf = block_id_new & 1; 
-      uint8_t stamp_new = 
-          hash >> ((bits_hash_ - log_blocks_after - bits_stamp_)) & stamp_mask; 
-      uint64_t group_id_bit_offs = j * num_group_id_bits_before; 
-      uint64_t group_id = 
-          (util::SafeLoadAs<uint64_t>(block_base + 8 + (group_id_bit_offs >> 3)) >> 
-           (group_id_bit_offs & 7)) & 
-          group_id_mask_before; 
- 
-      uint64_t slot_id_new = i * 16 + ihalf * 8 + full_slots_new[ihalf]; 
-      hashes_new[slot_id_new] = hash; 
-      uint8_t* block_base_new = double_block_base_new + ihalf * block_size_after; 
-      block_base_new[7 - full_slots_new[ihalf]] = stamp_new; 
-      int group_id_bit_offs_new = full_slots_new[ihalf] * num_group_id_bits_after; 
-      uint64_t* ptr = 
-          reinterpret_cast<uint64_t*>(block_base_new + 8 + (group_id_bit_offs_new >> 3)); 
-      util::SafeStore(ptr, 
-                      util::SafeLoad(ptr) | (group_id << (group_id_bit_offs_new & 7))); 
-      full_slots_new[ihalf]++; 
-    } 
-  } 
- 
-  // Second pass over all old blocks. 
-  // Reinsert entries that were in an overflow block. 
-  for (int i = 0; i < (1 << log_blocks_); ++i) { 
-    // How many full slots in this block 
-    uint8_t* block_base = blocks_ + i * block_size_before; 
-    uint64_t block = util::SafeLoadAs<uint64_t>(block_base); 
-    int full_slots = static_cast<int>(CountLeadingZeros(block & kHighBitOfEachByte) >> 3); 
- 
-    for (int j = 0; j < full_slots; ++j) { 
-      uint64_t slot_id = i * 8 + j; 
-      uint32_t hash = hashes_[slot_id]; 
-      uint64_t block_id_new = hash >> (bits_hash_ - log_blocks_after); 
-      bool is_overflow_entry = ((block_id_new >> 1) != static_cast<uint64_t>(i)); 
-      if (!is_overflow_entry) { 
-        continue; 
-      } 
- 
-      uint64_t group_id_bit_offs = j * num_group_id_bits_before; 
-      uint64_t group_id = 
-          (util::SafeLoadAs<uint64_t>(block_base + 8 + (group_id_bit_offs >> 3)) >> 
-           (group_id_bit_offs & 7)) & 
-          group_id_mask_before; 
-      uint8_t stamp_new = 
-          hash >> ((bits_hash_ - log_blocks_after - bits_stamp_)) & stamp_mask; 
- 
-      uint8_t* block_base_new = blocks_new + block_id_new * block_size_after; 
-      uint64_t block_new = util::SafeLoadAs<uint64_t>(block_base_new); 
-      int full_slots_new = 
-          static_cast<int>(CountLeadingZeros(block_new & kHighBitOfEachByte) >> 3); 
-      while (full_slots_new == 8) { 
-        block_id_new = (block_id_new + 1) & ((1 << log_blocks_after) - 1); 
-        block_base_new = blocks_new + block_id_new * block_size_after; 
-        block_new = util::SafeLoadAs<uint64_t>(block_base_new); 
-        full_slots_new = 
-            static_cast<int>(CountLeadingZeros(block_new & kHighBitOfEachByte) >> 3); 
-      } 
- 
-      hashes_new[block_id_new * 8 + full_slots_new] = hash; 
-      block_base_new[7 - full_slots_new] = stamp_new; 
-      int group_id_bit_offs_new = full_slots_new * num_group_id_bits_after; 
-      uint64_t* ptr = 
-          reinterpret_cast<uint64_t*>(block_base_new + 8 + (group_id_bit_offs_new >> 3)); 
-      util::SafeStore(ptr, 
-                      util::SafeLoad(ptr) | (group_id << (group_id_bit_offs_new & 7))); 
-    } 
-  } 
- 
-  pool_->Free(blocks_, block_size_total_before); 
-  pool_->Free(reinterpret_cast<uint8_t*>(hashes_), hashes_size_total_before); 
-  log_blocks_ = log_blocks_after; 
-  blocks_ = blocks_new; 
-  hashes_ = hashes_new; 
- 
-  return Status::OK(); 
-} 
- 
-Status SwissTable::init(int64_t hardware_flags, MemoryPool* pool, 
-                        util::TempVectorStack* temp_stack, int log_minibatch, 
-                        EqualImpl equal_impl, AppendImpl append_impl) { 
-  hardware_flags_ = hardware_flags; 
-  pool_ = pool; 
-  temp_stack_ = temp_stack; 
-  log_minibatch_ = log_minibatch; 
-  equal_impl_ = equal_impl; 
-  append_impl_ = append_impl; 
- 
-  log_blocks_ = 0; 
-  int num_groupid_bits = num_groupid_bits_from_log_blocks(log_blocks_); 
-  num_inserted_ = 0; 
- 
-  const uint64_t block_bytes = 8 + num_groupid_bits; 
-  const uint64_t slot_bytes = (block_bytes << log_blocks_) + padding_; 
-  RETURN_NOT_OK(pool_->Allocate(slot_bytes, &blocks_)); 
- 
-  // Make sure group ids are initially set to zero for all slots. 
-  memset(blocks_, 0, slot_bytes); 
- 
-  // Initialize all status bytes to represent an empty slot. 
-  for (uint64_t i = 0; i < (static_cast<uint64_t>(1) << log_blocks_); ++i) { 
-    util::SafeStore(blocks_ + i * block_bytes, kHighBitOfEachByte); 
-  } 
- 
-  uint64_t num_slots = 1ULL << (log_blocks_ + 3); 
-  const uint64_t hash_size = sizeof(uint32_t); 
-  const uint64_t hash_bytes = hash_size * num_slots + padding_; 
-  uint8_t* hashes8; 
-  RETURN_NOT_OK(pool_->Allocate(hash_bytes, &hashes8)); 
-  hashes_ = reinterpret_cast<uint32_t*>(hashes8); 
- 
-  return Status::OK(); 
-} 
- 
-void SwissTable::cleanup() { 
-  if (blocks_) { 
-    int num_groupid_bits = num_groupid_bits_from_log_blocks(log_blocks_); 
-    const uint64_t block_bytes = 8 + num_groupid_bits; 
-    const uint64_t slot_bytes = (block_bytes << log_blocks_) + padding_; 
-    pool_->Free(blocks_, slot_bytes); 
-    blocks_ = nullptr; 
-  } 
-  if (hashes_) { 
-    uint64_t num_slots = 1ULL << (log_blocks_ + 3); 
-    const uint64_t hash_size = sizeof(uint32_t); 
-    const uint64_t hash_bytes = hash_size * num_slots + padding_; 
-    pool_->Free(reinterpret_cast<uint8_t*>(hashes_), hash_bytes); 
-    hashes_ = nullptr; 
-  } 
-  log_blocks_ = 0; 
-  num_inserted_ = 0; 
-} 
- 
-}  // namespace compute 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/exec/key_map.h"
+
+#include <memory.h>
+
+#include <algorithm>
+#include <cstdint>
+
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/ubsan.h"
+
+namespace arrow {
+
+using BitUtil::CountLeadingZeros;
+
+namespace compute {
+
+constexpr uint64_t kHighBitOfEachByte = 0x8080808080808080ULL;
+
+// Search status bytes inside a block of 8 slots (64-bit word).
+// Try to find a slot that contains a 7-bit stamp matching the one provided.
+// There are three possible outcomes:
+// 1. A matching slot is found.
+// -> Return its index between 0 and 7 and set match found flag.
+// 2. A matching slot is not found and there is an empty slot in the block.
+// -> Return the index of the first empty slot and clear match found flag.
+// 3. A matching slot is not found and there are no empty slots in the block.
+// -> Return 8 as the output slot index and clear match found flag.
+//
+// Optionally an index of the first slot to start the search from can be specified.
+// In this case slots before it will be ignored.
+//
+template <bool use_start_slot>
+inline void SwissTable::search_block(uint64_t block, int stamp, int start_slot,
+                                     int* out_slot, int* out_match_found) {
+  // Filled slot bytes have the highest bit set to 0 and empty slots are equal to 0x80.
+  uint64_t block_high_bits = block & kHighBitOfEachByte;
+
+  // Replicate 7-bit stamp to all non-empty slots, leaving zeroes for empty slots.
+  uint64_t stamp_pattern = stamp * ((block_high_bits ^ kHighBitOfEachByte) >> 7);
+
+  // If we xor this pattern with block status bytes we get in individual bytes:
+  // a) 0x00, for filled slots matching the stamp,
+  // b) 0x00 < x < 0x80, for filled slots not matching the stamp,
+  // c) 0x80, for empty slots.
+  uint64_t block_xor_pattern = block ^ stamp_pattern;
+
+  // If we then add 0x7f to every byte, we get:
+  // a) 0x7F
+  // b) 0x80 <= x < 0xFF
+  // c) 0xFF
+  uint64_t match_base = block_xor_pattern + ~kHighBitOfEachByte;
+
+  // The highest bit now tells us if we have a match (0) or not (1).
+  // We will negate the bits so that match is represented by a set bit.
+  uint64_t matches = ~match_base;
+
+  // Clear 7 non-relevant bits in each byte.
+  // Also clear bytes that correspond to slots that we were supposed to
+  // skip due to provided start slot index.
+  // Note: the highest byte corresponds to the first slot.
+  if (use_start_slot) {
+    matches &= kHighBitOfEachByte >> (8 * start_slot);
+  } else {
+    matches &= kHighBitOfEachByte;
+  }
+
+  // We get 0 if there are no matches
+  *out_match_found = (matches == 0 ? 0 : 1);
+
+  // Now if we or with the highest bits of the block and scan zero bits in reverse,
+  // we get 8x slot index that we were looking for.
+  // This formula works in all three cases a), b) and c).
+  *out_slot = static_cast<int>(CountLeadingZeros(matches | block_high_bits) >> 3);
+}
+
+// This call follows the call to search_block.
+// The input slot index is the output returned by it, which is a value from 0 to 8,
+// with 8 indicating that both: no match was found and there were no empty slots.
+//
+// If the slot corresponds to a non-empty slot return a group id associated with it.
+// Otherwise return any group id from any of the slots or
+// zero, which is the default value stored in empty slots.
+//
+inline uint64_t SwissTable::extract_group_id(const uint8_t* block_ptr, int slot,
+                                             uint64_t group_id_mask) {
+  // Input slot can be equal to 8, in which case we need to output any valid group id
+  // value, so we take the one from slot 0 in the block.
+  int clamped_slot = slot & 7;
+
+  // Group id values for all 8 slots in the block are bit-packed and follow the status
+  // bytes. We assume here that the number of bits is rounded up to 8, 16, 32 or 64. In
+  // that case we can extract group id using aligned 64-bit word access.
+  int num_groupid_bits = static_cast<int>(ARROW_POPCOUNT64(group_id_mask));
+  ARROW_DCHECK(num_groupid_bits == 8 || num_groupid_bits == 16 ||
+               num_groupid_bits == 32 || num_groupid_bits == 64);
+
+  int bit_offset = clamped_slot * num_groupid_bits;
+  const uint64_t* group_id_bytes =
+      reinterpret_cast<const uint64_t*>(block_ptr) + 1 + (bit_offset >> 6);
+  uint64_t group_id = (*group_id_bytes >> (bit_offset & 63)) & group_id_mask;
+
+  return group_id;
+}
+
+// Return global slot id (the index including the information about the block)
+// where the search should continue if the first comparison fails.
+// This function always follows search_block and receives the slot id returned by it.
+//
+inline uint64_t SwissTable::next_slot_to_visit(uint64_t block_index, int slot,
+                                               int match_found) {
+  // The result should be taken modulo the number of all slots in all blocks,
+  // but here we allow it to take a value one above the last slot index.
+  // Modulo operation is postponed to later.
+  return block_index * 8 + slot + match_found;
+}
+
+// Implements first (fast-path, optimistic) lookup.
+// Searches for a match only within the start block and
+// trying only the first slot with a matching stamp.
+//
+// Comparison callback needed for match verification is done outside of this function.
+// Match bit vector filled by it only indicates finding a matching stamp in a slot.
+//
+template <bool use_selection>
+void SwissTable::lookup_1(const uint16_t* selection, const int num_keys,
+                          const uint32_t* hashes, uint8_t* out_match_bitvector,
+                          uint32_t* out_groupids, uint32_t* out_slot_ids) {
+  // Clear the output bit vector
+  memset(out_match_bitvector, 0, (num_keys + 7) / 8);
+
+  // Based on the size of the table, prepare bit number constants.
+  uint32_t stamp_mask = (1 << bits_stamp_) - 1;
+  int num_groupid_bits = num_groupid_bits_from_log_blocks(log_blocks_);
+  uint32_t groupid_mask = (1 << num_groupid_bits) - 1;
+
+  for (int i = 0; i < num_keys; ++i) {
+    int id;
+    if (use_selection) {
+      id = util::SafeLoad(&selection[i]);
+    } else {
+      id = i;
+    }
+
+    // Extract from hash: block index and stamp
+    //
+    uint32_t hash = hashes[id];
+    uint32_t iblock = hash >> (bits_hash_ - bits_stamp_ - log_blocks_);
+    uint32_t stamp = iblock & stamp_mask;
+    iblock >>= bits_stamp_;
+
+    uint32_t num_block_bytes = num_groupid_bits + 8;
+    const uint8_t* blockbase = reinterpret_cast<const uint8_t*>(blocks_) +
+                               static_cast<uint64_t>(iblock) * num_block_bytes;
+    uint64_t block = util::SafeLoadAs<uint64_t>(blockbase);
+
+    // Call helper functions to obtain the output triplet:
+    // - match (of a stamp) found flag
+    // - group id for key comparison
+    // - slot to resume search from in case of no match or false positive
+    int match_found;
+    int islot_in_block;
+    search_block<false>(block, stamp, 0, &islot_in_block, &match_found);
+    uint64_t groupid = extract_group_id(blockbase, islot_in_block, groupid_mask);
+    ARROW_DCHECK(groupid < num_inserted_ || num_inserted_ == 0);
+    uint64_t islot = next_slot_to_visit(iblock, islot_in_block, match_found);
+
+    out_match_bitvector[id / 8] |= match_found << (id & 7);
+    util::SafeStore(&out_groupids[id], static_cast<uint32_t>(groupid));
+    util::SafeStore(&out_slot_ids[id], static_cast<uint32_t>(islot));
+  }
+}
+
+// How many groups we can keep in the hash table without the need for resizing.
+// When we reach this limit, we need to break processing of any further rows and resize.
+//
+uint64_t SwissTable::num_groups_for_resize() const {
+  // Resize small hash tables when 50% full (up to 12KB).
+  // Resize large hash tables when 75% full.
+  constexpr int log_blocks_small_ = 9;
+  uint64_t num_slots = 1ULL << (log_blocks_ + 3);
+  if (log_blocks_ <= log_blocks_small_) {
+    return num_slots / 2;
+  } else {
+    return num_slots * 3 / 4;
+  }
+}
+
+uint64_t SwissTable::wrap_global_slot_id(uint64_t global_slot_id) {
+  uint64_t global_slot_id_mask = (1 << (log_blocks_ + 3)) - 1;
+  return global_slot_id & global_slot_id_mask;
+}
+
+// Run a single round of slot search - comparison / insert - filter unprocessed.
+// Update selection vector to reflect which items have been processed.
+// Ids in selection vector do not have to be sorted.
+//
+Status SwissTable::lookup_2(const uint32_t* hashes, uint32_t* inout_num_selected,
+                            uint16_t* inout_selection, bool* out_need_resize,
+                            uint32_t* out_group_ids, uint32_t* inout_next_slot_ids) {
+  auto num_groups_limit = num_groups_for_resize();
+  ARROW_DCHECK(num_inserted_ < num_groups_limit);
+
+  // Temporary arrays are of limited size.
+  // The input needs to be split into smaller portions if it exceeds that limit.
+  //
+  ARROW_DCHECK(*inout_num_selected <= static_cast<uint32_t>(1 << log_minibatch_));
+
+  // We will split input row ids into three categories:
+  // - needing to visit next block [0]
+  // - needing comparison [1]
+  // - inserted [2]
+  //
+  auto ids_inserted_buf =
+      util::TempVectorHolder<uint16_t>(temp_stack_, *inout_num_selected);
+  auto ids_for_comparison_buf =
+      util::TempVectorHolder<uint16_t>(temp_stack_, *inout_num_selected);
+  constexpr int category_nomatch = 0;
+  constexpr int category_cmp = 1;
+  constexpr int category_inserted = 2;
+  int num_ids[3];
+  num_ids[0] = num_ids[1] = num_ids[2] = 0;
+  uint16_t* ids[3]{inout_selection, ids_for_comparison_buf.mutable_data(),
+                   ids_inserted_buf.mutable_data()};
+  auto push_id = [&num_ids, &ids](int category, int id) {
+    util::SafeStore(&ids[category][num_ids[category]++], static_cast<uint16_t>(id));
+  };
+
+  uint64_t num_groupid_bits = num_groupid_bits_from_log_blocks(log_blocks_);
+  uint64_t groupid_mask = (1ULL << num_groupid_bits) - 1;
+  constexpr uint64_t stamp_mask = 0x7f;
+  uint64_t num_block_bytes = (8 + num_groupid_bits);
+
+  uint32_t num_processed;
+  for (num_processed = 0;
+       // Second condition in for loop:
+       // We need to break processing and have the caller of this function
+       // resize hash table if we reach the limit of the number of groups present.
+       num_processed < *inout_num_selected &&
+       num_inserted_ + num_ids[category_inserted] < num_groups_limit;
+       ++num_processed) {
+    // row id in original batch
+    int id = util::SafeLoad(&inout_selection[num_processed]);
+
+    uint64_t slot_id = wrap_global_slot_id(util::SafeLoad(&inout_next_slot_ids[id]));
+    uint64_t block_id = slot_id >> 3;
+    uint32_t hash = hashes[id];
+    uint8_t* blockbase = blocks_ + num_block_bytes * block_id;
+    uint64_t block = *reinterpret_cast<uint64_t*>(blockbase);
+    uint64_t stamp = (hash >> (bits_hash_ - log_blocks_ - bits_stamp_)) & stamp_mask;
+    int start_slot = (slot_id & 7);
+
+    bool isempty = (blockbase[7 - start_slot] == 0x80);
+    if (isempty) {
+      // If we reach the empty slot we insert key for new group
+
+      blockbase[7 - start_slot] = static_cast<uint8_t>(stamp);
+      uint32_t group_id = num_inserted_ + num_ids[category_inserted];
+      int groupid_bit_offset = static_cast<int>(start_slot * num_groupid_bits);
+
+      // We assume here that the number of bits is rounded up to 8, 16, 32 or 64.
+      // In that case we can insert group id value using aligned 64-bit word access.
+      ARROW_DCHECK(num_groupid_bits == 8 || num_groupid_bits == 16 ||
+                   num_groupid_bits == 32 || num_groupid_bits == 64);
+      uint64_t* ptr =
+          &reinterpret_cast<uint64_t*>(blockbase + 8)[groupid_bit_offset >> 6];
+      util::SafeStore(ptr, util::SafeLoad(ptr) | (static_cast<uint64_t>(group_id)
+                                                  << (groupid_bit_offset & 63)));
+
+      hashes_[slot_id] = hash;
+      util::SafeStore(&out_group_ids[id], group_id);
+      push_id(category_inserted, id);
+    } else {
+      // We search for a slot with a matching stamp within a single block.
+      // We append row id to the appropriate sequence of ids based on
+      // whether the match has been found or not.
+
+      int new_match_found;
+      int new_slot;
+      search_block<true>(block, static_cast<int>(stamp), start_slot, &new_slot,
+                         &new_match_found);
+      auto new_groupid =
+          static_cast<uint32_t>(extract_group_id(blockbase, new_slot, groupid_mask));
+      ARROW_DCHECK(new_groupid < num_inserted_ + num_ids[category_inserted]);
+      new_slot =
+          static_cast<int>(next_slot_to_visit(block_id, new_slot, new_match_found));
+      util::SafeStore(&inout_next_slot_ids[id], new_slot);
+      util::SafeStore(&out_group_ids[id], new_groupid);
+      push_id(new_match_found, id);
+    }
+  }
+
+  // Copy keys for newly inserted rows using callback
+  RETURN_NOT_OK(append_impl_(num_ids[category_inserted], ids[category_inserted]));
+  num_inserted_ += num_ids[category_inserted];
+
+  // Evaluate comparisons and append ids of rows that failed it to the non-match set.
+  uint32_t num_not_equal;
+  equal_impl_(num_ids[category_cmp], ids[category_cmp], out_group_ids, &num_not_equal,
+              ids[category_nomatch] + num_ids[category_nomatch]);
+  num_ids[category_nomatch] += num_not_equal;
+
+  // Append ids of any unprocessed entries if we aborted processing due to the need
+  // to resize.
+  if (num_processed < *inout_num_selected) {
+    memmove(ids[category_nomatch] + num_ids[category_nomatch],
+            inout_selection + num_processed,
+            sizeof(uint16_t) * (*inout_num_selected - num_processed));
+    num_ids[category_nomatch] += (*inout_num_selected - num_processed);
+  }
+
+  *out_need_resize = (num_inserted_ == num_groups_limit);
+  *inout_num_selected = num_ids[category_nomatch];
+  return Status::OK();
+}
+
+// Use hashes and callbacks to find group ids for already existing keys and
+// to insert and report newly assigned group ids for new keys.
+//
+Status SwissTable::map(const int num_keys, const uint32_t* hashes,
+                       uint32_t* out_groupids) {
+  // Temporary buffers have limited size.
+  // Caller is responsible for splitting larger input arrays into smaller chunks.
+  ARROW_DCHECK(num_keys <= (1 << log_minibatch_));
+
+  // Allocate temporary buffers with a lifetime of this function
+  auto match_bitvector_buf = util::TempVectorHolder<uint8_t>(temp_stack_, num_keys);
+  uint8_t* match_bitvector = match_bitvector_buf.mutable_data();
+  auto slot_ids_buf = util::TempVectorHolder<uint32_t>(temp_stack_, num_keys);
+  uint32_t* slot_ids = slot_ids_buf.mutable_data();
+  auto ids_buf = util::TempVectorHolder<uint16_t>(temp_stack_, num_keys);
+  uint16_t* ids = ids_buf.mutable_data();
+  uint32_t num_ids;
+
+  // First-pass processing.
+  // Optimistically use simplified lookup involving only a start block to find
+  // a single group id candidate for every input.
+#if defined(ARROW_HAVE_AVX2)
+  if (hardware_flags_ & arrow::internal::CpuInfo::AVX2) {
+    if (log_blocks_ <= 4) {
+      int tail = num_keys % 32;
+      int delta = num_keys - tail;
+      lookup_1_avx2_x32(num_keys - tail, hashes, match_bitvector, out_groupids, slot_ids);
+      lookup_1_avx2_x8(tail, hashes + delta, match_bitvector + delta / 8,
+                       out_groupids + delta, slot_ids + delta);
+    } else {
+      lookup_1_avx2_x8(num_keys, hashes, match_bitvector, out_groupids, slot_ids);
+    }
+  } else {
+#endif
+    lookup_1<false>(nullptr, num_keys, hashes, match_bitvector, out_groupids, slot_ids);
+#if defined(ARROW_HAVE_AVX2)
+  }
+#endif
+
+  int64_t num_matches =
+      arrow::internal::CountSetBits(match_bitvector, /*offset=*/0, num_keys);
+
+  // After the first-pass processing count rows with matches (based on stamp comparison)
+  // and decide based on their percentage whether to call dense or sparse comparison
+  // function. Dense comparison means evaluating it for all inputs, even if the matching
+  // stamp was not found. It may be cheaper to evaluate comparison for all inputs if the
+  // extra cost of filtering is higher than the wasted processing of rows with no match.
+  //
+  // Dense comparison can only be used if there is at least one inserted key,
+  // because otherwise there is no key to compare to.
+  //
+  if (num_inserted_ > 0 && num_matches > 0 && num_matches > 3 * num_keys / 4) {
+    // Dense comparisons
+    equal_impl_(num_keys, nullptr, out_groupids, &num_ids, ids);
+  } else {
+    // Sparse comparisons that involve filtering the input set of keys
+    auto ids_cmp_buf = util::TempVectorHolder<uint16_t>(temp_stack_, num_keys);
+    uint16_t* ids_cmp = ids_cmp_buf.mutable_data();
+    int num_ids_result;
+    util::BitUtil::bits_split_indexes(hardware_flags_, num_keys, match_bitvector,
+                                      &num_ids_result, ids, ids_cmp);
+    num_ids = num_ids_result;
+    uint32_t num_not_equal;
+    equal_impl_(num_keys - num_ids, ids_cmp, out_groupids, &num_not_equal, ids + num_ids);
+    num_ids += num_not_equal;
+  }
+
+  do {
+    // A single round of slow-pass (robust) lookup or insert.
+    // A single round ends with either a single comparison verifying the match candidate
+    // or inserting a new key. A single round of slow-pass may return early if we reach
+    // the limit of the number of groups due to inserts of new keys. In that case we need
+    // to resize and recalculating starting global slot ids for new bigger hash table.
+    bool out_of_capacity;
+    RETURN_NOT_OK(
+        lookup_2(hashes, &num_ids, ids, &out_of_capacity, out_groupids, slot_ids));
+    if (out_of_capacity) {
+      RETURN_NOT_OK(grow_double());
+      // Reset start slot ids for still unprocessed input keys.
+      //
+      for (uint32_t i = 0; i < num_ids; ++i) {
+        // First slot in the new starting block
+        const int16_t id = util::SafeLoad(&ids[i]);
+        util::SafeStore(&slot_ids[id], (hashes[id] >> (bits_hash_ - log_blocks_)) * 8);
+      }
+    }
+  } while (num_ids > 0);
+
+  return Status::OK();
+}
+
+Status SwissTable::grow_double() {
+  // Before and after metadata
+  int num_group_id_bits_before = num_groupid_bits_from_log_blocks(log_blocks_);
+  int num_group_id_bits_after = num_groupid_bits_from_log_blocks(log_blocks_ + 1);
+  uint64_t group_id_mask_before = ~0ULL >> (64 - num_group_id_bits_before);
+  int log_blocks_before = log_blocks_;
+  int log_blocks_after = log_blocks_ + 1;
+  uint64_t block_size_before = (8 + num_group_id_bits_before);
+  uint64_t block_size_after = (8 + num_group_id_bits_after);
+  uint64_t block_size_total_before = (block_size_before << log_blocks_before) + padding_;
+  uint64_t block_size_total_after = (block_size_after << log_blocks_after) + padding_;
+  uint64_t hashes_size_total_before =
+      (bits_hash_ / 8 * (1 << (log_blocks_before + 3))) + padding_;
+  uint64_t hashes_size_total_after =
+      (bits_hash_ / 8 * (1 << (log_blocks_after + 3))) + padding_;
+  constexpr uint32_t stamp_mask = (1 << bits_stamp_) - 1;
+
+  // Allocate new buffers
+  uint8_t* blocks_new;
+  RETURN_NOT_OK(pool_->Allocate(block_size_total_after, &blocks_new));
+  memset(blocks_new, 0, block_size_total_after);
+  uint8_t* hashes_new_8B;
+  uint32_t* hashes_new;
+  RETURN_NOT_OK(pool_->Allocate(hashes_size_total_after, &hashes_new_8B));
+  hashes_new = reinterpret_cast<uint32_t*>(hashes_new_8B);
+
+  // First pass over all old blocks.
+  // Reinsert entries that were not in the overflow block
+  // (block other than selected by hash bits corresponding to the entry).
+  for (int i = 0; i < (1 << log_blocks_); ++i) {
+    // How many full slots in this block
+    uint8_t* block_base = blocks_ + i * block_size_before;
+    uint8_t* double_block_base_new = blocks_new + 2 * i * block_size_after;
+    uint64_t block = *reinterpret_cast<const uint64_t*>(block_base);
+
+    auto full_slots =
+        static_cast<int>(CountLeadingZeros(block & kHighBitOfEachByte) >> 3);
+    int full_slots_new[2];
+    full_slots_new[0] = full_slots_new[1] = 0;
+    util::SafeStore(double_block_base_new, kHighBitOfEachByte);
+    util::SafeStore(double_block_base_new + block_size_after, kHighBitOfEachByte);
+
+    for (int j = 0; j < full_slots; ++j) {
+      uint64_t slot_id = i * 8 + j;
+      uint32_t hash = hashes_[slot_id];
+      uint64_t block_id_new = hash >> (bits_hash_ - log_blocks_after);
+      bool is_overflow_entry = ((block_id_new >> 1) != static_cast<uint64_t>(i));
+      if (is_overflow_entry) {
+        continue;
+      }
+
+      int ihalf = block_id_new & 1;
+      uint8_t stamp_new =
+          hash >> ((bits_hash_ - log_blocks_after - bits_stamp_)) & stamp_mask;
+      uint64_t group_id_bit_offs = j * num_group_id_bits_before;
+      uint64_t group_id =
+          (util::SafeLoadAs<uint64_t>(block_base + 8 + (group_id_bit_offs >> 3)) >>
+           (group_id_bit_offs & 7)) &
+          group_id_mask_before;
+
+      uint64_t slot_id_new = i * 16 + ihalf * 8 + full_slots_new[ihalf];
+      hashes_new[slot_id_new] = hash;
+      uint8_t* block_base_new = double_block_base_new + ihalf * block_size_after;
+      block_base_new[7 - full_slots_new[ihalf]] = stamp_new;
+      int group_id_bit_offs_new = full_slots_new[ihalf] * num_group_id_bits_after;
+      uint64_t* ptr =
+          reinterpret_cast<uint64_t*>(block_base_new + 8 + (group_id_bit_offs_new >> 3));
+      util::SafeStore(ptr,
+                      util::SafeLoad(ptr) | (group_id << (group_id_bit_offs_new & 7)));
+      full_slots_new[ihalf]++;
+    }
+  }
+
+  // Second pass over all old blocks.
+  // Reinsert entries that were in an overflow block.
+  for (int i = 0; i < (1 << log_blocks_); ++i) {
+    // How many full slots in this block
+    uint8_t* block_base = blocks_ + i * block_size_before;
+    uint64_t block = util::SafeLoadAs<uint64_t>(block_base);
+    int full_slots = static_cast<int>(CountLeadingZeros(block & kHighBitOfEachByte) >> 3);
+
+    for (int j = 0; j < full_slots; ++j) {
+      uint64_t slot_id = i * 8 + j;
+      uint32_t hash = hashes_[slot_id];
+      uint64_t block_id_new = hash >> (bits_hash_ - log_blocks_after);
+      bool is_overflow_entry = ((block_id_new >> 1) != static_cast<uint64_t>(i));
+      if (!is_overflow_entry) {
+        continue;
+      }
+
+      uint64_t group_id_bit_offs = j * num_group_id_bits_before;
+      uint64_t group_id =
+          (util::SafeLoadAs<uint64_t>(block_base + 8 + (group_id_bit_offs >> 3)) >>
+           (group_id_bit_offs & 7)) &
+          group_id_mask_before;
+      uint8_t stamp_new =
+          hash >> ((bits_hash_ - log_blocks_after - bits_stamp_)) & stamp_mask;
+
+      uint8_t* block_base_new = blocks_new + block_id_new * block_size_after;
+      uint64_t block_new = util::SafeLoadAs<uint64_t>(block_base_new);
+      int full_slots_new =
+          static_cast<int>(CountLeadingZeros(block_new & kHighBitOfEachByte) >> 3);
+      while (full_slots_new == 8) {
+        block_id_new = (block_id_new + 1) & ((1 << log_blocks_after) - 1);
+        block_base_new = blocks_new + block_id_new * block_size_after;
+        block_new = util::SafeLoadAs<uint64_t>(block_base_new);
+        full_slots_new =
+            static_cast<int>(CountLeadingZeros(block_new & kHighBitOfEachByte) >> 3);
+      }
+
+      hashes_new[block_id_new * 8 + full_slots_new] = hash;
+      block_base_new[7 - full_slots_new] = stamp_new;
+      int group_id_bit_offs_new = full_slots_new * num_group_id_bits_after;
+      uint64_t* ptr =
+          reinterpret_cast<uint64_t*>(block_base_new + 8 + (group_id_bit_offs_new >> 3));
+      util::SafeStore(ptr,
+                      util::SafeLoad(ptr) | (group_id << (group_id_bit_offs_new & 7)));
+    }
+  }
+
+  pool_->Free(blocks_, block_size_total_before);
+  pool_->Free(reinterpret_cast<uint8_t*>(hashes_), hashes_size_total_before);
+  log_blocks_ = log_blocks_after;
+  blocks_ = blocks_new;
+  hashes_ = hashes_new;
+
+  return Status::OK();
+}
+
+Status SwissTable::init(int64_t hardware_flags, MemoryPool* pool,
+                        util::TempVectorStack* temp_stack, int log_minibatch,
+                        EqualImpl equal_impl, AppendImpl append_impl) {
+  hardware_flags_ = hardware_flags;
+  pool_ = pool;
+  temp_stack_ = temp_stack;
+  log_minibatch_ = log_minibatch;
+  equal_impl_ = equal_impl;
+  append_impl_ = append_impl;
+
+  log_blocks_ = 0;
+  int num_groupid_bits = num_groupid_bits_from_log_blocks(log_blocks_);
+  num_inserted_ = 0;
+
+  const uint64_t block_bytes = 8 + num_groupid_bits;
+  const uint64_t slot_bytes = (block_bytes << log_blocks_) + padding_;
+  RETURN_NOT_OK(pool_->Allocate(slot_bytes, &blocks_));
+
+  // Make sure group ids are initially set to zero for all slots.
+  memset(blocks_, 0, slot_bytes);
+
+  // Initialize all status bytes to represent an empty slot.
+  for (uint64_t i = 0; i < (static_cast<uint64_t>(1) << log_blocks_); ++i) {
+    util::SafeStore(blocks_ + i * block_bytes, kHighBitOfEachByte);
+  }
+
+  uint64_t num_slots = 1ULL << (log_blocks_ + 3);
+  const uint64_t hash_size = sizeof(uint32_t);
+  const uint64_t hash_bytes = hash_size * num_slots + padding_;
+  uint8_t* hashes8;
+  RETURN_NOT_OK(pool_->Allocate(hash_bytes, &hashes8));
+  hashes_ = reinterpret_cast<uint32_t*>(hashes8);
+
+  return Status::OK();
+}
+
+void SwissTable::cleanup() {
+  if (blocks_) {
+    int num_groupid_bits = num_groupid_bits_from_log_blocks(log_blocks_);
+    const uint64_t block_bytes = 8 + num_groupid_bits;
+    const uint64_t slot_bytes = (block_bytes << log_blocks_) + padding_;
+    pool_->Free(blocks_, slot_bytes);
+    blocks_ = nullptr;
+  }
+  if (hashes_) {
+    uint64_t num_slots = 1ULL << (log_blocks_ + 3);
+    const uint64_t hash_size = sizeof(uint32_t);
+    const uint64_t hash_bytes = hash_size * num_slots + padding_;
+    pool_->Free(reinterpret_cast<uint8_t*>(hashes_), hash_bytes);
+    hashes_ = nullptr;
+  }
+  log_blocks_ = 0;
+  num_inserted_ = 0;
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/key_map.h b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/key_map.h
index da50db91040..8c472736ec4 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/key_map.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/key_map.h
@@ -1,172 +1,172 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <functional> 
- 
-#include "arrow/compute/exec/util.h" 
-#include "arrow/memory_pool.h" 
-#include "arrow/result.h" 
-#include "arrow/status.h" 
- 
-namespace arrow { 
-namespace compute { 
- 
-class SwissTable { 
- public: 
-  SwissTable() = default; 
-  ~SwissTable() { cleanup(); } 
- 
-  using EqualImpl = 
-      std::function<void(int num_keys, const uint16_t* selection /* may be null */, 
-                         const uint32_t* group_ids, uint32_t* out_num_keys_mismatch, 
-                         uint16_t* out_selection_mismatch)>; 
-  using AppendImpl = std::function<Status(int num_keys, const uint16_t* selection)>; 
- 
-  Status init(int64_t hardware_flags, MemoryPool* pool, util::TempVectorStack* temp_stack, 
-              int log_minibatch, EqualImpl equal_impl, AppendImpl append_impl); 
-  void cleanup(); 
- 
-  Status map(const int ckeys, const uint32_t* hashes, uint32_t* outgroupids); 
- 
- private: 
-  // Lookup helpers 
- 
-  /// \brief Scan bytes in block in reverse and stop as soon 
-  /// as a position of interest is found. 
-  /// 
-  /// Positions of interest: 
-  /// a) slot with a matching stamp is encountered, 
-  /// b) first empty slot is encountered, 
-  /// c) we reach the end of the block. 
-  /// 
-  /// \param[in] block 8 byte block of hash table 
-  /// \param[in] stamp 7 bits of hash used as a stamp 
-  /// \param[in] start_slot Index of the first slot in the block to start search from.  We 
-  ///            assume that this index always points to a non-empty slot, equivalently 
-  ///            that it comes before any empty slots.  (Used only by one template 
-  ///            variant.) 
-  /// \param[out] out_slot index corresponding to the discovered position of interest (8 
-  ///            represents end of block). 
-  /// \param[out] out_match_found an integer flag (0 or 1) indicating if we found a 
-  ///            matching stamp. 
-  template <bool use_start_slot> 
-  inline void search_block(uint64_t block, int stamp, int start_slot, int* out_slot, 
-                           int* out_match_found); 
- 
-  /// \brief Extract group id for a given slot in a given block. 
-  /// 
-  /// Group ids follow in memory after 64-bit block data. 
-  /// Maximum number of groups inserted is equal to the number 
-  /// of all slots in all blocks, which is 8 * the number of blocks. 
-  /// Group ids are bit packed using that maximum to determine the necessary number of 
-  /// bits. 
-  inline uint64_t extract_group_id(const uint8_t* block_ptr, int slot, 
-                                   uint64_t group_id_mask); 
- 
-  inline uint64_t next_slot_to_visit(uint64_t block_index, int slot, int match_found); 
- 
-  inline void insert(uint8_t* block_base, uint64_t slot_id, uint32_t hash, uint8_t stamp, 
-                     uint32_t group_id); 
- 
-  inline uint64_t num_groups_for_resize() const; 
- 
-  inline uint64_t wrap_global_slot_id(uint64_t global_slot_id); 
- 
-  // First hash table access 
-  // Find first match in the start block if exists. 
-  // Possible cases: 
-  // 1. Stamp match in a block 
-  // 2. No stamp match in a block, no empty buckets in a block 
-  // 3. No stamp match in a block, empty buckets in a block 
-  // 
-  template <bool use_selection> 
-  void lookup_1(const uint16_t* selection, const int num_keys, const uint32_t* hashes, 
-                uint8_t* out_match_bitvector, uint32_t* out_group_ids, 
-                uint32_t* out_slot_ids); 
-#if defined(ARROW_HAVE_AVX2) 
-  void lookup_1_avx2_x8(const int num_hashes, const uint32_t* hashes, 
-                        uint8_t* out_match_bitvector, uint32_t* out_group_ids, 
-                        uint32_t* out_next_slot_ids); 
-  void lookup_1_avx2_x32(const int num_hashes, const uint32_t* hashes, 
-                         uint8_t* out_match_bitvector, uint32_t* out_group_ids, 
-                         uint32_t* out_next_slot_ids); 
-#endif 
- 
-  // Completing hash table lookup post first access 
-  Status lookup_2(const uint32_t* hashes, uint32_t* inout_num_selected, 
-                  uint16_t* inout_selection, bool* out_need_resize, 
-                  uint32_t* out_group_ids, uint32_t* out_next_slot_ids); 
- 
-  // Resize small hash tables when 50% full (up to 8KB). 
-  // Resize large hash tables when 75% full. 
-  Status grow_double(); 
- 
-  static int num_groupid_bits_from_log_blocks(int log_blocks) { 
-    int required_bits = log_blocks + 3; 
-    return required_bits <= 8 ? 8 
-                              : required_bits <= 16 ? 16 : required_bits <= 32 ? 32 : 64; 
-  } 
- 
-  // Use 32-bit hash for now 
-  static constexpr int bits_hash_ = 32; 
- 
-  // Number of hash bits stored in slots in a block. 
-  // The highest bits of hash determine block id. 
-  // The next set of highest bits is a "stamp" stored in a slot in a block. 
-  static constexpr int bits_stamp_ = 7; 
- 
-  // Padding bytes added at the end of buffers for ease of SIMD access 
-  static constexpr int padding_ = 64; 
- 
-  int log_minibatch_; 
-  // Base 2 log of the number of blocks 
-  int log_blocks_ = 0; 
-  // Number of keys inserted into hash table 
-  uint32_t num_inserted_ = 0; 
- 
-  // Data for blocks. 
-  // Each block has 8 status bytes for 8 slots, followed by 8 bit packed group ids for 
-  // these slots. In 8B status word, the order of bytes is reversed. Group ids are in 
-  // normal order. There is 64B padding at the end. 
-  // 
-  // 0 byte - 7 bucket | 1. byte - 6 bucket | ... 
-  // --------------------------------------------------- 
-  // |     Empty bit*   |    Empty bit       | 
-  // --------------------------------------------------- 
-  // |   7-bit hash    |    7-bit hash      | 
-  // --------------------------------------------------- 
-  // * Empty bucket has value 0x80. Non-empty bucket has highest bit set to 0. 
-  // 
-  uint8_t* blocks_; 
- 
-  // Array of hashes of values inserted into slots. 
-  // Undefined if the corresponding slot is empty. 
-  // There is 64B padding at the end. 
-  uint32_t* hashes_; 
- 
-  int64_t hardware_flags_; 
-  MemoryPool* pool_; 
-  util::TempVectorStack* temp_stack_; 
- 
-  EqualImpl equal_impl_; 
-  AppendImpl append_impl_; 
-}; 
- 
-}  // namespace compute 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <functional>
+
+#include "arrow/compute/exec/util.h"
+#include "arrow/memory_pool.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+
+namespace arrow {
+namespace compute {
+
+class SwissTable {
+ public:
+  SwissTable() = default;
+  ~SwissTable() { cleanup(); }
+
+  using EqualImpl =
+      std::function<void(int num_keys, const uint16_t* selection /* may be null */,
+                         const uint32_t* group_ids, uint32_t* out_num_keys_mismatch,
+                         uint16_t* out_selection_mismatch)>;
+  using AppendImpl = std::function<Status(int num_keys, const uint16_t* selection)>;
+
+  Status init(int64_t hardware_flags, MemoryPool* pool, util::TempVectorStack* temp_stack,
+              int log_minibatch, EqualImpl equal_impl, AppendImpl append_impl);
+  void cleanup();
+
+  Status map(const int ckeys, const uint32_t* hashes, uint32_t* outgroupids);
+
+ private:
+  // Lookup helpers
+
+  /// \brief Scan bytes in block in reverse and stop as soon
+  /// as a position of interest is found.
+  ///
+  /// Positions of interest:
+  /// a) slot with a matching stamp is encountered,
+  /// b) first empty slot is encountered,
+  /// c) we reach the end of the block.
+  ///
+  /// \param[in] block 8 byte block of hash table
+  /// \param[in] stamp 7 bits of hash used as a stamp
+  /// \param[in] start_slot Index of the first slot in the block to start search from.  We
+  ///            assume that this index always points to a non-empty slot, equivalently
+  ///            that it comes before any empty slots.  (Used only by one template
+  ///            variant.)
+  /// \param[out] out_slot index corresponding to the discovered position of interest (8
+  ///            represents end of block).
+  /// \param[out] out_match_found an integer flag (0 or 1) indicating if we found a
+  ///            matching stamp.
+  template <bool use_start_slot>
+  inline void search_block(uint64_t block, int stamp, int start_slot, int* out_slot,
+                           int* out_match_found);
+
+  /// \brief Extract group id for a given slot in a given block.
+  ///
+  /// Group ids follow in memory after 64-bit block data.
+  /// Maximum number of groups inserted is equal to the number
+  /// of all slots in all blocks, which is 8 * the number of blocks.
+  /// Group ids are bit packed using that maximum to determine the necessary number of
+  /// bits.
+  inline uint64_t extract_group_id(const uint8_t* block_ptr, int slot,
+                                   uint64_t group_id_mask);
+
+  inline uint64_t next_slot_to_visit(uint64_t block_index, int slot, int match_found);
+
+  inline void insert(uint8_t* block_base, uint64_t slot_id, uint32_t hash, uint8_t stamp,
+                     uint32_t group_id);
+
+  inline uint64_t num_groups_for_resize() const;
+
+  inline uint64_t wrap_global_slot_id(uint64_t global_slot_id);
+
+  // First hash table access
+  // Find first match in the start block if exists.
+  // Possible cases:
+  // 1. Stamp match in a block
+  // 2. No stamp match in a block, no empty buckets in a block
+  // 3. No stamp match in a block, empty buckets in a block
+  //
+  template <bool use_selection>
+  void lookup_1(const uint16_t* selection, const int num_keys, const uint32_t* hashes,
+                uint8_t* out_match_bitvector, uint32_t* out_group_ids,
+                uint32_t* out_slot_ids);
+#if defined(ARROW_HAVE_AVX2)
+  void lookup_1_avx2_x8(const int num_hashes, const uint32_t* hashes,
+                        uint8_t* out_match_bitvector, uint32_t* out_group_ids,
+                        uint32_t* out_next_slot_ids);
+  void lookup_1_avx2_x32(const int num_hashes, const uint32_t* hashes,
+                         uint8_t* out_match_bitvector, uint32_t* out_group_ids,
+                         uint32_t* out_next_slot_ids);
+#endif
+
+  // Completing hash table lookup post first access
+  Status lookup_2(const uint32_t* hashes, uint32_t* inout_num_selected,
+                  uint16_t* inout_selection, bool* out_need_resize,
+                  uint32_t* out_group_ids, uint32_t* out_next_slot_ids);
+
+  // Resize small hash tables when 50% full (up to 8KB).
+  // Resize large hash tables when 75% full.
+  Status grow_double();
+
+  static int num_groupid_bits_from_log_blocks(int log_blocks) {
+    int required_bits = log_blocks + 3;
+    return required_bits <= 8 ? 8
+                              : required_bits <= 16 ? 16 : required_bits <= 32 ? 32 : 64;
+  }
+
+  // Use 32-bit hash for now
+  static constexpr int bits_hash_ = 32;
+
+  // Number of hash bits stored in slots in a block.
+  // The highest bits of hash determine block id.
+  // The next set of highest bits is a "stamp" stored in a slot in a block.
+  static constexpr int bits_stamp_ = 7;
+
+  // Padding bytes added at the end of buffers for ease of SIMD access
+  static constexpr int padding_ = 64;
+
+  int log_minibatch_;
+  // Base 2 log of the number of blocks
+  int log_blocks_ = 0;
+  // Number of keys inserted into hash table
+  uint32_t num_inserted_ = 0;
+
+  // Data for blocks.
+  // Each block has 8 status bytes for 8 slots, followed by 8 bit packed group ids for
+  // these slots. In 8B status word, the order of bytes is reversed. Group ids are in
+  // normal order. There is 64B padding at the end.
+  //
+  // 0 byte - 7 bucket | 1. byte - 6 bucket | ...
+  // ---------------------------------------------------
+  // |     Empty bit*   |    Empty bit       |
+  // ---------------------------------------------------
+  // |   7-bit hash    |    7-bit hash      |
+  // ---------------------------------------------------
+  // * Empty bucket has value 0x80. Non-empty bucket has highest bit set to 0.
+  //
+  uint8_t* blocks_;
+
+  // Array of hashes of values inserted into slots.
+  // Undefined if the corresponding slot is empty.
+  // There is 64B padding at the end.
+  uint32_t* hashes_;
+
+  int64_t hardware_flags_;
+  MemoryPool* pool_;
+  util::TempVectorStack* temp_stack_;
+
+  EqualImpl equal_impl_;
+  AppendImpl append_impl_;
+};
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/util.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/util.cc
index b667afc65bb..a44676c2f0d 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/util.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/util.cc
@@ -1,278 +1,278 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "arrow/compute/exec/util.h" 
- 
-#include "arrow/util/bit_util.h" 
-#include "arrow/util/bitmap_ops.h" 
-#include "arrow/util/ubsan.h" 
- 
-namespace arrow { 
- 
-using BitUtil::CountTrailingZeros; 
- 
-namespace util { 
- 
-inline void BitUtil::bits_to_indexes_helper(uint64_t word, uint16_t base_index, 
-                                            int* num_indexes, uint16_t* indexes) { 
-  int n = *num_indexes; 
-  while (word) { 
-    indexes[n++] = base_index + static_cast<uint16_t>(CountTrailingZeros(word)); 
-    word &= word - 1; 
-  } 
-  *num_indexes = n; 
-} 
- 
-inline void BitUtil::bits_filter_indexes_helper(uint64_t word, 
-                                                const uint16_t* input_indexes, 
-                                                int* num_indexes, uint16_t* indexes) { 
-  int n = *num_indexes; 
-  while (word) { 
-    indexes[n++] = input_indexes[CountTrailingZeros(word)]; 
-    word &= word - 1; 
-  } 
-  *num_indexes = n; 
-} 
- 
-template <int bit_to_search, bool filter_input_indexes> 
-void BitUtil::bits_to_indexes_internal(int64_t hardware_flags, const int num_bits, 
-                                       const uint8_t* bits, const uint16_t* input_indexes, 
-                                       int* num_indexes, uint16_t* indexes) { 
-  // 64 bits at a time 
-  constexpr int unroll = 64; 
-  int tail = num_bits % unroll; 
-#if defined(ARROW_HAVE_AVX2) 
-  if (hardware_flags & arrow::internal::CpuInfo::AVX2) { 
-    if (filter_input_indexes) { 
-      bits_filter_indexes_avx2(bit_to_search, num_bits - tail, bits, input_indexes, 
-                               num_indexes, indexes); 
-    } else { 
-      bits_to_indexes_avx2(bit_to_search, num_bits - tail, bits, num_indexes, indexes); 
-    } 
-  } else { 
-#endif 
-    *num_indexes = 0; 
-    for (int i = 0; i < num_bits / unroll; ++i) { 
-      uint64_t word = util::SafeLoad(&reinterpret_cast<const uint64_t*>(bits)[i]); 
-      if (bit_to_search == 0) { 
-        word = ~word; 
-      } 
-      if (filter_input_indexes) { 
-        bits_filter_indexes_helper(word, input_indexes + i * 64, num_indexes, indexes); 
-      } else { 
-        bits_to_indexes_helper(word, i * 64, num_indexes, indexes); 
-      } 
-    } 
-#if defined(ARROW_HAVE_AVX2) 
-  } 
-#endif 
-  // Optionally process the last partial word with masking out bits outside range 
-  if (tail) { 
-    uint64_t word = 
-        util::SafeLoad(&reinterpret_cast<const uint64_t*>(bits)[num_bits / unroll]); 
-    if (bit_to_search == 0) { 
-      word = ~word; 
-    } 
-    word &= ~0ULL >> (64 - tail); 
-    if (filter_input_indexes) { 
-      bits_filter_indexes_helper(word, input_indexes + num_bits - tail, num_indexes, 
-                                 indexes); 
-    } else { 
-      bits_to_indexes_helper(word, num_bits - tail, num_indexes, indexes); 
-    } 
-  } 
-} 
- 
-void BitUtil::bits_to_indexes(int bit_to_search, int64_t hardware_flags, 
-                              const int num_bits, const uint8_t* bits, int* num_indexes, 
-                              uint16_t* indexes, int bit_offset) { 
-  bits += bit_offset / 8; 
-  bit_offset %= 8; 
-  if (bit_offset != 0) { 
-    int num_indexes_head = 0; 
-    uint64_t bits_head = 
-        util::SafeLoad(reinterpret_cast<const uint64_t*>(bits)) >> bit_offset; 
-    int bits_in_first_byte = std::min(num_bits, 8 - bit_offset); 
-    bits_to_indexes(bit_to_search, hardware_flags, bits_in_first_byte, 
-                    reinterpret_cast<const uint8_t*>(&bits_head), &num_indexes_head, 
-                    indexes); 
-    int num_indexes_tail = 0; 
-    if (num_bits > bits_in_first_byte) { 
-      bits_to_indexes(bit_to_search, hardware_flags, num_bits - bits_in_first_byte, 
-                      bits + 1, &num_indexes_tail, indexes + num_indexes_head); 
-    } 
-    *num_indexes = num_indexes_head + num_indexes_tail; 
-    return; 
-  } 
- 
-  if (bit_to_search == 0) { 
-    bits_to_indexes_internal<0, false>(hardware_flags, num_bits, bits, nullptr, 
-                                       num_indexes, indexes); 
-  } else { 
-    ARROW_DCHECK(bit_to_search == 1); 
-    bits_to_indexes_internal<1, false>(hardware_flags, num_bits, bits, nullptr, 
-                                       num_indexes, indexes); 
-  } 
-} 
- 
-void BitUtil::bits_filter_indexes(int bit_to_search, int64_t hardware_flags, 
-                                  const int num_bits, const uint8_t* bits, 
-                                  const uint16_t* input_indexes, int* num_indexes, 
-                                  uint16_t* indexes, int bit_offset) { 
-  bits += bit_offset / 8; 
-  bit_offset %= 8; 
-  if (bit_offset != 0) { 
-    int num_indexes_head = 0; 
-    uint64_t bits_head = 
-        util::SafeLoad(reinterpret_cast<const uint64_t*>(bits)) >> bit_offset; 
-    int bits_in_first_byte = std::min(num_bits, 8 - bit_offset); 
-    bits_filter_indexes(bit_to_search, hardware_flags, bits_in_first_byte, 
-                        reinterpret_cast<const uint8_t*>(&bits_head), input_indexes, 
-                        &num_indexes_head, indexes); 
-    int num_indexes_tail = 0; 
-    if (num_bits > bits_in_first_byte) { 
-      bits_filter_indexes(bit_to_search, hardware_flags, num_bits - bits_in_first_byte, 
-                          bits + 1, input_indexes + bits_in_first_byte, &num_indexes_tail, 
-                          indexes + num_indexes_head); 
-    } 
-    *num_indexes = num_indexes_head + num_indexes_tail; 
-    return; 
-  } 
- 
-  if (bit_to_search == 0) { 
-    bits_to_indexes_internal<0, true>(hardware_flags, num_bits, bits, input_indexes, 
-                                      num_indexes, indexes); 
-  } else { 
-    ARROW_DCHECK(bit_to_search == 1); 
-    bits_to_indexes_internal<1, true>(hardware_flags, num_bits, bits, input_indexes, 
-                                      num_indexes, indexes); 
-  } 
-} 
- 
-void BitUtil::bits_split_indexes(int64_t hardware_flags, const int num_bits, 
-                                 const uint8_t* bits, int* num_indexes_bit0, 
-                                 uint16_t* indexes_bit0, uint16_t* indexes_bit1, 
-                                 int bit_offset) { 
-  bits_to_indexes(0, hardware_flags, num_bits, bits, num_indexes_bit0, indexes_bit0, 
-                  bit_offset); 
-  int num_indexes_bit1; 
-  bits_to_indexes(1, hardware_flags, num_bits, bits, &num_indexes_bit1, indexes_bit1, 
-                  bit_offset); 
-} 
- 
-void BitUtil::bits_to_bytes(int64_t hardware_flags, const int num_bits, 
-                            const uint8_t* bits, uint8_t* bytes, int bit_offset) { 
-  bits += bit_offset / 8; 
-  bit_offset %= 8; 
-  if (bit_offset != 0) { 
-    uint64_t bits_head = 
-        util::SafeLoad(reinterpret_cast<const uint64_t*>(bits)) >> bit_offset; 
-    int bits_in_first_byte = std::min(num_bits, 8 - bit_offset); 
-    bits_to_bytes(hardware_flags, bits_in_first_byte, 
-                  reinterpret_cast<const uint8_t*>(&bits_head), bytes); 
-    if (num_bits > bits_in_first_byte) { 
-      bits_to_bytes(hardware_flags, num_bits - bits_in_first_byte, bits + 1, 
-                    bytes + bits_in_first_byte); 
-    } 
-    return; 
-  } 
- 
-  int num_processed = 0; 
-#if defined(ARROW_HAVE_AVX2) 
-  if (hardware_flags & arrow::internal::CpuInfo::AVX2) { 
-    // The function call below processes whole 32 bit chunks together. 
-    num_processed = num_bits - (num_bits % 32); 
-    bits_to_bytes_avx2(num_processed, bits, bytes); 
-  } 
-#endif 
-  // Processing 8 bits at a time 
-  constexpr int unroll = 8; 
-  for (int i = num_processed / unroll; i < (num_bits + unroll - 1) / unroll; ++i) { 
-    uint8_t bits_next = bits[i]; 
-    // Clear the lowest bit and then make 8 copies of remaining 7 bits, each 7 bits apart 
-    // from the previous. 
-    uint64_t unpacked = static_cast<uint64_t>(bits_next & 0xfe) * 
-                        ((1ULL << 7) | (1ULL << 14) | (1ULL << 21) | (1ULL << 28) | 
-                         (1ULL << 35) | (1ULL << 42) | (1ULL << 49)); 
-    unpacked |= (bits_next & 1); 
-    unpacked &= 0x0101010101010101ULL; 
-    unpacked *= 255; 
-    util::SafeStore(&reinterpret_cast<uint64_t*>(bytes)[i], unpacked); 
-  } 
-} 
- 
-void BitUtil::bytes_to_bits(int64_t hardware_flags, const int num_bits, 
-                            const uint8_t* bytes, uint8_t* bits, int bit_offset) { 
-  bits += bit_offset / 8; 
-  bit_offset %= 8; 
-  if (bit_offset != 0) { 
-    uint64_t bits_head; 
-    int bits_in_first_byte = std::min(num_bits, 8 - bit_offset); 
-    bytes_to_bits(hardware_flags, bits_in_first_byte, bytes, 
-                  reinterpret_cast<uint8_t*>(&bits_head)); 
-    uint8_t mask = (1 << bit_offset) - 1; 
-    *bits = static_cast<uint8_t>((*bits & mask) | (bits_head << bit_offset)); 
- 
-    if (num_bits > bits_in_first_byte) { 
-      bytes_to_bits(hardware_flags, num_bits - bits_in_first_byte, 
-                    bytes + bits_in_first_byte, bits + 1); 
-    } 
-    return; 
-  } 
- 
-  int num_processed = 0; 
-#if defined(ARROW_HAVE_AVX2) 
-  if (hardware_flags & arrow::internal::CpuInfo::AVX2) { 
-    // The function call below processes whole 32 bit chunks together. 
-    num_processed = num_bits - (num_bits % 32); 
-    bytes_to_bits_avx2(num_processed, bytes, bits); 
-  } 
-#endif 
-  // Process 8 bits at a time 
-  constexpr int unroll = 8; 
-  for (int i = num_processed / unroll; i < (num_bits + unroll - 1) / unroll; ++i) { 
-    uint64_t bytes_next = util::SafeLoad(&reinterpret_cast<const uint64_t*>(bytes)[i]); 
-    bytes_next &= 0x0101010101010101ULL; 
-    bytes_next |= (bytes_next >> 7);  // Pairs of adjacent output bits in individual bytes 
-    bytes_next |= (bytes_next >> 14);  // 4 adjacent output bits in individual bytes 
-    bytes_next |= (bytes_next >> 28);  // All 8 output bits in the lowest byte 
-    bits[i] = static_cast<uint8_t>(bytes_next & 0xff); 
-  } 
-} 
- 
-bool BitUtil::are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes, 
-                                 uint32_t num_bytes) { 
-#if defined(ARROW_HAVE_AVX2) 
-  if (hardware_flags & arrow::internal::CpuInfo::AVX2) { 
-    return are_all_bytes_zero_avx2(bytes, num_bytes); 
-  } 
-#endif 
-  uint64_t result_or = 0; 
-  uint32_t i; 
-  for (i = 0; i < num_bytes / 8; ++i) { 
-    uint64_t x = util::SafeLoad(&reinterpret_cast<const uint64_t*>(bytes)[i]); 
-    result_or |= x; 
-  } 
-  if (num_bytes % 8 > 0) { 
-    uint64_t tail = 0; 
-    result_or |= memcmp(bytes + i * 8, &tail, num_bytes % 8); 
-  } 
-  return result_or == 0; 
-} 
- 
-}  // namespace util 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/exec/util.h"
+
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/ubsan.h"
+
+namespace arrow {
+
+using BitUtil::CountTrailingZeros;
+
+namespace util {
+
+inline void BitUtil::bits_to_indexes_helper(uint64_t word, uint16_t base_index,
+                                            int* num_indexes, uint16_t* indexes) {
+  int n = *num_indexes;
+  while (word) {
+    indexes[n++] = base_index + static_cast<uint16_t>(CountTrailingZeros(word));
+    word &= word - 1;
+  }
+  *num_indexes = n;
+}
+
+inline void BitUtil::bits_filter_indexes_helper(uint64_t word,
+                                                const uint16_t* input_indexes,
+                                                int* num_indexes, uint16_t* indexes) {
+  int n = *num_indexes;
+  while (word) {
+    indexes[n++] = input_indexes[CountTrailingZeros(word)];
+    word &= word - 1;
+  }
+  *num_indexes = n;
+}
+
+template <int bit_to_search, bool filter_input_indexes>
+void BitUtil::bits_to_indexes_internal(int64_t hardware_flags, const int num_bits,
+                                       const uint8_t* bits, const uint16_t* input_indexes,
+                                       int* num_indexes, uint16_t* indexes) {
+  // 64 bits at a time
+  constexpr int unroll = 64;
+  int tail = num_bits % unroll;
+#if defined(ARROW_HAVE_AVX2)
+  if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
+    if (filter_input_indexes) {
+      bits_filter_indexes_avx2(bit_to_search, num_bits - tail, bits, input_indexes,
+                               num_indexes, indexes);
+    } else {
+      bits_to_indexes_avx2(bit_to_search, num_bits - tail, bits, num_indexes, indexes);
+    }
+  } else {
+#endif
+    *num_indexes = 0;
+    for (int i = 0; i < num_bits / unroll; ++i) {
+      uint64_t word = util::SafeLoad(&reinterpret_cast<const uint64_t*>(bits)[i]);
+      if (bit_to_search == 0) {
+        word = ~word;
+      }
+      if (filter_input_indexes) {
+        bits_filter_indexes_helper(word, input_indexes + i * 64, num_indexes, indexes);
+      } else {
+        bits_to_indexes_helper(word, i * 64, num_indexes, indexes);
+      }
+    }
+#if defined(ARROW_HAVE_AVX2)
+  }
+#endif
+  // Optionally process the last partial word with masking out bits outside range
+  if (tail) {
+    uint64_t word =
+        util::SafeLoad(&reinterpret_cast<const uint64_t*>(bits)[num_bits / unroll]);
+    if (bit_to_search == 0) {
+      word = ~word;
+    }
+    word &= ~0ULL >> (64 - tail);
+    if (filter_input_indexes) {
+      bits_filter_indexes_helper(word, input_indexes + num_bits - tail, num_indexes,
+                                 indexes);
+    } else {
+      bits_to_indexes_helper(word, num_bits - tail, num_indexes, indexes);
+    }
+  }
+}
+
+void BitUtil::bits_to_indexes(int bit_to_search, int64_t hardware_flags,
+                              const int num_bits, const uint8_t* bits, int* num_indexes,
+                              uint16_t* indexes, int bit_offset) {
+  bits += bit_offset / 8;
+  bit_offset %= 8;
+  if (bit_offset != 0) {
+    int num_indexes_head = 0;
+    uint64_t bits_head =
+        util::SafeLoad(reinterpret_cast<const uint64_t*>(bits)) >> bit_offset;
+    int bits_in_first_byte = std::min(num_bits, 8 - bit_offset);
+    bits_to_indexes(bit_to_search, hardware_flags, bits_in_first_byte,
+                    reinterpret_cast<const uint8_t*>(&bits_head), &num_indexes_head,
+                    indexes);
+    int num_indexes_tail = 0;
+    if (num_bits > bits_in_first_byte) {
+      bits_to_indexes(bit_to_search, hardware_flags, num_bits - bits_in_first_byte,
+                      bits + 1, &num_indexes_tail, indexes + num_indexes_head);
+    }
+    *num_indexes = num_indexes_head + num_indexes_tail;
+    return;
+  }
+
+  if (bit_to_search == 0) {
+    bits_to_indexes_internal<0, false>(hardware_flags, num_bits, bits, nullptr,
+                                       num_indexes, indexes);
+  } else {
+    ARROW_DCHECK(bit_to_search == 1);
+    bits_to_indexes_internal<1, false>(hardware_flags, num_bits, bits, nullptr,
+                                       num_indexes, indexes);
+  }
+}
+
+void BitUtil::bits_filter_indexes(int bit_to_search, int64_t hardware_flags,
+                                  const int num_bits, const uint8_t* bits,
+                                  const uint16_t* input_indexes, int* num_indexes,
+                                  uint16_t* indexes, int bit_offset) {
+  bits += bit_offset / 8;
+  bit_offset %= 8;
+  if (bit_offset != 0) {
+    int num_indexes_head = 0;
+    uint64_t bits_head =
+        util::SafeLoad(reinterpret_cast<const uint64_t*>(bits)) >> bit_offset;
+    int bits_in_first_byte = std::min(num_bits, 8 - bit_offset);
+    bits_filter_indexes(bit_to_search, hardware_flags, bits_in_first_byte,
+                        reinterpret_cast<const uint8_t*>(&bits_head), input_indexes,
+                        &num_indexes_head, indexes);
+    int num_indexes_tail = 0;
+    if (num_bits > bits_in_first_byte) {
+      bits_filter_indexes(bit_to_search, hardware_flags, num_bits - bits_in_first_byte,
+                          bits + 1, input_indexes + bits_in_first_byte, &num_indexes_tail,
+                          indexes + num_indexes_head);
+    }
+    *num_indexes = num_indexes_head + num_indexes_tail;
+    return;
+  }
+
+  if (bit_to_search == 0) {
+    bits_to_indexes_internal<0, true>(hardware_flags, num_bits, bits, input_indexes,
+                                      num_indexes, indexes);
+  } else {
+    ARROW_DCHECK(bit_to_search == 1);
+    bits_to_indexes_internal<1, true>(hardware_flags, num_bits, bits, input_indexes,
+                                      num_indexes, indexes);
+  }
+}
+
+void BitUtil::bits_split_indexes(int64_t hardware_flags, const int num_bits,
+                                 const uint8_t* bits, int* num_indexes_bit0,
+                                 uint16_t* indexes_bit0, uint16_t* indexes_bit1,
+                                 int bit_offset) {
+  bits_to_indexes(0, hardware_flags, num_bits, bits, num_indexes_bit0, indexes_bit0,
+                  bit_offset);
+  int num_indexes_bit1;
+  bits_to_indexes(1, hardware_flags, num_bits, bits, &num_indexes_bit1, indexes_bit1,
+                  bit_offset);
+}
+
+void BitUtil::bits_to_bytes(int64_t hardware_flags, const int num_bits,
+                            const uint8_t* bits, uint8_t* bytes, int bit_offset) {
+  bits += bit_offset / 8;
+  bit_offset %= 8;
+  if (bit_offset != 0) {
+    uint64_t bits_head =
+        util::SafeLoad(reinterpret_cast<const uint64_t*>(bits)) >> bit_offset;
+    int bits_in_first_byte = std::min(num_bits, 8 - bit_offset);
+    bits_to_bytes(hardware_flags, bits_in_first_byte,
+                  reinterpret_cast<const uint8_t*>(&bits_head), bytes);
+    if (num_bits > bits_in_first_byte) {
+      bits_to_bytes(hardware_flags, num_bits - bits_in_first_byte, bits + 1,
+                    bytes + bits_in_first_byte);
+    }
+    return;
+  }
+
+  int num_processed = 0;
+#if defined(ARROW_HAVE_AVX2)
+  if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
+    // The function call below processes whole 32 bit chunks together.
+    num_processed = num_bits - (num_bits % 32);
+    bits_to_bytes_avx2(num_processed, bits, bytes);
+  }
+#endif
+  // Processing 8 bits at a time
+  constexpr int unroll = 8;
+  for (int i = num_processed / unroll; i < (num_bits + unroll - 1) / unroll; ++i) {
+    uint8_t bits_next = bits[i];
+    // Clear the lowest bit and then make 8 copies of remaining 7 bits, each 7 bits apart
+    // from the previous.
+    uint64_t unpacked = static_cast<uint64_t>(bits_next & 0xfe) *
+                        ((1ULL << 7) | (1ULL << 14) | (1ULL << 21) | (1ULL << 28) |
+                         (1ULL << 35) | (1ULL << 42) | (1ULL << 49));
+    unpacked |= (bits_next & 1);
+    unpacked &= 0x0101010101010101ULL;
+    unpacked *= 255;
+    util::SafeStore(&reinterpret_cast<uint64_t*>(bytes)[i], unpacked);
+  }
+}
+
+void BitUtil::bytes_to_bits(int64_t hardware_flags, const int num_bits,
+                            const uint8_t* bytes, uint8_t* bits, int bit_offset) {
+  bits += bit_offset / 8;
+  bit_offset %= 8;
+  if (bit_offset != 0) {
+    uint64_t bits_head;
+    int bits_in_first_byte = std::min(num_bits, 8 - bit_offset);
+    bytes_to_bits(hardware_flags, bits_in_first_byte, bytes,
+                  reinterpret_cast<uint8_t*>(&bits_head));
+    uint8_t mask = (1 << bit_offset) - 1;
+    *bits = static_cast<uint8_t>((*bits & mask) | (bits_head << bit_offset));
+
+    if (num_bits > bits_in_first_byte) {
+      bytes_to_bits(hardware_flags, num_bits - bits_in_first_byte,
+                    bytes + bits_in_first_byte, bits + 1);
+    }
+    return;
+  }
+
+  int num_processed = 0;
+#if defined(ARROW_HAVE_AVX2)
+  if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
+    // The function call below processes whole 32 bit chunks together.
+    num_processed = num_bits - (num_bits % 32);
+    bytes_to_bits_avx2(num_processed, bytes, bits);
+  }
+#endif
+  // Process 8 bits at a time
+  constexpr int unroll = 8;
+  for (int i = num_processed / unroll; i < (num_bits + unroll - 1) / unroll; ++i) {
+    uint64_t bytes_next = util::SafeLoad(&reinterpret_cast<const uint64_t*>(bytes)[i]);
+    bytes_next &= 0x0101010101010101ULL;
+    bytes_next |= (bytes_next >> 7);  // Pairs of adjacent output bits in individual bytes
+    bytes_next |= (bytes_next >> 14);  // 4 adjacent output bits in individual bytes
+    bytes_next |= (bytes_next >> 28);  // All 8 output bits in the lowest byte
+    bits[i] = static_cast<uint8_t>(bytes_next & 0xff);
+  }
+}
+
+bool BitUtil::are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes,
+                                 uint32_t num_bytes) {
+#if defined(ARROW_HAVE_AVX2)
+  if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
+    return are_all_bytes_zero_avx2(bytes, num_bytes);
+  }
+#endif
+  uint64_t result_or = 0;
+  uint32_t i;
+  for (i = 0; i < num_bytes / 8; ++i) {
+    uint64_t x = util::SafeLoad(&reinterpret_cast<const uint64_t*>(bytes)[i]);
+    result_or |= x;
+  }
+  if (num_bytes % 8 > 0) {
+    uint64_t tail = 0;
+    result_or |= memcmp(bytes + i * 8, &tail, num_bytes % 8);
+  }
+  return result_or == 0;
+}
+
+}  // namespace util
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/util.h b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/util.h
index 1025476ac63..471cc332220 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/util.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec/util.h
@@ -1,171 +1,171 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <cstdint> 
-#include <vector> 
- 
-#include "arrow/buffer.h" 
-#include "arrow/memory_pool.h" 
-#include "arrow/result.h" 
-#include "arrow/status.h" 
-#include "arrow/util/cpu_info.h" 
-#include "arrow/util/logging.h" 
- 
-#if defined(__clang__) || defined(__GNUC__) 
-#define BYTESWAP(x) __builtin_bswap64(x) 
-#define ROTL(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) 
-#elif defined(_MSC_VER) 
-#include <intrin.h> 
-#define BYTESWAP(x) _byteswap_uint64(x) 
-#define ROTL(x, n) _rotl((x), (n)) 
-#endif 
- 
-namespace arrow { 
-namespace util { 
- 
-// Some platforms typedef int64_t as long int instead of long long int, 
-// which breaks the _mm256_i64gather_epi64 and _mm256_i32gather_epi64 intrinsics 
-// which need long long. 
-// We use the cast to the type below in these intrinsics to make the code 
-// compile in all cases. 
-// 
-using int64_for_gather_t = const long long int;  // NOLINT runtime-int 
- 
-/// Storage used to allocate temporary vectors of a batch size. 
-/// Temporary vectors should resemble allocating temporary variables on the stack 
-/// but in the context of vectorized processing where we need to store a vector of 
-/// temporaries instead of a single value. 
-class TempVectorStack { 
-  template <typename> 
-  friend class TempVectorHolder; 
- 
- public: 
-  Status Init(MemoryPool* pool, int64_t size) { 
-    num_vectors_ = 0; 
-    top_ = 0; 
-    buffer_size_ = size; 
-    ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(size, pool)); 
-    buffer_ = std::move(buffer); 
-    return Status::OK(); 
-  } 
- 
- private: 
-  void alloc(uint32_t num_bytes, uint8_t** data, int* id) { 
-    int64_t old_top = top_; 
-    top_ += num_bytes + padding; 
-    // Stack overflow check 
-    ARROW_DCHECK(top_ <= buffer_size_); 
-    *data = buffer_->mutable_data() + old_top; 
-    *id = num_vectors_++; 
-  } 
-  void release(int id, uint32_t num_bytes) { 
-    ARROW_DCHECK(num_vectors_ == id + 1); 
-    int64_t size = num_bytes + padding; 
-    ARROW_DCHECK(top_ >= size); 
-    top_ -= size; 
-    --num_vectors_; 
-  } 
-  static constexpr int64_t padding = 64; 
-  int num_vectors_; 
-  int64_t top_; 
-  std::unique_ptr<Buffer> buffer_; 
-  int64_t buffer_size_; 
-}; 
- 
-template <typename T> 
-class TempVectorHolder { 
-  friend class TempVectorStack; 
- 
- public: 
-  ~TempVectorHolder() { stack_->release(id_, num_elements_ * sizeof(T)); } 
-  T* mutable_data() { return reinterpret_cast<T*>(data_); } 
-  TempVectorHolder(TempVectorStack* stack, uint32_t num_elements) { 
-    stack_ = stack; 
-    num_elements_ = num_elements; 
-    stack_->alloc(num_elements * sizeof(T), &data_, &id_); 
-  } 
- 
- private: 
-  TempVectorStack* stack_; 
-  uint8_t* data_; 
-  int id_; 
-  uint32_t num_elements_; 
-}; 
- 
-class BitUtil { 
- public: 
-  static void bits_to_indexes(int bit_to_search, int64_t hardware_flags, 
-                              const int num_bits, const uint8_t* bits, int* num_indexes, 
-                              uint16_t* indexes, int bit_offset = 0); 
- 
-  static void bits_filter_indexes(int bit_to_search, int64_t hardware_flags, 
-                                  const int num_bits, const uint8_t* bits, 
-                                  const uint16_t* input_indexes, int* num_indexes, 
-                                  uint16_t* indexes, int bit_offset = 0); 
- 
-  // Input and output indexes may be pointing to the same data (in-place filtering). 
-  static void bits_split_indexes(int64_t hardware_flags, const int num_bits, 
-                                 const uint8_t* bits, int* num_indexes_bit0, 
-                                 uint16_t* indexes_bit0, uint16_t* indexes_bit1, 
-                                 int bit_offset = 0); 
- 
-  // Bit 1 is replaced with byte 0xFF. 
-  static void bits_to_bytes(int64_t hardware_flags, const int num_bits, 
-                            const uint8_t* bits, uint8_t* bytes, int bit_offset = 0); 
- 
-  // Return highest bit of each byte. 
-  static void bytes_to_bits(int64_t hardware_flags, const int num_bits, 
-                            const uint8_t* bytes, uint8_t* bits, int bit_offset = 0); 
- 
-  static bool are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes, 
-                                 uint32_t num_bytes); 
- 
- private: 
-  inline static void bits_to_indexes_helper(uint64_t word, uint16_t base_index, 
-                                            int* num_indexes, uint16_t* indexes); 
-  inline static void bits_filter_indexes_helper(uint64_t word, 
-                                                const uint16_t* input_indexes, 
-                                                int* num_indexes, uint16_t* indexes); 
-  template <int bit_to_search, bool filter_input_indexes> 
-  static void bits_to_indexes_internal(int64_t hardware_flags, const int num_bits, 
-                                       const uint8_t* bits, const uint16_t* input_indexes, 
-                                       int* num_indexes, uint16_t* indexes); 
- 
-#if defined(ARROW_HAVE_AVX2) 
-  static void bits_to_indexes_avx2(int bit_to_search, const int num_bits, 
-                                   const uint8_t* bits, int* num_indexes, 
-                                   uint16_t* indexes); 
-  static void bits_filter_indexes_avx2(int bit_to_search, const int num_bits, 
-                                       const uint8_t* bits, const uint16_t* input_indexes, 
-                                       int* num_indexes, uint16_t* indexes); 
-  template <int bit_to_search> 
-  static void bits_to_indexes_imp_avx2(const int num_bits, const uint8_t* bits, 
-                                       int* num_indexes, uint16_t* indexes); 
-  template <int bit_to_search> 
-  static void bits_filter_indexes_imp_avx2(const int num_bits, const uint8_t* bits, 
-                                           const uint16_t* input_indexes, 
-                                           int* num_indexes, uint16_t* indexes); 
-  static void bits_to_bytes_avx2(const int num_bits, const uint8_t* bits, uint8_t* bytes); 
-  static void bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes, uint8_t* bits); 
-  static bool are_all_bytes_zero_avx2(const uint8_t* bytes, uint32_t num_bytes); 
-#endif 
-}; 
- 
-}  // namespace util 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <vector>
+
+#include "arrow/buffer.h"
+#include "arrow/memory_pool.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/cpu_info.h"
+#include "arrow/util/logging.h"
+
+#if defined(__clang__) || defined(__GNUC__)
+#define BYTESWAP(x) __builtin_bswap64(x)
+#define ROTL(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
+#elif defined(_MSC_VER)
+#include <intrin.h>
+#define BYTESWAP(x) _byteswap_uint64(x)
+#define ROTL(x, n) _rotl((x), (n))
+#endif
+
+namespace arrow {
+namespace util {
+
+// Some platforms typedef int64_t as long int instead of long long int,
+// which breaks the _mm256_i64gather_epi64 and _mm256_i32gather_epi64 intrinsics
+// which need long long.
+// We use the cast to the type below in these intrinsics to make the code
+// compile in all cases.
+//
+using int64_for_gather_t = const long long int;  // NOLINT runtime-int
+
+/// Storage used to allocate temporary vectors of a batch size.
+/// Temporary vectors should resemble allocating temporary variables on the stack
+/// but in the context of vectorized processing where we need to store a vector of
+/// temporaries instead of a single value.
+class TempVectorStack {
+  template <typename>
+  friend class TempVectorHolder;
+
+ public:
+  Status Init(MemoryPool* pool, int64_t size) {
+    num_vectors_ = 0;
+    top_ = 0;
+    buffer_size_ = size;
+    ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(size, pool));
+    buffer_ = std::move(buffer);
+    return Status::OK();
+  }
+
+ private:
+  void alloc(uint32_t num_bytes, uint8_t** data, int* id) {
+    int64_t old_top = top_;
+    top_ += num_bytes + padding;
+    // Stack overflow check
+    ARROW_DCHECK(top_ <= buffer_size_);
+    *data = buffer_->mutable_data() + old_top;
+    *id = num_vectors_++;
+  }
+  void release(int id, uint32_t num_bytes) {
+    ARROW_DCHECK(num_vectors_ == id + 1);
+    int64_t size = num_bytes + padding;
+    ARROW_DCHECK(top_ >= size);
+    top_ -= size;
+    --num_vectors_;
+  }
+  static constexpr int64_t padding = 64;
+  int num_vectors_;
+  int64_t top_;
+  std::unique_ptr<Buffer> buffer_;
+  int64_t buffer_size_;
+};
+
+template <typename T>
+class TempVectorHolder {
+  friend class TempVectorStack;
+
+ public:
+  ~TempVectorHolder() { stack_->release(id_, num_elements_ * sizeof(T)); }
+  T* mutable_data() { return reinterpret_cast<T*>(data_); }
+  TempVectorHolder(TempVectorStack* stack, uint32_t num_elements) {
+    stack_ = stack;
+    num_elements_ = num_elements;
+    stack_->alloc(num_elements * sizeof(T), &data_, &id_);
+  }
+
+ private:
+  TempVectorStack* stack_;
+  uint8_t* data_;
+  int id_;
+  uint32_t num_elements_;
+};
+
+class BitUtil {
+ public:
+  static void bits_to_indexes(int bit_to_search, int64_t hardware_flags,
+                              const int num_bits, const uint8_t* bits, int* num_indexes,
+                              uint16_t* indexes, int bit_offset = 0);
+
+  static void bits_filter_indexes(int bit_to_search, int64_t hardware_flags,
+                                  const int num_bits, const uint8_t* bits,
+                                  const uint16_t* input_indexes, int* num_indexes,
+                                  uint16_t* indexes, int bit_offset = 0);
+
+  // Input and output indexes may be pointing to the same data (in-place filtering).
+  static void bits_split_indexes(int64_t hardware_flags, const int num_bits,
+                                 const uint8_t* bits, int* num_indexes_bit0,
+                                 uint16_t* indexes_bit0, uint16_t* indexes_bit1,
+                                 int bit_offset = 0);
+
+  // Bit 1 is replaced with byte 0xFF.
+  static void bits_to_bytes(int64_t hardware_flags, const int num_bits,
+                            const uint8_t* bits, uint8_t* bytes, int bit_offset = 0);
+
+  // Return highest bit of each byte.
+  static void bytes_to_bits(int64_t hardware_flags, const int num_bits,
+                            const uint8_t* bytes, uint8_t* bits, int bit_offset = 0);
+
+  static bool are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes,
+                                 uint32_t num_bytes);
+
+ private:
+  inline static void bits_to_indexes_helper(uint64_t word, uint16_t base_index,
+                                            int* num_indexes, uint16_t* indexes);
+  inline static void bits_filter_indexes_helper(uint64_t word,
+                                                const uint16_t* input_indexes,
+                                                int* num_indexes, uint16_t* indexes);
+  template <int bit_to_search, bool filter_input_indexes>
+  static void bits_to_indexes_internal(int64_t hardware_flags, const int num_bits,
+                                       const uint8_t* bits, const uint16_t* input_indexes,
+                                       int* num_indexes, uint16_t* indexes);
+
+#if defined(ARROW_HAVE_AVX2)
+  static void bits_to_indexes_avx2(int bit_to_search, const int num_bits,
+                                   const uint8_t* bits, int* num_indexes,
+                                   uint16_t* indexes);
+  static void bits_filter_indexes_avx2(int bit_to_search, const int num_bits,
+                                       const uint8_t* bits, const uint16_t* input_indexes,
+                                       int* num_indexes, uint16_t* indexes);
+  template <int bit_to_search>
+  static void bits_to_indexes_imp_avx2(const int num_bits, const uint8_t* bits,
+                                       int* num_indexes, uint16_t* indexes);
+  template <int bit_to_search>
+  static void bits_filter_indexes_imp_avx2(const int num_bits, const uint8_t* bits,
+                                           const uint16_t* input_indexes,
+                                           int* num_indexes, uint16_t* indexes);
+  static void bits_to_bytes_avx2(const int num_bits, const uint8_t* bits, uint8_t* bytes);
+  static void bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes, uint8_t* bits);
+  static bool are_all_bytes_zero_avx2(const uint8_t* bytes, uint32_t num_bytes);
+#endif
+};
+
+}  // namespace util
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec_internal.h b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec_internal.h
index abc9861537f..55daa243cd3 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec_internal.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/exec_internal.h
@@ -84,14 +84,14 @@ class ARROW_EXPORT ExecListener {
 
 class DatumAccumulator : public ExecListener {
  public:
-  DatumAccumulator() = default; 
+  DatumAccumulator() = default;
 
   Status OnResult(Datum value) override {
     values_.emplace_back(value);
     return Status::OK();
   }
 
-  std::vector<Datum> values() { return std::move(values_); } 
+  std::vector<Datum> values() { return std::move(values_); }
 
  private:
   std::vector<Datum> values_;
@@ -102,17 +102,17 @@ class DatumAccumulator : public ExecListener {
 /// inputs will be split into non-chunked ExecBatch values for execution
 Status CheckAllValues(const std::vector<Datum>& values);
 
-class ARROW_EXPORT KernelExecutor { 
+class ARROW_EXPORT KernelExecutor {
  public:
-  virtual ~KernelExecutor() = default; 
-
-  /// The Kernel's `init` method must be called and any KernelState set in the 
-  /// KernelContext *before* KernelExecutor::Init is called. This is to facilitate 
-  /// the case where init may be expensive and does not need to be called again for 
-  /// each execution of the kernel, for example the same lookup table can be re-used 
-  /// for all scanned batches in a dataset filter. 
-  virtual Status Init(KernelContext*, KernelInitArgs) = 0; 
- 
+  virtual ~KernelExecutor() = default;
+
+  /// The Kernel's `init` method must be called and any KernelState set in the
+  /// KernelContext *before* KernelExecutor::Init is called. This is to facilitate
+  /// the case where init may be expensive and does not need to be called again for
+  /// each execution of the kernel, for example the same lookup table can be re-used
+  /// for all scanned batches in a dataset filter.
+  virtual Status Init(KernelContext*, KernelInitArgs) = 0;
+
   /// XXX: Better configurability for listener
   /// Not thread-safe
   virtual Status Execute(const std::vector<Datum>& args, ExecListener* listener) = 0;
@@ -120,9 +120,9 @@ class ARROW_EXPORT KernelExecutor {
   virtual Datum WrapResults(const std::vector<Datum>& args,
                             const std::vector<Datum>& outputs) = 0;
 
-  static std::unique_ptr<KernelExecutor> MakeScalar(); 
-  static std::unique_ptr<KernelExecutor> MakeVector(); 
-  static std::unique_ptr<KernelExecutor> MakeScalarAggregate(); 
+  static std::unique_ptr<KernelExecutor> MakeScalar();
+  static std::unique_ptr<KernelExecutor> MakeVector();
+  static std::unique_ptr<KernelExecutor> MakeScalarAggregate();
 };
 
 /// \brief Populate validity bitmap with the intersection of the nullity of the
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/function.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/function.cc
index 1958f442849..05d14d03b16 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/function.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/function.cc
@@ -21,108 +21,108 @@
 #include <memory>
 #include <sstream>
 
-#include "arrow/compute/api_scalar.h" 
-#include "arrow/compute/cast.h" 
+#include "arrow/compute/api_scalar.h"
+#include "arrow/compute/cast.h"
 #include "arrow/compute/exec.h"
 #include "arrow/compute/exec_internal.h"
-#include "arrow/compute/function_internal.h" 
-#include "arrow/compute/kernels/common.h" 
-#include "arrow/compute/registry.h" 
+#include "arrow/compute/function_internal.h"
+#include "arrow/compute/kernels/common.h"
+#include "arrow/compute/registry.h"
 #include "arrow/datum.h"
 #include "arrow/util/cpu_info.h"
 
 namespace arrow {
- 
-using internal::checked_cast; 
- 
+
+using internal::checked_cast;
+
 namespace compute {
-Result<std::shared_ptr<Buffer>> FunctionOptionsType::Serialize( 
-    const FunctionOptions&) const { 
-  return Status::NotImplemented("Serialize for ", type_name()); 
-} 
-
-Result<std::unique_ptr<FunctionOptions>> FunctionOptionsType::Deserialize( 
-    const Buffer& buffer) const { 
-  return Status::NotImplemented("Deserialize for ", type_name()); 
-} 
- 
-std::string FunctionOptions::ToString() const { return options_type()->Stringify(*this); } 
- 
-bool FunctionOptions::Equals(const FunctionOptions& other) const { 
-  if (this == &other) return true; 
-  if (options_type() != other.options_type()) return false; 
-  return options_type()->Compare(*this, other); 
-} 
- 
-Result<std::shared_ptr<Buffer>> FunctionOptions::Serialize() const { 
-  return options_type()->Serialize(*this); 
-} 
- 
-Result<std::unique_ptr<FunctionOptions>> FunctionOptions::Deserialize( 
-    const std::string& type_name, const Buffer& buffer) { 
-  ARROW_ASSIGN_OR_RAISE(auto options, 
-                        GetFunctionRegistry()->GetFunctionOptionsType(type_name)); 
-  return options->Deserialize(buffer); 
-} 
- 
-void PrintTo(const FunctionOptions& options, std::ostream* os) { 
-  *os << options.ToString(); 
-} 
- 
-static const FunctionDoc kEmptyFunctionDoc{}; 
- 
-const FunctionDoc& FunctionDoc::Empty() { return kEmptyFunctionDoc; } 
- 
-static Status CheckArityImpl(const Function* function, int passed_num_args, 
-                             const char* passed_num_args_label) { 
-  if (function->arity().is_varargs && passed_num_args < function->arity().num_args) { 
-    return Status::Invalid("VarArgs function ", function->name(), " needs at least ", 
-                           function->arity().num_args, " arguments but ", 
-                           passed_num_args_label, " only ", passed_num_args); 
+Result<std::shared_ptr<Buffer>> FunctionOptionsType::Serialize(
+    const FunctionOptions&) const {
+  return Status::NotImplemented("Serialize for ", type_name());
+}
+
+Result<std::unique_ptr<FunctionOptions>> FunctionOptionsType::Deserialize(
+    const Buffer& buffer) const {
+  return Status::NotImplemented("Deserialize for ", type_name());
+}
+
+std::string FunctionOptions::ToString() const { return options_type()->Stringify(*this); }
+
+bool FunctionOptions::Equals(const FunctionOptions& other) const {
+  if (this == &other) return true;
+  if (options_type() != other.options_type()) return false;
+  return options_type()->Compare(*this, other);
+}
+
+Result<std::shared_ptr<Buffer>> FunctionOptions::Serialize() const {
+  return options_type()->Serialize(*this);
+}
+
+Result<std::unique_ptr<FunctionOptions>> FunctionOptions::Deserialize(
+    const std::string& type_name, const Buffer& buffer) {
+  ARROW_ASSIGN_OR_RAISE(auto options,
+                        GetFunctionRegistry()->GetFunctionOptionsType(type_name));
+  return options->Deserialize(buffer);
+}
+
+void PrintTo(const FunctionOptions& options, std::ostream* os) {
+  *os << options.ToString();
+}
+
+static const FunctionDoc kEmptyFunctionDoc{};
+
+const FunctionDoc& FunctionDoc::Empty() { return kEmptyFunctionDoc; }
+
+static Status CheckArityImpl(const Function* function, int passed_num_args,
+                             const char* passed_num_args_label) {
+  if (function->arity().is_varargs && passed_num_args < function->arity().num_args) {
+    return Status::Invalid("VarArgs function ", function->name(), " needs at least ",
+                           function->arity().num_args, " arguments but ",
+                           passed_num_args_label, " only ", passed_num_args);
   }
- 
-  if (!function->arity().is_varargs && passed_num_args != function->arity().num_args) { 
-    return Status::Invalid("Function ", function->name(), " accepts ", 
-                           function->arity().num_args, " arguments but ", 
-                           passed_num_args_label, " ", passed_num_args); 
-  } 
- 
+
+  if (!function->arity().is_varargs && passed_num_args != function->arity().num_args) {
+    return Status::Invalid("Function ", function->name(), " accepts ",
+                           function->arity().num_args, " arguments but ",
+                           passed_num_args_label, " ", passed_num_args);
+  }
+
   return Status::OK();
 }
 
-Status Function::CheckArity(const std::vector<InputType>& in_types) const { 
-  return CheckArityImpl(this, static_cast<int>(in_types.size()), "kernel accepts"); 
+Status Function::CheckArity(const std::vector<InputType>& in_types) const {
+  return CheckArityImpl(this, static_cast<int>(in_types.size()), "kernel accepts");
+}
+
+Status Function::CheckArity(const std::vector<ValueDescr>& descrs) const {
+  return CheckArityImpl(this, static_cast<int>(descrs.size()),
+                        "attempted to look up kernel(s) with");
+}
+
+namespace detail {
+
+Status NoMatchingKernel(const Function* func, const std::vector<ValueDescr>& descrs) {
+  return Status::NotImplemented("Function ", func->name(),
+                                " has no kernel matching input types ",
+                                ValueDescr::ToString(descrs));
 }
 
-Status Function::CheckArity(const std::vector<ValueDescr>& descrs) const { 
-  return CheckArityImpl(this, static_cast<int>(descrs.size()), 
-                        "attempted to look up kernel(s) with"); 
-} 
-
-namespace detail { 
- 
-Status NoMatchingKernel(const Function* func, const std::vector<ValueDescr>& descrs) { 
-  return Status::NotImplemented("Function ", func->name(), 
-                                " has no kernel matching input types ", 
-                                ValueDescr::ToString(descrs)); 
-} 
- 
-template <typename KernelType> 
-const KernelType* DispatchExactImpl(const std::vector<KernelType*>& kernels, 
-                                    const std::vector<ValueDescr>& values) { 
-  const KernelType* kernel_matches[SimdLevel::MAX] = {nullptr}; 
- 
+template <typename KernelType>
+const KernelType* DispatchExactImpl(const std::vector<KernelType*>& kernels,
+                                    const std::vector<ValueDescr>& values) {
+  const KernelType* kernel_matches[SimdLevel::MAX] = {nullptr};
+
   // Validate arity
   for (const auto& kernel : kernels) {
-    if (kernel->signature->MatchesInputs(values)) { 
-      kernel_matches[kernel->simd_level] = kernel; 
+    if (kernel->signature->MatchesInputs(values)) {
+      kernel_matches[kernel->simd_level] = kernel;
     }
   }
 
   // Dispatch as the CPU feature
-#if defined(ARROW_HAVE_RUNTIME_AVX512) || defined(ARROW_HAVE_RUNTIME_AVX2) 
+#if defined(ARROW_HAVE_RUNTIME_AVX512) || defined(ARROW_HAVE_RUNTIME_AVX2)
   auto cpu_info = arrow::internal::CpuInfo::GetInstance();
-#endif 
+#endif
 #if defined(ARROW_HAVE_RUNTIME_AVX512)
   if (cpu_info->IsSupported(arrow::internal::CpuInfo::AVX512)) {
     if (kernel_matches[SimdLevel::AVX512]) {
@@ -141,54 +141,54 @@ const KernelType* DispatchExactImpl(const std::vector<KernelType*>& kernels,
     return kernel_matches[SimdLevel::NONE];
   }
 
-  return nullptr; 
+  return nullptr;
+}
+
+const Kernel* DispatchExactImpl(const Function* func,
+                                const std::vector<ValueDescr>& values) {
+  if (func->kind() == Function::SCALAR) {
+    return DispatchExactImpl(checked_cast<const ScalarFunction*>(func)->kernels(),
+                             values);
+  }
+
+  if (func->kind() == Function::VECTOR) {
+    return DispatchExactImpl(checked_cast<const VectorFunction*>(func)->kernels(),
+                             values);
+  }
+
+  if (func->kind() == Function::SCALAR_AGGREGATE) {
+    return DispatchExactImpl(
+        checked_cast<const ScalarAggregateFunction*>(func)->kernels(), values);
+  }
+
+  if (func->kind() == Function::HASH_AGGREGATE) {
+    return DispatchExactImpl(checked_cast<const HashAggregateFunction*>(func)->kernels(),
+                             values);
+  }
+
+  return nullptr;
+}
+
+}  // namespace detail
+
+Result<const Kernel*> Function::DispatchExact(
+    const std::vector<ValueDescr>& values) const {
+  if (kind_ == Function::META) {
+    return Status::NotImplemented("Dispatch for a MetaFunction's Kernels");
+  }
+  RETURN_NOT_OK(CheckArity(values));
+
+  if (auto kernel = detail::DispatchExactImpl(this, values)) {
+    return kernel;
+  }
+  return detail::NoMatchingKernel(this, values);
+}
+
+Result<const Kernel*> Function::DispatchBest(std::vector<ValueDescr>* values) const {
+  // TODO(ARROW-11508) permit generic conversions here
+  return DispatchExact(*values);
 }
 
-const Kernel* DispatchExactImpl(const Function* func, 
-                                const std::vector<ValueDescr>& values) { 
-  if (func->kind() == Function::SCALAR) { 
-    return DispatchExactImpl(checked_cast<const ScalarFunction*>(func)->kernels(), 
-                             values); 
-  } 
- 
-  if (func->kind() == Function::VECTOR) { 
-    return DispatchExactImpl(checked_cast<const VectorFunction*>(func)->kernels(), 
-                             values); 
-  } 
- 
-  if (func->kind() == Function::SCALAR_AGGREGATE) { 
-    return DispatchExactImpl( 
-        checked_cast<const ScalarAggregateFunction*>(func)->kernels(), values); 
-  } 
- 
-  if (func->kind() == Function::HASH_AGGREGATE) { 
-    return DispatchExactImpl(checked_cast<const HashAggregateFunction*>(func)->kernels(), 
-                             values); 
-  } 
- 
-  return nullptr; 
-} 
- 
-}  // namespace detail 
- 
-Result<const Kernel*> Function::DispatchExact( 
-    const std::vector<ValueDescr>& values) const { 
-  if (kind_ == Function::META) { 
-    return Status::NotImplemented("Dispatch for a MetaFunction's Kernels"); 
-  } 
-  RETURN_NOT_OK(CheckArity(values)); 
- 
-  if (auto kernel = detail::DispatchExactImpl(this, values)) { 
-    return kernel; 
-  } 
-  return detail::NoMatchingKernel(this, values); 
-} 
- 
-Result<const Kernel*> Function::DispatchBest(std::vector<ValueDescr>* values) const { 
-  // TODO(ARROW-11508) permit generic conversions here 
-  return DispatchExact(*values); 
-} 
- 
 Result<Datum> Function::Execute(const std::vector<Datum>& args,
                                 const FunctionOptions* options, ExecContext* ctx) const {
   if (options == nullptr) {
@@ -198,63 +198,63 @@ Result<Datum> Function::Execute(const std::vector<Datum>& args,
     ExecContext default_ctx;
     return Execute(args, options, &default_ctx);
   }
- 
+
   // type-check Datum arguments here. Really we'd like to avoid this as much as
   // possible
   RETURN_NOT_OK(detail::CheckAllValues(args));
-  std::vector<ValueDescr> inputs(args.size()); 
-  for (size_t i = 0; i != args.size(); ++i) { 
-    inputs[i] = args[i].descr(); 
-  } 
- 
-  ARROW_ASSIGN_OR_RAISE(auto kernel, DispatchBest(&inputs)); 
-  ARROW_ASSIGN_OR_RAISE(auto implicitly_cast_args, Cast(args, inputs, ctx)); 
- 
-  std::unique_ptr<KernelState> state; 
- 
-  KernelContext kernel_ctx{ctx}; 
-  if (kernel->init) { 
-    ARROW_ASSIGN_OR_RAISE(state, kernel->init(&kernel_ctx, {kernel, inputs, options})); 
-    kernel_ctx.SetState(state.get()); 
-  } 
- 
-  std::unique_ptr<detail::KernelExecutor> executor; 
-  if (kind() == Function::SCALAR) { 
-    executor = detail::KernelExecutor::MakeScalar(); 
-  } else if (kind() == Function::VECTOR) { 
-    executor = detail::KernelExecutor::MakeVector(); 
-  } else if (kind() == Function::SCALAR_AGGREGATE) { 
-    executor = detail::KernelExecutor::MakeScalarAggregate(); 
-  } else { 
-    return Status::NotImplemented("Direct execution of HASH_AGGREGATE functions"); 
-  } 
-  RETURN_NOT_OK(executor->Init(&kernel_ctx, {kernel, inputs, options})); 
- 
+  std::vector<ValueDescr> inputs(args.size());
+  for (size_t i = 0; i != args.size(); ++i) {
+    inputs[i] = args[i].descr();
+  }
+
+  ARROW_ASSIGN_OR_RAISE(auto kernel, DispatchBest(&inputs));
+  ARROW_ASSIGN_OR_RAISE(auto implicitly_cast_args, Cast(args, inputs, ctx));
+
+  std::unique_ptr<KernelState> state;
+
+  KernelContext kernel_ctx{ctx};
+  if (kernel->init) {
+    ARROW_ASSIGN_OR_RAISE(state, kernel->init(&kernel_ctx, {kernel, inputs, options}));
+    kernel_ctx.SetState(state.get());
+  }
+
+  std::unique_ptr<detail::KernelExecutor> executor;
+  if (kind() == Function::SCALAR) {
+    executor = detail::KernelExecutor::MakeScalar();
+  } else if (kind() == Function::VECTOR) {
+    executor = detail::KernelExecutor::MakeVector();
+  } else if (kind() == Function::SCALAR_AGGREGATE) {
+    executor = detail::KernelExecutor::MakeScalarAggregate();
+  } else {
+    return Status::NotImplemented("Direct execution of HASH_AGGREGATE functions");
+  }
+  RETURN_NOT_OK(executor->Init(&kernel_ctx, {kernel, inputs, options}));
+
   auto listener = std::make_shared<detail::DatumAccumulator>();
-  RETURN_NOT_OK(executor->Execute(implicitly_cast_args, listener.get())); 
-  return executor->WrapResults(implicitly_cast_args, listener->values()); 
+  RETURN_NOT_OK(executor->Execute(implicitly_cast_args, listener.get()));
+  return executor->WrapResults(implicitly_cast_args, listener->values());
+}
+
+Status Function::Validate() const {
+  if (!doc_->summary.empty()) {
+    // Documentation given, check its contents
+    int arg_count = static_cast<int>(doc_->arg_names.size());
+    if (arg_count == arity_.num_args) {
+      return Status::OK();
+    }
+    if (arity_.is_varargs && arg_count == arity_.num_args + 1) {
+      return Status::OK();
+    }
+    return Status::Invalid(
+        "In function '", name_,
+        "': ", "number of argument names for function documentation != function arity");
+  }
+  return Status::OK();
 }
 
-Status Function::Validate() const { 
-  if (!doc_->summary.empty()) { 
-    // Documentation given, check its contents 
-    int arg_count = static_cast<int>(doc_->arg_names.size()); 
-    if (arg_count == arity_.num_args) { 
-      return Status::OK(); 
-    } 
-    if (arity_.is_varargs && arg_count == arity_.num_args + 1) { 
-      return Status::OK(); 
-    } 
-    return Status::Invalid( 
-        "In function '", name_, 
-        "': ", "number of argument names for function documentation != function arity"); 
-  } 
-  return Status::OK(); 
-} 
- 
 Status ScalarFunction::AddKernel(std::vector<InputType> in_types, OutputType out_type,
                                  ArrayKernelExec exec, KernelInit init) {
-  RETURN_NOT_OK(CheckArity(in_types)); 
+  RETURN_NOT_OK(CheckArity(in_types));
 
   if (arity_.is_varargs && in_types.size() != 1) {
     return Status::Invalid("VarArgs signatures must have exactly one input type");
@@ -266,7 +266,7 @@ Status ScalarFunction::AddKernel(std::vector<InputType> in_types, OutputType out
 }
 
 Status ScalarFunction::AddKernel(ScalarKernel kernel) {
-  RETURN_NOT_OK(CheckArity(kernel.signature->in_types())); 
+  RETURN_NOT_OK(CheckArity(kernel.signature->in_types()));
   if (arity_.is_varargs && !kernel.signature->is_varargs()) {
     return Status::Invalid("Function accepts varargs but kernel signature does not");
   }
@@ -276,7 +276,7 @@ Status ScalarFunction::AddKernel(ScalarKernel kernel) {
 
 Status VectorFunction::AddKernel(std::vector<InputType> in_types, OutputType out_type,
                                  ArrayKernelExec exec, KernelInit init) {
-  RETURN_NOT_OK(CheckArity(in_types)); 
+  RETURN_NOT_OK(CheckArity(in_types));
 
   if (arity_.is_varargs && in_types.size() != 1) {
     return Status::Invalid("VarArgs signatures must have exactly one input type");
@@ -288,7 +288,7 @@ Status VectorFunction::AddKernel(std::vector<InputType> in_types, OutputType out
 }
 
 Status VectorFunction::AddKernel(VectorKernel kernel) {
-  RETURN_NOT_OK(CheckArity(kernel.signature->in_types())); 
+  RETURN_NOT_OK(CheckArity(kernel.signature->in_types()));
   if (arity_.is_varargs && !kernel.signature->is_varargs()) {
     return Status::Invalid("Function accepts varargs but kernel signature does not");
   }
@@ -297,7 +297,7 @@ Status VectorFunction::AddKernel(VectorKernel kernel) {
 }
 
 Status ScalarAggregateFunction::AddKernel(ScalarAggregateKernel kernel) {
-  RETURN_NOT_OK(CheckArity(kernel.signature->in_types())); 
+  RETURN_NOT_OK(CheckArity(kernel.signature->in_types()));
   if (arity_.is_varargs && !kernel.signature->is_varargs()) {
     return Status::Invalid("Function accepts varargs but kernel signature does not");
   }
@@ -305,21 +305,21 @@ Status ScalarAggregateFunction::AddKernel(ScalarAggregateKernel kernel) {
   return Status::OK();
 }
 
-Status HashAggregateFunction::AddKernel(HashAggregateKernel kernel) { 
-  RETURN_NOT_OK(CheckArity(kernel.signature->in_types())); 
-  if (arity_.is_varargs && !kernel.signature->is_varargs()) { 
-    return Status::Invalid("Function accepts varargs but kernel signature does not"); 
-  } 
-  kernels_.emplace_back(std::move(kernel)); 
-  return Status::OK(); 
+Status HashAggregateFunction::AddKernel(HashAggregateKernel kernel) {
+  RETURN_NOT_OK(CheckArity(kernel.signature->in_types()));
+  if (arity_.is_varargs && !kernel.signature->is_varargs()) {
+    return Status::Invalid("Function accepts varargs but kernel signature does not");
+  }
+  kernels_.emplace_back(std::move(kernel));
+  return Status::OK();
 }
 
 Result<Datum> MetaFunction::Execute(const std::vector<Datum>& args,
                                     const FunctionOptions* options,
                                     ExecContext* ctx) const {
-  RETURN_NOT_OK( 
-      CheckArityImpl(this, static_cast<int>(args.size()), "attempted to Execute with")); 
- 
+  RETURN_NOT_OK(
+      CheckArityImpl(this, static_cast<int>(args.size()), "attempted to Execute with"));
+
   if (options == nullptr) {
     options = default_options();
   }
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/function.h b/contrib/libs/apache/arrow/cpp/src/arrow/compute/function.h
index e50ba155244..bd854bbb28e 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/function.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/function.h
@@ -29,7 +29,7 @@
 #include "arrow/datum.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
-#include "arrow/util/compare.h" 
+#include "arrow/util/compare.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/visibility.h"
 
@@ -40,50 +40,50 @@ namespace compute {
 ///
 /// @{
 
-/// \brief Extension point for defining options outside libarrow (but 
-/// still within this project). 
-class ARROW_EXPORT FunctionOptionsType { 
- public: 
-  virtual ~FunctionOptionsType() = default; 
- 
-  virtual const char* type_name() const = 0; 
-  virtual std::string Stringify(const FunctionOptions&) const = 0; 
-  virtual bool Compare(const FunctionOptions&, const FunctionOptions&) const = 0; 
-  virtual Result<std::shared_ptr<Buffer>> Serialize(const FunctionOptions&) const; 
-  virtual Result<std::unique_ptr<FunctionOptions>> Deserialize( 
-      const Buffer& buffer) const; 
-}; 
- 
+/// \brief Extension point for defining options outside libarrow (but
+/// still within this project).
+class ARROW_EXPORT FunctionOptionsType {
+ public:
+  virtual ~FunctionOptionsType() = default;
+
+  virtual const char* type_name() const = 0;
+  virtual std::string Stringify(const FunctionOptions&) const = 0;
+  virtual bool Compare(const FunctionOptions&, const FunctionOptions&) const = 0;
+  virtual Result<std::shared_ptr<Buffer>> Serialize(const FunctionOptions&) const;
+  virtual Result<std::unique_ptr<FunctionOptions>> Deserialize(
+      const Buffer& buffer) const;
+};
+
 /// \brief Base class for specifying options configuring a function's behavior,
 /// such as error handling.
-class ARROW_EXPORT FunctionOptions : public util::EqualityComparable<FunctionOptions> { 
- public: 
-  virtual ~FunctionOptions() = default; 
-
-  const FunctionOptionsType* options_type() const { return options_type_; } 
-  const char* type_name() const { return options_type()->type_name(); } 
- 
-  bool Equals(const FunctionOptions& other) const; 
-  using util::EqualityComparable<FunctionOptions>::Equals; 
-  using util::EqualityComparable<FunctionOptions>::operator==; 
-  using util::EqualityComparable<FunctionOptions>::operator!=; 
-  std::string ToString() const; 
-  /// \brief Serialize an options struct to a buffer. 
-  Result<std::shared_ptr<Buffer>> Serialize() const; 
-  /// \brief Deserialize an options struct from a buffer. 
-  /// Note: this will only look for `type_name` in the default FunctionRegistry; 
-  /// to use a custom FunctionRegistry, look up the FunctionOptionsType, then 
-  /// call FunctionOptionsType::Deserialize(). 
-  static Result<std::unique_ptr<FunctionOptions>> Deserialize( 
-      const std::string& type_name, const Buffer& buffer); 
- 
- protected: 
-  explicit FunctionOptions(const FunctionOptionsType* type) : options_type_(type) {} 
-  const FunctionOptionsType* options_type_; 
-}; 
- 
-ARROW_EXPORT void PrintTo(const FunctionOptions&, std::ostream*); 
- 
+class ARROW_EXPORT FunctionOptions : public util::EqualityComparable<FunctionOptions> {
+ public:
+  virtual ~FunctionOptions() = default;
+
+  const FunctionOptionsType* options_type() const { return options_type_; }
+  const char* type_name() const { return options_type()->type_name(); }
+
+  bool Equals(const FunctionOptions& other) const;
+  using util::EqualityComparable<FunctionOptions>::Equals;
+  using util::EqualityComparable<FunctionOptions>::operator==;
+  using util::EqualityComparable<FunctionOptions>::operator!=;
+  std::string ToString() const;
+  /// \brief Serialize an options struct to a buffer.
+  Result<std::shared_ptr<Buffer>> Serialize() const;
+  /// \brief Deserialize an options struct from a buffer.
+  /// Note: this will only look for `type_name` in the default FunctionRegistry;
+  /// to use a custom FunctionRegistry, look up the FunctionOptionsType, then
+  /// call FunctionOptionsType::Deserialize().
+  static Result<std::unique_ptr<FunctionOptions>> Deserialize(
+      const std::string& type_name, const Buffer& buffer);
+
+ protected:
+  explicit FunctionOptions(const FunctionOptionsType* type) : options_type_(type) {}
+  const FunctionOptionsType* options_type_;
+};
+
+ARROW_EXPORT void PrintTo(const FunctionOptions&, std::ostream*);
+
 /// \brief Contains the number of required arguments for the function.
 ///
 /// Naming conventions taken from https://en.wikipedia.org/wiki/Arity.
@@ -118,37 +118,37 @@ struct ARROW_EXPORT Arity {
   bool is_varargs = false;
 };
 
-struct ARROW_EXPORT FunctionDoc { 
-  /// \brief A one-line summary of the function, using a verb. 
-  /// 
-  /// For example, "Add two numeric arrays or scalars". 
-  std::string summary; 
- 
-  /// \brief A detailed description of the function, meant to follow the summary. 
-  std::string description; 
- 
-  /// \brief Symbolic names (identifiers) for the function arguments. 
-  /// 
-  /// Some bindings may use this to generate nicer function signatures. 
-  std::vector<std::string> arg_names; 
- 
-  // TODO add argument descriptions? 
- 
-  /// \brief Name of the options class, if any. 
-  std::string options_class; 
- 
-  FunctionDoc() = default; 
- 
-  FunctionDoc(std::string summary, std::string description, 
-              std::vector<std::string> arg_names, std::string options_class = "") 
-      : summary(std::move(summary)), 
-        description(std::move(description)), 
-        arg_names(std::move(arg_names)), 
-        options_class(std::move(options_class)) {} 
- 
-  static const FunctionDoc& Empty(); 
-}; 
- 
+struct ARROW_EXPORT FunctionDoc {
+  /// \brief A one-line summary of the function, using a verb.
+  ///
+  /// For example, "Add two numeric arrays or scalars".
+  std::string summary;
+
+  /// \brief A detailed description of the function, meant to follow the summary.
+  std::string description;
+
+  /// \brief Symbolic names (identifiers) for the function arguments.
+  ///
+  /// Some bindings may use this to generate nicer function signatures.
+  std::vector<std::string> arg_names;
+
+  // TODO add argument descriptions?
+
+  /// \brief Name of the options class, if any.
+  std::string options_class;
+
+  FunctionDoc() = default;
+
+  FunctionDoc(std::string summary, std::string description,
+              std::vector<std::string> arg_names, std::string options_class = "")
+      : summary(std::move(summary)),
+        description(std::move(description)),
+        arg_names(std::move(arg_names)),
+        options_class(std::move(options_class)) {}
+
+  static const FunctionDoc& Empty();
+};
+
 /// \brief Base class for compute functions. Function implementations contain a
 /// collection of "kernels" which are implementations of the function for
 /// specific argument types. Selecting a viable kernel for executing a function
@@ -172,10 +172,10 @@ class ARROW_EXPORT Function {
     /// A function that computes scalar summary statistics from array input.
     SCALAR_AGGREGATE,
 
-    /// A function that computes grouped summary statistics from array input 
-    /// and an array of group identifiers. 
-    HASH_AGGREGATE, 
- 
+    /// A function that computes grouped summary statistics from array input
+    /// and an array of group identifiers.
+    HASH_AGGREGATE,
+
     /// A function that dispatches to other functions and does not contain its
     /// own kernels.
     META
@@ -194,27 +194,27 @@ class ARROW_EXPORT Function {
   /// function accepts variable numbers of arguments.
   const Arity& arity() const { return arity_; }
 
-  /// \brief Return the function documentation 
-  const FunctionDoc& doc() const { return *doc_; } 
- 
+  /// \brief Return the function documentation
+  const FunctionDoc& doc() const { return *doc_; }
+
   /// \brief Returns the number of registered kernels for this function.
   virtual int num_kernels() const = 0;
 
-  /// \brief Return a kernel that can execute the function given the exact 
-  /// argument types (without implicit type casts or scalar->array promotions). 
-  /// 
-  /// NB: This function is overridden in CastFunction. 
-  virtual Result<const Kernel*> DispatchExact( 
-      const std::vector<ValueDescr>& values) const; 
- 
-  /// \brief Return a best-match kernel that can execute the function given the argument 
-  /// types, after implicit casts are applied. 
-  /// 
-  /// \param[in,out] values Argument types. An element may be modified to indicate that 
-  /// the returned kernel only approximately matches the input value descriptors; callers 
-  /// are responsible for casting inputs to the type and shape required by the kernel. 
-  virtual Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const; 
- 
+  /// \brief Return a kernel that can execute the function given the exact
+  /// argument types (without implicit type casts or scalar->array promotions).
+  ///
+  /// NB: This function is overridden in CastFunction.
+  virtual Result<const Kernel*> DispatchExact(
+      const std::vector<ValueDescr>& values) const;
+
+  /// \brief Return a best-match kernel that can execute the function given the argument
+  /// types, after implicit casts are applied.
+  ///
+  /// \param[in,out] values Argument types. An element may be modified to indicate that
+  /// the returned kernel only approximately matches the input value descriptors; callers
+  /// are responsible for casting inputs to the type and shape required by the kernel.
+  virtual Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const;
+
   /// \brief Execute the function eagerly with the passed input arguments with
   /// kernel dispatch, batch iteration, and memory allocation details taken
   /// care of.
@@ -231,24 +231,24 @@ class ARROW_EXPORT Function {
   /// that default_options() is valid to pass to Execute as options.
   const FunctionOptions* default_options() const { return default_options_; }
 
-  virtual Status Validate() const; 
- 
+  virtual Status Validate() const;
+
  protected:
   Function(std::string name, Function::Kind kind, const Arity& arity,
-           const FunctionDoc* doc, const FunctionOptions* default_options) 
+           const FunctionDoc* doc, const FunctionOptions* default_options)
       : name_(std::move(name)),
         kind_(kind),
         arity_(arity),
-        doc_(doc ? doc : &FunctionDoc::Empty()), 
+        doc_(doc ? doc : &FunctionDoc::Empty()),
         default_options_(default_options) {}
 
-  Status CheckArity(const std::vector<InputType>&) const; 
-  Status CheckArity(const std::vector<ValueDescr>&) const; 
+  Status CheckArity(const std::vector<InputType>&) const;
+  Status CheckArity(const std::vector<ValueDescr>&) const;
 
   std::string name_;
   Function::Kind kind_;
   Arity arity_;
-  const FunctionDoc* doc_; 
+  const FunctionDoc* doc_;
   const FunctionOptions* default_options_ = NULLPTR;
 };
 
@@ -270,20 +270,20 @@ class FunctionImpl : public Function {
 
  protected:
   FunctionImpl(std::string name, Function::Kind kind, const Arity& arity,
-               const FunctionDoc* doc, const FunctionOptions* default_options) 
-      : Function(std::move(name), kind, arity, doc, default_options) {} 
+               const FunctionDoc* doc, const FunctionOptions* default_options)
+      : Function(std::move(name), kind, arity, doc, default_options) {}
 
   std::vector<KernelType> kernels_;
 };
 
-/// \brief Look up a kernel in a function. If no Kernel is found, nullptr is returned. 
-ARROW_EXPORT 
-const Kernel* DispatchExactImpl(const Function* func, const std::vector<ValueDescr>&); 
- 
-/// \brief Return an error message if no Kernel is found. 
-ARROW_EXPORT 
-Status NoMatchingKernel(const Function* func, const std::vector<ValueDescr>&); 
- 
+/// \brief Look up a kernel in a function. If no Kernel is found, nullptr is returned.
+ARROW_EXPORT
+const Kernel* DispatchExactImpl(const Function* func, const std::vector<ValueDescr>&);
+
+/// \brief Return an error message if no Kernel is found.
+ARROW_EXPORT
+Status NoMatchingKernel(const Function* func, const std::vector<ValueDescr>&);
+
 }  // namespace detail
 
 /// \brief A function that executes elementwise operations on arrays or
@@ -295,9 +295,9 @@ class ARROW_EXPORT ScalarFunction : public detail::FunctionImpl<ScalarKernel> {
  public:
   using KernelType = ScalarKernel;
 
-  ScalarFunction(std::string name, const Arity& arity, const FunctionDoc* doc, 
+  ScalarFunction(std::string name, const Arity& arity, const FunctionDoc* doc,
                  const FunctionOptions* default_options = NULLPTR)
-      : detail::FunctionImpl<ScalarKernel>(std::move(name), Function::SCALAR, arity, doc, 
+      : detail::FunctionImpl<ScalarKernel>(std::move(name), Function::SCALAR, arity, doc,
                                            default_options) {}
 
   /// \brief Add a kernel with given input/output types, no required state
@@ -319,9 +319,9 @@ class ARROW_EXPORT VectorFunction : public detail::FunctionImpl<VectorKernel> {
  public:
   using KernelType = VectorKernel;
 
-  VectorFunction(std::string name, const Arity& arity, const FunctionDoc* doc, 
+  VectorFunction(std::string name, const Arity& arity, const FunctionDoc* doc,
                  const FunctionOptions* default_options = NULLPTR)
-      : detail::FunctionImpl<VectorKernel>(std::move(name), Function::VECTOR, arity, doc, 
+      : detail::FunctionImpl<VectorKernel>(std::move(name), Function::VECTOR, arity, doc,
                                            default_options) {}
 
   /// \brief Add a simple kernel with given input/output types, no required
@@ -340,29 +340,29 @@ class ARROW_EXPORT ScalarAggregateFunction
  public:
   using KernelType = ScalarAggregateKernel;
 
-  ScalarAggregateFunction(std::string name, const Arity& arity, const FunctionDoc* doc, 
+  ScalarAggregateFunction(std::string name, const Arity& arity, const FunctionDoc* doc,
                           const FunctionOptions* default_options = NULLPTR)
       : detail::FunctionImpl<ScalarAggregateKernel>(
-            std::move(name), Function::SCALAR_AGGREGATE, arity, doc, default_options) {} 
+            std::move(name), Function::SCALAR_AGGREGATE, arity, doc, default_options) {}
 
   /// \brief Add a kernel (function implementation). Returns error if the
   /// kernel's signature does not match the function's arity.
   Status AddKernel(ScalarAggregateKernel kernel);
-}; 
-
-class ARROW_EXPORT HashAggregateFunction 
-    : public detail::FunctionImpl<HashAggregateKernel> { 
- public: 
-  using KernelType = HashAggregateKernel; 
- 
-  HashAggregateFunction(std::string name, const Arity& arity, const FunctionDoc* doc, 
-                        const FunctionOptions* default_options = NULLPTR) 
-      : detail::FunctionImpl<HashAggregateKernel>( 
-            std::move(name), Function::HASH_AGGREGATE, arity, doc, default_options) {} 
- 
-  /// \brief Add a kernel (function implementation). Returns error if the 
-  /// kernel's signature does not match the function's arity. 
-  Status AddKernel(HashAggregateKernel kernel); 
+};
+
+class ARROW_EXPORT HashAggregateFunction
+    : public detail::FunctionImpl<HashAggregateKernel> {
+ public:
+  using KernelType = HashAggregateKernel;
+
+  HashAggregateFunction(std::string name, const Arity& arity, const FunctionDoc* doc,
+                        const FunctionOptions* default_options = NULLPTR)
+      : detail::FunctionImpl<HashAggregateKernel>(
+            std::move(name), Function::HASH_AGGREGATE, arity, doc, default_options) {}
+
+  /// \brief Add a kernel (function implementation). Returns error if the
+  /// kernel's signature does not match the function's arity.
+  Status AddKernel(HashAggregateKernel kernel);
 };
 
 /// \brief A function that dispatches to other functions. Must implement
@@ -382,9 +382,9 @@ class ARROW_EXPORT MetaFunction : public Function {
                                     const FunctionOptions* options,
                                     ExecContext* ctx) const = 0;
 
-  MetaFunction(std::string name, const Arity& arity, const FunctionDoc* doc, 
+  MetaFunction(std::string name, const Arity& arity, const FunctionDoc* doc,
                const FunctionOptions* default_options = NULLPTR)
-      : Function(std::move(name), Function::META, arity, doc, default_options) {} 
+      : Function(std::move(name), Function::META, arity, doc, default_options) {}
 };
 
 /// @}
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/function_internal.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/function_internal.cc
index 8515d957cbd..0a926e0a39c 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/function_internal.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/function_internal.cc
@@ -1,113 +1,113 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "arrow/compute/function_internal.h" 
- 
-#include "arrow/array/util.h" 
-#include "arrow/compute/function.h" 
-#include "arrow/compute/registry.h" 
-#include "arrow/io/memory.h" 
-#include "arrow/ipc/reader.h" 
-#include "arrow/ipc/writer.h" 
-#include "arrow/record_batch.h" 
-#include "arrow/scalar.h" 
-#include "arrow/util/checked_cast.h" 
- 
-namespace arrow { 
-namespace compute { 
-namespace internal { 
-using ::arrow::internal::checked_cast; 
- 
-constexpr char kTypeNameField[] = "_type_name"; 
- 
-Result<std::shared_ptr<StructScalar>> FunctionOptionsToStructScalar( 
-    const FunctionOptions& options) { 
-  std::vector<std::string> field_names; 
-  std::vector<std::shared_ptr<Scalar>> values; 
-  const auto* options_type = 
-      dynamic_cast<const GenericOptionsType*>(options.options_type()); 
-  if (!options_type) { 
-    return Status::NotImplemented("serializing ", options.type_name(), 
-                                  " to StructScalar"); 
-  } 
-  RETURN_NOT_OK(options_type->ToStructScalar(options, &field_names, &values)); 
-  field_names.push_back(kTypeNameField); 
-  const char* options_name = options.type_name(); 
-  values.emplace_back( 
-      new BinaryScalar(Buffer::Wrap(options_name, std::strlen(options_name)))); 
-  return StructScalar::Make(std::move(values), std::move(field_names)); 
-} 
- 
-Result<std::unique_ptr<FunctionOptions>> FunctionOptionsFromStructScalar( 
-    const StructScalar& scalar) { 
-  ARROW_ASSIGN_OR_RAISE(auto type_name_holder, scalar.field(kTypeNameField)); 
-  const std::string type_name = 
-      checked_cast<const BinaryScalar&>(*type_name_holder).value->ToString(); 
-  ARROW_ASSIGN_OR_RAISE(auto raw_options_type, 
-                        GetFunctionRegistry()->GetFunctionOptionsType(type_name)); 
-  const auto* options_type = checked_cast<const GenericOptionsType*>(raw_options_type); 
-  return options_type->FromStructScalar(scalar); 
-} 
- 
-Result<std::shared_ptr<Buffer>> GenericOptionsType::Serialize( 
-    const FunctionOptions& options) const { 
-  ARROW_ASSIGN_OR_RAISE(auto scalar, FunctionOptionsToStructScalar(options)); 
-  ARROW_ASSIGN_OR_RAISE(auto array, MakeArrayFromScalar(*scalar, 1)); 
-  auto batch = 
-      RecordBatch::Make(schema({field("", array->type())}), /*num_rows=*/1, {array}); 
-  ARROW_ASSIGN_OR_RAISE(auto stream, io::BufferOutputStream::Create()); 
-  ARROW_ASSIGN_OR_RAISE(auto writer, ipc::MakeFileWriter(stream, batch->schema())); 
-  RETURN_NOT_OK(writer->WriteRecordBatch(*batch)); 
-  RETURN_NOT_OK(writer->Close()); 
-  return stream->Finish(); 
-} 
- 
-Result<std::unique_ptr<FunctionOptions>> GenericOptionsType::Deserialize( 
-    const Buffer& buffer) const { 
-  return DeserializeFunctionOptions(buffer); 
-} 
- 
-Result<std::unique_ptr<FunctionOptions>> DeserializeFunctionOptions( 
-    const Buffer& buffer) { 
-  io::BufferReader stream(buffer); 
-  ARROW_ASSIGN_OR_RAISE(auto reader, ipc::RecordBatchFileReader::Open(&stream)); 
-  ARROW_ASSIGN_OR_RAISE(auto batch, reader->ReadRecordBatch(0)); 
-  if (batch->num_rows() != 1) { 
-    return Status::Invalid( 
-        "serialized FunctionOptions's batch repr was not a single row - had ", 
-        batch->num_rows()); 
-  } 
-  if (batch->num_columns() != 1) { 
-    return Status::Invalid( 
-        "serialized FunctionOptions's batch repr was not a single column - had ", 
-        batch->num_columns()); 
-  } 
-  auto column = batch->column(0); 
-  if (column->type()->id() != Type::STRUCT) { 
-    return Status::Invalid( 
-        "serialized FunctionOptions's batch repr was not a struct column - was ", 
-        column->type()->ToString()); 
-  } 
-  ARROW_ASSIGN_OR_RAISE(auto raw_scalar, 
-                        checked_cast<const StructArray&>(*column).GetScalar(0)); 
-  auto scalar = checked_cast<const StructScalar&>(*raw_scalar); 
-  return FunctionOptionsFromStructScalar(scalar); 
-} 
- 
-}  // namespace internal 
-}  // namespace compute 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/function_internal.h"
+
+#include "arrow/array/util.h"
+#include "arrow/compute/function.h"
+#include "arrow/compute/registry.h"
+#include "arrow/io/memory.h"
+#include "arrow/ipc/reader.h"
+#include "arrow/ipc/writer.h"
+#include "arrow/record_batch.h"
+#include "arrow/scalar.h"
+#include "arrow/util/checked_cast.h"
+
+namespace arrow {
+namespace compute {
+namespace internal {
+using ::arrow::internal::checked_cast;
+
+constexpr char kTypeNameField[] = "_type_name";
+
+Result<std::shared_ptr<StructScalar>> FunctionOptionsToStructScalar(
+    const FunctionOptions& options) {
+  std::vector<std::string> field_names;
+  std::vector<std::shared_ptr<Scalar>> values;
+  const auto* options_type =
+      dynamic_cast<const GenericOptionsType*>(options.options_type());
+  if (!options_type) {
+    return Status::NotImplemented("serializing ", options.type_name(),
+                                  " to StructScalar");
+  }
+  RETURN_NOT_OK(options_type->ToStructScalar(options, &field_names, &values));
+  field_names.push_back(kTypeNameField);
+  const char* options_name = options.type_name();
+  values.emplace_back(
+      new BinaryScalar(Buffer::Wrap(options_name, std::strlen(options_name))));
+  return StructScalar::Make(std::move(values), std::move(field_names));
+}
+
+Result<std::unique_ptr<FunctionOptions>> FunctionOptionsFromStructScalar(
+    const StructScalar& scalar) {
+  ARROW_ASSIGN_OR_RAISE(auto type_name_holder, scalar.field(kTypeNameField));
+  const std::string type_name =
+      checked_cast<const BinaryScalar&>(*type_name_holder).value->ToString();
+  ARROW_ASSIGN_OR_RAISE(auto raw_options_type,
+                        GetFunctionRegistry()->GetFunctionOptionsType(type_name));
+  const auto* options_type = checked_cast<const GenericOptionsType*>(raw_options_type);
+  return options_type->FromStructScalar(scalar);
+}
+
+Result<std::shared_ptr<Buffer>> GenericOptionsType::Serialize(
+    const FunctionOptions& options) const {
+  ARROW_ASSIGN_OR_RAISE(auto scalar, FunctionOptionsToStructScalar(options));
+  ARROW_ASSIGN_OR_RAISE(auto array, MakeArrayFromScalar(*scalar, 1));
+  auto batch =
+      RecordBatch::Make(schema({field("", array->type())}), /*num_rows=*/1, {array});
+  ARROW_ASSIGN_OR_RAISE(auto stream, io::BufferOutputStream::Create());
+  ARROW_ASSIGN_OR_RAISE(auto writer, ipc::MakeFileWriter(stream, batch->schema()));
+  RETURN_NOT_OK(writer->WriteRecordBatch(*batch));
+  RETURN_NOT_OK(writer->Close());
+  return stream->Finish();
+}
+
+Result<std::unique_ptr<FunctionOptions>> GenericOptionsType::Deserialize(
+    const Buffer& buffer) const {
+  return DeserializeFunctionOptions(buffer);
+}
+
+Result<std::unique_ptr<FunctionOptions>> DeserializeFunctionOptions(
+    const Buffer& buffer) {
+  io::BufferReader stream(buffer);
+  ARROW_ASSIGN_OR_RAISE(auto reader, ipc::RecordBatchFileReader::Open(&stream));
+  ARROW_ASSIGN_OR_RAISE(auto batch, reader->ReadRecordBatch(0));
+  if (batch->num_rows() != 1) {
+    return Status::Invalid(
+        "serialized FunctionOptions's batch repr was not a single row - had ",
+        batch->num_rows());
+  }
+  if (batch->num_columns() != 1) {
+    return Status::Invalid(
+        "serialized FunctionOptions's batch repr was not a single column - had ",
+        batch->num_columns());
+  }
+  auto column = batch->column(0);
+  if (column->type()->id() != Type::STRUCT) {
+    return Status::Invalid(
+        "serialized FunctionOptions's batch repr was not a struct column - was ",
+        column->type()->ToString());
+  }
+  ARROW_ASSIGN_OR_RAISE(auto raw_scalar,
+                        checked_cast<const StructArray&>(*column).GetScalar(0));
+  auto scalar = checked_cast<const StructScalar&>(*raw_scalar);
+  return FunctionOptionsFromStructScalar(scalar);
+}
+
+}  // namespace internal
+}  // namespace compute
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/function_internal.h b/contrib/libs/apache/arrow/cpp/src/arrow/compute/function_internal.h
index 9ce0c3cc84e..fdd7f09ba1f 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/function_internal.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/function_internal.h
@@ -1,626 +1,626 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <sstream> 
-#include <string> 
-#include <vector> 
- 
-#include "arrow/array/builder_base.h" 
-#include "arrow/array/builder_binary.h" 
-#include "arrow/array/builder_nested.h" 
-#include "arrow/compute/api_vector.h" 
-#include "arrow/compute/function.h" 
-#include "arrow/compute/type_fwd.h" 
-#include "arrow/result.h" 
-#include "arrow/status.h" 
-#include "arrow/util/checked_cast.h" 
-#include "arrow/util/key_value_metadata.h" 
-#include "arrow/util/reflection_internal.h" 
-#include "arrow/util/string.h" 
-#include "arrow/util/visibility.h" 
- 
-namespace arrow { 
-struct Scalar; 
-struct StructScalar; 
-using ::arrow::internal::checked_cast; 
- 
-namespace internal { 
-template <> 
-struct EnumTraits<compute::SortOrder> 
-    : BasicEnumTraits<compute::SortOrder, compute::SortOrder::Ascending, 
-                      compute::SortOrder::Descending> { 
-  static std::string name() { return "SortOrder"; } 
-  static std::string value_name(compute::SortOrder value) { 
-    switch (value) { 
-      case compute::SortOrder::Ascending: 
-        return "Ascending"; 
-      case compute::SortOrder::Descending: 
-        return "Descending"; 
-    } 
-    return "<INVALID>"; 
-  } 
-}; 
-}  // namespace internal 
- 
-namespace compute { 
-namespace internal { 
- 
-using arrow::internal::EnumTraits; 
-using arrow::internal::has_enum_traits; 
- 
-template <typename Enum, typename CType = typename std::underlying_type<Enum>::type> 
-Result<Enum> ValidateEnumValue(CType raw) { 
-  for (auto valid : EnumTraits<Enum>::values()) { 
-    if (raw == static_cast<CType>(valid)) { 
-      return static_cast<Enum>(raw); 
-    } 
-  } 
-  return Status::Invalid("Invalid value for ", EnumTraits<Enum>::name(), ": ", raw); 
-} 
- 
-class GenericOptionsType : public FunctionOptionsType { 
- public: 
-  Result<std::shared_ptr<Buffer>> Serialize(const FunctionOptions&) const override; 
-  Result<std::unique_ptr<FunctionOptions>> Deserialize( 
-      const Buffer& buffer) const override; 
-  virtual Status ToStructScalar(const FunctionOptions& options, 
-                                std::vector<std::string>* field_names, 
-                                std::vector<std::shared_ptr<Scalar>>* values) const = 0; 
-  virtual Result<std::unique_ptr<FunctionOptions>> FromStructScalar( 
-      const StructScalar& scalar) const = 0; 
-}; 
- 
-ARROW_EXPORT 
-Result<std::shared_ptr<StructScalar>> FunctionOptionsToStructScalar( 
-    const FunctionOptions&); 
-ARROW_EXPORT 
-Result<std::unique_ptr<FunctionOptions>> FunctionOptionsFromStructScalar( 
-    const StructScalar&); 
-ARROW_EXPORT 
-Result<std::unique_ptr<FunctionOptions>> DeserializeFunctionOptions(const Buffer& buffer); 
- 
-template <typename T> 
-static inline enable_if_t<!has_enum_traits<T>::value, std::string> GenericToString( 
-    const T& value) { 
-  std::stringstream ss; 
-  ss << value; 
-  return ss.str(); 
-} 
- 
-static inline std::string GenericToString(bool value) { return value ? "true" : "false"; } 
- 
-static inline std::string GenericToString(const std::string& value) { 
-  std::stringstream ss; 
-  ss << '"' << value << '"'; 
-  return ss.str(); 
-} 
- 
-template <typename T> 
-static inline enable_if_t<has_enum_traits<T>::value, std::string> GenericToString( 
-    const T value) { 
-  return EnumTraits<T>::value_name(value); 
-} 
- 
-template <typename T> 
-static inline std::string GenericToString(const std::shared_ptr<T>& value) { 
-  std::stringstream ss; 
-  return value ? value->ToString() : "<NULLPTR>"; 
-} 
- 
-static inline std::string GenericToString(const std::shared_ptr<Scalar>& value) { 
-  std::stringstream ss; 
-  ss << value->type->ToString() << ":" << value->ToString(); 
-  return ss.str(); 
-} 
- 
-static inline std::string GenericToString( 
-    const std::shared_ptr<const KeyValueMetadata>& value) { 
-  std::stringstream ss; 
-  ss << "KeyValueMetadata{"; 
-  if (value) { 
-    bool first = true; 
-    for (const auto& pair : value->sorted_pairs()) { 
-      if (!first) ss << ", "; 
-      first = false; 
-      ss << pair.first << ':' << pair.second; 
-    } 
-  } 
-  ss << '}'; 
-  return ss.str(); 
-} 
- 
-static inline std::string GenericToString(const Datum& value) { 
-  switch (value.kind()) { 
-    case Datum::NONE: 
-      return "<NULL DATUM>"; 
-    case Datum::SCALAR: 
-      return GenericToString(value.scalar()); 
-    case Datum::ARRAY: { 
-      std::stringstream ss; 
-      ss << value.type()->ToString() << ':' << value.make_array()->ToString(); 
-      return ss.str(); 
-    } 
-    case Datum::CHUNKED_ARRAY: 
-    case Datum::RECORD_BATCH: 
-    case Datum::TABLE: 
-    case Datum::COLLECTION: 
-      return value.ToString(); 
-  } 
-  return value.ToString(); 
-} 
- 
-template <typename T> 
-static inline std::string GenericToString(const std::vector<T>& value) { 
-  std::stringstream ss; 
-  ss << "["; 
-  bool first = true; 
-  // Don't use range-for with auto& to avoid Clang -Wrange-loop-analysis 
-  for (auto it = value.begin(); it != value.end(); it++) { 
-    if (!first) ss << ", "; 
-    first = false; 
-    ss << GenericToString(*it); 
-  } 
-  ss << ']'; 
-  return ss.str(); 
-} 
- 
-static inline std::string GenericToString(SortOrder value) { 
-  switch (value) { 
-    case SortOrder::Ascending: 
-      return "Ascending"; 
-    case SortOrder::Descending: 
-      return "Descending"; 
-  } 
-  return "<INVALID SORT ORDER>"; 
-} 
- 
-static inline std::string GenericToString(const std::vector<SortKey>& value) { 
-  std::stringstream ss; 
-  ss << '['; 
-  bool first = true; 
-  for (const auto& key : value) { 
-    if (!first) { 
-      ss << ", "; 
-    } 
-    first = false; 
-    ss << key.ToString(); 
-  } 
-  ss << ']'; 
-  return ss.str(); 
-} 
- 
-template <typename T> 
-static inline bool GenericEquals(const T& left, const T& right) { 
-  return left == right; 
-} 
- 
-template <typename T> 
-static inline bool GenericEquals(const std::shared_ptr<T>& left, 
-                                 const std::shared_ptr<T>& right) { 
-  if (left && right) { 
-    return left->Equals(*right); 
-  } 
-  return left == right; 
-} 
- 
-static inline bool IsEmpty(const std::shared_ptr<const KeyValueMetadata>& meta) { 
-  return !meta || meta->size() == 0; 
-} 
- 
-static inline bool GenericEquals(const std::shared_ptr<const KeyValueMetadata>& left, 
-                                 const std::shared_ptr<const KeyValueMetadata>& right) { 
-  // Special case since null metadata is considered equivalent to empty 
-  if (IsEmpty(left) || IsEmpty(right)) { 
-    return IsEmpty(left) && IsEmpty(right); 
-  } 
-  return left->Equals(*right); 
-} 
- 
-template <typename T> 
-static inline bool GenericEquals(const std::vector<T>& left, 
-                                 const std::vector<T>& right) { 
-  if (left.size() != right.size()) return false; 
-  for (size_t i = 0; i < left.size(); i++) { 
-    if (!GenericEquals(left[i], right[i])) return false; 
-  } 
-  return true; 
-} 
- 
-template <typename T> 
-static inline decltype(TypeTraits<typename CTypeTraits<T>::ArrowType>::type_singleton()) 
-GenericTypeSingleton() { 
-  return TypeTraits<typename CTypeTraits<T>::ArrowType>::type_singleton(); 
-} 
- 
-template <typename T> 
-static inline enable_if_same<T, std::shared_ptr<const KeyValueMetadata>, 
-                             std::shared_ptr<DataType>> 
-GenericTypeSingleton() { 
-  return map(binary(), binary()); 
-} 
- 
-template <typename T> 
-static inline enable_if_t<has_enum_traits<T>::value, std::shared_ptr<DataType>> 
-GenericTypeSingleton() { 
-  return TypeTraits<typename EnumTraits<T>::Type>::type_singleton(); 
-} 
- 
-template <typename T> 
-static inline enable_if_same<T, SortKey, std::shared_ptr<DataType>> 
-GenericTypeSingleton() { 
-  std::vector<std::shared_ptr<Field>> fields; 
-  fields.emplace_back(new Field("name", GenericTypeSingleton<std::string>())); 
-  fields.emplace_back(new Field("order", GenericTypeSingleton<SortOrder>())); 
-  return std::make_shared<StructType>(std::move(fields)); 
-} 
- 
-// N.B. ordering of overloads is relatively fragile 
-template <typename T> 
-static inline Result<decltype(MakeScalar(std::declval<T>()))> GenericToScalar( 
-    const T& value) { 
-  return MakeScalar(value); 
-} 
- 
-// For Clang/libc++: when iterating through vector<bool>, we can't 
-// pass it by reference so the overload above doesn't apply 
-static inline Result<std::shared_ptr<Scalar>> GenericToScalar(bool value) { 
-  return MakeScalar(value); 
-} 
- 
-template <typename T, typename Enable = enable_if_t<has_enum_traits<T>::value>> 
-static inline Result<std::shared_ptr<Scalar>> GenericToScalar(const T value) { 
-  using CType = typename EnumTraits<T>::CType; 
-  return GenericToScalar(static_cast<CType>(value)); 
-} 
- 
-static inline Result<std::shared_ptr<Scalar>> GenericToScalar(const SortKey& value) { 
-  ARROW_ASSIGN_OR_RAISE(auto name, GenericToScalar(value.name)); 
-  ARROW_ASSIGN_OR_RAISE(auto order, GenericToScalar(value.order)); 
-  return StructScalar::Make({name, order}, {"name", "order"}); 
-} 
- 
-static inline Result<std::shared_ptr<Scalar>> GenericToScalar( 
-    const std::shared_ptr<const KeyValueMetadata>& value) { 
-  auto ty = GenericTypeSingleton<std::shared_ptr<const KeyValueMetadata>>(); 
-  std::unique_ptr<ArrayBuilder> builder; 
-  RETURN_NOT_OK(MakeBuilder(default_memory_pool(), ty, &builder)); 
-  auto* map_builder = checked_cast<MapBuilder*>(builder.get()); 
-  auto* key_builder = checked_cast<BinaryBuilder*>(map_builder->key_builder()); 
-  auto* item_builder = checked_cast<BinaryBuilder*>(map_builder->item_builder()); 
-  RETURN_NOT_OK(map_builder->Append()); 
-  if (value) { 
-    RETURN_NOT_OK(key_builder->AppendValues(value->keys())); 
-    RETURN_NOT_OK(item_builder->AppendValues(value->values())); 
-  } 
-  std::shared_ptr<Array> arr; 
-  RETURN_NOT_OK(map_builder->Finish(&arr)); 
-  return arr->GetScalar(0); 
-} 
- 
-template <typename T> 
-static inline Result<std::shared_ptr<Scalar>> GenericToScalar( 
-    const std::vector<T>& value) { 
-  std::shared_ptr<DataType> type = GenericTypeSingleton<T>(); 
-  std::vector<std::shared_ptr<Scalar>> scalars; 
-  scalars.reserve(value.size()); 
-  // Don't use range-for with auto& to avoid Clang -Wrange-loop-analysis 
-  for (auto it = value.begin(); it != value.end(); it++) { 
-    ARROW_ASSIGN_OR_RAISE(auto scalar, GenericToScalar(*it)); 
-    scalars.push_back(std::move(scalar)); 
-  } 
-  std::unique_ptr<ArrayBuilder> builder; 
-  RETURN_NOT_OK( 
-      MakeBuilder(default_memory_pool(), type ? type : scalars[0]->type, &builder)); 
-  RETURN_NOT_OK(builder->AppendScalars(scalars)); 
-  std::shared_ptr<Array> out; 
-  RETURN_NOT_OK(builder->Finish(&out)); 
-  return std::make_shared<ListScalar>(std::move(out)); 
-} 
- 
-static inline Result<std::shared_ptr<Scalar>> GenericToScalar( 
-    const std::shared_ptr<DataType>& value) { 
-  if (!value) { 
-    return Status::Invalid("shared_ptr<DataType> is nullptr"); 
-  } 
-  return MakeNullScalar(value); 
-} 
- 
-static inline Result<std::shared_ptr<Scalar>> GenericToScalar( 
-    const std::shared_ptr<Scalar>& value) { 
-  return value; 
-} 
- 
-static inline Result<std::shared_ptr<Scalar>> GenericToScalar( 
-    const std::shared_ptr<Array>& value) { 
-  return std::make_shared<ListScalar>(value); 
-} 
- 
-static inline Result<std::shared_ptr<Scalar>> GenericToScalar(const Datum& value) { 
-  // TODO(ARROW-9434): store in a union instead. 
-  switch (value.kind()) { 
-    case Datum::ARRAY: 
-      return GenericToScalar(value.make_array()); 
-      break; 
-    default: 
-      return Status::NotImplemented("Cannot serialize Datum kind ", value.kind()); 
-  } 
-} 
- 
-template <typename T> 
-static inline enable_if_primitive_ctype<typename CTypeTraits<T>::ArrowType, Result<T>> 
-GenericFromScalar(const std::shared_ptr<Scalar>& value) { 
-  using ArrowType = typename CTypeTraits<T>::ArrowType; 
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType; 
-  if (value->type->id() != ArrowType::type_id) { 
-    return Status::Invalid("Expected type ", ArrowType::type_id, " but got ", 
-                           value->type->ToString()); 
-  } 
-  const auto& holder = checked_cast<const ScalarType&>(*value); 
-  if (!holder.is_valid) return Status::Invalid("Got null scalar"); 
-  return holder.value; 
-} 
- 
-template <typename T> 
-static inline enable_if_primitive_ctype<typename EnumTraits<T>::Type, Result<T>> 
-GenericFromScalar(const std::shared_ptr<Scalar>& value) { 
-  ARROW_ASSIGN_OR_RAISE(auto raw_val, 
-                        GenericFromScalar<typename EnumTraits<T>::CType>(value)); 
-  return ValidateEnumValue<T>(raw_val); 
-} 
- 
-template <typename T, typename U> 
-using enable_if_same_result = enable_if_same<T, U, Result<T>>; 
- 
-template <typename T> 
-static inline enable_if_same_result<T, std::string> GenericFromScalar( 
-    const std::shared_ptr<Scalar>& value) { 
-  if (!is_base_binary_like(value->type->id())) { 
-    return Status::Invalid("Expected binary-like type but got ", value->type->ToString()); 
-  } 
-  const auto& holder = checked_cast<const BaseBinaryScalar&>(*value); 
-  if (!holder.is_valid) return Status::Invalid("Got null scalar"); 
-  return holder.value->ToString(); 
-} 
- 
-template <typename T> 
-static inline enable_if_same_result<T, SortKey> GenericFromScalar( 
-    const std::shared_ptr<Scalar>& value) { 
-  if (value->type->id() != Type::STRUCT) { 
-    return Status::Invalid("Expected type STRUCT but got ", value->type->id()); 
-  } 
-  if (!value->is_valid) return Status::Invalid("Got null scalar"); 
-  const auto& holder = checked_cast<const StructScalar&>(*value); 
-  ARROW_ASSIGN_OR_RAISE(auto name_holder, holder.field("name")); 
-  ARROW_ASSIGN_OR_RAISE(auto order_holder, holder.field("order")); 
-  ARROW_ASSIGN_OR_RAISE(auto name, GenericFromScalar<std::string>(name_holder)); 
-  ARROW_ASSIGN_OR_RAISE(auto order, GenericFromScalar<SortOrder>(order_holder)); 
-  return SortKey{std::move(name), order}; 
-} 
- 
-template <typename T> 
-static inline enable_if_same_result<T, std::shared_ptr<DataType>> GenericFromScalar( 
-    const std::shared_ptr<Scalar>& value) { 
-  return value->type; 
-} 
- 
-template <typename T> 
-static inline enable_if_same_result<T, std::shared_ptr<Scalar>> GenericFromScalar( 
-    const std::shared_ptr<Scalar>& value) { 
-  return value; 
-} 
- 
-template <typename T> 
-static inline enable_if_same_result<T, std::shared_ptr<const KeyValueMetadata>> 
-GenericFromScalar(const std::shared_ptr<Scalar>& value) { 
-  auto ty = GenericTypeSingleton<std::shared_ptr<const KeyValueMetadata>>(); 
-  if (!value->type->Equals(ty)) { 
-    return Status::Invalid("Expected ", ty->ToString(), " but got ", 
-                           value->type->ToString()); 
-  } 
-  const auto& holder = checked_cast<const MapScalar&>(*value); 
-  std::vector<std::string> keys; 
-  std::vector<std::string> values; 
-  const auto& list = checked_cast<const StructArray&>(*holder.value); 
-  const auto& key_arr = checked_cast<const BinaryArray&>(*list.field(0)); 
-  const auto& value_arr = checked_cast<const BinaryArray&>(*list.field(1)); 
-  for (int64_t i = 0; i < list.length(); i++) { 
-    keys.push_back(key_arr.GetString(i)); 
-    values.push_back(value_arr.GetString(i)); 
-  } 
-  return key_value_metadata(std::move(keys), std::move(values)); 
-} 
- 
-template <typename T> 
-static inline enable_if_same_result<T, Datum> GenericFromScalar( 
-    const std::shared_ptr<Scalar>& value) { 
-  if (value->type->id() == Type::LIST) { 
-    const auto& holder = checked_cast<const BaseListScalar&>(*value); 
-    return holder.value; 
-  } 
-  // TODO(ARROW-9434): handle other possible datum kinds by looking for a union 
-  return Status::Invalid("Cannot deserialize Datum from ", value->ToString()); 
-} 
- 
-template <typename T> 
-static enable_if_same<typename CTypeTraits<T>::ArrowType, ListType, Result<T>> 
-GenericFromScalar(const std::shared_ptr<Scalar>& value) { 
-  using ValueType = typename T::value_type; 
-  if (value->type->id() != Type::LIST) { 
-    return Status::Invalid("Expected type LIST but got ", value->type->ToString()); 
-  } 
-  const auto& holder = checked_cast<const BaseListScalar&>(*value); 
-  if (!holder.is_valid) return Status::Invalid("Got null scalar"); 
-  std::vector<ValueType> result; 
-  for (int i = 0; i < holder.value->length(); i++) { 
-    ARROW_ASSIGN_OR_RAISE(auto scalar, holder.value->GetScalar(i)); 
-    ARROW_ASSIGN_OR_RAISE(auto v, GenericFromScalar<ValueType>(scalar)); 
-    result.push_back(std::move(v)); 
-  } 
-  return result; 
-} 
- 
-template <typename Options> 
-struct StringifyImpl { 
-  template <typename Tuple> 
-  StringifyImpl(const Options& obj, const Tuple& props) 
-      : obj_(obj), members_(props.size()) { 
-    props.ForEach(*this); 
-  } 
- 
-  template <typename Property> 
-  void operator()(const Property& prop, size_t i) { 
-    std::stringstream ss; 
-    ss << prop.name() << '=' << GenericToString(prop.get(obj_)); 
-    members_[i] = ss.str(); 
-  } 
- 
-  std::string Finish() { 
-    return "{" + arrow::internal::JoinStrings(members_, ", ") + "}"; 
-  } 
- 
-  const Options& obj_; 
-  std::vector<std::string> members_; 
-}; 
- 
-template <typename Options> 
-struct CompareImpl { 
-  template <typename Tuple> 
-  CompareImpl(const Options& l, const Options& r, const Tuple& props) 
-      : left_(l), right_(r) { 
-    props.ForEach(*this); 
-  } 
- 
-  template <typename Property> 
-  void operator()(const Property& prop, size_t) { 
-    equal_ &= GenericEquals(prop.get(left_), prop.get(right_)); 
-  } 
- 
-  const Options& left_; 
-  const Options& right_; 
-  bool equal_ = true; 
-}; 
- 
-template <typename Options> 
-struct ToStructScalarImpl { 
-  template <typename Tuple> 
-  ToStructScalarImpl(const Options& obj, const Tuple& props, 
-                     std::vector<std::string>* field_names, 
-                     std::vector<std::shared_ptr<Scalar>>* values) 
-      : obj_(obj), field_names_(field_names), values_(values) { 
-    props.ForEach(*this); 
-  } 
- 
-  template <typename Property> 
-  void operator()(const Property& prop, size_t) { 
-    if (!status_.ok()) return; 
-    auto result = GenericToScalar(prop.get(obj_)); 
-    if (!result.ok()) { 
-      status_ = result.status().WithMessage("Could not serialize field ", prop.name(), 
-                                            " of options type ", Options::kTypeName, ": ", 
-                                            result.status().message()); 
-      return; 
-    } 
-    field_names_->emplace_back(prop.name()); 
-    values_->push_back(result.MoveValueUnsafe()); 
-  } 
- 
-  const Options& obj_; 
-  Status status_; 
-  std::vector<std::string>* field_names_; 
-  std::vector<std::shared_ptr<Scalar>>* values_; 
-}; 
- 
-template <typename Options> 
-struct FromStructScalarImpl { 
-  template <typename Tuple> 
-  FromStructScalarImpl(Options* obj, const StructScalar& scalar, const Tuple& props) 
-      : obj_(obj), scalar_(scalar) { 
-    props.ForEach(*this); 
-  } 
- 
-  template <typename Property> 
-  void operator()(const Property& prop, size_t) { 
-    if (!status_.ok()) return; 
-    auto maybe_holder = scalar_.field(std::string(prop.name())); 
-    if (!maybe_holder.ok()) { 
-      status_ = maybe_holder.status().WithMessage( 
-          "Cannot deserialize field ", prop.name(), " of options type ", 
-          Options::kTypeName, ": ", maybe_holder.status().message()); 
-      return; 
-    } 
-    auto holder = maybe_holder.MoveValueUnsafe(); 
-    auto result = GenericFromScalar<typename Property::Type>(holder); 
-    if (!result.ok()) { 
-      status_ = result.status().WithMessage("Cannot deserialize field ", prop.name(), 
-                                            " of options type ", Options::kTypeName, ": ", 
-                                            result.status().message()); 
-      return; 
-    } 
-    prop.set(obj_, result.MoveValueUnsafe()); 
-  } 
- 
-  Options* obj_; 
-  Status status_; 
-  const StructScalar& scalar_; 
-}; 
- 
-template <typename Options, typename... Properties> 
-const FunctionOptionsType* GetFunctionOptionsType(const Properties&... properties) { 
-  static const class OptionsType : public GenericOptionsType { 
-   public: 
-    explicit OptionsType(const arrow::internal::PropertyTuple<Properties...> properties) 
-        : properties_(properties) {} 
- 
-    const char* type_name() const override { return Options::kTypeName; } 
- 
-    std::string Stringify(const FunctionOptions& options) const override { 
-      const auto& self = checked_cast<const Options&>(options); 
-      return StringifyImpl<Options>(self, properties_).Finish(); 
-    } 
-    bool Compare(const FunctionOptions& options, 
-                 const FunctionOptions& other) const override { 
-      const auto& lhs = checked_cast<const Options&>(options); 
-      const auto& rhs = checked_cast<const Options&>(other); 
-      return CompareImpl<Options>(lhs, rhs, properties_).equal_; 
-    } 
-    Status ToStructScalar(const FunctionOptions& options, 
-                          std::vector<std::string>* field_names, 
-                          std::vector<std::shared_ptr<Scalar>>* values) const override { 
-      const auto& self = checked_cast<const Options&>(options); 
-      RETURN_NOT_OK( 
-          ToStructScalarImpl<Options>(self, properties_, field_names, values).status_); 
-      return Status::OK(); 
-    } 
-    Result<std::unique_ptr<FunctionOptions>> FromStructScalar( 
-        const StructScalar& scalar) const override { 
-      auto options = std::unique_ptr<Options>(new Options()); 
-      RETURN_NOT_OK( 
-          FromStructScalarImpl<Options>(options.get(), scalar, properties_).status_); 
-      return std::move(options); 
-    } 
- 
-   private: 
-    const arrow::internal::PropertyTuple<Properties...> properties_; 
-  } instance(arrow::internal::MakeProperties(properties...)); 
-  return &instance; 
-} 
- 
-}  // namespace internal 
-}  // namespace compute 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "arrow/array/builder_base.h"
+#include "arrow/array/builder_binary.h"
+#include "arrow/array/builder_nested.h"
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/function.h"
+#include "arrow/compute/type_fwd.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/reflection_internal.h"
+#include "arrow/util/string.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+struct Scalar;
+struct StructScalar;
+using ::arrow::internal::checked_cast;
+
+namespace internal {
+template <>
+struct EnumTraits<compute::SortOrder>
+    : BasicEnumTraits<compute::SortOrder, compute::SortOrder::Ascending,
+                      compute::SortOrder::Descending> {
+  static std::string name() { return "SortOrder"; }
+  static std::string value_name(compute::SortOrder value) {
+    switch (value) {
+      case compute::SortOrder::Ascending:
+        return "Ascending";
+      case compute::SortOrder::Descending:
+        return "Descending";
+    }
+    return "<INVALID>";
+  }
+};
+}  // namespace internal
+
+namespace compute {
+namespace internal {
+
+using arrow::internal::EnumTraits;
+using arrow::internal::has_enum_traits;
+
+template <typename Enum, typename CType = typename std::underlying_type<Enum>::type>
+Result<Enum> ValidateEnumValue(CType raw) {
+  for (auto valid : EnumTraits<Enum>::values()) {
+    if (raw == static_cast<CType>(valid)) {
+      return static_cast<Enum>(raw);
+    }
+  }
+  return Status::Invalid("Invalid value for ", EnumTraits<Enum>::name(), ": ", raw);
+}
+
+class GenericOptionsType : public FunctionOptionsType {
+ public:
+  Result<std::shared_ptr<Buffer>> Serialize(const FunctionOptions&) const override;
+  Result<std::unique_ptr<FunctionOptions>> Deserialize(
+      const Buffer& buffer) const override;
+  virtual Status ToStructScalar(const FunctionOptions& options,
+                                std::vector<std::string>* field_names,
+                                std::vector<std::shared_ptr<Scalar>>* values) const = 0;
+  virtual Result<std::unique_ptr<FunctionOptions>> FromStructScalar(
+      const StructScalar& scalar) const = 0;
+};
+
+ARROW_EXPORT
+Result<std::shared_ptr<StructScalar>> FunctionOptionsToStructScalar(
+    const FunctionOptions&);
+ARROW_EXPORT
+Result<std::unique_ptr<FunctionOptions>> FunctionOptionsFromStructScalar(
+    const StructScalar&);
+ARROW_EXPORT
+Result<std::unique_ptr<FunctionOptions>> DeserializeFunctionOptions(const Buffer& buffer);
+
+template <typename T>
+static inline enable_if_t<!has_enum_traits<T>::value, std::string> GenericToString(
+    const T& value) {
+  std::stringstream ss;
+  ss << value;
+  return ss.str();
+}
+
+static inline std::string GenericToString(bool value) { return value ? "true" : "false"; }
+
+static inline std::string GenericToString(const std::string& value) {
+  std::stringstream ss;
+  ss << '"' << value << '"';
+  return ss.str();
+}
+
+template <typename T>
+static inline enable_if_t<has_enum_traits<T>::value, std::string> GenericToString(
+    const T value) {
+  return EnumTraits<T>::value_name(value);
+}
+
+template <typename T>
+static inline std::string GenericToString(const std::shared_ptr<T>& value) {
+  std::stringstream ss;
+  return value ? value->ToString() : "<NULLPTR>";
+}
+
+static inline std::string GenericToString(const std::shared_ptr<Scalar>& value) {
+  std::stringstream ss;
+  ss << value->type->ToString() << ":" << value->ToString();
+  return ss.str();
+}
+
+static inline std::string GenericToString(
+    const std::shared_ptr<const KeyValueMetadata>& value) {
+  std::stringstream ss;
+  ss << "KeyValueMetadata{";
+  if (value) {
+    bool first = true;
+    for (const auto& pair : value->sorted_pairs()) {
+      if (!first) ss << ", ";
+      first = false;
+      ss << pair.first << ':' << pair.second;
+    }
+  }
+  ss << '}';
+  return ss.str();
+}
+
+static inline std::string GenericToString(const Datum& value) {
+  switch (value.kind()) {
+    case Datum::NONE:
+      return "<NULL DATUM>";
+    case Datum::SCALAR:
+      return GenericToString(value.scalar());
+    case Datum::ARRAY: {
+      std::stringstream ss;
+      ss << value.type()->ToString() << ':' << value.make_array()->ToString();
+      return ss.str();
+    }
+    case Datum::CHUNKED_ARRAY:
+    case Datum::RECORD_BATCH:
+    case Datum::TABLE:
+    case Datum::COLLECTION:
+      return value.ToString();
+  }
+  return value.ToString();
+}
+
+template <typename T>
+static inline std::string GenericToString(const std::vector<T>& value) {
+  std::stringstream ss;
+  ss << "[";
+  bool first = true;
+  // Don't use range-for with auto& to avoid Clang -Wrange-loop-analysis
+  for (auto it = value.begin(); it != value.end(); it++) {
+    if (!first) ss << ", ";
+    first = false;
+    ss << GenericToString(*it);
+  }
+  ss << ']';
+  return ss.str();
+}
+
+static inline std::string GenericToString(SortOrder value) {
+  switch (value) {
+    case SortOrder::Ascending:
+      return "Ascending";
+    case SortOrder::Descending:
+      return "Descending";
+  }
+  return "<INVALID SORT ORDER>";
+}
+
+static inline std::string GenericToString(const std::vector<SortKey>& value) {
+  std::stringstream ss;
+  ss << '[';
+  bool first = true;
+  for (const auto& key : value) {
+    if (!first) {
+      ss << ", ";
+    }
+    first = false;
+    ss << key.ToString();
+  }
+  ss << ']';
+  return ss.str();
+}
+
+template <typename T>
+static inline bool GenericEquals(const T& left, const T& right) {
+  return left == right;
+}
+
+template <typename T>
+static inline bool GenericEquals(const std::shared_ptr<T>& left,
+                                 const std::shared_ptr<T>& right) {
+  if (left && right) {
+    return left->Equals(*right);
+  }
+  return left == right;
+}
+
+static inline bool IsEmpty(const std::shared_ptr<const KeyValueMetadata>& meta) {
+  return !meta || meta->size() == 0;
+}
+
+static inline bool GenericEquals(const std::shared_ptr<const KeyValueMetadata>& left,
+                                 const std::shared_ptr<const KeyValueMetadata>& right) {
+  // Special case since null metadata is considered equivalent to empty
+  if (IsEmpty(left) || IsEmpty(right)) {
+    return IsEmpty(left) && IsEmpty(right);
+  }
+  return left->Equals(*right);
+}
+
+template <typename T>
+static inline bool GenericEquals(const std::vector<T>& left,
+                                 const std::vector<T>& right) {
+  if (left.size() != right.size()) return false;
+  for (size_t i = 0; i < left.size(); i++) {
+    if (!GenericEquals(left[i], right[i])) return false;
+  }
+  return true;
+}
+
+template <typename T>
+static inline decltype(TypeTraits<typename CTypeTraits<T>::ArrowType>::type_singleton())
+GenericTypeSingleton() {
+  return TypeTraits<typename CTypeTraits<T>::ArrowType>::type_singleton();
+}
+
+template <typename T>
+static inline enable_if_same<T, std::shared_ptr<const KeyValueMetadata>,
+                             std::shared_ptr<DataType>>
+GenericTypeSingleton() {
+  return map(binary(), binary());
+}
+
+template <typename T>
+static inline enable_if_t<has_enum_traits<T>::value, std::shared_ptr<DataType>>
+GenericTypeSingleton() {
+  return TypeTraits<typename EnumTraits<T>::Type>::type_singleton();
+}
+
+template <typename T>
+static inline enable_if_same<T, SortKey, std::shared_ptr<DataType>>
+GenericTypeSingleton() {
+  std::vector<std::shared_ptr<Field>> fields;
+  fields.emplace_back(new Field("name", GenericTypeSingleton<std::string>()));
+  fields.emplace_back(new Field("order", GenericTypeSingleton<SortOrder>()));
+  return std::make_shared<StructType>(std::move(fields));
+}
+
+// N.B. ordering of overloads is relatively fragile
+template <typename T>
+static inline Result<decltype(MakeScalar(std::declval<T>()))> GenericToScalar(
+    const T& value) {
+  return MakeScalar(value);
+}
+
+// For Clang/libc++: when iterating through vector<bool>, we can't
+// pass it by reference so the overload above doesn't apply
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(bool value) {
+  return MakeScalar(value);
+}
+
+template <typename T, typename Enable = enable_if_t<has_enum_traits<T>::value>>
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(const T value) {
+  using CType = typename EnumTraits<T>::CType;
+  return GenericToScalar(static_cast<CType>(value));
+}
+
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(const SortKey& value) {
+  ARROW_ASSIGN_OR_RAISE(auto name, GenericToScalar(value.name));
+  ARROW_ASSIGN_OR_RAISE(auto order, GenericToScalar(value.order));
+  return StructScalar::Make({name, order}, {"name", "order"});
+}
+
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(
+    const std::shared_ptr<const KeyValueMetadata>& value) {
+  auto ty = GenericTypeSingleton<std::shared_ptr<const KeyValueMetadata>>();
+  std::unique_ptr<ArrayBuilder> builder;
+  RETURN_NOT_OK(MakeBuilder(default_memory_pool(), ty, &builder));
+  auto* map_builder = checked_cast<MapBuilder*>(builder.get());
+  auto* key_builder = checked_cast<BinaryBuilder*>(map_builder->key_builder());
+  auto* item_builder = checked_cast<BinaryBuilder*>(map_builder->item_builder());
+  RETURN_NOT_OK(map_builder->Append());
+  if (value) {
+    RETURN_NOT_OK(key_builder->AppendValues(value->keys()));
+    RETURN_NOT_OK(item_builder->AppendValues(value->values()));
+  }
+  std::shared_ptr<Array> arr;
+  RETURN_NOT_OK(map_builder->Finish(&arr));
+  return arr->GetScalar(0);
+}
+
+template <typename T>
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(
+    const std::vector<T>& value) {
+  std::shared_ptr<DataType> type = GenericTypeSingleton<T>();
+  std::vector<std::shared_ptr<Scalar>> scalars;
+  scalars.reserve(value.size());
+  // Don't use range-for with auto& to avoid Clang -Wrange-loop-analysis
+  for (auto it = value.begin(); it != value.end(); it++) {
+    ARROW_ASSIGN_OR_RAISE(auto scalar, GenericToScalar(*it));
+    scalars.push_back(std::move(scalar));
+  }
+  std::unique_ptr<ArrayBuilder> builder;
+  RETURN_NOT_OK(
+      MakeBuilder(default_memory_pool(), type ? type : scalars[0]->type, &builder));
+  RETURN_NOT_OK(builder->AppendScalars(scalars));
+  std::shared_ptr<Array> out;
+  RETURN_NOT_OK(builder->Finish(&out));
+  return std::make_shared<ListScalar>(std::move(out));
+}
+
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(
+    const std::shared_ptr<DataType>& value) {
+  if (!value) {
+    return Status::Invalid("shared_ptr<DataType> is nullptr");
+  }
+  return MakeNullScalar(value);
+}
+
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(
+    const std::shared_ptr<Scalar>& value) {
+  return value;
+}
+
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(
+    const std::shared_ptr<Array>& value) {
+  return std::make_shared<ListScalar>(value);
+}
+
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(const Datum& value) {
+  // TODO(ARROW-9434): store in a union instead.
+  switch (value.kind()) {
+    case Datum::ARRAY:
+      return GenericToScalar(value.make_array());
+      break;
+    default:
+      return Status::NotImplemented("Cannot serialize Datum kind ", value.kind());
+  }
+}
+
+template <typename T>
+static inline enable_if_primitive_ctype<typename CTypeTraits<T>::ArrowType, Result<T>>
+GenericFromScalar(const std::shared_ptr<Scalar>& value) {
+  using ArrowType = typename CTypeTraits<T>::ArrowType;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+  if (value->type->id() != ArrowType::type_id) {
+    return Status::Invalid("Expected type ", ArrowType::type_id, " but got ",
+                           value->type->ToString());
+  }
+  const auto& holder = checked_cast<const ScalarType&>(*value);
+  if (!holder.is_valid) return Status::Invalid("Got null scalar");
+  return holder.value;
+}
+
+template <typename T>
+static inline enable_if_primitive_ctype<typename EnumTraits<T>::Type, Result<T>>
+GenericFromScalar(const std::shared_ptr<Scalar>& value) {
+  ARROW_ASSIGN_OR_RAISE(auto raw_val,
+                        GenericFromScalar<typename EnumTraits<T>::CType>(value));
+  return ValidateEnumValue<T>(raw_val);
+}
+
+template <typename T, typename U>
+using enable_if_same_result = enable_if_same<T, U, Result<T>>;
+
+template <typename T>
+static inline enable_if_same_result<T, std::string> GenericFromScalar(
+    const std::shared_ptr<Scalar>& value) {
+  if (!is_base_binary_like(value->type->id())) {
+    return Status::Invalid("Expected binary-like type but got ", value->type->ToString());
+  }
+  const auto& holder = checked_cast<const BaseBinaryScalar&>(*value);
+  if (!holder.is_valid) return Status::Invalid("Got null scalar");
+  return holder.value->ToString();
+}
+
+template <typename T>
+static inline enable_if_same_result<T, SortKey> GenericFromScalar(
+    const std::shared_ptr<Scalar>& value) {
+  if (value->type->id() != Type::STRUCT) {
+    return Status::Invalid("Expected type STRUCT but got ", value->type->id());
+  }
+  if (!value->is_valid) return Status::Invalid("Got null scalar");
+  const auto& holder = checked_cast<const StructScalar&>(*value);
+  ARROW_ASSIGN_OR_RAISE(auto name_holder, holder.field("name"));
+  ARROW_ASSIGN_OR_RAISE(auto order_holder, holder.field("order"));
+  ARROW_ASSIGN_OR_RAISE(auto name, GenericFromScalar<std::string>(name_holder));
+  ARROW_ASSIGN_OR_RAISE(auto order, GenericFromScalar<SortOrder>(order_holder));
+  return SortKey{std::move(name), order};
+}
+
+template <typename T>
+static inline enable_if_same_result<T, std::shared_ptr<DataType>> GenericFromScalar(
+    const std::shared_ptr<Scalar>& value) {
+  return value->type;
+}
+
+template <typename T>
+static inline enable_if_same_result<T, std::shared_ptr<Scalar>> GenericFromScalar(
+    const std::shared_ptr<Scalar>& value) {
+  return value;
+}
+
+template <typename T>
+static inline enable_if_same_result<T, std::shared_ptr<const KeyValueMetadata>>
+GenericFromScalar(const std::shared_ptr<Scalar>& value) {
+  auto ty = GenericTypeSingleton<std::shared_ptr<const KeyValueMetadata>>();
+  if (!value->type->Equals(ty)) {
+    return Status::Invalid("Expected ", ty->ToString(), " but got ",
+                           value->type->ToString());
+  }
+  const auto& holder = checked_cast<const MapScalar&>(*value);
+  std::vector<std::string> keys;
+  std::vector<std::string> values;
+  const auto& list = checked_cast<const StructArray&>(*holder.value);
+  const auto& key_arr = checked_cast<const BinaryArray&>(*list.field(0));
+  const auto& value_arr = checked_cast<const BinaryArray&>(*list.field(1));
+  for (int64_t i = 0; i < list.length(); i++) {
+    keys.push_back(key_arr.GetString(i));
+    values.push_back(value_arr.GetString(i));
+  }
+  return key_value_metadata(std::move(keys), std::move(values));
+}
+
+template <typename T>
+static inline enable_if_same_result<T, Datum> GenericFromScalar(
+    const std::shared_ptr<Scalar>& value) {
+  if (value->type->id() == Type::LIST) {
+    const auto& holder = checked_cast<const BaseListScalar&>(*value);
+    return holder.value;
+  }
+  // TODO(ARROW-9434): handle other possible datum kinds by looking for a union
+  return Status::Invalid("Cannot deserialize Datum from ", value->ToString());
+}
+
+template <typename T>
+static enable_if_same<typename CTypeTraits<T>::ArrowType, ListType, Result<T>>
+GenericFromScalar(const std::shared_ptr<Scalar>& value) {
+  using ValueType = typename T::value_type;
+  if (value->type->id() != Type::LIST) {
+    return Status::Invalid("Expected type LIST but got ", value->type->ToString());
+  }
+  const auto& holder = checked_cast<const BaseListScalar&>(*value);
+  if (!holder.is_valid) return Status::Invalid("Got null scalar");
+  std::vector<ValueType> result;
+  for (int i = 0; i < holder.value->length(); i++) {
+    ARROW_ASSIGN_OR_RAISE(auto scalar, holder.value->GetScalar(i));
+    ARROW_ASSIGN_OR_RAISE(auto v, GenericFromScalar<ValueType>(scalar));
+    result.push_back(std::move(v));
+  }
+  return result;
+}
+
+template <typename Options>
+struct StringifyImpl {
+  template <typename Tuple>
+  StringifyImpl(const Options& obj, const Tuple& props)
+      : obj_(obj), members_(props.size()) {
+    props.ForEach(*this);
+  }
+
+  template <typename Property>
+  void operator()(const Property& prop, size_t i) {
+    std::stringstream ss;
+    ss << prop.name() << '=' << GenericToString(prop.get(obj_));
+    members_[i] = ss.str();
+  }
+
+  std::string Finish() {
+    return "{" + arrow::internal::JoinStrings(members_, ", ") + "}";
+  }
+
+  const Options& obj_;
+  std::vector<std::string> members_;
+};
+
+template <typename Options>
+struct CompareImpl {
+  template <typename Tuple>
+  CompareImpl(const Options& l, const Options& r, const Tuple& props)
+      : left_(l), right_(r) {
+    props.ForEach(*this);
+  }
+
+  template <typename Property>
+  void operator()(const Property& prop, size_t) {
+    equal_ &= GenericEquals(prop.get(left_), prop.get(right_));
+  }
+
+  const Options& left_;
+  const Options& right_;
+  bool equal_ = true;
+};
+
+template <typename Options>
+struct ToStructScalarImpl {
+  template <typename Tuple>
+  ToStructScalarImpl(const Options& obj, const Tuple& props,
+                     std::vector<std::string>* field_names,
+                     std::vector<std::shared_ptr<Scalar>>* values)
+      : obj_(obj), field_names_(field_names), values_(values) {
+    props.ForEach(*this);
+  }
+
+  template <typename Property>
+  void operator()(const Property& prop, size_t) {
+    if (!status_.ok()) return;
+    auto result = GenericToScalar(prop.get(obj_));
+    if (!result.ok()) {
+      status_ = result.status().WithMessage("Could not serialize field ", prop.name(),
+                                            " of options type ", Options::kTypeName, ": ",
+                                            result.status().message());
+      return;
+    }
+    field_names_->emplace_back(prop.name());
+    values_->push_back(result.MoveValueUnsafe());
+  }
+
+  const Options& obj_;
+  Status status_;
+  std::vector<std::string>* field_names_;
+  std::vector<std::shared_ptr<Scalar>>* values_;
+};
+
+template <typename Options>
+struct FromStructScalarImpl {
+  template <typename Tuple>
+  FromStructScalarImpl(Options* obj, const StructScalar& scalar, const Tuple& props)
+      : obj_(obj), scalar_(scalar) {
+    props.ForEach(*this);
+  }
+
+  template <typename Property>
+  void operator()(const Property& prop, size_t) {
+    if (!status_.ok()) return;
+    auto maybe_holder = scalar_.field(std::string(prop.name()));
+    if (!maybe_holder.ok()) {
+      status_ = maybe_holder.status().WithMessage(
+          "Cannot deserialize field ", prop.name(), " of options type ",
+          Options::kTypeName, ": ", maybe_holder.status().message());
+      return;
+    }
+    auto holder = maybe_holder.MoveValueUnsafe();
+    auto result = GenericFromScalar<typename Property::Type>(holder);
+    if (!result.ok()) {
+      status_ = result.status().WithMessage("Cannot deserialize field ", prop.name(),
+                                            " of options type ", Options::kTypeName, ": ",
+                                            result.status().message());
+      return;
+    }
+    prop.set(obj_, result.MoveValueUnsafe());
+  }
+
+  Options* obj_;
+  Status status_;
+  const StructScalar& scalar_;
+};
+
+template <typename Options, typename... Properties>
+const FunctionOptionsType* GetFunctionOptionsType(const Properties&... properties) {
+  static const class OptionsType : public GenericOptionsType {
+   public:
+    explicit OptionsType(const arrow::internal::PropertyTuple<Properties...> properties)
+        : properties_(properties) {}
+
+    const char* type_name() const override { return Options::kTypeName; }
+
+    std::string Stringify(const FunctionOptions& options) const override {
+      const auto& self = checked_cast<const Options&>(options);
+      return StringifyImpl<Options>(self, properties_).Finish();
+    }
+    bool Compare(const FunctionOptions& options,
+                 const FunctionOptions& other) const override {
+      const auto& lhs = checked_cast<const Options&>(options);
+      const auto& rhs = checked_cast<const Options&>(other);
+      return CompareImpl<Options>(lhs, rhs, properties_).equal_;
+    }
+    Status ToStructScalar(const FunctionOptions& options,
+                          std::vector<std::string>* field_names,
+                          std::vector<std::shared_ptr<Scalar>>* values) const override {
+      const auto& self = checked_cast<const Options&>(options);
+      RETURN_NOT_OK(
+          ToStructScalarImpl<Options>(self, properties_, field_names, values).status_);
+      return Status::OK();
+    }
+    Result<std::unique_ptr<FunctionOptions>> FromStructScalar(
+        const StructScalar& scalar) const override {
+      auto options = std::unique_ptr<Options>(new Options());
+      RETURN_NOT_OK(
+          FromStructScalarImpl<Options>(options.get(), scalar, properties_).status_);
+      return std::move(options);
+    }
+
+   private:
+    const arrow::internal::PropertyTuple<Properties...> properties_;
+  } instance(arrow::internal::MakeProperties(properties...));
+  return &instance;
+}
+
+}  // namespace internal
+}  // namespace compute
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernel.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernel.cc
index c730cbd131a..f131f524d2e 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernel.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernel.cc
@@ -59,25 +59,25 @@ Result<std::shared_ptr<ResizableBuffer>> KernelContext::AllocateBitmap(int64_t n
   return result;
 }
 
-Status Kernel::InitAll(KernelContext* ctx, const KernelInitArgs& args, 
-                       std::vector<std::unique_ptr<KernelState>>* states) { 
-  for (auto& state : *states) { 
-    ARROW_ASSIGN_OR_RAISE(state, args.kernel->init(ctx, args)); 
+Status Kernel::InitAll(KernelContext* ctx, const KernelInitArgs& args,
+                       std::vector<std::unique_ptr<KernelState>>* states) {
+  for (auto& state : *states) {
+    ARROW_ASSIGN_OR_RAISE(state, args.kernel->init(ctx, args));
   }
-  return Status::OK(); 
+  return Status::OK();
 }
 
-Result<std::unique_ptr<KernelState>> ScalarAggregateKernel::MergeAll( 
-    const ScalarAggregateKernel* kernel, KernelContext* ctx, 
-    std::vector<std::unique_ptr<KernelState>> states) { 
-  auto out = std::move(states.back()); 
-  states.pop_back(); 
-  ctx->SetState(out.get()); 
-  for (auto& state : states) { 
-    RETURN_NOT_OK(kernel->merge(ctx, std::move(*state), out.get())); 
-  } 
-  return std::move(out); 
-} 
+Result<std::unique_ptr<KernelState>> ScalarAggregateKernel::MergeAll(
+    const ScalarAggregateKernel* kernel, KernelContext* ctx,
+    std::vector<std::unique_ptr<KernelState>> states) {
+  auto out = std::move(states.back());
+  states.pop_back();
+  ctx->SetState(out.get());
+  for (auto& state : states) {
+    RETURN_NOT_OK(kernel->merge(ctx, std::move(*state), out.get()));
+  }
+  return std::move(out);
+}
 
 // ----------------------------------------------------------------------
 // Some basic TypeMatcher implementations
@@ -402,7 +402,7 @@ KernelSignature::KernelSignature(std::vector<InputType> in_types, OutputType out
       out_type_(std::move(out_type)),
       is_varargs_(is_varargs),
       hash_code_(0) {
-  DCHECK(!is_varargs || (is_varargs && (in_types_.size() >= 1))); 
+  DCHECK(!is_varargs || (is_varargs && (in_types_.size() >= 1)));
 }
 
 std::shared_ptr<KernelSignature> KernelSignature::Make(std::vector<InputType> in_types,
@@ -429,8 +429,8 @@ bool KernelSignature::Equals(const KernelSignature& other) const {
 
 bool KernelSignature::MatchesInputs(const std::vector<ValueDescr>& args) const {
   if (is_varargs_) {
-    for (size_t i = 0; i < args.size(); ++i) { 
-      if (!in_types_[std::min(i, in_types_.size() - 1)].Matches(args[i])) { 
+    for (size_t i = 0; i < args.size(); ++i) {
+      if (!in_types_[std::min(i, in_types_.size() - 1)].Matches(args[i])) {
         return false;
       }
     }
@@ -463,19 +463,19 @@ std::string KernelSignature::ToString() const {
   std::stringstream ss;
 
   if (is_varargs_) {
-    ss << "varargs["; 
+    ss << "varargs[";
   } else {
     ss << "(";
-  } 
-  for (size_t i = 0; i < in_types_.size(); ++i) { 
-    if (i > 0) { 
-      ss << ", "; 
+  }
+  for (size_t i = 0; i < in_types_.size(); ++i) {
+    if (i > 0) {
+      ss << ", ";
     }
-    ss << in_types_[i].ToString(); 
-  } 
-  if (is_varargs_) { 
-    ss << "]"; 
-  } else { 
+    ss << in_types_[i].ToString();
+  }
+  if (is_varargs_) {
+    ss << "]";
+  } else {
     ss << ")";
   }
   ss << " -> " << out_type_.ToString();
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernel.h b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernel.h
index c90c764f5ec..36d20c7289e 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernel.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernel.h
@@ -41,7 +41,7 @@
 namespace arrow {
 namespace compute {
 
-class FunctionOptions; 
+class FunctionOptions;
 
 /// \brief Base class for opaque kernel-specific state. For example, if there
 /// is some kind of initialization required.
@@ -52,7 +52,7 @@ struct ARROW_EXPORT KernelState {
 /// \brief Context/state for the execution of a particular kernel.
 class ARROW_EXPORT KernelContext {
  public:
-  explicit KernelContext(ExecContext* exec_ctx) : exec_ctx_(exec_ctx), state_() {} 
+  explicit KernelContext(ExecContext* exec_ctx) : exec_ctx_(exec_ctx), state_() {}
 
   /// \brief Allocate buffer from the context's memory pool. The contents are
   /// not initialized.
@@ -91,7 +91,7 @@ class ARROW_EXPORT KernelContext {
 /// into pre-allocated memory if they are able, though for some kernels
 /// (e.g. in cases when a builder like StringBuilder) must be employed this may
 /// not be possible.
-using ArrayKernelExec = std::function<Status(KernelContext*, const ExecBatch&, Datum*)>; 
+using ArrayKernelExec = std::function<Status(KernelContext*, const ExecBatch&, Datum*)>;
 
 /// \brief An type-checking interface to permit customizable validation rules
 /// for use with InputType and KernelSignature. This is for scenarios where the
@@ -321,9 +321,9 @@ class ARROW_EXPORT OutputType {
     this->resolver_ = other.resolver_;
   }
 
-  OutputType& operator=(const OutputType&) = default; 
-  OutputType& operator=(OutputType&&) = default; 
- 
+  OutputType& operator=(const OutputType&) = default;
+  OutputType& operator=(OutputType&&) = default;
+
   /// \brief Return the shape and type of the expected output value of the
   /// kernel given the value descriptors (shapes and types) of the input
   /// arguments. The resolver may make use of state information kept in the
@@ -366,10 +366,10 @@ class ARROW_EXPORT OutputType {
 
 /// \brief Holds the input types and output type of the kernel.
 ///
-/// VarArgs functions with minimum N arguments should pass up to N input types to be 
-/// used to validate the input types of a function invocation. The first N-1 types 
-/// will be matched against the first N-1 arguments, and the last type will be 
-/// matched against the remaining arguments. 
+/// VarArgs functions with minimum N arguments should pass up to N input types to be
+/// used to validate the input types of a function invocation. The first N-1 types
+/// will be matched against the first N-1 arguments, and the last type will be
+/// matched against the remaining arguments.
 class ARROW_EXPORT KernelSignature {
  public:
   KernelSignature(std::vector<InputType> in_types, OutputType out_type,
@@ -500,8 +500,8 @@ struct KernelInitArgs {
 };
 
 /// \brief Common initializer function for all kernel types.
-using KernelInit = std::function<Result<std::unique_ptr<KernelState>>( 
-    KernelContext*, const KernelInitArgs&)>; 
+using KernelInit = std::function<Result<std::unique_ptr<KernelState>>(
+    KernelContext*, const KernelInitArgs&)>;
 
 /// \brief Base type for kernels. Contains the function signature and
 /// optionally the state initialization function, along with some common
@@ -513,8 +513,8 @@ struct Kernel {
       : signature(std::move(sig)), init(std::move(init)) {}
 
   Kernel(std::vector<InputType> in_types, OutputType out_type, KernelInit init)
-      : Kernel(KernelSignature::Make(std::move(in_types), std::move(out_type)), 
-               std::move(init)) {} 
+      : Kernel(KernelSignature::Make(std::move(in_types), std::move(out_type)),
+               std::move(init)) {}
 
   /// \brief The "signature" of the kernel containing the InputType input
   /// argument validators and OutputType output type and shape resolver.
@@ -524,10 +524,10 @@ struct Kernel {
   /// set up any options or state relevant for execution.
   KernelInit init;
 
-  /// \brief Create a vector of new KernelState for invocations of this kernel. 
-  static Status InitAll(KernelContext*, const KernelInitArgs&, 
-                        std::vector<std::unique_ptr<KernelState>>*); 
- 
+  /// \brief Create a vector of new KernelState for invocations of this kernel.
+  static Status InitAll(KernelContext*, const KernelInitArgs&,
+                        std::vector<std::unique_ptr<KernelState>>*);
+
   /// \brief Indicates whether execution can benefit from parallelization
   /// (splitting large chunks into smaller chunks and using multiple
   /// threads). Some kernels may not support parallel execution at
@@ -547,7 +547,7 @@ struct Kernel {
 /// output array values (as opposed to scalar values in the case of aggregate
 /// functions).
 struct ArrayKernel : public Kernel {
-  ArrayKernel() = default; 
+  ArrayKernel() = default;
 
   ArrayKernel(std::shared_ptr<KernelSignature> sig, ArrayKernelExec exec,
               KernelInit init = NULLPTR)
@@ -555,8 +555,8 @@ struct ArrayKernel : public Kernel {
 
   ArrayKernel(std::vector<InputType> in_types, OutputType out_type, ArrayKernelExec exec,
               KernelInit init = NULLPTR)
-      : Kernel(std::move(in_types), std::move(out_type), std::move(init)), 
-        exec(std::move(exec)) {} 
+      : Kernel(std::move(in_types), std::move(out_type), std::move(init)),
+        exec(std::move(exec)) {}
 
   /// \brief Perform a single invocation of this kernel. Depending on the
   /// implementation, it may only write into preallocated memory, while in some
@@ -588,7 +588,7 @@ struct ScalarKernel : public ArrayKernel {
 // VectorKernel (for VectorFunction)
 
 /// \brief See VectorKernel::finalize member for usage
-using VectorFinalize = std::function<Status(KernelContext*, std::vector<Datum>*)>; 
+using VectorFinalize = std::function<Status(KernelContext*, std::vector<Datum>*)>;
 
 /// \brief Kernel data structure for implementations of VectorFunction. In
 /// addition to the members found in ArrayKernel, contains an optional
@@ -596,10 +596,10 @@ using VectorFinalize = std::function<Status(KernelContext*, std::vector<Datum>*)
 /// (which have different defaults from ScalarKernel), and some other
 /// execution-related options.
 struct VectorKernel : public ArrayKernel {
-  VectorKernel() = default; 
+  VectorKernel() = default;
 
   VectorKernel(std::shared_ptr<KernelSignature> sig, ArrayKernelExec exec)
-      : ArrayKernel(std::move(sig), std::move(exec)) {} 
+      : ArrayKernel(std::move(sig), std::move(exec)) {}
 
   VectorKernel(std::vector<InputType> in_types, OutputType out_type, ArrayKernelExec exec,
                KernelInit init = NULLPTR, VectorFinalize finalize = NULLPTR)
@@ -643,13 +643,13 @@ struct VectorKernel : public ArrayKernel {
 // ----------------------------------------------------------------------
 // ScalarAggregateKernel (for ScalarAggregateFunction)
 
-using ScalarAggregateConsume = std::function<Status(KernelContext*, const ExecBatch&)>; 
+using ScalarAggregateConsume = std::function<Status(KernelContext*, const ExecBatch&)>;
 
 using ScalarAggregateMerge =
-    std::function<Status(KernelContext*, KernelState&&, KernelState*)>; 
+    std::function<Status(KernelContext*, KernelState&&, KernelState*)>;
 
 // Finalize returns Datum to permit multiple return values
-using ScalarAggregateFinalize = std::function<Status(KernelContext*, Datum*)>; 
+using ScalarAggregateFinalize = std::function<Status(KernelContext*, Datum*)>;
 
 /// \brief Kernel data structure for implementations of
 /// ScalarAggregateFunction. The four necessary components of an aggregation
@@ -662,12 +662,12 @@ using ScalarAggregateFinalize = std::function<Status(KernelContext*, Datum*)>;
 /// * finalize: produces the end result of the aggregation using the
 ///   KernelState in the KernelContext.
 struct ScalarAggregateKernel : public Kernel {
-  ScalarAggregateKernel() = default; 
+  ScalarAggregateKernel() = default;
 
   ScalarAggregateKernel(std::shared_ptr<KernelSignature> sig, KernelInit init,
                         ScalarAggregateConsume consume, ScalarAggregateMerge merge,
                         ScalarAggregateFinalize finalize)
-      : Kernel(std::move(sig), std::move(init)), 
+      : Kernel(std::move(sig), std::move(init)),
         consume(std::move(consume)),
         merge(std::move(merge)),
         finalize(std::move(finalize)) {}
@@ -675,65 +675,65 @@ struct ScalarAggregateKernel : public Kernel {
   ScalarAggregateKernel(std::vector<InputType> in_types, OutputType out_type,
                         KernelInit init, ScalarAggregateConsume consume,
                         ScalarAggregateMerge merge, ScalarAggregateFinalize finalize)
-      : ScalarAggregateKernel( 
-            KernelSignature::Make(std::move(in_types), std::move(out_type)), 
-            std::move(init), std::move(consume), std::move(merge), std::move(finalize)) {} 
-
-  /// \brief Merge a vector of KernelStates into a single KernelState. 
-  /// The merged state will be returned and will be set on the KernelContext. 
-  static Result<std::unique_ptr<KernelState>> MergeAll( 
-      const ScalarAggregateKernel* kernel, KernelContext* ctx, 
-      std::vector<std::unique_ptr<KernelState>> states); 
- 
+      : ScalarAggregateKernel(
+            KernelSignature::Make(std::move(in_types), std::move(out_type)),
+            std::move(init), std::move(consume), std::move(merge), std::move(finalize)) {}
+
+  /// \brief Merge a vector of KernelStates into a single KernelState.
+  /// The merged state will be returned and will be set on the KernelContext.
+  static Result<std::unique_ptr<KernelState>> MergeAll(
+      const ScalarAggregateKernel* kernel, KernelContext* ctx,
+      std::vector<std::unique_ptr<KernelState>> states);
+
   ScalarAggregateConsume consume;
   ScalarAggregateMerge merge;
   ScalarAggregateFinalize finalize;
 };
 
-// ---------------------------------------------------------------------- 
-// HashAggregateKernel (for HashAggregateFunction) 
- 
-using HashAggregateConsume = std::function<Status(KernelContext*, const ExecBatch&)>; 
- 
-using HashAggregateMerge = 
-    std::function<Status(KernelContext*, KernelState&&, KernelState*)>; 
- 
-// Finalize returns Datum to permit multiple return values 
-using HashAggregateFinalize = std::function<Status(KernelContext*, Datum*)>; 
- 
-/// \brief Kernel data structure for implementations of 
-/// HashAggregateFunction. The four necessary components of an aggregation 
-/// kernel are the init, consume, merge, and finalize functions. 
-/// 
-/// * init: creates a new KernelState for a kernel. 
-/// * consume: processes an ExecBatch (which includes the argument as well 
-///   as an array of group identifiers) and updates the KernelState found in the 
-///   KernelContext. 
-/// * merge: combines one KernelState with another. 
-/// * finalize: produces the end result of the aggregation using the 
-///   KernelState in the KernelContext. 
-struct HashAggregateKernel : public Kernel { 
-  HashAggregateKernel() = default; 
- 
-  HashAggregateKernel(std::shared_ptr<KernelSignature> sig, KernelInit init, 
-                      HashAggregateConsume consume, HashAggregateMerge merge, 
-                      HashAggregateFinalize finalize) 
-      : Kernel(std::move(sig), std::move(init)), 
-        consume(std::move(consume)), 
-        merge(std::move(merge)), 
-        finalize(std::move(finalize)) {} 
- 
-  HashAggregateKernel(std::vector<InputType> in_types, OutputType out_type, 
-                      KernelInit init, HashAggregateMerge merge, 
-                      HashAggregateConsume consume, HashAggregateFinalize finalize) 
-      : HashAggregateKernel( 
-            KernelSignature::Make(std::move(in_types), std::move(out_type)), 
-            std::move(init), std::move(consume), std::move(merge), std::move(finalize)) {} 
- 
-  HashAggregateConsume consume; 
-  HashAggregateMerge merge; 
-  HashAggregateFinalize finalize; 
-}; 
- 
+// ----------------------------------------------------------------------
+// HashAggregateKernel (for HashAggregateFunction)
+
+using HashAggregateConsume = std::function<Status(KernelContext*, const ExecBatch&)>;
+
+using HashAggregateMerge =
+    std::function<Status(KernelContext*, KernelState&&, KernelState*)>;
+
+// Finalize returns Datum to permit multiple return values
+using HashAggregateFinalize = std::function<Status(KernelContext*, Datum*)>;
+
+/// \brief Kernel data structure for implementations of
+/// HashAggregateFunction. The four necessary components of an aggregation
+/// kernel are the init, consume, merge, and finalize functions.
+///
+/// * init: creates a new KernelState for a kernel.
+/// * consume: processes an ExecBatch (which includes the argument as well
+///   as an array of group identifiers) and updates the KernelState found in the
+///   KernelContext.
+/// * merge: combines one KernelState with another.
+/// * finalize: produces the end result of the aggregation using the
+///   KernelState in the KernelContext.
+struct HashAggregateKernel : public Kernel {
+  HashAggregateKernel() = default;
+
+  HashAggregateKernel(std::shared_ptr<KernelSignature> sig, KernelInit init,
+                      HashAggregateConsume consume, HashAggregateMerge merge,
+                      HashAggregateFinalize finalize)
+      : Kernel(std::move(sig), std::move(init)),
+        consume(std::move(consume)),
+        merge(std::move(merge)),
+        finalize(std::move(finalize)) {}
+
+  HashAggregateKernel(std::vector<InputType> in_types, OutputType out_type,
+                      KernelInit init, HashAggregateMerge merge,
+                      HashAggregateConsume consume, HashAggregateFinalize finalize)
+      : HashAggregateKernel(
+            KernelSignature::Make(std::move(in_types), std::move(out_type)),
+            std::move(init), std::move(consume), std::move(merge), std::move(finalize)) {}
+
+  HashAggregateConsume consume;
+  HashAggregateMerge merge;
+  HashAggregateFinalize finalize;
+};
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/aggregate_basic.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/aggregate_basic.cc
index 0c9636eae09..a7df66695b2 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/aggregate_basic.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/aggregate_basic.cc
@@ -25,375 +25,375 @@
 namespace arrow {
 namespace compute {
 
-namespace { 
- 
-Status AggregateConsume(KernelContext* ctx, const ExecBatch& batch) { 
-  return checked_cast<ScalarAggregator*>(ctx->state())->Consume(ctx, batch); 
+namespace {
+
+Status AggregateConsume(KernelContext* ctx, const ExecBatch& batch) {
+  return checked_cast<ScalarAggregator*>(ctx->state())->Consume(ctx, batch);
+}
+
+Status AggregateMerge(KernelContext* ctx, KernelState&& src, KernelState* dst) {
+  return checked_cast<ScalarAggregator*>(dst)->MergeFrom(ctx, std::move(src));
 }
 
-Status AggregateMerge(KernelContext* ctx, KernelState&& src, KernelState* dst) { 
-  return checked_cast<ScalarAggregator*>(dst)->MergeFrom(ctx, std::move(src)); 
+Status AggregateFinalize(KernelContext* ctx, Datum* out) {
+  return checked_cast<ScalarAggregator*>(ctx->state())->Finalize(ctx, out);
 }
 
-Status AggregateFinalize(KernelContext* ctx, Datum* out) { 
-  return checked_cast<ScalarAggregator*>(ctx->state())->Finalize(ctx, out); 
+}  // namespace
+
+void AddAggKernel(std::shared_ptr<KernelSignature> sig, KernelInit init,
+                  ScalarAggregateFunction* func, SimdLevel::type simd_level) {
+  ScalarAggregateKernel kernel(std::move(sig), init, AggregateConsume, AggregateMerge,
+                               AggregateFinalize);
+  // Set the simd level
+  kernel.simd_level = simd_level;
+  DCHECK_OK(func->AddKernel(kernel));
 }
 
-}  // namespace 
- 
-void AddAggKernel(std::shared_ptr<KernelSignature> sig, KernelInit init, 
-                  ScalarAggregateFunction* func, SimdLevel::type simd_level) { 
-  ScalarAggregateKernel kernel(std::move(sig), init, AggregateConsume, AggregateMerge, 
-                               AggregateFinalize); 
-  // Set the simd level 
-  kernel.simd_level = simd_level; 
-  DCHECK_OK(func->AddKernel(kernel)); 
-} 
- 
-namespace aggregate { 
- 
+namespace aggregate {
+
 // ----------------------------------------------------------------------
 // Count implementation
 
 struct CountImpl : public ScalarAggregator {
-  explicit CountImpl(ScalarAggregateOptions options) : options(std::move(options)) {} 
-
-  Status Consume(KernelContext*, const ExecBatch& batch) override { 
-    if (batch[0].is_array()) { 
-      const ArrayData& input = *batch[0].array(); 
-      const int64_t nulls = input.GetNullCount(); 
-      this->nulls += nulls; 
-      this->non_nulls += input.length - nulls; 
-    } else { 
-      const Scalar& input = *batch[0].scalar(); 
-      this->nulls += !input.is_valid * batch.length; 
-      this->non_nulls += input.is_valid * batch.length; 
-    } 
-    return Status::OK(); 
-  }
-
-  Status MergeFrom(KernelContext*, KernelState&& src) override { 
+  explicit CountImpl(ScalarAggregateOptions options) : options(std::move(options)) {}
+
+  Status Consume(KernelContext*, const ExecBatch& batch) override {
+    if (batch[0].is_array()) {
+      const ArrayData& input = *batch[0].array();
+      const int64_t nulls = input.GetNullCount();
+      this->nulls += nulls;
+      this->non_nulls += input.length - nulls;
+    } else {
+      const Scalar& input = *batch[0].scalar();
+      this->nulls += !input.is_valid * batch.length;
+      this->non_nulls += input.is_valid * batch.length;
+    }
+    return Status::OK();
+  }
+
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
     const auto& other_state = checked_cast<const CountImpl&>(src);
     this->non_nulls += other_state.non_nulls;
     this->nulls += other_state.nulls;
-    return Status::OK(); 
+    return Status::OK();
   }
 
-  Status Finalize(KernelContext* ctx, Datum* out) override { 
+  Status Finalize(KernelContext* ctx, Datum* out) override {
     const auto& state = checked_cast<const CountImpl&>(*ctx->state());
-    if (state.options.skip_nulls) { 
-      *out = Datum(state.non_nulls); 
-    } else { 
-      *out = Datum(state.nulls); 
+    if (state.options.skip_nulls) {
+      *out = Datum(state.non_nulls);
+    } else {
+      *out = Datum(state.nulls);
     }
-    return Status::OK(); 
+    return Status::OK();
   }
 
-  ScalarAggregateOptions options; 
+  ScalarAggregateOptions options;
   int64_t non_nulls = 0;
   int64_t nulls = 0;
 };
 
-Result<std::unique_ptr<KernelState>> CountInit(KernelContext*, 
-                                               const KernelInitArgs& args) { 
+Result<std::unique_ptr<KernelState>> CountInit(KernelContext*,
+                                               const KernelInitArgs& args) {
   return ::arrow::internal::make_unique<CountImpl>(
-      static_cast<const ScalarAggregateOptions&>(*args.options)); 
+      static_cast<const ScalarAggregateOptions&>(*args.options));
 }
 
 // ----------------------------------------------------------------------
 // Sum implementation
 
-template <typename ArrowType> 
-struct SumImplDefault : public SumImpl<ArrowType, SimdLevel::NONE> { 
-  explicit SumImplDefault(const ScalarAggregateOptions& options_) { 
-    this->options = options_; 
-  } 
+template <typename ArrowType>
+struct SumImplDefault : public SumImpl<ArrowType, SimdLevel::NONE> {
+  explicit SumImplDefault(const ScalarAggregateOptions& options_) {
+    this->options = options_;
+  }
 };
 
-template <typename ArrowType> 
-struct MeanImplDefault : public MeanImpl<ArrowType, SimdLevel::NONE> { 
-  explicit MeanImplDefault(const ScalarAggregateOptions& options_) { 
-    this->options = options_; 
-  } 
+template <typename ArrowType>
+struct MeanImplDefault : public MeanImpl<ArrowType, SimdLevel::NONE> {
+  explicit MeanImplDefault(const ScalarAggregateOptions& options_) {
+    this->options = options_;
+  }
 };
 
-Result<std::unique_ptr<KernelState>> SumInit(KernelContext* ctx, 
-                                             const KernelInitArgs& args) { 
-  SumLikeInit<SumImplDefault> visitor( 
-      ctx, *args.inputs[0].type, 
-      static_cast<const ScalarAggregateOptions&>(*args.options)); 
+Result<std::unique_ptr<KernelState>> SumInit(KernelContext* ctx,
+                                             const KernelInitArgs& args) {
+  SumLikeInit<SumImplDefault> visitor(
+      ctx, *args.inputs[0].type,
+      static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
 
-Result<std::unique_ptr<KernelState>> MeanInit(KernelContext* ctx, 
-                                              const KernelInitArgs& args) { 
-  SumLikeInit<MeanImplDefault> visitor( 
-      ctx, *args.inputs[0].type, 
-      static_cast<const ScalarAggregateOptions&>(*args.options)); 
+Result<std::unique_ptr<KernelState>> MeanInit(KernelContext* ctx,
+                                              const KernelInitArgs& args) {
+  SumLikeInit<MeanImplDefault> visitor(
+      ctx, *args.inputs[0].type,
+      static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
 
 // ----------------------------------------------------------------------
 // MinMax implementation
 
-Result<std::unique_ptr<KernelState>> MinMaxInit(KernelContext* ctx, 
-                                                const KernelInitArgs& args) { 
+Result<std::unique_ptr<KernelState>> MinMaxInit(KernelContext* ctx,
+                                                const KernelInitArgs& args) {
   MinMaxInitState<SimdLevel::NONE> visitor(
       ctx, *args.inputs[0].type, args.kernel->signature->out_type().type(),
-      static_cast<const ScalarAggregateOptions&>(*args.options)); 
+      static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
 
-// ---------------------------------------------------------------------- 
-// Any implementation 
- 
-struct BooleanAnyImpl : public ScalarAggregator { 
-  explicit BooleanAnyImpl(ScalarAggregateOptions options) : options(std::move(options)) {} 
- 
-  Status Consume(KernelContext*, const ExecBatch& batch) override { 
-    // short-circuit if seen a True already 
-    if (this->any == true) { 
-      return Status::OK(); 
-    } 
-    if (batch[0].is_scalar()) { 
-      const auto& scalar = *batch[0].scalar(); 
-      this->has_nulls = !scalar.is_valid; 
-      this->any = scalar.is_valid && checked_cast<const BooleanScalar&>(scalar).value; 
-      return Status::OK(); 
-    } 
-    const auto& data = *batch[0].array(); 
-    this->has_nulls = data.GetNullCount() > 0; 
-    arrow::internal::OptionalBinaryBitBlockCounter counter( 
-        data.buffers[0], data.offset, data.buffers[1], data.offset, data.length); 
-    int64_t position = 0; 
-    while (position < data.length) { 
-      const auto block = counter.NextAndBlock(); 
-      if (block.popcount > 0) { 
-        this->any = true; 
-        break; 
-      } 
-      position += block.length; 
-    } 
-    return Status::OK(); 
-  } 
- 
-  Status MergeFrom(KernelContext*, KernelState&& src) override { 
-    const auto& other = checked_cast<const BooleanAnyImpl&>(src); 
-    this->any |= other.any; 
-    this->has_nulls |= other.has_nulls; 
-    return Status::OK(); 
-  } 
- 
-  Status Finalize(KernelContext* ctx, Datum* out) override { 
-    if (!options.skip_nulls && !this->any && this->has_nulls) { 
-      out->value = std::make_shared<BooleanScalar>(); 
-    } else { 
-      out->value = std::make_shared<BooleanScalar>(this->any); 
-    } 
-    return Status::OK(); 
-  } 
- 
-  bool any = false; 
-  bool has_nulls = false; 
-  ScalarAggregateOptions options; 
-}; 
- 
-Result<std::unique_ptr<KernelState>> AnyInit(KernelContext*, const KernelInitArgs& args) { 
-  const ScalarAggregateOptions options = 
-      static_cast<const ScalarAggregateOptions&>(*args.options); 
-  return ::arrow::internal::make_unique<BooleanAnyImpl>( 
-      static_cast<const ScalarAggregateOptions&>(*args.options)); 
+// ----------------------------------------------------------------------
+// Any implementation
+
+struct BooleanAnyImpl : public ScalarAggregator {
+  explicit BooleanAnyImpl(ScalarAggregateOptions options) : options(std::move(options)) {}
+
+  Status Consume(KernelContext*, const ExecBatch& batch) override {
+    // short-circuit if seen a True already
+    if (this->any == true) {
+      return Status::OK();
+    }
+    if (batch[0].is_scalar()) {
+      const auto& scalar = *batch[0].scalar();
+      this->has_nulls = !scalar.is_valid;
+      this->any = scalar.is_valid && checked_cast<const BooleanScalar&>(scalar).value;
+      return Status::OK();
+    }
+    const auto& data = *batch[0].array();
+    this->has_nulls = data.GetNullCount() > 0;
+    arrow::internal::OptionalBinaryBitBlockCounter counter(
+        data.buffers[0], data.offset, data.buffers[1], data.offset, data.length);
+    int64_t position = 0;
+    while (position < data.length) {
+      const auto block = counter.NextAndBlock();
+      if (block.popcount > 0) {
+        this->any = true;
+        break;
+      }
+      position += block.length;
+    }
+    return Status::OK();
+  }
+
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
+    const auto& other = checked_cast<const BooleanAnyImpl&>(src);
+    this->any |= other.any;
+    this->has_nulls |= other.has_nulls;
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext* ctx, Datum* out) override {
+    if (!options.skip_nulls && !this->any && this->has_nulls) {
+      out->value = std::make_shared<BooleanScalar>();
+    } else {
+      out->value = std::make_shared<BooleanScalar>(this->any);
+    }
+    return Status::OK();
+  }
+
+  bool any = false;
+  bool has_nulls = false;
+  ScalarAggregateOptions options;
+};
+
+Result<std::unique_ptr<KernelState>> AnyInit(KernelContext*, const KernelInitArgs& args) {
+  const ScalarAggregateOptions options =
+      static_cast<const ScalarAggregateOptions&>(*args.options);
+  return ::arrow::internal::make_unique<BooleanAnyImpl>(
+      static_cast<const ScalarAggregateOptions&>(*args.options));
+}
+
+// ----------------------------------------------------------------------
+// All implementation
+
+struct BooleanAllImpl : public ScalarAggregator {
+  explicit BooleanAllImpl(ScalarAggregateOptions options) : options(std::move(options)) {}
+
+  Status Consume(KernelContext*, const ExecBatch& batch) override {
+    // short-circuit if seen a false already
+    if (this->all == false) {
+      return Status::OK();
+    }
+    // short-circuit if seen a null already
+    if (!options.skip_nulls && this->has_nulls) {
+      return Status::OK();
+    }
+    if (batch[0].is_scalar()) {
+      const auto& scalar = *batch[0].scalar();
+      this->has_nulls = !scalar.is_valid;
+      this->all = !scalar.is_valid || checked_cast<const BooleanScalar&>(scalar).value;
+      return Status::OK();
+    }
+    const auto& data = *batch[0].array();
+    this->has_nulls = data.GetNullCount() > 0;
+    arrow::internal::OptionalBinaryBitBlockCounter counter(
+        data.buffers[1], data.offset, data.buffers[0], data.offset, data.length);
+    int64_t position = 0;
+    while (position < data.length) {
+      const auto block = counter.NextOrNotBlock();
+      if (!block.AllSet()) {
+        this->all = false;
+        break;
+      }
+      position += block.length;
+    }
+
+    return Status::OK();
+  }
+
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
+    const auto& other = checked_cast<const BooleanAllImpl&>(src);
+    this->all &= other.all;
+    this->has_nulls |= other.has_nulls;
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext*, Datum* out) override {
+    if (!options.skip_nulls && this->all && this->has_nulls) {
+      out->value = std::make_shared<BooleanScalar>();
+    } else {
+      out->value = std::make_shared<BooleanScalar>(this->all);
+    }
+    return Status::OK();
+  }
+
+  bool all = true;
+  bool has_nulls = false;
+  ScalarAggregateOptions options;
+};
+
+Result<std::unique_ptr<KernelState>> AllInit(KernelContext*, const KernelInitArgs& args) {
+  return ::arrow::internal::make_unique<BooleanAllImpl>(
+      static_cast<const ScalarAggregateOptions&>(*args.options));
 }
 
-// ---------------------------------------------------------------------- 
-// All implementation 
- 
-struct BooleanAllImpl : public ScalarAggregator { 
-  explicit BooleanAllImpl(ScalarAggregateOptions options) : options(std::move(options)) {} 
- 
-  Status Consume(KernelContext*, const ExecBatch& batch) override { 
-    // short-circuit if seen a false already 
-    if (this->all == false) { 
-      return Status::OK(); 
-    } 
-    // short-circuit if seen a null already 
-    if (!options.skip_nulls && this->has_nulls) { 
-      return Status::OK(); 
-    } 
-    if (batch[0].is_scalar()) { 
-      const auto& scalar = *batch[0].scalar(); 
-      this->has_nulls = !scalar.is_valid; 
-      this->all = !scalar.is_valid || checked_cast<const BooleanScalar&>(scalar).value; 
-      return Status::OK(); 
-    } 
-    const auto& data = *batch[0].array(); 
-    this->has_nulls = data.GetNullCount() > 0; 
-    arrow::internal::OptionalBinaryBitBlockCounter counter( 
-        data.buffers[1], data.offset, data.buffers[0], data.offset, data.length); 
-    int64_t position = 0; 
-    while (position < data.length) { 
-      const auto block = counter.NextOrNotBlock(); 
-      if (!block.AllSet()) { 
-        this->all = false; 
-        break; 
-      } 
-      position += block.length; 
-    } 
- 
-    return Status::OK(); 
-  } 
- 
-  Status MergeFrom(KernelContext*, KernelState&& src) override { 
-    const auto& other = checked_cast<const BooleanAllImpl&>(src); 
-    this->all &= other.all; 
-    this->has_nulls |= other.has_nulls; 
-    return Status::OK(); 
-  } 
- 
-  Status Finalize(KernelContext*, Datum* out) override { 
-    if (!options.skip_nulls && this->all && this->has_nulls) { 
-      out->value = std::make_shared<BooleanScalar>(); 
-    } else { 
-      out->value = std::make_shared<BooleanScalar>(this->all); 
-    } 
-    return Status::OK(); 
-  } 
- 
-  bool all = true; 
-  bool has_nulls = false; 
-  ScalarAggregateOptions options; 
-}; 
- 
-Result<std::unique_ptr<KernelState>> AllInit(KernelContext*, const KernelInitArgs& args) { 
-  return ::arrow::internal::make_unique<BooleanAllImpl>( 
-      static_cast<const ScalarAggregateOptions&>(*args.options)); 
-} 
- 
-// ---------------------------------------------------------------------- 
-// Index implementation 
- 
-template <typename ArgType> 
-struct IndexImpl : public ScalarAggregator { 
-  using ArgValue = typename internal::GetViewType<ArgType>::T; 
- 
-  explicit IndexImpl(IndexOptions options, KernelState* raw_state) 
-      : options(std::move(options)), seen(0), index(-1) { 
-    if (auto state = static_cast<IndexImpl<ArgType>*>(raw_state)) { 
-      seen = state->seen; 
-      index = state->index; 
-    } 
-  } 
- 
-  Status Consume(KernelContext* ctx, const ExecBatch& batch) override { 
-    // short-circuit 
-    if (index >= 0 || !options.value->is_valid) { 
-      return Status::OK(); 
-    } 
- 
-    auto input = batch[0].array(); 
-    seen = input->length; 
-    const ArgValue desired = internal::UnboxScalar<ArgType>::Unbox(*options.value); 
-    int64_t i = 0; 
- 
-    ARROW_UNUSED(internal::VisitArrayValuesInline<ArgType>( 
-        *input, 
-        [&](ArgValue v) -> Status { 
-          if (v == desired) { 
-            index = i; 
-            return Status::Cancelled("Found"); 
-          } else { 
-            ++i; 
-            return Status::OK(); 
-          } 
-        }, 
-        [&]() -> Status { 
-          ++i; 
-          return Status::OK(); 
-        })); 
- 
-    return Status::OK(); 
-  } 
- 
-  Status MergeFrom(KernelContext*, KernelState&& src) override { 
-    const auto& other = checked_cast<const IndexImpl&>(src); 
-    if (index < 0 && other.index >= 0) { 
-      index = seen + other.index; 
-    } 
-    seen += other.seen; 
-    return Status::OK(); 
-  } 
- 
-  Status Finalize(KernelContext*, Datum* out) override { 
-    out->value = std::make_shared<Int64Scalar>(index >= 0 ? index : -1); 
-    return Status::OK(); 
-  } 
- 
-  const IndexOptions options; 
-  int64_t seen = 0; 
-  int64_t index = -1; 
-}; 
- 
-struct IndexInit { 
-  std::unique_ptr<KernelState> state; 
-  KernelContext* ctx; 
-  const IndexOptions& options; 
-  const DataType& type; 
- 
-  IndexInit(KernelContext* ctx, const IndexOptions& options, const DataType& type) 
-      : ctx(ctx), options(options), type(type) {} 
- 
-  Status Visit(const DataType& type) { 
-    return Status::NotImplemented("Index kernel not implemented for ", type.ToString()); 
-  } 
- 
-  Status Visit(const BooleanType&) { 
-    state.reset(new IndexImpl<BooleanType>(options, ctx->state())); 
-    return Status::OK(); 
-  } 
- 
-  template <typename Type> 
-  enable_if_number<Type, Status> Visit(const Type&) { 
-    state.reset(new IndexImpl<Type>(options, ctx->state())); 
-    return Status::OK(); 
-  } 
- 
-  template <typename Type> 
-  enable_if_base_binary<Type, Status> Visit(const Type&) { 
-    state.reset(new IndexImpl<Type>(options, ctx->state())); 
-    return Status::OK(); 
-  } 
- 
-  template <typename Type> 
-  enable_if_date<Type, Status> Visit(const Type&) { 
-    state.reset(new IndexImpl<Type>(options, ctx->state())); 
-    return Status::OK(); 
-  } 
- 
-  template <typename Type> 
-  enable_if_time<Type, Status> Visit(const Type&) { 
-    state.reset(new IndexImpl<Type>(options, ctx->state())); 
-    return Status::OK(); 
-  } 
- 
-  template <typename Type> 
-  enable_if_timestamp<Type, Status> Visit(const Type&) { 
-    state.reset(new IndexImpl<Type>(options, ctx->state())); 
-    return Status::OK(); 
-  } 
- 
-  Result<std::unique_ptr<KernelState>> Create() { 
-    RETURN_NOT_OK(VisitTypeInline(type, this)); 
-    return std::move(state); 
-  } 
- 
-  static Result<std::unique_ptr<KernelState>> Init(KernelContext* ctx, 
-                                                   const KernelInitArgs& args) { 
-    IndexInit visitor(ctx, static_cast<const IndexOptions&>(*args.options), 
-                      *args.inputs[0].type); 
-    return visitor.Create(); 
-  } 
-}; 
- 
+// ----------------------------------------------------------------------
+// Index implementation
+
+template <typename ArgType>
+struct IndexImpl : public ScalarAggregator {
+  using ArgValue = typename internal::GetViewType<ArgType>::T;
+
+  explicit IndexImpl(IndexOptions options, KernelState* raw_state)
+      : options(std::move(options)), seen(0), index(-1) {
+    if (auto state = static_cast<IndexImpl<ArgType>*>(raw_state)) {
+      seen = state->seen;
+      index = state->index;
+    }
+  }
+
+  Status Consume(KernelContext* ctx, const ExecBatch& batch) override {
+    // short-circuit
+    if (index >= 0 || !options.value->is_valid) {
+      return Status::OK();
+    }
+
+    auto input = batch[0].array();
+    seen = input->length;
+    const ArgValue desired = internal::UnboxScalar<ArgType>::Unbox(*options.value);
+    int64_t i = 0;
+
+    ARROW_UNUSED(internal::VisitArrayValuesInline<ArgType>(
+        *input,
+        [&](ArgValue v) -> Status {
+          if (v == desired) {
+            index = i;
+            return Status::Cancelled("Found");
+          } else {
+            ++i;
+            return Status::OK();
+          }
+        },
+        [&]() -> Status {
+          ++i;
+          return Status::OK();
+        }));
+
+    return Status::OK();
+  }
+
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
+    const auto& other = checked_cast<const IndexImpl&>(src);
+    if (index < 0 && other.index >= 0) {
+      index = seen + other.index;
+    }
+    seen += other.seen;
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext*, Datum* out) override {
+    out->value = std::make_shared<Int64Scalar>(index >= 0 ? index : -1);
+    return Status::OK();
+  }
+
+  const IndexOptions options;
+  int64_t seen = 0;
+  int64_t index = -1;
+};
+
+struct IndexInit {
+  std::unique_ptr<KernelState> state;
+  KernelContext* ctx;
+  const IndexOptions& options;
+  const DataType& type;
+
+  IndexInit(KernelContext* ctx, const IndexOptions& options, const DataType& type)
+      : ctx(ctx), options(options), type(type) {}
+
+  Status Visit(const DataType& type) {
+    return Status::NotImplemented("Index kernel not implemented for ", type.ToString());
+  }
+
+  Status Visit(const BooleanType&) {
+    state.reset(new IndexImpl<BooleanType>(options, ctx->state()));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_number<Type, Status> Visit(const Type&) {
+    state.reset(new IndexImpl<Type>(options, ctx->state()));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_base_binary<Type, Status> Visit(const Type&) {
+    state.reset(new IndexImpl<Type>(options, ctx->state()));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_date<Type, Status> Visit(const Type&) {
+    state.reset(new IndexImpl<Type>(options, ctx->state()));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_time<Type, Status> Visit(const Type&) {
+    state.reset(new IndexImpl<Type>(options, ctx->state()));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_timestamp<Type, Status> Visit(const Type&) {
+    state.reset(new IndexImpl<Type>(options, ctx->state()));
+    return Status::OK();
+  }
+
+  Result<std::unique_ptr<KernelState>> Create() {
+    RETURN_NOT_OK(VisitTypeInline(type, this));
+    return std::move(state);
+  }
+
+  static Result<std::unique_ptr<KernelState>> Init(KernelContext* ctx,
+                                                   const KernelInitArgs& args) {
+    IndexInit visitor(ctx, static_cast<const IndexOptions&>(*args.options),
+                      *args.inputs[0].type);
+    return visitor.Create();
+  }
+};
+
 void AddBasicAggKernels(KernelInit init,
                         const std::vector<std::shared_ptr<DataType>>& types,
                         std::shared_ptr<DataType> out_ty, ScalarAggregateFunction* func,
@@ -405,33 +405,33 @@ void AddBasicAggKernels(KernelInit init,
   }
 }
 
-void AddScalarAggKernels(KernelInit init, 
-                         const std::vector<std::shared_ptr<DataType>>& types, 
-                         std::shared_ptr<DataType> out_ty, 
-                         ScalarAggregateFunction* func) { 
-  for (const auto& ty : types) { 
-    // scalar[InT] -> scalar[OutT] 
-    auto sig = KernelSignature::Make({InputType::Scalar(ty)}, ValueDescr::Scalar(out_ty)); 
-    AddAggKernel(std::move(sig), init, func, SimdLevel::NONE); 
-  } 
-} 
- 
-void AddArrayScalarAggKernels(KernelInit init, 
-                              const std::vector<std::shared_ptr<DataType>>& types, 
-                              std::shared_ptr<DataType> out_ty, 
-                              ScalarAggregateFunction* func, 
-                              SimdLevel::type simd_level = SimdLevel::NONE) { 
-  AddBasicAggKernels(init, types, out_ty, func, simd_level); 
-  AddScalarAggKernels(init, types, out_ty, func); 
-} 
- 
+void AddScalarAggKernels(KernelInit init,
+                         const std::vector<std::shared_ptr<DataType>>& types,
+                         std::shared_ptr<DataType> out_ty,
+                         ScalarAggregateFunction* func) {
+  for (const auto& ty : types) {
+    // scalar[InT] -> scalar[OutT]
+    auto sig = KernelSignature::Make({InputType::Scalar(ty)}, ValueDescr::Scalar(out_ty));
+    AddAggKernel(std::move(sig), init, func, SimdLevel::NONE);
+  }
+}
+
+void AddArrayScalarAggKernels(KernelInit init,
+                              const std::vector<std::shared_ptr<DataType>>& types,
+                              std::shared_ptr<DataType> out_ty,
+                              ScalarAggregateFunction* func,
+                              SimdLevel::type simd_level = SimdLevel::NONE) {
+  AddBasicAggKernels(init, types, out_ty, func, simd_level);
+  AddScalarAggKernels(init, types, out_ty, func);
+}
+
 void AddMinMaxKernels(KernelInit init,
                       const std::vector<std::shared_ptr<DataType>>& types,
                       ScalarAggregateFunction* func, SimdLevel::type simd_level) {
   for (const auto& ty : types) {
-    // any[T] -> scalar[struct<min: T, max: T>] 
+    // any[T] -> scalar[struct<min: T, max: T>]
     auto out_ty = struct_({field("min", ty), field("max", ty)});
-    auto sig = KernelSignature::Make({InputType(ty)}, ValueDescr::Scalar(out_ty)); 
+    auto sig = KernelSignature::Make({InputType(ty)}, ValueDescr::Scalar(out_ty));
     AddAggKernel(std::move(sig), init, func, simd_level);
   }
 }
@@ -439,92 +439,92 @@ void AddMinMaxKernels(KernelInit init,
 }  // namespace aggregate
 
 namespace internal {
-namespace { 
- 
-const FunctionDoc count_doc{"Count the number of null / non-null values", 
-                            ("By default, only non-null values are counted.\n" 
-                             "This can be changed through ScalarAggregateOptions."), 
-                            {"array"}, 
-                            "ScalarAggregateOptions"}; 
- 
-const FunctionDoc sum_doc{ 
-    "Compute the sum of a numeric array", 
-    ("Null values are ignored by default. Minimum count of non-null\n" 
-     "values can be set and null is returned if too few are present.\n" 
-     "This can be changed through ScalarAggregateOptions."), 
-    {"array"}, 
-    "ScalarAggregateOptions"}; 
- 
-const FunctionDoc mean_doc{ 
-    "Compute the mean of a numeric array", 
-    ("Null values are ignored by default. Minimum count of non-null\n" 
-     "values can be set and null is returned if too few are " 
-     "present.\nThis can be changed through ScalarAggregateOptions.\n" 
-     "The result is always computed as a double, regardless of the input types."), 
-    {"array"}, 
-    "ScalarAggregateOptions"}; 
- 
-const FunctionDoc min_max_doc{"Compute the minimum and maximum values of a numeric array", 
-                              ("Null values are ignored by default.\n" 
-                               "This can be changed through ScalarAggregateOptions."), 
-                              {"array"}, 
-                              "ScalarAggregateOptions"}; 
- 
-const FunctionDoc any_doc{"Test whether any element in a boolean array evaluates to true", 
-                          ("Null values are ignored by default.\n" 
-                           "If null values are taken into account by setting " 
-                           "ScalarAggregateOptions parameter skip_nulls = false then " 
-                           "Kleene logic is used.\n" 
-                           "See KleeneOr for more details on Kleene logic."), 
-                          {"array"}, 
-                          "ScalarAggregateOptions"}; 
- 
-const FunctionDoc all_doc{"Test whether all elements in a boolean array evaluate to true", 
-                          ("Null values are ignored by default.\n" 
-                           "If null values are taken into account by setting " 
-                           "ScalarAggregateOptions parameter skip_nulls = false then " 
-                           "Kleene logic is used.\n" 
-                           "See KleeneAnd for more details on Kleene logic."), 
-                          {"array"}, 
-                          "ScalarAggregateOptions"}; 
- 
-const FunctionDoc index_doc{"Find the index of the first occurrence of a given value", 
-                            ("The result is always computed as an int64_t, regardless\n" 
-                             "of the offset type of the input array."), 
-                            {"array"}, 
-                            "IndexOptions"}; 
- 
-}  // namespace 
- 
+namespace {
+
+const FunctionDoc count_doc{"Count the number of null / non-null values",
+                            ("By default, only non-null values are counted.\n"
+                             "This can be changed through ScalarAggregateOptions."),
+                            {"array"},
+                            "ScalarAggregateOptions"};
+
+const FunctionDoc sum_doc{
+    "Compute the sum of a numeric array",
+    ("Null values are ignored by default. Minimum count of non-null\n"
+     "values can be set and null is returned if too few are present.\n"
+     "This can be changed through ScalarAggregateOptions."),
+    {"array"},
+    "ScalarAggregateOptions"};
+
+const FunctionDoc mean_doc{
+    "Compute the mean of a numeric array",
+    ("Null values are ignored by default. Minimum count of non-null\n"
+     "values can be set and null is returned if too few are "
+     "present.\nThis can be changed through ScalarAggregateOptions.\n"
+     "The result is always computed as a double, regardless of the input types."),
+    {"array"},
+    "ScalarAggregateOptions"};
+
+const FunctionDoc min_max_doc{"Compute the minimum and maximum values of a numeric array",
+                              ("Null values are ignored by default.\n"
+                               "This can be changed through ScalarAggregateOptions."),
+                              {"array"},
+                              "ScalarAggregateOptions"};
+
+const FunctionDoc any_doc{"Test whether any element in a boolean array evaluates to true",
+                          ("Null values are ignored by default.\n"
+                           "If null values are taken into account by setting "
+                           "ScalarAggregateOptions parameter skip_nulls = false then "
+                           "Kleene logic is used.\n"
+                           "See KleeneOr for more details on Kleene logic."),
+                          {"array"},
+                          "ScalarAggregateOptions"};
+
+const FunctionDoc all_doc{"Test whether all elements in a boolean array evaluate to true",
+                          ("Null values are ignored by default.\n"
+                           "If null values are taken into account by setting "
+                           "ScalarAggregateOptions parameter skip_nulls = false then "
+                           "Kleene logic is used.\n"
+                           "See KleeneAnd for more details on Kleene logic."),
+                          {"array"},
+                          "ScalarAggregateOptions"};
+
+const FunctionDoc index_doc{"Find the index of the first occurrence of a given value",
+                            ("The result is always computed as an int64_t, regardless\n"
+                             "of the offset type of the input array."),
+                            {"array"},
+                            "IndexOptions"};
+
+}  // namespace
+
 void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
-  static auto default_scalar_aggregate_options = ScalarAggregateOptions::Defaults(); 
+  static auto default_scalar_aggregate_options = ScalarAggregateOptions::Defaults();
+
+  auto func = std::make_shared<ScalarAggregateFunction>(
+      "count", Arity::Unary(), &count_doc, &default_scalar_aggregate_options);
 
-  auto func = std::make_shared<ScalarAggregateFunction>( 
-      "count", Arity::Unary(), &count_doc, &default_scalar_aggregate_options); 
- 
   // Takes any array input, outputs int64 scalar
   InputType any_array(ValueDescr::ARRAY);
-  AddAggKernel(KernelSignature::Make({any_array}, ValueDescr::Scalar(int64())), 
-               aggregate::CountInit, func.get()); 
-  AddAggKernel( 
-      KernelSignature::Make({InputType(ValueDescr::SCALAR)}, ValueDescr::Scalar(int64())), 
-      aggregate::CountInit, func.get()); 
+  AddAggKernel(KernelSignature::Make({any_array}, ValueDescr::Scalar(int64())),
+               aggregate::CountInit, func.get());
+  AddAggKernel(
+      KernelSignature::Make({InputType(ValueDescr::SCALAR)}, ValueDescr::Scalar(int64())),
+      aggregate::CountInit, func.get());
   DCHECK_OK(registry->AddFunction(std::move(func)));
 
-  func = std::make_shared<ScalarAggregateFunction>("sum", Arity::Unary(), &sum_doc, 
-                                                   &default_scalar_aggregate_options); 
-  aggregate::AddArrayScalarAggKernels(aggregate::SumInit, {boolean()}, int64(), 
-                                      func.get()); 
-  aggregate::AddArrayScalarAggKernels(aggregate::SumInit, SignedIntTypes(), int64(), 
-                                      func.get()); 
-  aggregate::AddArrayScalarAggKernels(aggregate::SumInit, UnsignedIntTypes(), uint64(), 
-                                      func.get()); 
-  aggregate::AddArrayScalarAggKernels(aggregate::SumInit, FloatingPointTypes(), float64(), 
-                                      func.get()); 
+  func = std::make_shared<ScalarAggregateFunction>("sum", Arity::Unary(), &sum_doc,
+                                                   &default_scalar_aggregate_options);
+  aggregate::AddArrayScalarAggKernels(aggregate::SumInit, {boolean()}, int64(),
+                                      func.get());
+  aggregate::AddArrayScalarAggKernels(aggregate::SumInit, SignedIntTypes(), int64(),
+                                      func.get());
+  aggregate::AddArrayScalarAggKernels(aggregate::SumInit, UnsignedIntTypes(), uint64(),
+                                      func.get());
+  aggregate::AddArrayScalarAggKernels(aggregate::SumInit, FloatingPointTypes(), float64(),
+                                      func.get());
   // Add the SIMD variants for sum
-#if defined(ARROW_HAVE_RUNTIME_AVX2) || defined(ARROW_HAVE_RUNTIME_AVX512) 
+#if defined(ARROW_HAVE_RUNTIME_AVX2) || defined(ARROW_HAVE_RUNTIME_AVX512)
   auto cpu_info = arrow::internal::CpuInfo::GetInstance();
-#endif 
+#endif
 #if defined(ARROW_HAVE_RUNTIME_AVX2)
   if (cpu_info->IsSupported(arrow::internal::CpuInfo::AVX2)) {
     aggregate::AddSumAvx2AggKernels(func.get());
@@ -537,12 +537,12 @@ void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
 #endif
   DCHECK_OK(registry->AddFunction(std::move(func)));
 
-  func = std::make_shared<ScalarAggregateFunction>("mean", Arity::Unary(), &mean_doc, 
-                                                   &default_scalar_aggregate_options); 
-  aggregate::AddArrayScalarAggKernels(aggregate::MeanInit, {boolean()}, float64(), 
-                                      func.get()); 
-  aggregate::AddArrayScalarAggKernels(aggregate::MeanInit, NumericTypes(), float64(), 
-                                      func.get()); 
+  func = std::make_shared<ScalarAggregateFunction>("mean", Arity::Unary(), &mean_doc,
+                                                   &default_scalar_aggregate_options);
+  aggregate::AddArrayScalarAggKernels(aggregate::MeanInit, {boolean()}, float64(),
+                                      func.get());
+  aggregate::AddArrayScalarAggKernels(aggregate::MeanInit, NumericTypes(), float64(),
+                                      func.get());
   // Add the SIMD variants for mean
 #if defined(ARROW_HAVE_RUNTIME_AVX2)
   if (cpu_info->IsSupported(arrow::internal::CpuInfo::AVX2)) {
@@ -556,8 +556,8 @@ void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
 #endif
   DCHECK_OK(registry->AddFunction(std::move(func)));
 
-  func = std::make_shared<ScalarAggregateFunction>( 
-      "min_max", Arity::Unary(), &min_max_doc, &default_scalar_aggregate_options); 
+  func = std::make_shared<ScalarAggregateFunction>(
+      "min_max", Arity::Unary(), &min_max_doc, &default_scalar_aggregate_options);
   aggregate::AddMinMaxKernels(aggregate::MinMaxInit, {boolean()}, func.get());
   aggregate::AddMinMaxKernels(aggregate::MinMaxInit, NumericTypes(), func.get());
   // Add the SIMD variants for min max
@@ -574,29 +574,29 @@ void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
 
   DCHECK_OK(registry->AddFunction(std::move(func)));
 
-  // any 
-  func = std::make_shared<ScalarAggregateFunction>("any", Arity::Unary(), &any_doc, 
-                                                   &default_scalar_aggregate_options); 
-  aggregate::AddArrayScalarAggKernels(aggregate::AnyInit, {boolean()}, boolean(), 
-                                      func.get()); 
-  DCHECK_OK(registry->AddFunction(std::move(func))); 
- 
-  // all 
-  func = std::make_shared<ScalarAggregateFunction>("all", Arity::Unary(), &all_doc, 
-                                                   &default_scalar_aggregate_options); 
-  aggregate::AddArrayScalarAggKernels(aggregate::AllInit, {boolean()}, boolean(), 
-                                      func.get()); 
-  DCHECK_OK(registry->AddFunction(std::move(func))); 
- 
-  // index 
-  func = std::make_shared<ScalarAggregateFunction>("index", Arity::Unary(), &index_doc); 
-  aggregate::AddBasicAggKernels(aggregate::IndexInit::Init, BaseBinaryTypes(), int64(), 
-                                func.get()); 
-  aggregate::AddBasicAggKernels(aggregate::IndexInit::Init, PrimitiveTypes(), int64(), 
-                                func.get()); 
-  aggregate::AddBasicAggKernels(aggregate::IndexInit::Init, TemporalTypes(), int64(), 
-                                func.get()); 
-  DCHECK_OK(registry->AddFunction(std::move(func))); 
+  // any
+  func = std::make_shared<ScalarAggregateFunction>("any", Arity::Unary(), &any_doc,
+                                                   &default_scalar_aggregate_options);
+  aggregate::AddArrayScalarAggKernels(aggregate::AnyInit, {boolean()}, boolean(),
+                                      func.get());
+  DCHECK_OK(registry->AddFunction(std::move(func)));
+
+  // all
+  func = std::make_shared<ScalarAggregateFunction>("all", Arity::Unary(), &all_doc,
+                                                   &default_scalar_aggregate_options);
+  aggregate::AddArrayScalarAggKernels(aggregate::AllInit, {boolean()}, boolean(),
+                                      func.get());
+  DCHECK_OK(registry->AddFunction(std::move(func)));
+
+  // index
+  func = std::make_shared<ScalarAggregateFunction>("index", Arity::Unary(), &index_doc);
+  aggregate::AddBasicAggKernels(aggregate::IndexInit::Init, BaseBinaryTypes(), int64(),
+                                func.get());
+  aggregate::AddBasicAggKernels(aggregate::IndexInit::Init, PrimitiveTypes(), int64(),
+                                func.get());
+  aggregate::AddBasicAggKernels(aggregate::IndexInit::Init, TemporalTypes(), int64(),
+                                func.get());
+  DCHECK_OK(registry->AddFunction(std::move(func)));
 }
 
 }  // namespace internal
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
index 4b1ae8d3d6c..5163d3fd03d 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
@@ -51,68 +51,68 @@ void AddMinMaxAvx512AggKernels(ScalarAggregateFunction* func);
 // ----------------------------------------------------------------------
 // Sum implementation
 
-template <typename ArrowType, SimdLevel::type SimdLevel> 
-struct SumImpl : public ScalarAggregator { 
-  using ThisType = SumImpl<ArrowType, SimdLevel>; 
-  using CType = typename ArrowType::c_type; 
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct SumImpl : public ScalarAggregator {
+  using ThisType = SumImpl<ArrowType, SimdLevel>;
+  using CType = typename ArrowType::c_type;
   using SumType = typename FindAccumulatorType<ArrowType>::Type;
-  using OutputType = typename TypeTraits<SumType>::ScalarType; 
-
-  Status Consume(KernelContext*, const ExecBatch& batch) override { 
-    if (batch[0].is_array()) { 
-      const auto& data = batch[0].array(); 
-      this->count += data->length - data->GetNullCount(); 
-      if (is_boolean_type<ArrowType>::value) { 
-        this->sum += 
-            static_cast<typename SumType::c_type>(BooleanArray(data).true_count()); 
-      } else { 
-        this->sum += 
-            arrow::compute::detail::SumArray<CType, typename SumType::c_type, SimdLevel>( 
-                *data); 
-      } 
+  using OutputType = typename TypeTraits<SumType>::ScalarType;
+
+  Status Consume(KernelContext*, const ExecBatch& batch) override {
+    if (batch[0].is_array()) {
+      const auto& data = batch[0].array();
+      this->count += data->length - data->GetNullCount();
+      if (is_boolean_type<ArrowType>::value) {
+        this->sum +=
+            static_cast<typename SumType::c_type>(BooleanArray(data).true_count());
+      } else {
+        this->sum +=
+            arrow::compute::detail::SumArray<CType, typename SumType::c_type, SimdLevel>(
+                *data);
+      }
     } else {
-      const auto& data = *batch[0].scalar(); 
-      this->count += data.is_valid * batch.length; 
-      if (data.is_valid) { 
-        this->sum += internal::UnboxScalar<ArrowType>::Unbox(data) * batch.length; 
+      const auto& data = *batch[0].scalar();
+      this->count += data.is_valid * batch.length;
+      if (data.is_valid) {
+        this->sum += internal::UnboxScalar<ArrowType>::Unbox(data) * batch.length;
       }
     }
-    return Status::OK(); 
+    return Status::OK();
   }
 
-  Status MergeFrom(KernelContext*, KernelState&& src) override { 
-    const auto& other = checked_cast<const ThisType&>(src); 
-    this->count += other.count; 
-    this->sum += other.sum; 
-    return Status::OK(); 
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
+    const auto& other = checked_cast<const ThisType&>(src);
+    this->count += other.count;
+    this->sum += other.sum;
+    return Status::OK();
   }
 
-  Status Finalize(KernelContext*, Datum* out) override { 
-    if (this->count < options.min_count) { 
-      out->value = std::make_shared<OutputType>(); 
+  Status Finalize(KernelContext*, Datum* out) override {
+    if (this->count < options.min_count) {
+      out->value = std::make_shared<OutputType>();
     } else {
-      out->value = MakeScalar(this->sum); 
+      out->value = MakeScalar(this->sum);
     }
-    return Status::OK(); 
+    return Status::OK();
   }
 
   size_t count = 0;
   typename SumType::c_type sum = 0;
-  ScalarAggregateOptions options; 
+  ScalarAggregateOptions options;
 };
 
-template <typename ArrowType, SimdLevel::type SimdLevel> 
-struct MeanImpl : public SumImpl<ArrowType, SimdLevel> { 
-  Status Finalize(KernelContext*, Datum* out) override { 
-    if (this->count < options.min_count) { 
-      out->value = std::make_shared<DoubleScalar>(); 
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct MeanImpl : public SumImpl<ArrowType, SimdLevel> {
+  Status Finalize(KernelContext*, Datum* out) override {
+    if (this->count < options.min_count) {
+      out->value = std::make_shared<DoubleScalar>();
     } else {
-      const double mean = static_cast<double>(this->sum) / this->count; 
-      out->value = std::make_shared<DoubleScalar>(mean); 
+      const double mean = static_cast<double>(this->sum) / this->count;
+      out->value = std::make_shared<DoubleScalar>(mean);
     }
-    return Status::OK(); 
+    return Status::OK();
   }
-  ScalarAggregateOptions options; 
+  ScalarAggregateOptions options;
 };
 
 template <template <typename> class KernelClass>
@@ -120,11 +120,11 @@ struct SumLikeInit {
   std::unique_ptr<KernelState> state;
   KernelContext* ctx;
   const DataType& type;
-  const ScalarAggregateOptions& options; 
+  const ScalarAggregateOptions& options;
 
-  SumLikeInit(KernelContext* ctx, const DataType& type, 
-              const ScalarAggregateOptions& options) 
-      : ctx(ctx), type(type), options(options) {} 
+  SumLikeInit(KernelContext* ctx, const DataType& type,
+              const ScalarAggregateOptions& options)
+      : ctx(ctx), type(type), options(options) {}
 
   Status Visit(const DataType&) { return Status::NotImplemented("No sum implemented"); }
 
@@ -133,18 +133,18 @@ struct SumLikeInit {
   }
 
   Status Visit(const BooleanType&) {
-    state.reset(new KernelClass<BooleanType>(options)); 
+    state.reset(new KernelClass<BooleanType>(options));
     return Status::OK();
   }
 
   template <typename Type>
   enable_if_number<Type, Status> Visit(const Type&) {
-    state.reset(new KernelClass<Type>(options)); 
+    state.reset(new KernelClass<Type>(options));
     return Status::OK();
   }
 
-  Result<std::unique_ptr<KernelState>> Create() { 
-    RETURN_NOT_OK(VisitTypeInline(type, this)); 
+  Result<std::unique_ptr<KernelState>> Create() {
+    RETURN_NOT_OK(VisitTypeInline(type, this));
     return std::move(state);
   }
 };
@@ -233,42 +233,42 @@ struct MinMaxImpl : public ScalarAggregator {
   using ThisType = MinMaxImpl<ArrowType, SimdLevel>;
   using StateType = MinMaxState<ArrowType, SimdLevel>;
 
-  MinMaxImpl(const std::shared_ptr<DataType>& out_type, 
-             const ScalarAggregateOptions& options) 
+  MinMaxImpl(const std::shared_ptr<DataType>& out_type,
+             const ScalarAggregateOptions& options)
       : out_type(out_type), options(options) {}
 
-  Status Consume(KernelContext*, const ExecBatch& batch) override { 
-    if (batch[0].is_array()) { 
-      return ConsumeArray(ArrayType(batch[0].array())); 
-    } 
-    return ConsumeScalar(*batch[0].scalar()); 
-  } 
- 
-  Status ConsumeScalar(const Scalar& scalar) { 
+  Status Consume(KernelContext*, const ExecBatch& batch) override {
+    if (batch[0].is_array()) {
+      return ConsumeArray(ArrayType(batch[0].array()));
+    }
+    return ConsumeScalar(*batch[0].scalar());
+  }
+
+  Status ConsumeScalar(const Scalar& scalar) {
+    StateType local;
+    local.has_nulls = !scalar.is_valid;
+    local.has_values = scalar.is_valid;
+
+    if (local.has_nulls && !options.skip_nulls) {
+      this->state = local;
+      return Status::OK();
+    }
+
+    local.MergeOne(internal::UnboxScalar<ArrowType>::Unbox(scalar));
+    this->state = local;
+    return Status::OK();
+  }
+
+  Status ConsumeArray(const ArrayType& arr) {
     StateType local;
-    local.has_nulls = !scalar.is_valid; 
-    local.has_values = scalar.is_valid; 
-
-    if (local.has_nulls && !options.skip_nulls) { 
-      this->state = local; 
-      return Status::OK(); 
-    } 
-
-    local.MergeOne(internal::UnboxScalar<ArrowType>::Unbox(scalar)); 
-    this->state = local; 
-    return Status::OK(); 
-  } 
- 
-  Status ConsumeArray(const ArrayType& arr) { 
-    StateType local; 
- 
+
     const auto null_count = arr.null_count();
     local.has_nulls = null_count > 0;
     local.has_values = (arr.length() - null_count) > 0;
 
-    if (local.has_nulls && !options.skip_nulls) { 
+    if (local.has_nulls && !options.skip_nulls) {
       this->state = local;
-      return Status::OK(); 
+      return Status::OK();
     }
 
     if (local.has_nulls) {
@@ -279,32 +279,32 @@ struct MinMaxImpl : public ScalarAggregator {
       }
     }
     this->state = local;
-    return Status::OK(); 
+    return Status::OK();
   }
 
-  Status MergeFrom(KernelContext*, KernelState&& src) override { 
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
     const auto& other = checked_cast<const ThisType&>(src);
     this->state += other.state;
-    return Status::OK(); 
+    return Status::OK();
   }
 
-  Status Finalize(KernelContext*, Datum* out) override { 
+  Status Finalize(KernelContext*, Datum* out) override {
     using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
 
     std::vector<std::shared_ptr<Scalar>> values;
-    if (!state.has_values || (state.has_nulls && !options.skip_nulls)) { 
+    if (!state.has_values || (state.has_nulls && !options.skip_nulls)) {
       // (null, null)
       values = {std::make_shared<ScalarType>(), std::make_shared<ScalarType>()};
     } else {
       values = {std::make_shared<ScalarType>(state.min),
                 std::make_shared<ScalarType>(state.max)};
     }
-    out->value = std::make_shared<StructScalar>(std::move(values), this->out_type); 
-    return Status::OK(); 
+    out->value = std::make_shared<StructScalar>(std::move(values), this->out_type);
+    return Status::OK();
   }
 
   std::shared_ptr<DataType> out_type;
-  ScalarAggregateOptions options; 
+  ScalarAggregateOptions options;
   MinMaxState<ArrowType, SimdLevel> state;
 
  private:
@@ -373,10 +373,10 @@ struct BooleanMinMaxImpl : public MinMaxImpl<BooleanType, SimdLevel> {
   using MinMaxImpl<BooleanType, SimdLevel>::MinMaxImpl;
   using MinMaxImpl<BooleanType, SimdLevel>::options;
 
-  Status Consume(KernelContext*, const ExecBatch& batch) override { 
-    if (ARROW_PREDICT_FALSE(batch[0].is_scalar())) { 
-      return ConsumeScalar(checked_cast<const BooleanScalar&>(*batch[0].scalar())); 
-    } 
+  Status Consume(KernelContext*, const ExecBatch& batch) override {
+    if (ARROW_PREDICT_FALSE(batch[0].is_scalar())) {
+      return ConsumeScalar(checked_cast<const BooleanScalar&>(*batch[0].scalar()));
+    }
     StateType local;
     ArrayType arr(batch[0].array());
 
@@ -386,9 +386,9 @@ struct BooleanMinMaxImpl : public MinMaxImpl<BooleanType, SimdLevel> {
 
     local.has_nulls = null_count > 0;
     local.has_values = valid_count > 0;
-    if (local.has_nulls && !options.skip_nulls) { 
+    if (local.has_nulls && !options.skip_nulls) {
       this->state = local;
-      return Status::OK(); 
+      return Status::OK();
     }
 
     const auto true_count = arr.true_count();
@@ -397,27 +397,27 @@ struct BooleanMinMaxImpl : public MinMaxImpl<BooleanType, SimdLevel> {
     local.min = false_count == 0;
 
     this->state = local;
-    return Status::OK(); 
+    return Status::OK();
+  }
+
+  Status ConsumeScalar(const BooleanScalar& scalar) {
+    StateType local;
+
+    local.has_nulls = !scalar.is_valid;
+    local.has_values = scalar.is_valid;
+    if (local.has_nulls && !options.skip_nulls) {
+      this->state = local;
+      return Status::OK();
+    }
+
+    const int true_count = scalar.is_valid && scalar.value;
+    const int false_count = scalar.is_valid && !scalar.value;
+    local.max = true_count > 0;
+    local.min = false_count == 0;
+
+    this->state = local;
+    return Status::OK();
   }
- 
-  Status ConsumeScalar(const BooleanScalar& scalar) { 
-    StateType local; 
- 
-    local.has_nulls = !scalar.is_valid; 
-    local.has_values = scalar.is_valid; 
-    if (local.has_nulls && !options.skip_nulls) { 
-      this->state = local; 
-      return Status::OK(); 
-    } 
- 
-    const int true_count = scalar.is_valid && scalar.value; 
-    const int false_count = scalar.is_valid && !scalar.value; 
-    local.max = true_count > 0; 
-    local.min = false_count == 0; 
- 
-    this->state = local; 
-    return Status::OK(); 
-  } 
 };
 
 template <SimdLevel::type SimdLevel>
@@ -426,11 +426,11 @@ struct MinMaxInitState {
   KernelContext* ctx;
   const DataType& in_type;
   const std::shared_ptr<DataType>& out_type;
-  const ScalarAggregateOptions& options; 
+  const ScalarAggregateOptions& options;
 
   MinMaxInitState(KernelContext* ctx, const DataType& in_type,
-                  const std::shared_ptr<DataType>& out_type, 
-                  const ScalarAggregateOptions& options) 
+                  const std::shared_ptr<DataType>& out_type,
+                  const ScalarAggregateOptions& options)
       : ctx(ctx), in_type(in_type), out_type(out_type), options(options) {}
 
   Status Visit(const DataType&) {
@@ -452,8 +452,8 @@ struct MinMaxInitState {
     return Status::OK();
   }
 
-  Result<std::unique_ptr<KernelState>> Create() { 
-    RETURN_NOT_OK(VisitTypeInline(in_type, this)); 
+  Result<std::unique_ptr<KernelState>> Create() {
+    RETURN_NOT_OK(VisitTypeInline(in_type, this));
     return std::move(state);
   }
 };
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/aggregate_internal.h b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/aggregate_internal.h
index d72cdb14941..ed29f26f2c3 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/aggregate_internal.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/aggregate_internal.h
@@ -19,8 +19,8 @@
 
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
-#include "arrow/util/bit_run_reader.h" 
-#include "arrow/util/logging.h" 
+#include "arrow/util/bit_run_reader.h"
+#include "arrow/util/logging.h"
 
 namespace arrow {
 namespace compute {
@@ -49,124 +49,124 @@ struct FindAccumulatorType<I, enable_if_floating_point<I>> {
   using Type = DoubleType;
 };
 
-struct ScalarAggregator : public KernelState { 
-  virtual Status Consume(KernelContext* ctx, const ExecBatch& batch) = 0; 
-  virtual Status MergeFrom(KernelContext* ctx, KernelState&& src) = 0; 
-  virtual Status Finalize(KernelContext* ctx, Datum* out) = 0; 
-}; 
- 
-void AddAggKernel(std::shared_ptr<KernelSignature> sig, KernelInit init, 
-                  ScalarAggregateFunction* func, 
-                  SimdLevel::type simd_level = SimdLevel::NONE); 
- 
-namespace detail { 
- 
-using arrow::internal::VisitSetBitRunsVoid; 
- 
-// SumArray must be parameterized with the SIMD level since it's called both from 
-// translation units with and without vectorization. Normally it gets inlined but 
-// if not, without the parameter, we'll have multiple definitions of the same 
-// symbol and we'll get unexpected results. 
- 
-// non-recursive pairwise summation for floating points 
-// https://en.wikipedia.org/wiki/Pairwise_summation 
-template <typename ValueType, typename SumType, SimdLevel::type SimdLevel, 
-          typename ValueFunc> 
-enable_if_t<std::is_floating_point<SumType>::value, SumType> SumArray( 
-    const ArrayData& data, ValueFunc&& func) { 
-  const int64_t data_size = data.length - data.GetNullCount(); 
-  if (data_size == 0) { 
-    return 0; 
-  } 
- 
-  // number of inputs to accumulate before merging with another block 
-  constexpr int kBlockSize = 16;  // same as numpy 
-  // levels (tree depth) = ceil(log2(len)) + 1, a bit larger than necessary 
-  const int levels = BitUtil::Log2(static_cast<uint64_t>(data_size)) + 1; 
-  // temporary summation per level 
-  std::vector<SumType> sum(levels); 
-  // whether two summations are ready and should be reduced to upper level 
-  // one bit for each level, bit0 -> level0, ... 
-  uint64_t mask = 0; 
-  // level of root node holding the final summation 
-  int root_level = 0; 
- 
-  // reduce summation of one block (may be smaller than kBlockSize) from leaf node 
-  // continue reducing to upper level if two summations are ready for non-leaf node 
-  auto reduce = [&](SumType block_sum) { 
-    int cur_level = 0; 
-    uint64_t cur_level_mask = 1ULL; 
-    sum[cur_level] += block_sum; 
-    mask ^= cur_level_mask; 
-    while ((mask & cur_level_mask) == 0) { 
-      block_sum = sum[cur_level]; 
-      sum[cur_level] = 0; 
-      ++cur_level; 
-      DCHECK_LT(cur_level, levels); 
-      cur_level_mask <<= 1; 
-      sum[cur_level] += block_sum; 
-      mask ^= cur_level_mask; 
-    } 
-    root_level = std::max(root_level, cur_level); 
-  }; 
- 
-  const ValueType* values = data.GetValues<ValueType>(1); 
-  VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length, 
-                      [&](int64_t pos, int64_t len) { 
-                        const ValueType* v = &values[pos]; 
-                        // unsigned division by constant is cheaper than signed one 
-                        const uint64_t blocks = static_cast<uint64_t>(len) / kBlockSize; 
-                        const uint64_t remains = static_cast<uint64_t>(len) % kBlockSize; 
- 
-                        for (uint64_t i = 0; i < blocks; ++i) { 
-                          SumType block_sum = 0; 
-                          for (int j = 0; j < kBlockSize; ++j) { 
-                            block_sum += func(v[j]); 
-                          } 
-                          reduce(block_sum); 
-                          v += kBlockSize; 
-                        } 
- 
-                        if (remains > 0) { 
-                          SumType block_sum = 0; 
-                          for (uint64_t i = 0; i < remains; ++i) { 
-                            block_sum += func(v[i]); 
-                          } 
-                          reduce(block_sum); 
-                        } 
-                      }); 
- 
-  // reduce intermediate summations from all non-leaf nodes 
-  for (int i = 1; i <= root_level; ++i) { 
-    sum[i] += sum[i - 1]; 
-  } 
- 
-  return sum[root_level]; 
-} 
- 
-// naive summation for integers 
-template <typename ValueType, typename SumType, SimdLevel::type SimdLevel, 
-          typename ValueFunc> 
-enable_if_t<!std::is_floating_point<SumType>::value, SumType> SumArray( 
-    const ArrayData& data, ValueFunc&& func) { 
-  SumType sum = 0; 
-  const ValueType* values = data.GetValues<ValueType>(1); 
-  VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length, 
-                      [&](int64_t pos, int64_t len) { 
-                        for (int64_t i = 0; i < len; ++i) { 
-                          sum += func(values[pos + i]); 
-                        } 
-                      }); 
-  return sum; 
-} 
- 
-template <typename ValueType, typename SumType, SimdLevel::type SimdLevel> 
-SumType SumArray(const ArrayData& data) { 
-  return SumArray<ValueType, SumType, SimdLevel>( 
-      data, [](ValueType v) { return static_cast<SumType>(v); }); 
-} 
- 
-}  // namespace detail 
- 
+struct ScalarAggregator : public KernelState {
+  virtual Status Consume(KernelContext* ctx, const ExecBatch& batch) = 0;
+  virtual Status MergeFrom(KernelContext* ctx, KernelState&& src) = 0;
+  virtual Status Finalize(KernelContext* ctx, Datum* out) = 0;
+};
+
+void AddAggKernel(std::shared_ptr<KernelSignature> sig, KernelInit init,
+                  ScalarAggregateFunction* func,
+                  SimdLevel::type simd_level = SimdLevel::NONE);
+
+namespace detail {
+
+using arrow::internal::VisitSetBitRunsVoid;
+
+// SumArray must be parameterized with the SIMD level since it's called both from
+// translation units with and without vectorization. Normally it gets inlined but
+// if not, without the parameter, we'll have multiple definitions of the same
+// symbol and we'll get unexpected results.
+
+// non-recursive pairwise summation for floating points
+// https://en.wikipedia.org/wiki/Pairwise_summation
+template <typename ValueType, typename SumType, SimdLevel::type SimdLevel,
+          typename ValueFunc>
+enable_if_t<std::is_floating_point<SumType>::value, SumType> SumArray(
+    const ArrayData& data, ValueFunc&& func) {
+  const int64_t data_size = data.length - data.GetNullCount();
+  if (data_size == 0) {
+    return 0;
+  }
+
+  // number of inputs to accumulate before merging with another block
+  constexpr int kBlockSize = 16;  // same as numpy
+  // levels (tree depth) = ceil(log2(len)) + 1, a bit larger than necessary
+  const int levels = BitUtil::Log2(static_cast<uint64_t>(data_size)) + 1;
+  // temporary summation per level
+  std::vector<SumType> sum(levels);
+  // whether two summations are ready and should be reduced to upper level
+  // one bit for each level, bit0 -> level0, ...
+  uint64_t mask = 0;
+  // level of root node holding the final summation
+  int root_level = 0;
+
+  // reduce summation of one block (may be smaller than kBlockSize) from leaf node
+  // continue reducing to upper level if two summations are ready for non-leaf node
+  auto reduce = [&](SumType block_sum) {
+    int cur_level = 0;
+    uint64_t cur_level_mask = 1ULL;
+    sum[cur_level] += block_sum;
+    mask ^= cur_level_mask;
+    while ((mask & cur_level_mask) == 0) {
+      block_sum = sum[cur_level];
+      sum[cur_level] = 0;
+      ++cur_level;
+      DCHECK_LT(cur_level, levels);
+      cur_level_mask <<= 1;
+      sum[cur_level] += block_sum;
+      mask ^= cur_level_mask;
+    }
+    root_level = std::max(root_level, cur_level);
+  };
+
+  const ValueType* values = data.GetValues<ValueType>(1);
+  VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length,
+                      [&](int64_t pos, int64_t len) {
+                        const ValueType* v = &values[pos];
+                        // unsigned division by constant is cheaper than signed one
+                        const uint64_t blocks = static_cast<uint64_t>(len) / kBlockSize;
+                        const uint64_t remains = static_cast<uint64_t>(len) % kBlockSize;
+
+                        for (uint64_t i = 0; i < blocks; ++i) {
+                          SumType block_sum = 0;
+                          for (int j = 0; j < kBlockSize; ++j) {
+                            block_sum += func(v[j]);
+                          }
+                          reduce(block_sum);
+                          v += kBlockSize;
+                        }
+
+                        if (remains > 0) {
+                          SumType block_sum = 0;
+                          for (uint64_t i = 0; i < remains; ++i) {
+                            block_sum += func(v[i]);
+                          }
+                          reduce(block_sum);
+                        }
+                      });
+
+  // reduce intermediate summations from all non-leaf nodes
+  for (int i = 1; i <= root_level; ++i) {
+    sum[i] += sum[i - 1];
+  }
+
+  return sum[root_level];
+}
+
+// naive summation for integers
+template <typename ValueType, typename SumType, SimdLevel::type SimdLevel,
+          typename ValueFunc>
+enable_if_t<!std::is_floating_point<SumType>::value, SumType> SumArray(
+    const ArrayData& data, ValueFunc&& func) {
+  SumType sum = 0;
+  const ValueType* values = data.GetValues<ValueType>(1);
+  VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length,
+                      [&](int64_t pos, int64_t len) {
+                        for (int64_t i = 0; i < len; ++i) {
+                          sum += func(values[pos + i]);
+                        }
+                      });
+  return sum;
+}
+
+template <typename ValueType, typename SumType, SimdLevel::type SimdLevel>
+SumType SumArray(const ArrayData& data) {
+  return SumArray<ValueType, SumType, SimdLevel>(
+      data, [](ValueType v) { return static_cast<SumType>(v); });
+}
+
+}  // namespace detail
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/aggregate_mode.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/aggregate_mode.cc
index b2659355ba9..6ad0eeb6456 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/aggregate_mode.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/aggregate_mode.cc
@@ -16,377 +16,377 @@
 // under the License.
 
 #include <cmath>
-#include <queue> 
-#include <utility> 
+#include <queue>
+#include <utility>
 
-#include "arrow/compute/api_aggregate.h" 
-#include "arrow/compute/kernels/aggregate_internal.h" 
-#include "arrow/compute/kernels/common.h" 
-#include "arrow/compute/kernels/util_internal.h" 
-#include "arrow/result.h" 
-#include "arrow/stl_allocator.h" 
-#include "arrow/type_traits.h" 
+#include "arrow/compute/api_aggregate.h"
+#include "arrow/compute/kernels/aggregate_internal.h"
+#include "arrow/compute/kernels/common.h"
+#include "arrow/compute/kernels/util_internal.h"
+#include "arrow/result.h"
+#include "arrow/stl_allocator.h"
+#include "arrow/type_traits.h"
 
 namespace arrow {
 namespace compute {
-namespace internal { 
+namespace internal {
 
 namespace {
 
-using ModeState = OptionsWrapper<ModeOptions>; 
+using ModeState = OptionsWrapper<ModeOptions>;
 
-constexpr char kModeFieldName[] = "mode"; 
-constexpr char kCountFieldName[] = "count"; 
+constexpr char kModeFieldName[] = "mode";
+constexpr char kCountFieldName[] = "count";
 
-constexpr uint64_t kCountEOF = ~0ULL; 
+constexpr uint64_t kCountEOF = ~0ULL;
 
-template <typename InType, typename CType = typename InType::c_type> 
-Result<std::pair<CType*, int64_t*>> PrepareOutput(int64_t n, KernelContext* ctx, 
-                                                  Datum* out) { 
-  const auto& mode_type = TypeTraits<InType>::type_singleton(); 
-  const auto& count_type = int64(); 
+template <typename InType, typename CType = typename InType::c_type>
+Result<std::pair<CType*, int64_t*>> PrepareOutput(int64_t n, KernelContext* ctx,
+                                                  Datum* out) {
+  const auto& mode_type = TypeTraits<InType>::type_singleton();
+  const auto& count_type = int64();
 
-  auto mode_data = ArrayData::Make(mode_type, /*length=*/n, /*null_count=*/0); 
-  mode_data->buffers.resize(2, nullptr); 
-  auto count_data = ArrayData::Make(count_type, n, 0); 
-  count_data->buffers.resize(2, nullptr); 
+  auto mode_data = ArrayData::Make(mode_type, /*length=*/n, /*null_count=*/0);
+  mode_data->buffers.resize(2, nullptr);
+  auto count_data = ArrayData::Make(count_type, n, 0);
+  count_data->buffers.resize(2, nullptr);
 
-  CType* mode_buffer = nullptr; 
-  int64_t* count_buffer = nullptr; 
- 
-  if (n > 0) { 
-    ARROW_ASSIGN_OR_RAISE(mode_data->buffers[1], ctx->Allocate(n * sizeof(CType))); 
-    ARROW_ASSIGN_OR_RAISE(count_data->buffers[1], ctx->Allocate(n * sizeof(int64_t))); 
-    mode_buffer = mode_data->template GetMutableValues<CType>(1); 
-    count_buffer = count_data->template GetMutableValues<int64_t>(1); 
+  CType* mode_buffer = nullptr;
+  int64_t* count_buffer = nullptr;
+
+  if (n > 0) {
+    ARROW_ASSIGN_OR_RAISE(mode_data->buffers[1], ctx->Allocate(n * sizeof(CType)));
+    ARROW_ASSIGN_OR_RAISE(count_data->buffers[1], ctx->Allocate(n * sizeof(int64_t)));
+    mode_buffer = mode_data->template GetMutableValues<CType>(1);
+    count_buffer = count_data->template GetMutableValues<int64_t>(1);
   }
 
-  const auto& out_type = 
-      struct_({field(kModeFieldName, mode_type), field(kCountFieldName, count_type)}); 
-  *out = Datum(ArrayData::Make(out_type, n, {nullptr}, {mode_data, count_data}, 0)); 
- 
-  return std::make_pair(mode_buffer, count_buffer); 
+  const auto& out_type =
+      struct_({field(kModeFieldName, mode_type), field(kCountFieldName, count_type)});
+  *out = Datum(ArrayData::Make(out_type, n, {nullptr}, {mode_data, count_data}, 0));
+
+  return std::make_pair(mode_buffer, count_buffer);
 }
 
-// find top-n value:count pairs with minimal heap 
-// suboptimal for tiny or large n, possibly okay as we're not in hot path 
-template <typename InType, typename Generator> 
-Status Finalize(KernelContext* ctx, Datum* out, Generator&& gen) { 
-  using CType = typename InType::c_type; 
-
-  using ValueCountPair = std::pair<CType, uint64_t>; 
-  auto gt = [](const ValueCountPair& lhs, const ValueCountPair& rhs) { 
-    const bool rhs_is_nan = rhs.first != rhs.first;  // nan as largest value 
-    return lhs.second > rhs.second || 
-           (lhs.second == rhs.second && (lhs.first < rhs.first || rhs_is_nan)); 
-  }; 
-
-  std::priority_queue<ValueCountPair, std::vector<ValueCountPair>, decltype(gt)> min_heap( 
-      std::move(gt)); 
- 
-  const ModeOptions& options = ModeState::Get(ctx); 
-  while (true) { 
-    const ValueCountPair& value_count = gen(); 
-    DCHECK_NE(value_count.second, 0); 
-    if (value_count.second == kCountEOF) break; 
-    if (static_cast<int64_t>(min_heap.size()) < options.n) { 
-      min_heap.push(value_count); 
-    } else if (gt(value_count, min_heap.top())) { 
-      min_heap.pop(); 
-      min_heap.push(value_count); 
+// find top-n value:count pairs with minimal heap
+// suboptimal for tiny or large n, possibly okay as we're not in hot path
+template <typename InType, typename Generator>
+Status Finalize(KernelContext* ctx, Datum* out, Generator&& gen) {
+  using CType = typename InType::c_type;
+
+  using ValueCountPair = std::pair<CType, uint64_t>;
+  auto gt = [](const ValueCountPair& lhs, const ValueCountPair& rhs) {
+    const bool rhs_is_nan = rhs.first != rhs.first;  // nan as largest value
+    return lhs.second > rhs.second ||
+           (lhs.second == rhs.second && (lhs.first < rhs.first || rhs_is_nan));
+  };
+
+  std::priority_queue<ValueCountPair, std::vector<ValueCountPair>, decltype(gt)> min_heap(
+      std::move(gt));
+
+  const ModeOptions& options = ModeState::Get(ctx);
+  while (true) {
+    const ValueCountPair& value_count = gen();
+    DCHECK_NE(value_count.second, 0);
+    if (value_count.second == kCountEOF) break;
+    if (static_cast<int64_t>(min_heap.size()) < options.n) {
+      min_heap.push(value_count);
+    } else if (gt(value_count, min_heap.top())) {
+      min_heap.pop();
+      min_heap.push(value_count);
     }
   }
-  const int64_t n = min_heap.size(); 
-
-  CType* mode_buffer; 
-  int64_t* count_buffer; 
-  ARROW_ASSIGN_OR_RAISE(std::tie(mode_buffer, count_buffer), 
-                        PrepareOutput<InType>(n, ctx, out)); 
- 
-  for (int64_t i = n - 1; i >= 0; --i) { 
-    std::tie(mode_buffer[i], count_buffer[i]) = min_heap.top(); 
-    min_heap.pop(); 
-  } 
- 
-  return Status::OK(); 
+  const int64_t n = min_heap.size();
+
+  CType* mode_buffer;
+  int64_t* count_buffer;
+  ARROW_ASSIGN_OR_RAISE(std::tie(mode_buffer, count_buffer),
+                        PrepareOutput<InType>(n, ctx, out));
+
+  for (int64_t i = n - 1; i >= 0; --i) {
+    std::tie(mode_buffer[i], count_buffer[i]) = min_heap.top();
+    min_heap.pop();
+  }
+
+  return Status::OK();
 }
 
-// count value occurances for integers with narrow value range 
-// O(1) space, O(n) time 
-template <typename T> 
-struct CountModer { 
-  using CType = typename T::c_type; 
+// count value occurances for integers with narrow value range
+// O(1) space, O(n) time
+template <typename T>
+struct CountModer {
+  using CType = typename T::c_type;
 
-  CType min; 
-  std::vector<uint64_t> counts; 
+  CType min;
+  std::vector<uint64_t> counts;
 
-  CountModer(CType min, CType max) { 
-    uint32_t value_range = static_cast<uint32_t>(max - min) + 1; 
-    DCHECK_LT(value_range, 1 << 20); 
-    this->min = min; 
-    this->counts.resize(value_range, 0); 
+  CountModer(CType min, CType max) {
+    uint32_t value_range = static_cast<uint32_t>(max - min) + 1;
+    DCHECK_LT(value_range, 1 << 20);
+    this->min = min;
+    this->counts.resize(value_range, 0);
   }
 
-  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    // count values in all chunks, ignore nulls 
-    const Datum& datum = batch[0]; 
-    CountValues<CType>(this->counts.data(), datum, this->min); 
-
-    // generator to emit next value:count pair 
-    int index = 0; 
-    auto gen = [&]() { 
-      for (; index < static_cast<int>(counts.size()); ++index) { 
-        if (counts[index] != 0) { 
-          auto value_count = 
-              std::make_pair(static_cast<CType>(index + this->min), counts[index]); 
-          ++index; 
-          return value_count; 
-        } 
-      } 
-      return std::pair<CType, uint64_t>(0, kCountEOF); 
-    }; 
-
-    return Finalize<T>(ctx, out, std::move(gen)); 
-  } 
-}; 
-
-// booleans can be handled more straightforward 
-template <> 
-struct CountModer<BooleanType> { 
-  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    int64_t counts[2]{}; 
-
-    const Datum& datum = batch[0]; 
-    for (const auto& array : datum.chunks()) { 
-      if (array->length() > array->null_count()) { 
-        const int64_t true_count = 
-            arrow::internal::checked_pointer_cast<BooleanArray>(array)->true_count(); 
-        const int64_t false_count = array->length() - array->null_count() - true_count; 
-        counts[true] += true_count; 
-        counts[false] += false_count; 
+  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    // count values in all chunks, ignore nulls
+    const Datum& datum = batch[0];
+    CountValues<CType>(this->counts.data(), datum, this->min);
+
+    // generator to emit next value:count pair
+    int index = 0;
+    auto gen = [&]() {
+      for (; index < static_cast<int>(counts.size()); ++index) {
+        if (counts[index] != 0) {
+          auto value_count =
+              std::make_pair(static_cast<CType>(index + this->min), counts[index]);
+          ++index;
+          return value_count;
+        }
+      }
+      return std::pair<CType, uint64_t>(0, kCountEOF);
+    };
+
+    return Finalize<T>(ctx, out, std::move(gen));
+  }
+};
+
+// booleans can be handled more straightforward
+template <>
+struct CountModer<BooleanType> {
+  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    int64_t counts[2]{};
+
+    const Datum& datum = batch[0];
+    for (const auto& array : datum.chunks()) {
+      if (array->length() > array->null_count()) {
+        const int64_t true_count =
+            arrow::internal::checked_pointer_cast<BooleanArray>(array)->true_count();
+        const int64_t false_count = array->length() - array->null_count() - true_count;
+        counts[true] += true_count;
+        counts[false] += false_count;
       }
     }
 
-    const ModeOptions& options = ModeState::Get(ctx); 
-    const int64_t distinct_values = (counts[0] != 0) + (counts[1] != 0); 
-    const int64_t n = std::min(options.n, distinct_values); 
-
-    bool* mode_buffer; 
-    int64_t* count_buffer; 
-    ARROW_ASSIGN_OR_RAISE(std::tie(mode_buffer, count_buffer), 
-                          PrepareOutput<BooleanType>(n, ctx, out)); 
- 
-    if (n >= 1) { 
-      const bool index = counts[1] > counts[0]; 
-      mode_buffer[0] = index; 
-      count_buffer[0] = counts[index]; 
-      if (n == 2) { 
-        mode_buffer[1] = !index; 
-        count_buffer[1] = counts[!index]; 
+    const ModeOptions& options = ModeState::Get(ctx);
+    const int64_t distinct_values = (counts[0] != 0) + (counts[1] != 0);
+    const int64_t n = std::min(options.n, distinct_values);
+
+    bool* mode_buffer;
+    int64_t* count_buffer;
+    ARROW_ASSIGN_OR_RAISE(std::tie(mode_buffer, count_buffer),
+                          PrepareOutput<BooleanType>(n, ctx, out));
+
+    if (n >= 1) {
+      const bool index = counts[1] > counts[0];
+      mode_buffer[0] = index;
+      count_buffer[0] = counts[index];
+      if (n == 2) {
+        mode_buffer[1] = !index;
+        count_buffer[1] = counts[!index];
       }
     }
- 
-    return Status::OK(); 
+
+    return Status::OK();
   }
 };
 
-// copy and sort approach for floating points or integers with wide value range 
-// O(n) space, O(nlogn) time 
-template <typename T> 
-struct SortModer { 
-  using CType = typename T::c_type; 
-  using Allocator = arrow::stl::allocator<CType>; 
-
-  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    // copy all chunks to a buffer, ignore nulls and nans 
-    std::vector<CType, Allocator> in_buffer(Allocator(ctx->memory_pool())); 
-
-    uint64_t nan_count = 0; 
-    const Datum& datum = batch[0]; 
-    const int64_t in_length = datum.length() - datum.null_count(); 
-    if (in_length > 0) { 
-      in_buffer.resize(in_length); 
-      CopyNonNullValues(datum, in_buffer.data()); 
-
-      // drop nan 
-      if (is_floating_type<T>::value) { 
-        const auto& it = std::remove_if(in_buffer.begin(), in_buffer.end(), 
-                                        [](CType v) { return v != v; }); 
-        nan_count = in_buffer.end() - it; 
-        in_buffer.resize(it - in_buffer.begin()); 
-      } 
-    } 
- 
-    // sort the input data to count same values 
-    std::sort(in_buffer.begin(), in_buffer.end()); 
- 
-    // generator to emit next value:count pair 
-    auto it = in_buffer.cbegin(); 
-    auto gen = [&]() { 
-      if (ARROW_PREDICT_FALSE(it == in_buffer.cend())) { 
-        // handle NAN at last 
-        if (nan_count > 0) { 
-          auto value_count = std::make_pair(static_cast<CType>(NAN), nan_count); 
-          nan_count = 0; 
-          return value_count; 
-        } 
-        return std::pair<CType, uint64_t>(static_cast<CType>(0), kCountEOF); 
-      } 
-      // count same values 
-      const CType value = *it; 
-      uint64_t count = 0; 
-      do { 
-        ++it; 
-        ++count; 
-      } while (it != in_buffer.cend() && *it == value); 
-      return std::make_pair(value, count); 
-    }; 
- 
-    return Finalize<T>(ctx, out, std::move(gen)); 
+// copy and sort approach for floating points or integers with wide value range
+// O(n) space, O(nlogn) time
+template <typename T>
+struct SortModer {
+  using CType = typename T::c_type;
+  using Allocator = arrow::stl::allocator<CType>;
+
+  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    // copy all chunks to a buffer, ignore nulls and nans
+    std::vector<CType, Allocator> in_buffer(Allocator(ctx->memory_pool()));
+
+    uint64_t nan_count = 0;
+    const Datum& datum = batch[0];
+    const int64_t in_length = datum.length() - datum.null_count();
+    if (in_length > 0) {
+      in_buffer.resize(in_length);
+      CopyNonNullValues(datum, in_buffer.data());
+
+      // drop nan
+      if (is_floating_type<T>::value) {
+        const auto& it = std::remove_if(in_buffer.begin(), in_buffer.end(),
+                                        [](CType v) { return v != v; });
+        nan_count = in_buffer.end() - it;
+        in_buffer.resize(it - in_buffer.begin());
+      }
+    }
+
+    // sort the input data to count same values
+    std::sort(in_buffer.begin(), in_buffer.end());
+
+    // generator to emit next value:count pair
+    auto it = in_buffer.cbegin();
+    auto gen = [&]() {
+      if (ARROW_PREDICT_FALSE(it == in_buffer.cend())) {
+        // handle NAN at last
+        if (nan_count > 0) {
+          auto value_count = std::make_pair(static_cast<CType>(NAN), nan_count);
+          nan_count = 0;
+          return value_count;
+        }
+        return std::pair<CType, uint64_t>(static_cast<CType>(0), kCountEOF);
+      }
+      // count same values
+      const CType value = *it;
+      uint64_t count = 0;
+      do {
+        ++it;
+        ++count;
+      } while (it != in_buffer.cend() && *it == value);
+      return std::make_pair(value, count);
+    };
+
+    return Finalize<T>(ctx, out, std::move(gen));
   }
-}; 
-
-// pick counting or sorting approach per integers value range 
-template <typename T> 
-struct CountOrSortModer { 
-  using CType = typename T::c_type; 
-
-  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    // cross point to benefit from counting approach 
-    // about 2x improvement for int32/64 from micro-benchmarking 
-    static constexpr int kMinArraySize = 8192; 
-    static constexpr int kMaxValueRange = 32768; 
- 
-    const Datum& datum = batch[0]; 
-    if (datum.length() - datum.null_count() >= kMinArraySize) { 
-      CType min, max; 
-      std::tie(min, max) = GetMinMax<CType>(datum); 
- 
-      if (static_cast<uint64_t>(max) - static_cast<uint64_t>(min) <= kMaxValueRange) { 
-        return CountModer<T>(min, max).Exec(ctx, batch, out); 
-      } 
+};
+
+// pick counting or sorting approach per integers value range
+template <typename T>
+struct CountOrSortModer {
+  using CType = typename T::c_type;
+
+  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    // cross point to benefit from counting approach
+    // about 2x improvement for int32/64 from micro-benchmarking
+    static constexpr int kMinArraySize = 8192;
+    static constexpr int kMaxValueRange = 32768;
+
+    const Datum& datum = batch[0];
+    if (datum.length() - datum.null_count() >= kMinArraySize) {
+      CType min, max;
+      std::tie(min, max) = GetMinMax<CType>(datum);
+
+      if (static_cast<uint64_t>(max) - static_cast<uint64_t>(min) <= kMaxValueRange) {
+        return CountModer<T>(min, max).Exec(ctx, batch, out);
+      }
     }
- 
-    return SortModer<T>().Exec(ctx, batch, out); 
+
+    return SortModer<T>().Exec(ctx, batch, out);
   }
-}; 
-
-template <typename InType, typename Enable = void> 
-struct Moder; 
- 
-template <> 
-struct Moder<Int8Type> { 
-  CountModer<Int8Type> impl; 
-  Moder() : impl(-128, 127) {} 
 };
 
-template <> 
-struct Moder<UInt8Type> { 
-  CountModer<UInt8Type> impl; 
-  Moder() : impl(0, 255) {} 
-}; 
-
-template <> 
-struct Moder<BooleanType> { 
-  CountModer<BooleanType> impl; 
-}; 
-
-template <typename InType> 
-struct Moder<InType, enable_if_t<(is_integer_type<InType>::value && 
-                                  (sizeof(typename InType::c_type) > 1))>> { 
-  CountOrSortModer<InType> impl; 
-}; 
-
-template <typename InType> 
-struct Moder<InType, enable_if_t<is_floating_type<InType>::value>> { 
-  SortModer<InType> impl; 
-}; 
-
-template <typename T> 
-Status ScalarMode(KernelContext* ctx, const Scalar& scalar, Datum* out) { 
-  using CType = typename T::c_type; 
-  if (scalar.is_valid) { 
-    bool called = false; 
-    return Finalize<T>(ctx, out, [&]() { 
-      if (!called) { 
-        called = true; 
-        return std::pair<CType, uint64_t>(UnboxScalar<T>::Unbox(scalar), 1); 
-      } 
-      return std::pair<CType, uint64_t>(static_cast<CType>(0), kCountEOF); 
-    }); 
+template <typename InType, typename Enable = void>
+struct Moder;
+
+template <>
+struct Moder<Int8Type> {
+  CountModer<Int8Type> impl;
+  Moder() : impl(-128, 127) {}
+};
+
+template <>
+struct Moder<UInt8Type> {
+  CountModer<UInt8Type> impl;
+  Moder() : impl(0, 255) {}
+};
+
+template <>
+struct Moder<BooleanType> {
+  CountModer<BooleanType> impl;
+};
+
+template <typename InType>
+struct Moder<InType, enable_if_t<(is_integer_type<InType>::value &&
+                                  (sizeof(typename InType::c_type) > 1))>> {
+  CountOrSortModer<InType> impl;
+};
+
+template <typename InType>
+struct Moder<InType, enable_if_t<is_floating_type<InType>::value>> {
+  SortModer<InType> impl;
+};
+
+template <typename T>
+Status ScalarMode(KernelContext* ctx, const Scalar& scalar, Datum* out) {
+  using CType = typename T::c_type;
+  if (scalar.is_valid) {
+    bool called = false;
+    return Finalize<T>(ctx, out, [&]() {
+      if (!called) {
+        called = true;
+        return std::pair<CType, uint64_t>(UnboxScalar<T>::Unbox(scalar), 1);
+      }
+      return std::pair<CType, uint64_t>(static_cast<CType>(0), kCountEOF);
+    });
   }
-  return Finalize<T>(ctx, out, []() { 
-    return std::pair<CType, uint64_t>(static_cast<CType>(0), kCountEOF); 
-  }); 
-} 
-
-template <typename _, typename InType> 
-struct ModeExecutor { 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    if (ctx->state() == nullptr) { 
-      return Status::Invalid("Mode requires ModeOptions"); 
-    } 
-    const ModeOptions& options = ModeState::Get(ctx); 
-    if (options.n <= 0) { 
-      return Status::Invalid("ModeOption::n must be strictly positive"); 
-    } 
- 
-    if (batch[0].is_scalar()) { 
-      return ScalarMode<InType>(ctx, *batch[0].scalar(), out); 
-    } 
- 
-    return Moder<InType>().impl.Exec(ctx, batch, out); 
+  return Finalize<T>(ctx, out, []() {
+    return std::pair<CType, uint64_t>(static_cast<CType>(0), kCountEOF);
+  });
+}
+
+template <typename _, typename InType>
+struct ModeExecutor {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    if (ctx->state() == nullptr) {
+      return Status::Invalid("Mode requires ModeOptions");
+    }
+    const ModeOptions& options = ModeState::Get(ctx);
+    if (options.n <= 0) {
+      return Status::Invalid("ModeOption::n must be strictly positive");
+    }
+
+    if (batch[0].is_scalar()) {
+      return ScalarMode<InType>(ctx, *batch[0].scalar(), out);
+    }
+
+    return Moder<InType>().impl.Exec(ctx, batch, out);
   }
 };
 
-VectorKernel NewModeKernel(const std::shared_ptr<DataType>& in_type) { 
-  VectorKernel kernel; 
-  kernel.init = ModeState::Init; 
-  kernel.can_execute_chunkwise = false; 
-  kernel.output_chunked = false; 
-  auto out_type = 
-      struct_({field(kModeFieldName, in_type), field(kCountFieldName, int64())}); 
-  kernel.signature = 
-      KernelSignature::Make({InputType(in_type)}, ValueDescr::Array(out_type)); 
-  return kernel; 
+VectorKernel NewModeKernel(const std::shared_ptr<DataType>& in_type) {
+  VectorKernel kernel;
+  kernel.init = ModeState::Init;
+  kernel.can_execute_chunkwise = false;
+  kernel.output_chunked = false;
+  auto out_type =
+      struct_({field(kModeFieldName, in_type), field(kCountFieldName, int64())});
+  kernel.signature =
+      KernelSignature::Make({InputType(in_type)}, ValueDescr::Array(out_type));
+  return kernel;
 }
 
-void AddBooleanModeKernel(VectorFunction* func) { 
-  VectorKernel kernel = NewModeKernel(boolean()); 
-  kernel.exec = ModeExecutor<StructType, BooleanType>::Exec; 
-  DCHECK_OK(func->AddKernel(kernel)); 
-} 
- 
-void AddNumericModeKernels(VectorFunction* func) { 
-  for (const auto& type : NumericTypes()) { 
-    VectorKernel kernel = NewModeKernel(type); 
-    kernel.exec = GenerateNumeric<ModeExecutor, StructType>(*type); 
-    DCHECK_OK(func->AddKernel(kernel)); 
+void AddBooleanModeKernel(VectorFunction* func) {
+  VectorKernel kernel = NewModeKernel(boolean());
+  kernel.exec = ModeExecutor<StructType, BooleanType>::Exec;
+  DCHECK_OK(func->AddKernel(kernel));
+}
+
+void AddNumericModeKernels(VectorFunction* func) {
+  for (const auto& type : NumericTypes()) {
+    VectorKernel kernel = NewModeKernel(type);
+    kernel.exec = GenerateNumeric<ModeExecutor, StructType>(*type);
+    DCHECK_OK(func->AddKernel(kernel));
   }
 }
 
-const FunctionDoc mode_doc{ 
-    "Calculate the modal (most common) values of a numeric array", 
-    ("Returns top-n most common values and number of times they occur in an array.\n" 
-     "Result is an array of `struct<mode: T, count: int64>`, where T is the input type.\n" 
-     "Values with larger counts are returned before smaller counts.\n" 
-     "If there are more than one values with same count, smaller one is returned first.\n" 
-     "Nulls are ignored.  If there are no non-null values in the array,\n" 
-     "empty array is returned."), 
-    {"array"}, 
-    "ModeOptions"}; 
- 
+const FunctionDoc mode_doc{
+    "Calculate the modal (most common) values of a numeric array",
+    ("Returns top-n most common values and number of times they occur in an array.\n"
+     "Result is an array of `struct<mode: T, count: int64>`, where T is the input type.\n"
+     "Values with larger counts are returned before smaller counts.\n"
+     "If there are more than one values with same count, smaller one is returned first.\n"
+     "Nulls are ignored.  If there are no non-null values in the array,\n"
+     "empty array is returned."),
+    {"array"},
+    "ModeOptions"};
+
 }  // namespace
 
-void RegisterScalarAggregateMode(FunctionRegistry* registry) { 
-  static auto default_options = ModeOptions::Defaults(); 
-  auto func = std::make_shared<VectorFunction>("mode", Arity::Unary(), &mode_doc, 
-                                               &default_options); 
-  AddBooleanModeKernel(func.get()); 
-  AddNumericModeKernels(func.get()); 
-  DCHECK_OK(registry->AddFunction(std::move(func))); 
+void RegisterScalarAggregateMode(FunctionRegistry* registry) {
+  static auto default_options = ModeOptions::Defaults();
+  auto func = std::make_shared<VectorFunction>("mode", Arity::Unary(), &mode_doc,
+                                               &default_options);
+  AddBooleanModeKernel(func.get());
+  AddNumericModeKernels(func.get());
+  DCHECK_OK(registry->AddFunction(std::move(func)));
 }
 
-}  // namespace internal 
+}  // namespace internal
 }  // namespace compute
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/aggregate_quantile.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/aggregate_quantile.cc
index feacedbb96e..7d2ffe0770c 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/aggregate_quantile.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/aggregate_quantile.cc
@@ -1,493 +1,493 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include <cmath> 
-#include <vector> 
- 
-#include "arrow/compute/api_aggregate.h" 
-#include "arrow/compute/kernels/common.h" 
-#include "arrow/compute/kernels/util_internal.h" 
-#include "arrow/stl_allocator.h" 
- 
-namespace arrow { 
-namespace compute { 
-namespace internal { 
- 
-namespace { 
- 
-using QuantileState = internal::OptionsWrapper<QuantileOptions>; 
- 
-// output is at some input data point, not interpolated 
-bool IsDataPoint(const QuantileOptions& options) { 
-  // some interpolation methods return exact data point 
-  return options.interpolation == QuantileOptions::LOWER || 
-         options.interpolation == QuantileOptions::HIGHER || 
-         options.interpolation == QuantileOptions::NEAREST; 
-} 
- 
-// quantile to exact datapoint index (IsDataPoint == true) 
-uint64_t QuantileToDataPoint(size_t length, double q, 
-                             enum QuantileOptions::Interpolation interpolation) { 
-  const double index = (length - 1) * q; 
-  uint64_t datapoint_index = static_cast<uint64_t>(index); 
-  const double fraction = index - datapoint_index; 
- 
-  if (interpolation == QuantileOptions::LINEAR || 
-      interpolation == QuantileOptions::MIDPOINT) { 
-    DCHECK_EQ(fraction, 0); 
-  } 
- 
-  // convert NEAREST interpolation method to LOWER or HIGHER 
-  if (interpolation == QuantileOptions::NEAREST) { 
-    if (fraction < 0.5) { 
-      interpolation = QuantileOptions::LOWER; 
-    } else if (fraction > 0.5) { 
-      interpolation = QuantileOptions::HIGHER; 
-    } else { 
-      // round 0.5 to nearest even number, similar to numpy.around 
-      interpolation = 
-          (datapoint_index & 1) ? QuantileOptions::HIGHER : QuantileOptions::LOWER; 
-    } 
-  } 
- 
-  if (interpolation == QuantileOptions::HIGHER && fraction != 0) { 
-    ++datapoint_index; 
-  } 
- 
-  return datapoint_index; 
-} 
- 
-// copy and nth_element approach, large memory footprint 
-template <typename InType> 
-struct SortQuantiler { 
-  using CType = typename InType::c_type; 
-  using Allocator = arrow::stl::allocator<CType>; 
- 
-  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    const QuantileOptions& options = QuantileState::Get(ctx); 
- 
-    // copy all chunks to a buffer, ignore nulls and nans 
-    std::vector<CType, Allocator> in_buffer(Allocator(ctx->memory_pool())); 
- 
-    const Datum& datum = batch[0]; 
-    const int64_t in_length = datum.length() - datum.null_count(); 
-    if (in_length > 0) { 
-      in_buffer.resize(in_length); 
-      CopyNonNullValues(datum, in_buffer.data()); 
- 
-      // drop nan 
-      if (is_floating_type<InType>::value) { 
-        const auto& it = std::remove_if(in_buffer.begin(), in_buffer.end(), 
-                                        [](CType v) { return v != v; }); 
-        in_buffer.resize(it - in_buffer.begin()); 
-      } 
-    } 
- 
-    // prepare out array 
-    int64_t out_length = options.q.size(); 
-    if (in_buffer.empty()) { 
-      out_length = 0;  // input is empty or only contains null and nan, return empty array 
-    } 
-    // out type depends on options 
-    const bool is_datapoint = IsDataPoint(options); 
-    const std::shared_ptr<DataType> out_type = 
-        is_datapoint ? TypeTraits<InType>::type_singleton() : float64(); 
-    auto out_data = ArrayData::Make(out_type, out_length, 0); 
-    out_data->buffers.resize(2, nullptr); 
- 
-    // calculate quantiles 
-    if (out_length > 0) { 
-      ARROW_ASSIGN_OR_RAISE(out_data->buffers[1], 
-                            ctx->Allocate(out_length * GetBitWidth(*out_type) / 8)); 
- 
-      // find quantiles in descending order 
-      std::vector<int64_t> q_indices(out_length); 
-      std::iota(q_indices.begin(), q_indices.end(), 0); 
-      std::sort(q_indices.begin(), q_indices.end(), 
-                [&options](int64_t left_index, int64_t right_index) { 
-                  return options.q[right_index] < options.q[left_index]; 
-                }); 
- 
-      // input array is partitioned around data point at `last_index` (pivot) 
-      // for next quatile which is smaller, we only consider inputs left of the pivot 
-      uint64_t last_index = in_buffer.size(); 
-      if (is_datapoint) { 
-        CType* out_buffer = out_data->template GetMutableValues<CType>(1); 
-        for (int64_t i = 0; i < out_length; ++i) { 
-          const int64_t q_index = q_indices[i]; 
-          out_buffer[q_index] = GetQuantileAtDataPoint( 
-              in_buffer, &last_index, options.q[q_index], options.interpolation); 
-        } 
-      } else { 
-        double* out_buffer = out_data->template GetMutableValues<double>(1); 
-        for (int64_t i = 0; i < out_length; ++i) { 
-          const int64_t q_index = q_indices[i]; 
-          out_buffer[q_index] = GetQuantileByInterp( 
-              in_buffer, &last_index, options.q[q_index], options.interpolation); 
-        } 
-      } 
-    } 
- 
-    *out = Datum(std::move(out_data)); 
-    return Status::OK(); 
-  } 
- 
-  // return quantile located exactly at some input data point 
-  CType GetQuantileAtDataPoint(std::vector<CType, Allocator>& in, uint64_t* last_index, 
-                               double q, 
-                               enum QuantileOptions::Interpolation interpolation) { 
-    const uint64_t datapoint_index = QuantileToDataPoint(in.size(), q, interpolation); 
- 
-    if (datapoint_index != *last_index) { 
-      DCHECK_LT(datapoint_index, *last_index); 
-      std::nth_element(in.begin(), in.begin() + datapoint_index, 
-                       in.begin() + *last_index); 
-      *last_index = datapoint_index; 
-    } 
- 
-    return in[datapoint_index]; 
-  } 
- 
-  // return quantile interpolated from adjacent input data points 
-  double GetQuantileByInterp(std::vector<CType, Allocator>& in, uint64_t* last_index, 
-                             double q, 
-                             enum QuantileOptions::Interpolation interpolation) { 
-    const double index = (in.size() - 1) * q; 
-    const uint64_t lower_index = static_cast<uint64_t>(index); 
-    const double fraction = index - lower_index; 
- 
-    if (lower_index != *last_index) { 
-      DCHECK_LT(lower_index, *last_index); 
-      std::nth_element(in.begin(), in.begin() + lower_index, in.begin() + *last_index); 
-    } 
- 
-    const double lower_value = static_cast<double>(in[lower_index]); 
-    if (fraction == 0) { 
-      *last_index = lower_index; 
-      return lower_value; 
-    } 
- 
-    const uint64_t higher_index = lower_index + 1; 
-    DCHECK_LT(higher_index, in.size()); 
-    if (lower_index != *last_index && higher_index != *last_index) { 
-      DCHECK_LT(higher_index, *last_index); 
-      // higher value must be the minimal value after lower_index 
-      auto min = std::min_element(in.begin() + higher_index, in.begin() + *last_index); 
-      std::iter_swap(in.begin() + higher_index, min); 
-    } 
-    *last_index = lower_index; 
- 
-    const double higher_value = static_cast<double>(in[higher_index]); 
- 
-    if (interpolation == QuantileOptions::LINEAR) { 
-      // more stable than naive linear interpolation 
-      return fraction * higher_value + (1 - fraction) * lower_value; 
-    } else if (interpolation == QuantileOptions::MIDPOINT) { 
-      return lower_value / 2 + higher_value / 2; 
-    } else { 
-      DCHECK(false); 
-      return NAN; 
-    } 
-  } 
-}; 
- 
-// histogram approach with constant memory, only for integers within limited value range 
-template <typename InType> 
-struct CountQuantiler { 
-  using CType = typename InType::c_type; 
- 
-  CType min; 
-  std::vector<uint64_t> counts;  // counts[i]: # of values equals i + min 
- 
-  // indices to adjacent non-empty bins covering current quantile 
-  struct AdjacentBins { 
-    int left_index; 
-    int right_index; 
-    uint64_t total_count;  // accumulated counts till left_index (inclusive) 
-  }; 
- 
-  CountQuantiler(CType min, CType max) { 
-    uint32_t value_range = static_cast<uint32_t>(max - min) + 1; 
-    DCHECK_LT(value_range, 1 << 30); 
-    this->min = min; 
-    this->counts.resize(value_range, 0); 
-  } 
- 
-  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    const QuantileOptions& options = QuantileState::Get(ctx); 
- 
-    // count values in all chunks, ignore nulls 
-    const Datum& datum = batch[0]; 
-    int64_t in_length = CountValues<CType>(this->counts.data(), datum, this->min); 
- 
-    // prepare out array 
-    int64_t out_length = options.q.size(); 
-    if (in_length == 0) { 
-      out_length = 0;  // input is empty or only contains null, return empty array 
-    } 
-    // out type depends on options 
-    const bool is_datapoint = IsDataPoint(options); 
-    const std::shared_ptr<DataType> out_type = 
-        is_datapoint ? TypeTraits<InType>::type_singleton() : float64(); 
-    auto out_data = ArrayData::Make(out_type, out_length, 0); 
-    out_data->buffers.resize(2, nullptr); 
- 
-    // calculate quantiles 
-    if (out_length > 0) { 
-      ARROW_ASSIGN_OR_RAISE(out_data->buffers[1], 
-                            ctx->Allocate(out_length * GetBitWidth(*out_type) / 8)); 
- 
-      // find quantiles in ascending order 
-      std::vector<int64_t> q_indices(out_length); 
-      std::iota(q_indices.begin(), q_indices.end(), 0); 
-      std::sort(q_indices.begin(), q_indices.end(), 
-                [&options](int64_t left_index, int64_t right_index) { 
-                  return options.q[left_index] < options.q[right_index]; 
-                }); 
- 
-      AdjacentBins bins{0, 0, this->counts[0]}; 
-      if (is_datapoint) { 
-        CType* out_buffer = out_data->template GetMutableValues<CType>(1); 
-        for (int64_t i = 0; i < out_length; ++i) { 
-          const int64_t q_index = q_indices[i]; 
-          out_buffer[q_index] = GetQuantileAtDataPoint( 
-              in_length, &bins, options.q[q_index], options.interpolation); 
-        } 
-      } else { 
-        double* out_buffer = out_data->template GetMutableValues<double>(1); 
-        for (int64_t i = 0; i < out_length; ++i) { 
-          const int64_t q_index = q_indices[i]; 
-          out_buffer[q_index] = GetQuantileByInterp(in_length, &bins, options.q[q_index], 
-                                                    options.interpolation); 
-        } 
-      } 
-    } 
- 
-    *out = Datum(std::move(out_data)); 
-    return Status::OK(); 
-  } 
- 
-  // return quantile located exactly at some input data point 
-  CType GetQuantileAtDataPoint(int64_t in_length, AdjacentBins* bins, double q, 
-                               enum QuantileOptions::Interpolation interpolation) { 
-    const uint64_t datapoint_index = QuantileToDataPoint(in_length, q, interpolation); 
-    while (datapoint_index >= bins->total_count && 
-           static_cast<size_t>(bins->left_index) < this->counts.size() - 1) { 
-      ++bins->left_index; 
-      bins->total_count += this->counts[bins->left_index]; 
-    } 
-    DCHECK_LT(datapoint_index, bins->total_count); 
-    return static_cast<CType>(bins->left_index + this->min); 
-  } 
- 
-  // return quantile interpolated from adjacent input data points 
-  double GetQuantileByInterp(int64_t in_length, AdjacentBins* bins, double q, 
-                             enum QuantileOptions::Interpolation interpolation) { 
-    const double index = (in_length - 1) * q; 
-    const uint64_t index_floor = static_cast<uint64_t>(index); 
-    const double fraction = index - index_floor; 
- 
-    while (index_floor >= bins->total_count && 
-           static_cast<size_t>(bins->left_index) < this->counts.size() - 1) { 
-      ++bins->left_index; 
-      bins->total_count += this->counts[bins->left_index]; 
-    } 
-    DCHECK_LT(index_floor, bins->total_count); 
-    const double lower_value = static_cast<double>(bins->left_index + this->min); 
- 
-    // quantile lies in this bin, no interpolation needed 
-    if (index <= bins->total_count - 1) { 
-      return lower_value; 
-    } 
- 
-    // quantile lies across two bins, locate next bin if not already done 
-    DCHECK_EQ(index_floor, bins->total_count - 1); 
-    if (bins->right_index <= bins->left_index) { 
-      bins->right_index = bins->left_index + 1; 
-      while (static_cast<size_t>(bins->right_index) < this->counts.size() - 1 && 
-             this->counts[bins->right_index] == 0) { 
-        ++bins->right_index; 
-      } 
-    } 
-    DCHECK_LT(static_cast<size_t>(bins->right_index), this->counts.size()); 
-    DCHECK_GT(this->counts[bins->right_index], 0); 
-    const double higher_value = static_cast<double>(bins->right_index + this->min); 
- 
-    if (interpolation == QuantileOptions::LINEAR) { 
-      return fraction * higher_value + (1 - fraction) * lower_value; 
-    } else if (interpolation == QuantileOptions::MIDPOINT) { 
-      return lower_value / 2 + higher_value / 2; 
-    } else { 
-      DCHECK(false); 
-      return NAN; 
-    } 
-  } 
-}; 
- 
-// histogram or 'copy & nth_element' approach per value range and size, only for integers 
-template <typename InType> 
-struct CountOrSortQuantiler { 
-  using CType = typename InType::c_type; 
- 
-  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    // cross point to benefit from histogram approach 
-    // parameters estimated from ad-hoc benchmarks manually 
-    static constexpr int kMinArraySize = 65536; 
-    static constexpr int kMaxValueRange = 65536; 
- 
-    const Datum& datum = batch[0]; 
-    if (datum.length() - datum.null_count() >= kMinArraySize) { 
-      CType min, max; 
-      std::tie(min, max) = GetMinMax<CType>(datum); 
- 
-      if (static_cast<uint64_t>(max) - static_cast<uint64_t>(min) <= kMaxValueRange) { 
-        return CountQuantiler<InType>(min, max).Exec(ctx, batch, out); 
-      } 
-    } 
- 
-    return SortQuantiler<InType>().Exec(ctx, batch, out); 
-  } 
-}; 
- 
-template <typename InType, typename Enable = void> 
-struct ExactQuantiler; 
- 
-template <> 
-struct ExactQuantiler<UInt8Type> { 
-  CountQuantiler<UInt8Type> impl; 
-  ExactQuantiler() : impl(0, 255) {} 
-}; 
- 
-template <> 
-struct ExactQuantiler<Int8Type> { 
-  CountQuantiler<Int8Type> impl; 
-  ExactQuantiler() : impl(-128, 127) {} 
-}; 
- 
-template <typename InType> 
-struct ExactQuantiler<InType, enable_if_t<(is_integer_type<InType>::value && 
-                                           (sizeof(typename InType::c_type) > 1))>> { 
-  CountOrSortQuantiler<InType> impl; 
-}; 
- 
-template <typename InType> 
-struct ExactQuantiler<InType, enable_if_t<is_floating_type<InType>::value>> { 
-  SortQuantiler<InType> impl; 
-}; 
- 
-template <typename T> 
-Status ScalarQuantile(KernelContext* ctx, const QuantileOptions& options, 
-                      const Scalar& scalar, Datum* out) { 
-  using CType = typename T::c_type; 
-  ArrayData* output = out->mutable_array(); 
-  if (!scalar.is_valid) { 
-    output->length = 0; 
-    output->null_count = 0; 
-    return Status::OK(); 
-  } 
-  auto out_type = IsDataPoint(options) ? scalar.type : float64(); 
-  output->length = options.q.size(); 
-  output->null_count = 0; 
-  ARROW_ASSIGN_OR_RAISE( 
-      output->buffers[1], 
-      ctx->Allocate(output->length * BitUtil::BytesForBits(GetBitWidth(*out_type)))); 
-  if (IsDataPoint(options)) { 
-    CType* out_buffer = output->template GetMutableValues<CType>(1); 
-    for (int64_t i = 0; i < output->length; i++) { 
-      out_buffer[i] = UnboxScalar<T>::Unbox(scalar); 
-    } 
-  } else { 
-    double* out_buffer = output->template GetMutableValues<double>(1); 
-    for (int64_t i = 0; i < output->length; i++) { 
-      out_buffer[i] = static_cast<double>(UnboxScalar<T>::Unbox(scalar)); 
-    } 
-  } 
-  return Status::OK(); 
-} 
- 
-template <typename _, typename InType> 
-struct QuantileExecutor { 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    if (ctx->state() == nullptr) { 
-      return Status::Invalid("Quantile requires QuantileOptions"); 
-    } 
- 
-    const QuantileOptions& options = QuantileState::Get(ctx); 
-    if (options.q.empty()) { 
-      return Status::Invalid("Requires quantile argument"); 
-    } 
-    for (double q : options.q) { 
-      if (q < 0 || q > 1) { 
-        return Status::Invalid("Quantile must be between 0 and 1"); 
-      } 
-    } 
- 
-    if (batch[0].is_scalar()) { 
-      return ScalarQuantile<InType>(ctx, options, *batch[0].scalar(), out); 
-    } 
- 
-    return ExactQuantiler<InType>().impl.Exec(ctx, batch, out); 
-  } 
-}; 
- 
-Result<ValueDescr> ResolveOutput(KernelContext* ctx, 
-                                 const std::vector<ValueDescr>& args) { 
-  const QuantileOptions& options = QuantileState::Get(ctx); 
-  if (IsDataPoint(options)) { 
-    return ValueDescr::Array(args[0].type); 
-  } else { 
-    return ValueDescr::Array(float64()); 
-  } 
-} 
- 
-void AddQuantileKernels(VectorFunction* func) { 
-  VectorKernel base; 
-  base.init = QuantileState::Init; 
-  base.can_execute_chunkwise = false; 
-  base.output_chunked = false; 
- 
-  for (const auto& ty : NumericTypes()) { 
-    base.signature = KernelSignature::Make({InputType(ty)}, OutputType(ResolveOutput)); 
-    // output type is determined at runtime, set template argument to nulltype 
-    base.exec = GenerateNumeric<QuantileExecutor, NullType>(*ty); 
-    DCHECK_OK(func->AddKernel(base)); 
-  } 
-} 
- 
-const FunctionDoc quantile_doc{ 
-    "Compute an array of quantiles of a numeric array or chunked array", 
-    ("By default, 0.5 quantile (median) is returned.\n" 
-     "If quantile lies between two data points, an interpolated value is\n" 
-     "returned based on selected interpolation method.\n" 
-     "Nulls and NaNs are ignored.\n" 
-     "An empty array is returned if there is no valid data point."), 
-    {"array"}, 
-    "QuantileOptions"}; 
- 
-}  // namespace 
- 
-void RegisterScalarAggregateQuantile(FunctionRegistry* registry) { 
-  static QuantileOptions default_options; 
-  auto func = std::make_shared<VectorFunction>("quantile", Arity::Unary(), &quantile_doc, 
-                                               &default_options); 
-  AddQuantileKernels(func.get()); 
-  DCHECK_OK(registry->AddFunction(std::move(func))); 
-} 
- 
-}  // namespace internal 
-}  // namespace compute 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cmath>
+#include <vector>
+
+#include "arrow/compute/api_aggregate.h"
+#include "arrow/compute/kernels/common.h"
+#include "arrow/compute/kernels/util_internal.h"
+#include "arrow/stl_allocator.h"
+
+namespace arrow {
+namespace compute {
+namespace internal {
+
+namespace {
+
+using QuantileState = internal::OptionsWrapper<QuantileOptions>;
+
+// output is at some input data point, not interpolated
+bool IsDataPoint(const QuantileOptions& options) {
+  // some interpolation methods return exact data point
+  return options.interpolation == QuantileOptions::LOWER ||
+         options.interpolation == QuantileOptions::HIGHER ||
+         options.interpolation == QuantileOptions::NEAREST;
+}
+
+// quantile to exact datapoint index (IsDataPoint == true)
+uint64_t QuantileToDataPoint(size_t length, double q,
+                             enum QuantileOptions::Interpolation interpolation) {
+  const double index = (length - 1) * q;
+  uint64_t datapoint_index = static_cast<uint64_t>(index);
+  const double fraction = index - datapoint_index;
+
+  if (interpolation == QuantileOptions::LINEAR ||
+      interpolation == QuantileOptions::MIDPOINT) {
+    DCHECK_EQ(fraction, 0);
+  }
+
+  // convert NEAREST interpolation method to LOWER or HIGHER
+  if (interpolation == QuantileOptions::NEAREST) {
+    if (fraction < 0.5) {
+      interpolation = QuantileOptions::LOWER;
+    } else if (fraction > 0.5) {
+      interpolation = QuantileOptions::HIGHER;
+    } else {
+      // round 0.5 to nearest even number, similar to numpy.around
+      interpolation =
+          (datapoint_index & 1) ? QuantileOptions::HIGHER : QuantileOptions::LOWER;
+    }
+  }
+
+  if (interpolation == QuantileOptions::HIGHER && fraction != 0) {
+    ++datapoint_index;
+  }
+
+  return datapoint_index;
+}
+
+// copy and nth_element approach, large memory footprint
+template <typename InType>
+struct SortQuantiler {
+  using CType = typename InType::c_type;
+  using Allocator = arrow::stl::allocator<CType>;
+
+  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const QuantileOptions& options = QuantileState::Get(ctx);
+
+    // copy all chunks to a buffer, ignore nulls and nans
+    std::vector<CType, Allocator> in_buffer(Allocator(ctx->memory_pool()));
+
+    const Datum& datum = batch[0];
+    const int64_t in_length = datum.length() - datum.null_count();
+    if (in_length > 0) {
+      in_buffer.resize(in_length);
+      CopyNonNullValues(datum, in_buffer.data());
+
+      // drop nan
+      if (is_floating_type<InType>::value) {
+        const auto& it = std::remove_if(in_buffer.begin(), in_buffer.end(),
+                                        [](CType v) { return v != v; });
+        in_buffer.resize(it - in_buffer.begin());
+      }
+    }
+
+    // prepare out array
+    int64_t out_length = options.q.size();
+    if (in_buffer.empty()) {
+      out_length = 0;  // input is empty or only contains null and nan, return empty array
+    }
+    // out type depends on options
+    const bool is_datapoint = IsDataPoint(options);
+    const std::shared_ptr<DataType> out_type =
+        is_datapoint ? TypeTraits<InType>::type_singleton() : float64();
+    auto out_data = ArrayData::Make(out_type, out_length, 0);
+    out_data->buffers.resize(2, nullptr);
+
+    // calculate quantiles
+    if (out_length > 0) {
+      ARROW_ASSIGN_OR_RAISE(out_data->buffers[1],
+                            ctx->Allocate(out_length * GetBitWidth(*out_type) / 8));
+
+      // find quantiles in descending order
+      std::vector<int64_t> q_indices(out_length);
+      std::iota(q_indices.begin(), q_indices.end(), 0);
+      std::sort(q_indices.begin(), q_indices.end(),
+                [&options](int64_t left_index, int64_t right_index) {
+                  return options.q[right_index] < options.q[left_index];
+                });
+
+      // input array is partitioned around data point at `last_index` (pivot)
+      // for next quatile which is smaller, we only consider inputs left of the pivot
+      uint64_t last_index = in_buffer.size();
+      if (is_datapoint) {
+        CType* out_buffer = out_data->template GetMutableValues<CType>(1);
+        for (int64_t i = 0; i < out_length; ++i) {
+          const int64_t q_index = q_indices[i];
+          out_buffer[q_index] = GetQuantileAtDataPoint(
+              in_buffer, &last_index, options.q[q_index], options.interpolation);
+        }
+      } else {
+        double* out_buffer = out_data->template GetMutableValues<double>(1);
+        for (int64_t i = 0; i < out_length; ++i) {
+          const int64_t q_index = q_indices[i];
+          out_buffer[q_index] = GetQuantileByInterp(
+              in_buffer, &last_index, options.q[q_index], options.interpolation);
+        }
+      }
+    }
+
+    *out = Datum(std::move(out_data));
+    return Status::OK();
+  }
+
+  // return quantile located exactly at some input data point
+  CType GetQuantileAtDataPoint(std::vector<CType, Allocator>& in, uint64_t* last_index,
+                               double q,
+                               enum QuantileOptions::Interpolation interpolation) {
+    const uint64_t datapoint_index = QuantileToDataPoint(in.size(), q, interpolation);
+
+    if (datapoint_index != *last_index) {
+      DCHECK_LT(datapoint_index, *last_index);
+      std::nth_element(in.begin(), in.begin() + datapoint_index,
+                       in.begin() + *last_index);
+      *last_index = datapoint_index;
+    }
+
+    return in[datapoint_index];
+  }
+
+  // return quantile interpolated from adjacent input data points
+  double GetQuantileByInterp(std::vector<CType, Allocator>& in, uint64_t* last_index,
+                             double q,
+                             enum QuantileOptions::Interpolation interpolation) {
+    const double index = (in.size() - 1) * q;
+    const uint64_t lower_index = static_cast<uint64_t>(index);
+    const double fraction = index - lower_index;
+
+    if (lower_index != *last_index) {
+      DCHECK_LT(lower_index, *last_index);
+      std::nth_element(in.begin(), in.begin() + lower_index, in.begin() + *last_index);
+    }
+
+    const double lower_value = static_cast<double>(in[lower_index]);
+    if (fraction == 0) {
+      *last_index = lower_index;
+      return lower_value;
+    }
+
+    const uint64_t higher_index = lower_index + 1;
+    DCHECK_LT(higher_index, in.size());
+    if (lower_index != *last_index && higher_index != *last_index) {
+      DCHECK_LT(higher_index, *last_index);
+      // higher value must be the minimal value after lower_index
+      auto min = std::min_element(in.begin() + higher_index, in.begin() + *last_index);
+      std::iter_swap(in.begin() + higher_index, min);
+    }
+    *last_index = lower_index;
+
+    const double higher_value = static_cast<double>(in[higher_index]);
+
+    if (interpolation == QuantileOptions::LINEAR) {
+      // more stable than naive linear interpolation
+      return fraction * higher_value + (1 - fraction) * lower_value;
+    } else if (interpolation == QuantileOptions::MIDPOINT) {
+      return lower_value / 2 + higher_value / 2;
+    } else {
+      DCHECK(false);
+      return NAN;
+    }
+  }
+};
+
+// histogram approach with constant memory, only for integers within limited value range
+template <typename InType>
+struct CountQuantiler {
+  using CType = typename InType::c_type;
+
+  CType min;
+  std::vector<uint64_t> counts;  // counts[i]: # of values equals i + min
+
+  // indices to adjacent non-empty bins covering current quantile
+  struct AdjacentBins {
+    int left_index;
+    int right_index;
+    uint64_t total_count;  // accumulated counts till left_index (inclusive)
+  };
+
+  CountQuantiler(CType min, CType max) {
+    uint32_t value_range = static_cast<uint32_t>(max - min) + 1;
+    DCHECK_LT(value_range, 1 << 30);
+    this->min = min;
+    this->counts.resize(value_range, 0);
+  }
+
+  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const QuantileOptions& options = QuantileState::Get(ctx);
+
+    // count values in all chunks, ignore nulls
+    const Datum& datum = batch[0];
+    int64_t in_length = CountValues<CType>(this->counts.data(), datum, this->min);
+
+    // prepare out array
+    int64_t out_length = options.q.size();
+    if (in_length == 0) {
+      out_length = 0;  // input is empty or only contains null, return empty array
+    }
+    // out type depends on options
+    const bool is_datapoint = IsDataPoint(options);
+    const std::shared_ptr<DataType> out_type =
+        is_datapoint ? TypeTraits<InType>::type_singleton() : float64();
+    auto out_data = ArrayData::Make(out_type, out_length, 0);
+    out_data->buffers.resize(2, nullptr);
+
+    // calculate quantiles
+    if (out_length > 0) {
+      ARROW_ASSIGN_OR_RAISE(out_data->buffers[1],
+                            ctx->Allocate(out_length * GetBitWidth(*out_type) / 8));
+
+      // find quantiles in ascending order
+      std::vector<int64_t> q_indices(out_length);
+      std::iota(q_indices.begin(), q_indices.end(), 0);
+      std::sort(q_indices.begin(), q_indices.end(),
+                [&options](int64_t left_index, int64_t right_index) {
+                  return options.q[left_index] < options.q[right_index];
+                });
+
+      AdjacentBins bins{0, 0, this->counts[0]};
+      if (is_datapoint) {
+        CType* out_buffer = out_data->template GetMutableValues<CType>(1);
+        for (int64_t i = 0; i < out_length; ++i) {
+          const int64_t q_index = q_indices[i];
+          out_buffer[q_index] = GetQuantileAtDataPoint(
+              in_length, &bins, options.q[q_index], options.interpolation);
+        }
+      } else {
+        double* out_buffer = out_data->template GetMutableValues<double>(1);
+        for (int64_t i = 0; i < out_length; ++i) {
+          const int64_t q_index = q_indices[i];
+          out_buffer[q_index] = GetQuantileByInterp(in_length, &bins, options.q[q_index],
+                                                    options.interpolation);
+        }
+      }
+    }
+
+    *out = Datum(std::move(out_data));
+    return Status::OK();
+  }
+
+  // return quantile located exactly at some input data point
+  CType GetQuantileAtDataPoint(int64_t in_length, AdjacentBins* bins, double q,
+                               enum QuantileOptions::Interpolation interpolation) {
+    const uint64_t datapoint_index = QuantileToDataPoint(in_length, q, interpolation);
+    while (datapoint_index >= bins->total_count &&
+           static_cast<size_t>(bins->left_index) < this->counts.size() - 1) {
+      ++bins->left_index;
+      bins->total_count += this->counts[bins->left_index];
+    }
+    DCHECK_LT(datapoint_index, bins->total_count);
+    return static_cast<CType>(bins->left_index + this->min);
+  }
+
+  // return quantile interpolated from adjacent input data points
+  double GetQuantileByInterp(int64_t in_length, AdjacentBins* bins, double q,
+                             enum QuantileOptions::Interpolation interpolation) {
+    const double index = (in_length - 1) * q;
+    const uint64_t index_floor = static_cast<uint64_t>(index);
+    const double fraction = index - index_floor;
+
+    while (index_floor >= bins->total_count &&
+           static_cast<size_t>(bins->left_index) < this->counts.size() - 1) {
+      ++bins->left_index;
+      bins->total_count += this->counts[bins->left_index];
+    }
+    DCHECK_LT(index_floor, bins->total_count);
+    const double lower_value = static_cast<double>(bins->left_index + this->min);
+
+    // quantile lies in this bin, no interpolation needed
+    if (index <= bins->total_count - 1) {
+      return lower_value;
+    }
+
+    // quantile lies across two bins, locate next bin if not already done
+    DCHECK_EQ(index_floor, bins->total_count - 1);
+    if (bins->right_index <= bins->left_index) {
+      bins->right_index = bins->left_index + 1;
+      while (static_cast<size_t>(bins->right_index) < this->counts.size() - 1 &&
+             this->counts[bins->right_index] == 0) {
+        ++bins->right_index;
+      }
+    }
+    DCHECK_LT(static_cast<size_t>(bins->right_index), this->counts.size());
+    DCHECK_GT(this->counts[bins->right_index], 0);
+    const double higher_value = static_cast<double>(bins->right_index + this->min);
+
+    if (interpolation == QuantileOptions::LINEAR) {
+      return fraction * higher_value + (1 - fraction) * lower_value;
+    } else if (interpolation == QuantileOptions::MIDPOINT) {
+      return lower_value / 2 + higher_value / 2;
+    } else {
+      DCHECK(false);
+      return NAN;
+    }
+  }
+};
+
+// histogram or 'copy & nth_element' approach per value range and size, only for integers
+template <typename InType>
+struct CountOrSortQuantiler {
+  using CType = typename InType::c_type;
+
+  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    // cross point to benefit from histogram approach
+    // parameters estimated from ad-hoc benchmarks manually
+    static constexpr int kMinArraySize = 65536;
+    static constexpr int kMaxValueRange = 65536;
+
+    const Datum& datum = batch[0];
+    if (datum.length() - datum.null_count() >= kMinArraySize) {
+      CType min, max;
+      std::tie(min, max) = GetMinMax<CType>(datum);
+
+      if (static_cast<uint64_t>(max) - static_cast<uint64_t>(min) <= kMaxValueRange) {
+        return CountQuantiler<InType>(min, max).Exec(ctx, batch, out);
+      }
+    }
+
+    return SortQuantiler<InType>().Exec(ctx, batch, out);
+  }
+};
+
+template <typename InType, typename Enable = void>
+struct ExactQuantiler;
+
+template <>
+struct ExactQuantiler<UInt8Type> {
+  CountQuantiler<UInt8Type> impl;
+  ExactQuantiler() : impl(0, 255) {}
+};
+
+template <>
+struct ExactQuantiler<Int8Type> {
+  CountQuantiler<Int8Type> impl;
+  ExactQuantiler() : impl(-128, 127) {}
+};
+
+template <typename InType>
+struct ExactQuantiler<InType, enable_if_t<(is_integer_type<InType>::value &&
+                                           (sizeof(typename InType::c_type) > 1))>> {
+  CountOrSortQuantiler<InType> impl;
+};
+
+template <typename InType>
+struct ExactQuantiler<InType, enable_if_t<is_floating_type<InType>::value>> {
+  SortQuantiler<InType> impl;
+};
+
+template <typename T>
+Status ScalarQuantile(KernelContext* ctx, const QuantileOptions& options,
+                      const Scalar& scalar, Datum* out) {
+  using CType = typename T::c_type;
+  ArrayData* output = out->mutable_array();
+  if (!scalar.is_valid) {
+    output->length = 0;
+    output->null_count = 0;
+    return Status::OK();
+  }
+  auto out_type = IsDataPoint(options) ? scalar.type : float64();
+  output->length = options.q.size();
+  output->null_count = 0;
+  ARROW_ASSIGN_OR_RAISE(
+      output->buffers[1],
+      ctx->Allocate(output->length * BitUtil::BytesForBits(GetBitWidth(*out_type))));
+  if (IsDataPoint(options)) {
+    CType* out_buffer = output->template GetMutableValues<CType>(1);
+    for (int64_t i = 0; i < output->length; i++) {
+      out_buffer[i] = UnboxScalar<T>::Unbox(scalar);
+    }
+  } else {
+    double* out_buffer = output->template GetMutableValues<double>(1);
+    for (int64_t i = 0; i < output->length; i++) {
+      out_buffer[i] = static_cast<double>(UnboxScalar<T>::Unbox(scalar));
+    }
+  }
+  return Status::OK();
+}
+
+template <typename _, typename InType>
+struct QuantileExecutor {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    if (ctx->state() == nullptr) {
+      return Status::Invalid("Quantile requires QuantileOptions");
+    }
+
+    const QuantileOptions& options = QuantileState::Get(ctx);
+    if (options.q.empty()) {
+      return Status::Invalid("Requires quantile argument");
+    }
+    for (double q : options.q) {
+      if (q < 0 || q > 1) {
+        return Status::Invalid("Quantile must be between 0 and 1");
+      }
+    }
+
+    if (batch[0].is_scalar()) {
+      return ScalarQuantile<InType>(ctx, options, *batch[0].scalar(), out);
+    }
+
+    return ExactQuantiler<InType>().impl.Exec(ctx, batch, out);
+  }
+};
+
+Result<ValueDescr> ResolveOutput(KernelContext* ctx,
+                                 const std::vector<ValueDescr>& args) {
+  const QuantileOptions& options = QuantileState::Get(ctx);
+  if (IsDataPoint(options)) {
+    return ValueDescr::Array(args[0].type);
+  } else {
+    return ValueDescr::Array(float64());
+  }
+}
+
+void AddQuantileKernels(VectorFunction* func) {
+  VectorKernel base;
+  base.init = QuantileState::Init;
+  base.can_execute_chunkwise = false;
+  base.output_chunked = false;
+
+  for (const auto& ty : NumericTypes()) {
+    base.signature = KernelSignature::Make({InputType(ty)}, OutputType(ResolveOutput));
+    // output type is determined at runtime, set template argument to nulltype
+    base.exec = GenerateNumeric<QuantileExecutor, NullType>(*ty);
+    DCHECK_OK(func->AddKernel(base));
+  }
+}
+
+const FunctionDoc quantile_doc{
+    "Compute an array of quantiles of a numeric array or chunked array",
+    ("By default, 0.5 quantile (median) is returned.\n"
+     "If quantile lies between two data points, an interpolated value is\n"
+     "returned based on selected interpolation method.\n"
+     "Nulls and NaNs are ignored.\n"
+     "An empty array is returned if there is no valid data point."),
+    {"array"},
+    "QuantileOptions"};
+
+}  // namespace
+
+void RegisterScalarAggregateQuantile(FunctionRegistry* registry) {
+  static QuantileOptions default_options;
+  auto func = std::make_shared<VectorFunction>("quantile", Arity::Unary(), &quantile_doc,
+                                               &default_options);
+  AddQuantileKernels(func.get());
+  DCHECK_OK(registry->AddFunction(std::move(func)));
+}
+
+}  // namespace internal
+}  // namespace compute
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/aggregate_tdigest.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/aggregate_tdigest.cc
index 54f36ab9159..4c261604c85 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/aggregate_tdigest.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/aggregate_tdigest.cc
@@ -1,164 +1,164 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "arrow/compute/api_aggregate.h" 
-#include "arrow/compute/kernels/aggregate_internal.h" 
-#include "arrow/compute/kernels/common.h" 
-#include "arrow/util/bit_run_reader.h" 
-#include "arrow/util/tdigest.h" 
- 
-namespace arrow { 
-namespace compute { 
-namespace internal { 
- 
-namespace { 
- 
-using arrow::internal::TDigest; 
-using arrow::internal::VisitSetBitRunsVoid; 
- 
-template <typename ArrowType> 
-struct TDigestImpl : public ScalarAggregator { 
-  using ThisType = TDigestImpl<ArrowType>; 
-  using ArrayType = typename TypeTraits<ArrowType>::ArrayType; 
-  using CType = typename ArrowType::c_type; 
- 
-  explicit TDigestImpl(const TDigestOptions& options) 
-      : q{options.q}, tdigest{options.delta, options.buffer_size} {} 
- 
-  Status Consume(KernelContext*, const ExecBatch& batch) override { 
-    if (batch[0].is_array()) { 
-      const ArrayData& data = *batch[0].array(); 
-      const CType* values = data.GetValues<CType>(1); 
- 
-      if (data.length > data.GetNullCount()) { 
-        VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length, 
-                            [&](int64_t pos, int64_t len) { 
-                              for (int64_t i = 0; i < len; ++i) { 
-                                this->tdigest.NanAdd(values[pos + i]); 
-                              } 
-                            }); 
-      } 
-    } else { 
-      const CType value = UnboxScalar<ArrowType>::Unbox(*batch[0].scalar()); 
-      if (batch[0].scalar()->is_valid) { 
-        this->tdigest.NanAdd(value); 
-      } 
-    } 
-    return Status::OK(); 
-  } 
- 
-  Status MergeFrom(KernelContext*, KernelState&& src) override { 
-    auto& other = checked_cast<ThisType&>(src); 
-    std::vector<TDigest> other_tdigest; 
-    other_tdigest.push_back(std::move(other.tdigest)); 
-    this->tdigest.Merge(&other_tdigest); 
-    return Status::OK(); 
-  } 
- 
-  Status Finalize(KernelContext* ctx, Datum* out) override { 
-    const int64_t out_length = this->tdigest.is_empty() ? 0 : this->q.size(); 
-    auto out_data = ArrayData::Make(float64(), out_length, 0); 
-    out_data->buffers.resize(2, nullptr); 
- 
-    if (out_length > 0) { 
-      ARROW_ASSIGN_OR_RAISE(out_data->buffers[1], 
-                            ctx->Allocate(out_length * sizeof(double))); 
-      double* out_buffer = out_data->template GetMutableValues<double>(1); 
-      for (int64_t i = 0; i < out_length; ++i) { 
-        out_buffer[i] = this->tdigest.Quantile(this->q[i]); 
-      } 
-    } 
- 
-    *out = Datum(std::move(out_data)); 
-    return Status::OK(); 
-  } 
- 
-  const std::vector<double>& q; 
-  TDigest tdigest; 
-}; 
- 
-struct TDigestInitState { 
-  std::unique_ptr<KernelState> state; 
-  KernelContext* ctx; 
-  const DataType& in_type; 
-  const TDigestOptions& options; 
- 
-  TDigestInitState(KernelContext* ctx, const DataType& in_type, 
-                   const TDigestOptions& options) 
-      : ctx(ctx), in_type(in_type), options(options) {} 
- 
-  Status Visit(const DataType&) { 
-    return Status::NotImplemented("No tdigest implemented"); 
-  } 
- 
-  Status Visit(const HalfFloatType&) { 
-    return Status::NotImplemented("No tdigest implemented"); 
-  } 
- 
-  template <typename Type> 
-  enable_if_t<is_number_type<Type>::value, Status> Visit(const Type&) { 
-    state.reset(new TDigestImpl<Type>(options)); 
-    return Status::OK(); 
-  } 
- 
-  Result<std::unique_ptr<KernelState>> Create() { 
-    RETURN_NOT_OK(VisitTypeInline(in_type, this)); 
-    return std::move(state); 
-  } 
-}; 
- 
-Result<std::unique_ptr<KernelState>> TDigestInit(KernelContext* ctx, 
-                                                 const KernelInitArgs& args) { 
-  TDigestInitState visitor(ctx, *args.inputs[0].type, 
-                           static_cast<const TDigestOptions&>(*args.options)); 
-  return visitor.Create(); 
-} 
- 
-void AddTDigestKernels(KernelInit init, 
-                       const std::vector<std::shared_ptr<DataType>>& types, 
-                       ScalarAggregateFunction* func) { 
-  for (const auto& ty : types) { 
-    auto sig = KernelSignature::Make({InputType(ty)}, float64()); 
-    AddAggKernel(std::move(sig), init, func); 
-  } 
-} 
- 
-const FunctionDoc tdigest_doc{ 
-    "Approximate quantiles of a numeric array with T-Digest algorithm", 
-    ("By default, 0.5 quantile (median) is returned.\n" 
-     "Nulls and NaNs are ignored.\n" 
-     "An empty array is returned if there is no valid data point."), 
-    {"array"}, 
-    "TDigestOptions"}; 
- 
-std::shared_ptr<ScalarAggregateFunction> AddTDigestAggKernels() { 
-  static auto default_tdigest_options = TDigestOptions::Defaults(); 
-  auto func = std::make_shared<ScalarAggregateFunction>( 
-      "tdigest", Arity::Unary(), &tdigest_doc, &default_tdigest_options); 
-  AddTDigestKernels(TDigestInit, NumericTypes(), func.get()); 
-  return func; 
-} 
- 
-}  // namespace 
- 
-void RegisterScalarAggregateTDigest(FunctionRegistry* registry) { 
-  DCHECK_OK(registry->AddFunction(AddTDigestAggKernels())); 
-} 
- 
-}  // namespace internal 
-}  // namespace compute 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/api_aggregate.h"
+#include "arrow/compute/kernels/aggregate_internal.h"
+#include "arrow/compute/kernels/common.h"
+#include "arrow/util/bit_run_reader.h"
+#include "arrow/util/tdigest.h"
+
+namespace arrow {
+namespace compute {
+namespace internal {
+
+namespace {
+
+using arrow::internal::TDigest;
+using arrow::internal::VisitSetBitRunsVoid;
+
+template <typename ArrowType>
+struct TDigestImpl : public ScalarAggregator {
+  using ThisType = TDigestImpl<ArrowType>;
+  using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
+  using CType = typename ArrowType::c_type;
+
+  explicit TDigestImpl(const TDigestOptions& options)
+      : q{options.q}, tdigest{options.delta, options.buffer_size} {}
+
+  Status Consume(KernelContext*, const ExecBatch& batch) override {
+    if (batch[0].is_array()) {
+      const ArrayData& data = *batch[0].array();
+      const CType* values = data.GetValues<CType>(1);
+
+      if (data.length > data.GetNullCount()) {
+        VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length,
+                            [&](int64_t pos, int64_t len) {
+                              for (int64_t i = 0; i < len; ++i) {
+                                this->tdigest.NanAdd(values[pos + i]);
+                              }
+                            });
+      }
+    } else {
+      const CType value = UnboxScalar<ArrowType>::Unbox(*batch[0].scalar());
+      if (batch[0].scalar()->is_valid) {
+        this->tdigest.NanAdd(value);
+      }
+    }
+    return Status::OK();
+  }
+
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
+    auto& other = checked_cast<ThisType&>(src);
+    std::vector<TDigest> other_tdigest;
+    other_tdigest.push_back(std::move(other.tdigest));
+    this->tdigest.Merge(&other_tdigest);
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext* ctx, Datum* out) override {
+    const int64_t out_length = this->tdigest.is_empty() ? 0 : this->q.size();
+    auto out_data = ArrayData::Make(float64(), out_length, 0);
+    out_data->buffers.resize(2, nullptr);
+
+    if (out_length > 0) {
+      ARROW_ASSIGN_OR_RAISE(out_data->buffers[1],
+                            ctx->Allocate(out_length * sizeof(double)));
+      double* out_buffer = out_data->template GetMutableValues<double>(1);
+      for (int64_t i = 0; i < out_length; ++i) {
+        out_buffer[i] = this->tdigest.Quantile(this->q[i]);
+      }
+    }
+
+    *out = Datum(std::move(out_data));
+    return Status::OK();
+  }
+
+  const std::vector<double>& q;
+  TDigest tdigest;
+};
+
+struct TDigestInitState {
+  std::unique_ptr<KernelState> state;
+  KernelContext* ctx;
+  const DataType& in_type;
+  const TDigestOptions& options;
+
+  TDigestInitState(KernelContext* ctx, const DataType& in_type,
+                   const TDigestOptions& options)
+      : ctx(ctx), in_type(in_type), options(options) {}
+
+  Status Visit(const DataType&) {
+    return Status::NotImplemented("No tdigest implemented");
+  }
+
+  Status Visit(const HalfFloatType&) {
+    return Status::NotImplemented("No tdigest implemented");
+  }
+
+  template <typename Type>
+  enable_if_t<is_number_type<Type>::value, Status> Visit(const Type&) {
+    state.reset(new TDigestImpl<Type>(options));
+    return Status::OK();
+  }
+
+  Result<std::unique_ptr<KernelState>> Create() {
+    RETURN_NOT_OK(VisitTypeInline(in_type, this));
+    return std::move(state);
+  }
+};
+
+Result<std::unique_ptr<KernelState>> TDigestInit(KernelContext* ctx,
+                                                 const KernelInitArgs& args) {
+  TDigestInitState visitor(ctx, *args.inputs[0].type,
+                           static_cast<const TDigestOptions&>(*args.options));
+  return visitor.Create();
+}
+
+void AddTDigestKernels(KernelInit init,
+                       const std::vector<std::shared_ptr<DataType>>& types,
+                       ScalarAggregateFunction* func) {
+  for (const auto& ty : types) {
+    auto sig = KernelSignature::Make({InputType(ty)}, float64());
+    AddAggKernel(std::move(sig), init, func);
+  }
+}
+
+const FunctionDoc tdigest_doc{
+    "Approximate quantiles of a numeric array with T-Digest algorithm",
+    ("By default, 0.5 quantile (median) is returned.\n"
+     "Nulls and NaNs are ignored.\n"
+     "An empty array is returned if there is no valid data point."),
+    {"array"},
+    "TDigestOptions"};
+
+std::shared_ptr<ScalarAggregateFunction> AddTDigestAggKernels() {
+  static auto default_tdigest_options = TDigestOptions::Defaults();
+  auto func = std::make_shared<ScalarAggregateFunction>(
+      "tdigest", Arity::Unary(), &tdigest_doc, &default_tdigest_options);
+  AddTDigestKernels(TDigestInit, NumericTypes(), func.get());
+  return func;
+}
+
+}  // namespace
+
+void RegisterScalarAggregateTDigest(FunctionRegistry* registry) {
+  DCHECK_OK(registry->AddFunction(AddTDigestAggKernels()));
+}
+
+}  // namespace internal
+}  // namespace compute
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/aggregate_var_std.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/aggregate_var_std.cc
index d879630e697..d6965fed4a3 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/aggregate_var_std.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/aggregate_var_std.cc
@@ -15,130 +15,130 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <cmath> 
-
-#include "arrow/compute/api_aggregate.h" 
-#include "arrow/compute/kernels/aggregate_internal.h" 
-#include "arrow/compute/kernels/common.h" 
-#include "arrow/util/bit_run_reader.h" 
-#include "arrow/util/int128_internal.h" 
- 
+#include <cmath>
+
+#include "arrow/compute/api_aggregate.h"
+#include "arrow/compute/kernels/aggregate_internal.h"
+#include "arrow/compute/kernels/common.h"
+#include "arrow/util/bit_run_reader.h"
+#include "arrow/util/int128_internal.h"
+
 namespace arrow {
 namespace compute {
-namespace internal { 
+namespace internal {
 
 namespace {
 
-using arrow::internal::int128_t; 
-using arrow::internal::VisitSetBitRunsVoid; 
- 
+using arrow::internal::int128_t;
+using arrow::internal::VisitSetBitRunsVoid;
+
 template <typename ArrowType>
 struct VarStdState {
   using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
-  using CType = typename ArrowType::c_type; 
+  using CType = typename ArrowType::c_type;
   using ThisType = VarStdState<ArrowType>;
 
-  // float/double/int64: calculate `m2` (sum((X-mean)^2)) with `two pass algorithm` 
+  // float/double/int64: calculate `m2` (sum((X-mean)^2)) with `two pass algorithm`
   // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
-  template <typename T = ArrowType> 
-  enable_if_t<is_floating_type<T>::value || (sizeof(CType) > 4)> Consume( 
-      const ArrayType& array) { 
+  template <typename T = ArrowType>
+  enable_if_t<is_floating_type<T>::value || (sizeof(CType) > 4)> Consume(
+      const ArrayType& array) {
     int64_t count = array.length() - array.null_count();
     if (count == 0) {
       return;
     }
 
-    using SumType = 
-        typename std::conditional<is_floating_type<T>::value, double, int128_t>::type; 
-    SumType sum = 
-        arrow::compute::detail::SumArray<CType, SumType, SimdLevel::NONE>(*array.data()); 
+    using SumType =
+        typename std::conditional<is_floating_type<T>::value, double, int128_t>::type;
+    SumType sum =
+        arrow::compute::detail::SumArray<CType, SumType, SimdLevel::NONE>(*array.data());
 
-    const double mean = static_cast<double>(sum) / count; 
-    const double m2 = arrow::compute::detail::SumArray<CType, double, SimdLevel::NONE>( 
-        *array.data(), [mean](CType value) { 
-          const double v = static_cast<double>(value); 
-          return (v - mean) * (v - mean); 
-        }); 
+    const double mean = static_cast<double>(sum) / count;
+    const double m2 = arrow::compute::detail::SumArray<CType, double, SimdLevel::NONE>(
+        *array.data(), [mean](CType value) {
+          const double v = static_cast<double>(value);
+          return (v - mean) * (v - mean);
+        });
 
     this->count = count;
-    this->mean = mean; 
+    this->mean = mean;
     this->m2 = m2;
   }
 
-  // int32/16/8: textbook one pass algorithm with integer arithmetic 
-  template <typename T = ArrowType> 
-  enable_if_t<is_integer_type<T>::value && (sizeof(CType) <= 4)> Consume( 
-      const ArrayType& array) { 
-    // max number of elements that sum will not overflow int64 (2Gi int32 elements) 
-    // for uint32:    0 <= sum < 2^63 (int64 >= 0) 
-    // for int32: -2^62 <= sum < 2^62 
-    constexpr int64_t max_length = 1ULL << (63 - sizeof(CType) * 8); 
- 
-    int64_t start_index = 0; 
-    int64_t valid_count = array.length() - array.null_count(); 
- 
-    while (valid_count > 0) { 
-      // process in chunks that overflow will never happen 
-      const auto slice = array.Slice(start_index, max_length); 
-      const int64_t count = slice->length() - slice->null_count(); 
-      start_index += max_length; 
-      valid_count -= count; 
- 
-      if (count > 0) { 
-        int64_t sum = 0; 
-        int128_t square_sum = 0; 
-        const ArrayData& data = *slice->data(); 
-        const CType* values = data.GetValues<CType>(1); 
-        VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length, 
-                            [&](int64_t pos, int64_t len) { 
-                              for (int64_t i = 0; i < len; ++i) { 
-                                const auto value = values[pos + i]; 
-                                sum += value; 
-                                square_sum += static_cast<uint64_t>(value) * value; 
-                              } 
-                            }); 
- 
-        const double mean = static_cast<double>(sum) / count; 
-        // calculate m2 = square_sum - sum * sum / count 
-        // decompose `sum * sum / count` into integers and fractions 
-        const int128_t sum_square = static_cast<int128_t>(sum) * sum; 
-        const int128_t integers = sum_square / count; 
-        const double fractions = static_cast<double>(sum_square % count) / count; 
-        const double m2 = static_cast<double>(square_sum - integers) - fractions; 
- 
-        // merge variance 
-        ThisType state; 
-        state.count = count; 
-        state.mean = mean; 
-        state.m2 = m2; 
-        this->MergeFrom(state); 
-      } 
-    } 
-  } 
- 
-  // Combine `m2` from two chunks (m2 = n*s2) 
-  // https://www.emathzone.com/tutorials/basic-statistics/combined-variance.html 
+  // int32/16/8: textbook one pass algorithm with integer arithmetic
+  template <typename T = ArrowType>
+  enable_if_t<is_integer_type<T>::value && (sizeof(CType) <= 4)> Consume(
+      const ArrayType& array) {
+    // max number of elements that sum will not overflow int64 (2Gi int32 elements)
+    // for uint32:    0 <= sum < 2^63 (int64 >= 0)
+    // for int32: -2^62 <= sum < 2^62
+    constexpr int64_t max_length = 1ULL << (63 - sizeof(CType) * 8);
+
+    int64_t start_index = 0;
+    int64_t valid_count = array.length() - array.null_count();
+
+    while (valid_count > 0) {
+      // process in chunks that overflow will never happen
+      const auto slice = array.Slice(start_index, max_length);
+      const int64_t count = slice->length() - slice->null_count();
+      start_index += max_length;
+      valid_count -= count;
+
+      if (count > 0) {
+        int64_t sum = 0;
+        int128_t square_sum = 0;
+        const ArrayData& data = *slice->data();
+        const CType* values = data.GetValues<CType>(1);
+        VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length,
+                            [&](int64_t pos, int64_t len) {
+                              for (int64_t i = 0; i < len; ++i) {
+                                const auto value = values[pos + i];
+                                sum += value;
+                                square_sum += static_cast<uint64_t>(value) * value;
+                              }
+                            });
+
+        const double mean = static_cast<double>(sum) / count;
+        // calculate m2 = square_sum - sum * sum / count
+        // decompose `sum * sum / count` into integers and fractions
+        const int128_t sum_square = static_cast<int128_t>(sum) * sum;
+        const int128_t integers = sum_square / count;
+        const double fractions = static_cast<double>(sum_square % count) / count;
+        const double m2 = static_cast<double>(square_sum - integers) - fractions;
+
+        // merge variance
+        ThisType state;
+        state.count = count;
+        state.mean = mean;
+        state.m2 = m2;
+        this->MergeFrom(state);
+      }
+    }
+  }
+
+  // Combine `m2` from two chunks (m2 = n*s2)
+  // https://www.emathzone.com/tutorials/basic-statistics/combined-variance.html
   void MergeFrom(const ThisType& state) {
     if (state.count == 0) {
       return;
     }
     if (this->count == 0) {
       this->count = state.count;
-      this->mean = state.mean; 
+      this->mean = state.mean;
       this->m2 = state.m2;
       return;
     }
-    double mean = (this->mean * this->count + state.mean * state.count) / 
-                  (this->count + state.count); 
-    this->m2 += state.m2 + this->count * (this->mean - mean) * (this->mean - mean) + 
-                state.count * (state.mean - mean) * (state.mean - mean); 
+    double mean = (this->mean * this->count + state.mean * state.count) /
+                  (this->count + state.count);
+    this->m2 += state.m2 + this->count * (this->mean - mean) * (this->mean - mean) +
+                state.count * (state.mean - mean) * (state.mean - mean);
     this->count += state.count;
-    this->mean = mean; 
+    this->mean = mean;
   }
 
   int64_t count = 0;
-  double mean = 0; 
-  double m2 = 0;  // m2 = count*s2 = sum((X-mean)^2) 
+  double mean = 0;
+  double m2 = 0;  // m2 = count*s2 = sum((X-mean)^2)
 };
 
 enum class VarOrStd : bool { Var, Std };
@@ -152,27 +152,27 @@ struct VarStdImpl : public ScalarAggregator {
                       const VarianceOptions& options, VarOrStd return_type)
       : out_type(out_type), options(options), return_type(return_type) {}
 
-  Status Consume(KernelContext*, const ExecBatch& batch) override { 
+  Status Consume(KernelContext*, const ExecBatch& batch) override {
     ArrayType array(batch[0].array());
     this->state.Consume(array);
-    return Status::OK(); 
+    return Status::OK();
   }
 
-  Status MergeFrom(KernelContext*, KernelState&& src) override { 
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
     const auto& other = checked_cast<const ThisType&>(src);
     this->state.MergeFrom(other.state);
-    return Status::OK(); 
+    return Status::OK();
   }
 
-  Status Finalize(KernelContext*, Datum* out) override { 
+  Status Finalize(KernelContext*, Datum* out) override {
     if (this->state.count <= options.ddof) {
-      out->value = std::make_shared<DoubleScalar>(); 
+      out->value = std::make_shared<DoubleScalar>();
     } else {
       double var = this->state.m2 / (this->state.count - options.ddof);
       out->value =
-          std::make_shared<DoubleScalar>(return_type == VarOrStd::Var ? var : sqrt(var)); 
+          std::make_shared<DoubleScalar>(return_type == VarOrStd::Var ? var : sqrt(var));
     }
-    return Status::OK(); 
+    return Status::OK();
   }
 
   std::shared_ptr<DataType> out_type;
@@ -181,34 +181,34 @@ struct VarStdImpl : public ScalarAggregator {
   VarOrStd return_type;
 };
 
-struct ScalarVarStdImpl : public ScalarAggregator { 
-  explicit ScalarVarStdImpl(const VarianceOptions& options) 
-      : options(options), seen(false) {} 
- 
-  Status Consume(KernelContext*, const ExecBatch& batch) override { 
-    seen = batch[0].scalar()->is_valid; 
-    return Status::OK(); 
-  } 
- 
-  Status MergeFrom(KernelContext*, KernelState&& src) override { 
-    const auto& other = checked_cast<const ScalarVarStdImpl&>(src); 
-    seen = seen || other.seen; 
-    return Status::OK(); 
-  } 
- 
-  Status Finalize(KernelContext*, Datum* out) override { 
-    if (!seen || options.ddof > 0) { 
-      out->value = std::make_shared<DoubleScalar>(); 
-    } else { 
-      out->value = std::make_shared<DoubleScalar>(0.0); 
-    } 
-    return Status::OK(); 
-  } 
- 
-  const VarianceOptions options; 
-  bool seen; 
-}; 
- 
+struct ScalarVarStdImpl : public ScalarAggregator {
+  explicit ScalarVarStdImpl(const VarianceOptions& options)
+      : options(options), seen(false) {}
+
+  Status Consume(KernelContext*, const ExecBatch& batch) override {
+    seen = batch[0].scalar()->is_valid;
+    return Status::OK();
+  }
+
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
+    const auto& other = checked_cast<const ScalarVarStdImpl&>(src);
+    seen = seen || other.seen;
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext*, Datum* out) override {
+    if (!seen || options.ddof > 0) {
+      out->value = std::make_shared<DoubleScalar>();
+    } else {
+      out->value = std::make_shared<DoubleScalar>(0.0);
+    }
+    return Status::OK();
+  }
+
+  const VarianceOptions options;
+  bool seen;
+};
+
 struct VarStdInitState {
   std::unique_ptr<KernelState> state;
   KernelContext* ctx;
@@ -240,87 +240,87 @@ struct VarStdInitState {
     return Status::OK();
   }
 
-  Result<std::unique_ptr<KernelState>> Create() { 
-    RETURN_NOT_OK(VisitTypeInline(in_type, this)); 
+  Result<std::unique_ptr<KernelState>> Create() {
+    RETURN_NOT_OK(VisitTypeInline(in_type, this));
     return std::move(state);
   }
 };
 
-Result<std::unique_ptr<KernelState>> StddevInit(KernelContext* ctx, 
-                                                const KernelInitArgs& args) { 
+Result<std::unique_ptr<KernelState>> StddevInit(KernelContext* ctx,
+                                                const KernelInitArgs& args) {
   VarStdInitState visitor(
       ctx, *args.inputs[0].type, args.kernel->signature->out_type().type(),
       static_cast<const VarianceOptions&>(*args.options), VarOrStd::Std);
   return visitor.Create();
 }
 
-Result<std::unique_ptr<KernelState>> VarianceInit(KernelContext* ctx, 
-                                                  const KernelInitArgs& args) { 
+Result<std::unique_ptr<KernelState>> VarianceInit(KernelContext* ctx,
+                                                  const KernelInitArgs& args) {
   VarStdInitState visitor(
       ctx, *args.inputs[0].type, args.kernel->signature->out_type().type(),
       static_cast<const VarianceOptions&>(*args.options), VarOrStd::Var);
   return visitor.Create();
 }
 
-Result<std::unique_ptr<KernelState>> ScalarVarStdInit(KernelContext* ctx, 
-                                                      const KernelInitArgs& args) { 
-  return arrow::internal::make_unique<ScalarVarStdImpl>( 
-      static_cast<const VarianceOptions&>(*args.options)); 
-} 
- 
+Result<std::unique_ptr<KernelState>> ScalarVarStdInit(KernelContext* ctx,
+                                                      const KernelInitArgs& args) {
+  return arrow::internal::make_unique<ScalarVarStdImpl>(
+      static_cast<const VarianceOptions&>(*args.options));
+}
+
 void AddVarStdKernels(KernelInit init,
                       const std::vector<std::shared_ptr<DataType>>& types,
                       ScalarAggregateFunction* func) {
   for (const auto& ty : types) {
     auto sig = KernelSignature::Make({InputType::Array(ty)}, float64());
     AddAggKernel(std::move(sig), init, func);
- 
-    sig = KernelSignature::Make({InputType::Scalar(ty)}, float64()); 
-    AddAggKernel(std::move(sig), ScalarVarStdInit, func); 
+
+    sig = KernelSignature::Make({InputType::Scalar(ty)}, float64());
+    AddAggKernel(std::move(sig), ScalarVarStdInit, func);
   }
 }
 
-const FunctionDoc stddev_doc{ 
-    "Calculate the standard deviation of a numeric array", 
-    ("The number of degrees of freedom can be controlled using VarianceOptions.\n" 
-     "By default (`ddof` = 0), the population standard deviation is calculated.\n" 
-     "Nulls are ignored.  If there are not enough non-null values in the array\n" 
-     "to satisfy `ddof`, null is returned."), 
-    {"array"}, 
-    "VarianceOptions"}; 
-
-const FunctionDoc variance_doc{ 
-    "Calculate the variance of a numeric array", 
-    ("The number of degrees of freedom can be controlled using VarianceOptions.\n" 
-     "By default (`ddof` = 0), the population variance is calculated.\n" 
-     "Nulls are ignored.  If there are not enough non-null values in the array\n" 
-     "to satisfy `ddof`, null is returned."), 
-    {"array"}, 
-    "VarianceOptions"}; 
- 
+const FunctionDoc stddev_doc{
+    "Calculate the standard deviation of a numeric array",
+    ("The number of degrees of freedom can be controlled using VarianceOptions.\n"
+     "By default (`ddof` = 0), the population standard deviation is calculated.\n"
+     "Nulls are ignored.  If there are not enough non-null values in the array\n"
+     "to satisfy `ddof`, null is returned."),
+    {"array"},
+    "VarianceOptions"};
+
+const FunctionDoc variance_doc{
+    "Calculate the variance of a numeric array",
+    ("The number of degrees of freedom can be controlled using VarianceOptions.\n"
+     "By default (`ddof` = 0), the population variance is calculated.\n"
+     "Nulls are ignored.  If there are not enough non-null values in the array\n"
+     "to satisfy `ddof`, null is returned."),
+    {"array"},
+    "VarianceOptions"};
+
 std::shared_ptr<ScalarAggregateFunction> AddStddevAggKernels() {
   static auto default_std_options = VarianceOptions::Defaults();
-  auto func = std::make_shared<ScalarAggregateFunction>( 
-      "stddev", Arity::Unary(), &stddev_doc, &default_std_options); 
-  AddVarStdKernels(StddevInit, NumericTypes(), func.get()); 
+  auto func = std::make_shared<ScalarAggregateFunction>(
+      "stddev", Arity::Unary(), &stddev_doc, &default_std_options);
+  AddVarStdKernels(StddevInit, NumericTypes(), func.get());
   return func;
 }
 
 std::shared_ptr<ScalarAggregateFunction> AddVarianceAggKernels() {
   static auto default_var_options = VarianceOptions::Defaults();
-  auto func = std::make_shared<ScalarAggregateFunction>( 
-      "variance", Arity::Unary(), &variance_doc, &default_var_options); 
-  AddVarStdKernels(VarianceInit, NumericTypes(), func.get()); 
+  auto func = std::make_shared<ScalarAggregateFunction>(
+      "variance", Arity::Unary(), &variance_doc, &default_var_options);
+  AddVarStdKernels(VarianceInit, NumericTypes(), func.get());
   return func;
 }
 
-}  // namespace 
- 
-void RegisterScalarAggregateVariance(FunctionRegistry* registry) { 
-  DCHECK_OK(registry->AddFunction(AddVarianceAggKernels())); 
-  DCHECK_OK(registry->AddFunction(AddStddevAggKernels())); 
-} 
- 
-}  // namespace internal 
+}  // namespace
+
+void RegisterScalarAggregateVariance(FunctionRegistry* registry) {
+  DCHECK_OK(registry->AddFunction(AddVarianceAggKernels()));
+  DCHECK_OK(registry->AddFunction(AddStddevAggKernels()));
+}
+
+}  // namespace internal
 }  // namespace compute
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/codegen_internal.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/codegen_internal.cc
index 7133b175472..bab8e7000cd 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/codegen_internal.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/codegen_internal.cc
@@ -28,15 +28,15 @@ namespace arrow {
 namespace compute {
 namespace internal {
 
-Status ExecFail(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-  return Status::NotImplemented("This kernel is malformed"); 
+Status ExecFail(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  return Status::NotImplemented("This kernel is malformed");
 }
 
 ArrayKernelExec MakeFlippedBinaryExec(ArrayKernelExec exec) {
   return [exec](KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     ExecBatch flipped_batch = batch;
     std::swap(flipped_batch.values[0], flipped_batch.values[1]);
-    return exec(ctx, flipped_batch, out); 
+    return exec(ctx, flipped_batch, out);
   };
 }
 
@@ -48,7 +48,7 @@ std::vector<std::shared_ptr<DataType>> g_numeric_types;
 std::vector<std::shared_ptr<DataType>> g_base_binary_types;
 std::vector<std::shared_ptr<DataType>> g_temporal_types;
 std::vector<std::shared_ptr<DataType>> g_primitive_types;
-std::vector<Type::type> g_decimal_type_ids; 
+std::vector<Type::type> g_decimal_type_ids;
 static std::once_flag codegen_static_initialized;
 
 template <typename T>
@@ -72,9 +72,9 @@ static void InitStaticData() {
   // Floating point types
   g_floating_types = {float32(), float64()};
 
-  // Decimal types 
-  g_decimal_type_ids = {Type::DECIMAL128, Type::DECIMAL256}; 
- 
+  // Decimal types
+  g_decimal_type_ids = {Type::DECIMAL128, Type::DECIMAL256};
+
   // Numeric types
   Extend(g_int_types, &g_numeric_types);
   Extend(g_floating_types, &g_numeric_types);
@@ -136,11 +136,11 @@ const std::vector<std::shared_ptr<DataType>>& FloatingPointTypes() {
   return g_floating_types;
 }
 
-const std::vector<Type::type>& DecimalTypeIds() { 
-  std::call_once(codegen_static_initialized, InitStaticData); 
-  return g_decimal_type_ids; 
-} 
- 
+const std::vector<Type::type>& DecimalTypeIds() {
+  std::call_once(codegen_static_initialized, InitStaticData);
+  return g_decimal_type_ids;
+}
+
 const std::vector<TimeUnit::type>& AllTimeUnits() {
   static std::vector<TimeUnit::type> units = {TimeUnit::SECOND, TimeUnit::MILLI,
                                               TimeUnit::MICRO, TimeUnit::NANO};
@@ -164,7 +164,7 @@ const std::vector<std::shared_ptr<DataType>>& PrimitiveTypes() {
 
 const std::vector<std::shared_ptr<DataType>>& ExampleParametricTypes() {
   static DataTypeVector example_parametric_types = {
-      decimal128(12, 2), 
+      decimal128(12, 2),
       duration(TimeUnit::SECOND),
       timestamp(TimeUnit::SECOND),
       time32(TimeUnit::SECOND),
@@ -185,153 +185,153 @@ const std::vector<std::shared_ptr<DataType>>& ExampleParametricTypes() {
 // work above
 
 Result<ValueDescr> FirstType(KernelContext*, const std::vector<ValueDescr>& descrs) {
-  ValueDescr result = descrs.front(); 
-  result.shape = GetBroadcastShape(descrs); 
-  return result; 
+  ValueDescr result = descrs.front();
+  result.shape = GetBroadcastShape(descrs);
+  return result;
+}
+
+void EnsureDictionaryDecoded(std::vector<ValueDescr>* descrs) {
+  for (ValueDescr& descr : *descrs) {
+    if (descr.type->id() == Type::DICTIONARY) {
+      descr.type = checked_cast<const DictionaryType&>(*descr.type).value_type();
+    }
+  }
+}
+
+void ReplaceNullWithOtherType(std::vector<ValueDescr>* descrs) {
+  DCHECK_EQ(descrs->size(), 2);
+
+  if (descrs->at(0).type->id() == Type::NA) {
+    descrs->at(0).type = descrs->at(1).type;
+    return;
+  }
+
+  if (descrs->at(1).type->id() == Type::NA) {
+    descrs->at(1).type = descrs->at(0).type;
+    return;
+  }
+}
+
+void ReplaceTypes(const std::shared_ptr<DataType>& type,
+                  std::vector<ValueDescr>* descrs) {
+  for (auto& descr : *descrs) {
+    descr.type = type;
+  }
+}
+
+std::shared_ptr<DataType> CommonNumeric(const std::vector<ValueDescr>& descrs) {
+  return CommonNumeric(descrs.data(), descrs.size());
+}
+
+std::shared_ptr<DataType> CommonNumeric(const ValueDescr* begin, size_t count) {
+  DCHECK_GT(count, 0) << "tried to find CommonNumeric type of an empty set";
+
+  for (size_t i = 0; i < count; i++) {
+    const auto& descr = *(begin + i);
+    auto id = descr.type->id();
+    if (!is_floating(id) && !is_integer(id)) {
+      // a common numeric type is only possible if all types are numeric
+      return nullptr;
+    }
+    if (id == Type::HALF_FLOAT) {
+      // float16 arithmetic is not currently supported
+      return nullptr;
+    }
+  }
+
+  for (size_t i = 0; i < count; i++) {
+    const auto& descr = *(begin + i);
+    if (descr.type->id() == Type::DOUBLE) return float64();
+  }
+
+  for (size_t i = 0; i < count; i++) {
+    const auto& descr = *(begin + i);
+    if (descr.type->id() == Type::FLOAT) return float32();
+  }
+
+  int max_width_signed = 0, max_width_unsigned = 0;
+
+  for (size_t i = 0; i < count; i++) {
+    const auto& descr = *(begin + i);
+    auto id = descr.type->id();
+    auto max_width = &(is_signed_integer(id) ? max_width_signed : max_width_unsigned);
+    *max_width = std::max(bit_width(id), *max_width);
+  }
+
+  if (max_width_signed == 0) {
+    if (max_width_unsigned >= 64) return uint64();
+    if (max_width_unsigned == 32) return uint32();
+    if (max_width_unsigned == 16) return uint16();
+    DCHECK_EQ(max_width_unsigned, 8);
+    return uint8();
+  }
+
+  if (max_width_signed <= max_width_unsigned) {
+    max_width_signed = static_cast<int>(BitUtil::NextPower2(max_width_unsigned + 1));
+  }
+
+  if (max_width_signed >= 64) return int64();
+  if (max_width_signed == 32) return int32();
+  if (max_width_signed == 16) return int16();
+  DCHECK_EQ(max_width_signed, 8);
+  return int8();
+}
+
+std::shared_ptr<DataType> CommonTimestamp(const std::vector<ValueDescr>& descrs) {
+  TimeUnit::type finest_unit = TimeUnit::SECOND;
+
+  for (const auto& descr : descrs) {
+    auto id = descr.type->id();
+    // a common timestamp is only possible if all types are timestamp like
+    switch (id) {
+      case Type::DATE32:
+      case Type::DATE64:
+        continue;
+      case Type::TIMESTAMP:
+        finest_unit =
+            std::max(finest_unit, checked_cast<const TimestampType&>(*descr.type).unit());
+        continue;
+      default:
+        return nullptr;
+    }
+  }
+
+  return timestamp(finest_unit);
+}
+
+std::shared_ptr<DataType> CommonBinary(const std::vector<ValueDescr>& descrs) {
+  bool all_utf8 = true, all_offset32 = true;
+
+  for (const auto& descr : descrs) {
+    auto id = descr.type->id();
+    // a common varbinary type is only possible if all types are binary like
+    switch (id) {
+      case Type::STRING:
+        continue;
+      case Type::BINARY:
+        all_utf8 = false;
+        continue;
+      case Type::LARGE_STRING:
+        all_offset32 = false;
+        continue;
+      case Type::LARGE_BINARY:
+        all_offset32 = false;
+        all_utf8 = false;
+        continue;
+      default:
+        return nullptr;
+    }
+  }
+
+  if (all_utf8) {
+    if (all_offset32) return utf8();
+    return large_utf8();
+  }
+
+  if (all_offset32) return binary();
+  return large_binary();
 }
 
-void EnsureDictionaryDecoded(std::vector<ValueDescr>* descrs) { 
-  for (ValueDescr& descr : *descrs) { 
-    if (descr.type->id() == Type::DICTIONARY) { 
-      descr.type = checked_cast<const DictionaryType&>(*descr.type).value_type(); 
-    } 
-  } 
-} 
- 
-void ReplaceNullWithOtherType(std::vector<ValueDescr>* descrs) { 
-  DCHECK_EQ(descrs->size(), 2); 
- 
-  if (descrs->at(0).type->id() == Type::NA) { 
-    descrs->at(0).type = descrs->at(1).type; 
-    return; 
-  } 
- 
-  if (descrs->at(1).type->id() == Type::NA) { 
-    descrs->at(1).type = descrs->at(0).type; 
-    return; 
-  } 
-} 
- 
-void ReplaceTypes(const std::shared_ptr<DataType>& type, 
-                  std::vector<ValueDescr>* descrs) { 
-  for (auto& descr : *descrs) { 
-    descr.type = type; 
-  } 
-} 
- 
-std::shared_ptr<DataType> CommonNumeric(const std::vector<ValueDescr>& descrs) { 
-  return CommonNumeric(descrs.data(), descrs.size()); 
-} 
- 
-std::shared_ptr<DataType> CommonNumeric(const ValueDescr* begin, size_t count) { 
-  DCHECK_GT(count, 0) << "tried to find CommonNumeric type of an empty set"; 
- 
-  for (size_t i = 0; i < count; i++) { 
-    const auto& descr = *(begin + i); 
-    auto id = descr.type->id(); 
-    if (!is_floating(id) && !is_integer(id)) { 
-      // a common numeric type is only possible if all types are numeric 
-      return nullptr; 
-    } 
-    if (id == Type::HALF_FLOAT) { 
-      // float16 arithmetic is not currently supported 
-      return nullptr; 
-    } 
-  } 
- 
-  for (size_t i = 0; i < count; i++) { 
-    const auto& descr = *(begin + i); 
-    if (descr.type->id() == Type::DOUBLE) return float64(); 
-  } 
- 
-  for (size_t i = 0; i < count; i++) { 
-    const auto& descr = *(begin + i); 
-    if (descr.type->id() == Type::FLOAT) return float32(); 
-  } 
- 
-  int max_width_signed = 0, max_width_unsigned = 0; 
- 
-  for (size_t i = 0; i < count; i++) { 
-    const auto& descr = *(begin + i); 
-    auto id = descr.type->id(); 
-    auto max_width = &(is_signed_integer(id) ? max_width_signed : max_width_unsigned); 
-    *max_width = std::max(bit_width(id), *max_width); 
-  } 
- 
-  if (max_width_signed == 0) { 
-    if (max_width_unsigned >= 64) return uint64(); 
-    if (max_width_unsigned == 32) return uint32(); 
-    if (max_width_unsigned == 16) return uint16(); 
-    DCHECK_EQ(max_width_unsigned, 8); 
-    return uint8(); 
-  } 
- 
-  if (max_width_signed <= max_width_unsigned) { 
-    max_width_signed = static_cast<int>(BitUtil::NextPower2(max_width_unsigned + 1)); 
-  } 
- 
-  if (max_width_signed >= 64) return int64(); 
-  if (max_width_signed == 32) return int32(); 
-  if (max_width_signed == 16) return int16(); 
-  DCHECK_EQ(max_width_signed, 8); 
-  return int8(); 
-} 
- 
-std::shared_ptr<DataType> CommonTimestamp(const std::vector<ValueDescr>& descrs) { 
-  TimeUnit::type finest_unit = TimeUnit::SECOND; 
- 
-  for (const auto& descr : descrs) { 
-    auto id = descr.type->id(); 
-    // a common timestamp is only possible if all types are timestamp like 
-    switch (id) { 
-      case Type::DATE32: 
-      case Type::DATE64: 
-        continue; 
-      case Type::TIMESTAMP: 
-        finest_unit = 
-            std::max(finest_unit, checked_cast<const TimestampType&>(*descr.type).unit()); 
-        continue; 
-      default: 
-        return nullptr; 
-    } 
-  } 
- 
-  return timestamp(finest_unit); 
-} 
- 
-std::shared_ptr<DataType> CommonBinary(const std::vector<ValueDescr>& descrs) { 
-  bool all_utf8 = true, all_offset32 = true; 
- 
-  for (const auto& descr : descrs) { 
-    auto id = descr.type->id(); 
-    // a common varbinary type is only possible if all types are binary like 
-    switch (id) { 
-      case Type::STRING: 
-        continue; 
-      case Type::BINARY: 
-        all_utf8 = false; 
-        continue; 
-      case Type::LARGE_STRING: 
-        all_offset32 = false; 
-        continue; 
-      case Type::LARGE_BINARY: 
-        all_offset32 = false; 
-        all_utf8 = false; 
-        continue; 
-      default: 
-        return nullptr; 
-    } 
-  } 
- 
-  if (all_utf8) { 
-    if (all_offset32) return utf8(); 
-    return large_utf8(); 
-  } 
- 
-  if (all_offset32) return binary(); 
-  return large_binary(); 
-} 
- 
 }  // namespace internal
 }  // namespace compute
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/codegen_internal.h b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/codegen_internal.h
index c1950a2b11a..cb9b13bb3d7 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/codegen_internal.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/codegen_internal.h
@@ -18,7 +18,7 @@
 #pragma once
 
 #include <cstdint>
-#include <cstring> 
+#include <cstring>
 #include <memory>
 #include <string>
 #include <utility>
@@ -71,14 +71,14 @@ template <typename OptionsType>
 struct OptionsWrapper : public KernelState {
   explicit OptionsWrapper(OptionsType options) : options(std::move(options)) {}
 
-  static Result<std::unique_ptr<KernelState>> Init(KernelContext* ctx, 
-                                                   const KernelInitArgs& args) { 
+  static Result<std::unique_ptr<KernelState>> Init(KernelContext* ctx,
+                                                   const KernelInitArgs& args) {
     if (auto options = static_cast<const OptionsType*>(args.options)) {
       return ::arrow::internal::make_unique<OptionsWrapper>(*options);
     }
 
-    return Status::Invalid( 
-        "Attempted to initialize KernelState from null FunctionOptions"); 
+    return Status::Invalid(
+        "Attempted to initialize KernelState from null FunctionOptions");
   }
 
   static const OptionsType& Get(const KernelState& state) {
@@ -90,34 +90,34 @@ struct OptionsWrapper : public KernelState {
   OptionsType options;
 };
 
-/// KernelState adapter for when the state is an instance constructed with the 
-/// KernelContext and the FunctionOptions as argument 
-template <typename StateType, typename OptionsType> 
-struct KernelStateFromFunctionOptions : public KernelState { 
-  explicit KernelStateFromFunctionOptions(KernelContext* ctx, OptionsType state) 
-      : state(StateType(ctx, std::move(state))) {} 
- 
-  static Result<std::unique_ptr<KernelState>> Init(KernelContext* ctx, 
-                                                   const KernelInitArgs& args) { 
-    if (auto options = static_cast<const OptionsType*>(args.options)) { 
-      return ::arrow::internal::make_unique<KernelStateFromFunctionOptions>(ctx, 
-                                                                            *options); 
-    } 
- 
-    return Status::Invalid( 
-        "Attempted to initialize KernelState from null FunctionOptions"); 
-  } 
- 
-  static const StateType& Get(const KernelState& state) { 
-    return ::arrow::internal::checked_cast<const KernelStateFromFunctionOptions&>(state) 
-        .state; 
-  } 
- 
-  static const StateType& Get(KernelContext* ctx) { return Get(*ctx->state()); } 
- 
-  StateType state; 
-}; 
- 
+/// KernelState adapter for when the state is an instance constructed with the
+/// KernelContext and the FunctionOptions as argument
+template <typename StateType, typename OptionsType>
+struct KernelStateFromFunctionOptions : public KernelState {
+  explicit KernelStateFromFunctionOptions(KernelContext* ctx, OptionsType state)
+      : state(StateType(ctx, std::move(state))) {}
+
+  static Result<std::unique_ptr<KernelState>> Init(KernelContext* ctx,
+                                                   const KernelInitArgs& args) {
+    if (auto options = static_cast<const OptionsType*>(args.options)) {
+      return ::arrow::internal::make_unique<KernelStateFromFunctionOptions>(ctx,
+                                                                            *options);
+    }
+
+    return Status::Invalid(
+        "Attempted to initialize KernelState from null FunctionOptions");
+  }
+
+  static const StateType& Get(const KernelState& state) {
+    return ::arrow::internal::checked_cast<const KernelStateFromFunctionOptions&>(state)
+        .state;
+  }
+
+  static const StateType& Get(KernelContext* ctx) { return Get(*ctx->state()); }
+
+  StateType state;
+};
+
 // ----------------------------------------------------------------------
 // Input and output value type definitions
 
@@ -149,22 +149,22 @@ struct GetViewType<Decimal128Type> {
   static T LogicalValue(PhysicalType value) {
     return Decimal128(reinterpret_cast<const uint8_t*>(value.data()));
   }
- 
-  static T LogicalValue(T value) { return value; } 
+
+  static T LogicalValue(T value) { return value; }
+};
+
+template <>
+struct GetViewType<Decimal256Type> {
+  using T = Decimal256;
+  using PhysicalType = util::string_view;
+
+  static T LogicalValue(PhysicalType value) {
+    return Decimal256(reinterpret_cast<const uint8_t*>(value.data()));
+  }
+
+  static T LogicalValue(T value) { return value; }
 };
 
-template <> 
-struct GetViewType<Decimal256Type> { 
-  using T = Decimal256; 
-  using PhysicalType = util::string_view; 
- 
-  static T LogicalValue(PhysicalType value) { 
-    return Decimal256(reinterpret_cast<const uint8_t*>(value.data())); 
-  } 
- 
-  static T LogicalValue(T value) { return value; } 
-}; 
- 
 template <typename Type, typename Enable = void>
 struct GetOutputType;
 
@@ -183,11 +183,11 @@ struct GetOutputType<Decimal128Type> {
   using T = Decimal128;
 };
 
-template <> 
-struct GetOutputType<Decimal256Type> { 
-  using T = Decimal256; 
-}; 
- 
+template <>
+struct GetOutputType<Decimal256Type> {
+  using T = Decimal256;
+};
+
 // ----------------------------------------------------------------------
 // Iteration / value access utilities
 
@@ -247,18 +247,18 @@ struct ArrayIterator<Type, enable_if_base_binary<Type>> {
   }
 };
 
-template <typename Type> 
-struct ArrayIterator<Type, enable_if_decimal<Type>> { 
-  using T = typename TypeTraits<Type>::ScalarType::ValueType; 
-  using endian_agnostic = std::array<uint8_t, sizeof(T)>; 
-  const endian_agnostic* values; 
- 
-  explicit ArrayIterator(const ArrayData& data) 
-      : values(data.GetValues<endian_agnostic>(1)) {} 
- 
-  T operator()() { return T{values++->data()}; } 
-}; 
- 
+template <typename Type>
+struct ArrayIterator<Type, enable_if_decimal<Type>> {
+  using T = typename TypeTraits<Type>::ScalarType::ValueType;
+  using endian_agnostic = std::array<uint8_t, sizeof(T)>;
+  const endian_agnostic* values;
+
+  explicit ArrayIterator(const ArrayData& data)
+      : values(data.GetValues<endian_agnostic>(1)) {}
+
+  T operator()() { return T{values++->data()}; }
+};
+
 // Iterator over various output array types, taking a GetOutputType<Type>
 
 template <typename Type, typename Enable = void>
@@ -276,26 +276,26 @@ struct OutputArrayWriter<Type, enable_if_has_c_type_not_boolean<Type>> {
   // Note that this doesn't write the null bitmap, which should be consistent
   // with Write / WriteNull calls
   void WriteNull() { *values++ = T{}; }
- 
-  void WriteAllNull(int64_t length) { std::memset(values, 0, sizeof(T) * length); } 
+
+  void WriteAllNull(int64_t length) { std::memset(values, 0, sizeof(T) * length); }
+};
+
+template <typename Type>
+struct OutputArrayWriter<Type, enable_if_decimal<Type>> {
+  using T = typename TypeTraits<Type>::ScalarType::ValueType;
+  using endian_agnostic = std::array<uint8_t, sizeof(T)>;
+  endian_agnostic* values;
+
+  explicit OutputArrayWriter(ArrayData* data)
+      : values(data->GetMutableValues<endian_agnostic>(1)) {}
+
+  void Write(T value) { value.ToBytes(values++->data()); }
+
+  void WriteNull() { T{}.ToBytes(values++->data()); }
+
+  void WriteAllNull(int64_t length) { std::memset(values, 0, sizeof(T) * length); }
 };
 
-template <typename Type> 
-struct OutputArrayWriter<Type, enable_if_decimal<Type>> { 
-  using T = typename TypeTraits<Type>::ScalarType::ValueType; 
-  using endian_agnostic = std::array<uint8_t, sizeof(T)>; 
-  endian_agnostic* values; 
- 
-  explicit OutputArrayWriter(ArrayData* data) 
-      : values(data->GetMutableValues<endian_agnostic>(1)) {} 
- 
-  void Write(T value) { value.ToBytes(values++->data()); } 
- 
-  void WriteNull() { T{}.ToBytes(values++->data()); } 
- 
-  void WriteAllNull(int64_t length) { std::memset(values, 0, sizeof(T) * length); } 
-}; 
- 
 // (Un)box Scalar to / from C++ value
 
 template <typename Type, typename Enable = void>
@@ -311,9 +311,9 @@ struct UnboxScalar<Type, enable_if_has_c_type<Type>> {
 };
 
 template <typename Type>
-struct UnboxScalar<Type, enable_if_has_string_view<Type>> { 
+struct UnboxScalar<Type, enable_if_has_string_view<Type>> {
   static util::string_view Unbox(const Scalar& val) {
-    if (!val.is_valid) return util::string_view(); 
+    if (!val.is_valid) return util::string_view();
     return util::string_view(*checked_cast<const BaseBinaryScalar&>(val).value);
   }
 };
@@ -325,25 +325,25 @@ struct UnboxScalar<Decimal128Type> {
   }
 };
 
-template <> 
-struct UnboxScalar<Decimal256Type> { 
-  static Decimal256 Unbox(const Scalar& val) { 
-    return checked_cast<const Decimal256Scalar&>(val).value; 
-  } 
-}; 
- 
+template <>
+struct UnboxScalar<Decimal256Type> {
+  static Decimal256 Unbox(const Scalar& val) {
+    return checked_cast<const Decimal256Scalar&>(val).value;
+  }
+};
+
 template <typename Type, typename Enable = void>
 struct BoxScalar;
 
 template <typename Type>
 struct BoxScalar<Type, enable_if_has_c_type<Type>> {
   using T = typename GetOutputType<Type>::T;
-  static void Box(T val, Scalar* out) { 
-    // Enables BoxScalar<Int64Type> to work on a (for example) Time64Scalar 
-    T* mutable_data = reinterpret_cast<T*>( 
-        checked_cast<::arrow::internal::PrimitiveScalarBase*>(out)->mutable_data()); 
-    *mutable_data = val; 
-  } 
+  static void Box(T val, Scalar* out) {
+    // Enables BoxScalar<Int64Type> to work on a (for example) Time64Scalar
+    T* mutable_data = reinterpret_cast<T*>(
+        checked_cast<::arrow::internal::PrimitiveScalarBase*>(out)->mutable_data());
+    *mutable_data = val;
+  }
 };
 
 template <typename Type>
@@ -362,20 +362,20 @@ struct BoxScalar<Decimal128Type> {
   static void Box(T val, Scalar* out) { checked_cast<ScalarType*>(out)->value = val; }
 };
 
-template <> 
-struct BoxScalar<Decimal256Type> { 
-  using T = Decimal256; 
-  using ScalarType = Decimal256Scalar; 
-  static void Box(T val, Scalar* out) { checked_cast<ScalarType*>(out)->value = val; } 
-}; 
- 
+template <>
+struct BoxScalar<Decimal256Type> {
+  using T = Decimal256;
+  using ScalarType = Decimal256Scalar;
+  static void Box(T val, Scalar* out) { checked_cast<ScalarType*>(out)->value = val; }
+};
+
 // A VisitArrayDataInline variant that calls its visitor function with logical
 // values, such as Decimal128 rather than util::string_view.
 
 template <typename T, typename VisitFunc, typename NullFunc>
-static typename arrow::internal::call_traits::enable_if_return<VisitFunc, void>::type 
-VisitArrayValuesInline(const ArrayData& arr, VisitFunc&& valid_func, 
-                       NullFunc&& null_func) { 
+static typename arrow::internal::call_traits::enable_if_return<VisitFunc, void>::type
+VisitArrayValuesInline(const ArrayData& arr, VisitFunc&& valid_func,
+                       NullFunc&& null_func) {
   VisitArrayDataInline<T>(
       arr,
       [&](typename GetViewType<T>::PhysicalType v) {
@@ -384,18 +384,18 @@ VisitArrayValuesInline(const ArrayData& arr, VisitFunc&& valid_func,
       std::forward<NullFunc>(null_func));
 }
 
-template <typename T, typename VisitFunc, typename NullFunc> 
-static typename arrow::internal::call_traits::enable_if_return<VisitFunc, Status>::type 
-VisitArrayValuesInline(const ArrayData& arr, VisitFunc&& valid_func, 
-                       NullFunc&& null_func) { 
-  return VisitArrayDataInline<T>( 
-      arr, 
-      [&](typename GetViewType<T>::PhysicalType v) { 
-        return valid_func(GetViewType<T>::LogicalValue(std::move(v))); 
-      }, 
-      std::forward<NullFunc>(null_func)); 
-} 
- 
+template <typename T, typename VisitFunc, typename NullFunc>
+static typename arrow::internal::call_traits::enable_if_return<VisitFunc, Status>::type
+VisitArrayValuesInline(const ArrayData& arr, VisitFunc&& valid_func,
+                       NullFunc&& null_func) {
+  return VisitArrayDataInline<T>(
+      arr,
+      [&](typename GetViewType<T>::PhysicalType v) {
+        return valid_func(GetViewType<T>::LogicalValue(std::move(v)));
+      },
+      std::forward<NullFunc>(null_func));
+}
+
 // Like VisitArrayValuesInline, but for binary functions.
 
 template <typename Arg0Type, typename Arg1Type, typename VisitFunc, typename NullFunc>
@@ -425,7 +425,7 @@ Result<ValueDescr> FirstType(KernelContext*, const std::vector<ValueDescr>& desc
 // ----------------------------------------------------------------------
 // Generate an array kernel given template classes
 
-Status ExecFail(KernelContext* ctx, const ExecBatch& batch, Datum* out); 
+Status ExecFail(KernelContext* ctx, const ExecBatch& batch, Datum* out);
 
 ArrayKernelExec MakeFlippedBinaryExec(ArrayKernelExec exec);
 
@@ -439,7 +439,7 @@ const std::vector<std::shared_ptr<DataType>>& SignedIntTypes();
 const std::vector<std::shared_ptr<DataType>>& UnsignedIntTypes();
 const std::vector<std::shared_ptr<DataType>>& IntTypes();
 const std::vector<std::shared_ptr<DataType>>& FloatingPointTypes();
-const std::vector<Type::type>& DecimalTypeIds(); 
+const std::vector<Type::type>& DecimalTypeIds();
 
 ARROW_EXPORT
 const std::vector<TimeUnit::type>& AllTimeUnits();
@@ -483,16 +483,16 @@ namespace applicator {
 //
 // Operator must implement
 //
-// static Status Call(KernelContext*, const ArrayData& in, ArrayData* out) 
-// static Status Call(KernelContext*, const Scalar& in, Scalar* out) 
+// static Status Call(KernelContext*, const ArrayData& in, ArrayData* out)
+// static Status Call(KernelContext*, const Scalar& in, Scalar* out)
 template <typename Operator>
-static Status SimpleUnary(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+static Status SimpleUnary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   if (batch[0].kind() == Datum::SCALAR) {
-    return Operator::Call(ctx, *batch[0].scalar(), out->scalar().get()); 
+    return Operator::Call(ctx, *batch[0].scalar(), out->scalar().get());
   } else if (batch.length > 0) {
-    return Operator::Call(ctx, *batch[0].array(), out->mutable_array()); 
+    return Operator::Call(ctx, *batch[0].array(), out->mutable_array());
   }
-  return Status::OK(); 
+  return Status::OK();
 }
 
 // Generate an ArrayKernelExec given a functor that handles all of its own
@@ -500,34 +500,34 @@ static Status SimpleUnary(KernelContext* ctx, const ExecBatch& batch, Datum* out
 //
 // Operator must implement
 //
-// static Status Call(KernelContext*, const ArrayData& arg0, const ArrayData& arg1, 
-//                    ArrayData* out) 
-// static Status Call(KernelContext*, const ArrayData& arg0, const Scalar& arg1, 
-//                    ArrayData* out) 
-// static Status Call(KernelContext*, const Scalar& arg0, const ArrayData& arg1, 
-//                    ArrayData* out) 
-// static Status Call(KernelContext*, const Scalar& arg0, const Scalar& arg1, 
-//                    Scalar* out) 
+// static Status Call(KernelContext*, const ArrayData& arg0, const ArrayData& arg1,
+//                    ArrayData* out)
+// static Status Call(KernelContext*, const ArrayData& arg0, const Scalar& arg1,
+//                    ArrayData* out)
+// static Status Call(KernelContext*, const Scalar& arg0, const ArrayData& arg1,
+//                    ArrayData* out)
+// static Status Call(KernelContext*, const Scalar& arg0, const Scalar& arg1,
+//                    Scalar* out)
 template <typename Operator>
-static Status SimpleBinary(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-  if (batch.length == 0) return Status::OK(); 
- 
-  if (batch[0].kind() == Datum::ARRAY) { 
-    if (batch[1].kind() == Datum::ARRAY) { 
-      return Operator::Call(ctx, *batch[0].array(), *batch[1].array(), 
-                            out->mutable_array()); 
-    } else { 
-      return Operator::Call(ctx, *batch[0].array(), *batch[1].scalar(), 
-                            out->mutable_array()); 
-    } 
-  } else { 
-    if (batch[1].kind() == Datum::ARRAY) { 
-      return Operator::Call(ctx, *batch[0].scalar(), *batch[1].array(), 
-                            out->mutable_array()); 
-    } else { 
-      return Operator::Call(ctx, *batch[0].scalar(), *batch[1].scalar(), 
-                            out->scalar().get()); 
-    } 
+static Status SimpleBinary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  if (batch.length == 0) return Status::OK();
+
+  if (batch[0].kind() == Datum::ARRAY) {
+    if (batch[1].kind() == Datum::ARRAY) {
+      return Operator::Call(ctx, *batch[0].array(), *batch[1].array(),
+                            out->mutable_array());
+    } else {
+      return Operator::Call(ctx, *batch[0].array(), *batch[1].scalar(),
+                            out->mutable_array());
+    }
+  } else {
+    if (batch[1].kind() == Datum::ARRAY) {
+      return Operator::Call(ctx, *batch[0].scalar(), *batch[1].array(),
+                            out->mutable_array());
+    } else {
+      return Operator::Call(ctx, *batch[0].scalar(), *batch[1].scalar(),
+                            out->scalar().get());
+    }
   }
 }
 
@@ -541,53 +541,53 @@ struct OutputAdapter;
 template <typename Type>
 struct OutputAdapter<Type, enable_if_boolean<Type>> {
   template <typename Generator>
-  static Status Write(KernelContext*, Datum* out, Generator&& generator) { 
+  static Status Write(KernelContext*, Datum* out, Generator&& generator) {
     ArrayData* out_arr = out->mutable_array();
     auto out_bitmap = out_arr->buffers[1]->mutable_data();
     GenerateBitsUnrolled(out_bitmap, out_arr->offset, out_arr->length,
                          std::forward<Generator>(generator));
-    return Status::OK(); 
+    return Status::OK();
   }
 };
 
 template <typename Type>
 struct OutputAdapter<Type, enable_if_has_c_type_not_boolean<Type>> {
   template <typename Generator>
-  static Status Write(KernelContext*, Datum* out, Generator&& generator) { 
+  static Status Write(KernelContext*, Datum* out, Generator&& generator) {
     ArrayData* out_arr = out->mutable_array();
     auto out_data = out_arr->GetMutableValues<typename Type::c_type>(1);
     // TODO: Is this as fast as a more explicitly inlined function?
     for (int64_t i = 0; i < out_arr->length; ++i) {
       *out_data++ = generator();
     }
-    return Status::OK(); 
+    return Status::OK();
   }
 };
 
 template <typename Type>
 struct OutputAdapter<Type, enable_if_base_binary<Type>> {
   template <typename Generator>
-  static Status Write(KernelContext* ctx, Datum* out, Generator&& generator) { 
-    return Status::NotImplemented("NYI"); 
+  static Status Write(KernelContext* ctx, Datum* out, Generator&& generator) {
+    return Status::NotImplemented("NYI");
+  }
+};
+
+template <typename Type>
+struct OutputAdapter<Type, enable_if_decimal<Type>> {
+  using T = typename TypeTraits<Type>::ScalarType::ValueType;
+  using endian_agnostic = std::array<uint8_t, sizeof(T)>;
+
+  template <typename Generator>
+  static Status Write(KernelContext*, Datum* out, Generator&& generator) {
+    ArrayData* out_arr = out->mutable_array();
+    auto out_data = out_arr->GetMutableValues<endian_agnostic>(1);
+    for (int64_t i = 0; i < out_arr->length; ++i) {
+      generator().ToBytes(out_data++->data());
+    }
+    return Status::OK();
   }
 };
 
-template <typename Type> 
-struct OutputAdapter<Type, enable_if_decimal<Type>> { 
-  using T = typename TypeTraits<Type>::ScalarType::ValueType; 
-  using endian_agnostic = std::array<uint8_t, sizeof(T)>; 
- 
-  template <typename Generator> 
-  static Status Write(KernelContext*, Datum* out, Generator&& generator) { 
-    ArrayData* out_arr = out->mutable_array(); 
-    auto out_data = out_arr->GetMutableValues<endian_agnostic>(1); 
-    for (int64_t i = 0; i < out_arr->length; ++i) { 
-      generator().ToBytes(out_data++->data()); 
-    } 
-    return Status::OK(); 
-  } 
-}; 
- 
 // A kernel exec generator for unary functions that addresses both array and
 // scalar inputs and dispatches input iteration and output writing to other
 // templates
@@ -600,10 +600,10 @@ struct OutputAdapter<Type, enable_if_decimal<Type>> {
 //
 // struct Op {
 //   template <typename OutValue, typename Arg0Value>
-//   static OutValue Call(KernelContext* ctx, Arg0Value val, Status* st) { 
+//   static OutValue Call(KernelContext* ctx, Arg0Value val, Status* st) {
 //     // implementation
-//     // NOTE: "status" should only populated with errors, 
-//     //        leave it unmodified to indicate Status::OK() 
+//     // NOTE: "status" should only populated with errors,
+//     //        leave it unmodified to indicate Status::OK()
 //   }
 // };
 template <typename OutType, typename Arg0Type, typename Op>
@@ -611,34 +611,34 @@ struct ScalarUnary {
   using OutValue = typename GetOutputType<OutType>::T;
   using Arg0Value = typename GetViewType<Arg0Type>::T;
 
-  static Status ExecArray(KernelContext* ctx, const ArrayData& arg0, Datum* out) { 
-    Status st = Status::OK(); 
+  static Status ExecArray(KernelContext* ctx, const ArrayData& arg0, Datum* out) {
+    Status st = Status::OK();
     ArrayIterator<Arg0Type> arg0_it(arg0);
-    RETURN_NOT_OK(OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue { 
-      return Op::template Call<OutValue, Arg0Value>(ctx, arg0_it(), &st); 
-    })); 
-    return st; 
+    RETURN_NOT_OK(OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue {
+      return Op::template Call<OutValue, Arg0Value>(ctx, arg0_it(), &st);
+    }));
+    return st;
   }
 
-  static Status ExecScalar(KernelContext* ctx, const Scalar& arg0, Datum* out) { 
-    Status st = Status::OK(); 
-    Scalar* out_scalar = out->scalar().get(); 
+  static Status ExecScalar(KernelContext* ctx, const Scalar& arg0, Datum* out) {
+    Status st = Status::OK();
+    Scalar* out_scalar = out->scalar().get();
     if (arg0.is_valid) {
       Arg0Value arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0);
-      out_scalar->is_valid = true; 
-      BoxScalar<OutType>::Box(Op::template Call<OutValue, Arg0Value>(ctx, arg0_val, &st), 
-                              out_scalar); 
+      out_scalar->is_valid = true;
+      BoxScalar<OutType>::Box(Op::template Call<OutValue, Arg0Value>(ctx, arg0_val, &st),
+                              out_scalar);
     } else {
-      out_scalar->is_valid = false; 
+      out_scalar->is_valid = false;
     }
-    return st; 
+    return st;
   }
 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     if (batch[0].kind() == Datum::ARRAY) {
-      return ExecArray(ctx, *batch[0].array(), out); 
+      return ExecArray(ctx, *batch[0].array(), out);
     } else {
-      return ExecScalar(ctx, *batch[0].scalar(), out); 
+      return ExecScalar(ctx, *batch[0].scalar(), out);
     }
   }
 };
@@ -658,69 +658,69 @@ struct ScalarUnaryNotNullStateful {
 
   template <typename Type, typename Enable = void>
   struct ArrayExec {
-    static Status Exec(const ThisType& functor, KernelContext* ctx, 
-                       const ExecBatch& batch, Datum* out) { 
+    static Status Exec(const ThisType& functor, KernelContext* ctx,
+                       const ExecBatch& batch, Datum* out) {
       ARROW_LOG(FATAL) << "Missing ArrayExec specialization for output type "
                        << out->type();
-      return Status::NotImplemented("NYI"); 
+      return Status::NotImplemented("NYI");
     }
   };
 
   template <typename Type>
   struct ArrayExec<
       Type, enable_if_t<has_c_type<Type>::value && !is_boolean_type<Type>::value>> {
-    static Status Exec(const ThisType& functor, KernelContext* ctx, const ArrayData& arg0, 
-                       Datum* out) { 
-      Status st = Status::OK(); 
+    static Status Exec(const ThisType& functor, KernelContext* ctx, const ArrayData& arg0,
+                       Datum* out) {
+      Status st = Status::OK();
       ArrayData* out_arr = out->mutable_array();
       auto out_data = out_arr->GetMutableValues<OutValue>(1);
       VisitArrayValuesInline<Arg0Type>(
           arg0,
           [&](Arg0Value v) {
-            *out_data++ = functor.op.template Call<OutValue, Arg0Value>(ctx, v, &st); 
+            *out_data++ = functor.op.template Call<OutValue, Arg0Value>(ctx, v, &st);
           },
           [&]() {
             // null
-            *out_data++ = OutValue{}; 
+            *out_data++ = OutValue{};
           });
-      return st; 
+      return st;
     }
   };
 
   template <typename Type>
   struct ArrayExec<Type, enable_if_base_binary<Type>> {
-    static Status Exec(const ThisType& functor, KernelContext* ctx, const ArrayData& arg0, 
-                       Datum* out) { 
+    static Status Exec(const ThisType& functor, KernelContext* ctx, const ArrayData& arg0,
+                       Datum* out) {
       // NOTE: This code is not currently used by any kernels and has
       // suboptimal performance because it's recomputing the validity bitmap
       // that is already computed by the kernel execution layer. Consider
       // writing a lower-level "output adapter" for base binary types.
       typename TypeTraits<Type>::BuilderType builder;
-      Status st = Status::OK(); 
-      RETURN_NOT_OK(VisitArrayValuesInline<Arg0Type>( 
-          arg0, [&](Arg0Value v) { return builder.Append(functor.op.Call(ctx, v, &st)); }, 
-          [&]() { return builder.AppendNull(); })); 
-      if (st.ok()) { 
+      Status st = Status::OK();
+      RETURN_NOT_OK(VisitArrayValuesInline<Arg0Type>(
+          arg0, [&](Arg0Value v) { return builder.Append(functor.op.Call(ctx, v, &st)); },
+          [&]() { return builder.AppendNull(); }));
+      if (st.ok()) {
         std::shared_ptr<ArrayData> result;
-        RETURN_NOT_OK(builder.FinishInternal(&result)); 
+        RETURN_NOT_OK(builder.FinishInternal(&result));
         out->value = std::move(result);
       }
-      return st; 
+      return st;
     }
   };
 
   template <typename Type>
   struct ArrayExec<Type, enable_if_t<is_boolean_type<Type>::value>> {
-    static Status Exec(const ThisType& functor, KernelContext* ctx, const ArrayData& arg0, 
-                       Datum* out) { 
-      Status st = Status::OK(); 
+    static Status Exec(const ThisType& functor, KernelContext* ctx, const ArrayData& arg0,
+                       Datum* out) {
+      Status st = Status::OK();
       ArrayData* out_arr = out->mutable_array();
       FirstTimeBitmapWriter out_writer(out_arr->buffers[1]->mutable_data(),
                                        out_arr->offset, out_arr->length);
       VisitArrayValuesInline<Arg0Type>(
           arg0,
           [&](Arg0Value v) {
-            if (functor.op.template Call<OutValue, Arg0Value>(ctx, v, &st)) { 
+            if (functor.op.template Call<OutValue, Arg0Value>(ctx, v, &st)) {
               out_writer.Set();
             }
             out_writer.Next();
@@ -731,49 +731,49 @@ struct ScalarUnaryNotNullStateful {
             out_writer.Next();
           });
       out_writer.Finish();
-      return st; 
+      return st;
     }
   };
 
   template <typename Type>
-  struct ArrayExec<Type, enable_if_decimal<Type>> { 
-    static Status Exec(const ThisType& functor, KernelContext* ctx, const ArrayData& arg0, 
-                       Datum* out) { 
-      Status st = Status::OK(); 
+  struct ArrayExec<Type, enable_if_decimal<Type>> {
+    static Status Exec(const ThisType& functor, KernelContext* ctx, const ArrayData& arg0,
+                       Datum* out) {
+      Status st = Status::OK();
       ArrayData* out_arr = out->mutable_array();
-      // Decimal128 data buffers are not safely reinterpret_cast-able on big-endian 
-      using endian_agnostic = 
-          std::array<uint8_t, sizeof(typename TypeTraits<Type>::ScalarType::ValueType)>; 
-      auto out_data = out_arr->GetMutableValues<endian_agnostic>(1); 
+      // Decimal128 data buffers are not safely reinterpret_cast-able on big-endian
+      using endian_agnostic =
+          std::array<uint8_t, sizeof(typename TypeTraits<Type>::ScalarType::ValueType)>;
+      auto out_data = out_arr->GetMutableValues<endian_agnostic>(1);
       VisitArrayValuesInline<Arg0Type>(
           arg0,
           [&](Arg0Value v) {
-            functor.op.template Call<OutValue, Arg0Value>(ctx, v, &st) 
-                .ToBytes(out_data++->data()); 
+            functor.op.template Call<OutValue, Arg0Value>(ctx, v, &st)
+                .ToBytes(out_data++->data());
           },
-          [&]() { 
-            // null 
-            std::memset(out_data, 0, sizeof(*out_data)); 
-            ++out_data; 
-          }); 
-      return st; 
+          [&]() {
+            // null
+            std::memset(out_data, 0, sizeof(*out_data));
+            ++out_data;
+          });
+      return st;
     }
   };
 
-  Status Scalar(KernelContext* ctx, const Scalar& arg0, Datum* out) { 
-    Status st = Status::OK(); 
+  Status Scalar(KernelContext* ctx, const Scalar& arg0, Datum* out) {
+    Status st = Status::OK();
     if (arg0.is_valid) {
       Arg0Value arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0);
-      BoxScalar<OutType>::Box( 
-          this->op.template Call<OutValue, Arg0Value>(ctx, arg0_val, &st), 
-          out->scalar().get()); 
+      BoxScalar<OutType>::Box(
+          this->op.template Call<OutValue, Arg0Value>(ctx, arg0_val, &st),
+          out->scalar().get());
     }
-    return st; 
+    return st;
   }
 
-  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     if (batch[0].kind() == Datum::ARRAY) {
-      return ArrayExec<OutType>::Exec(*this, ctx, *batch[0].array(), out); 
+      return ArrayExec<OutType>::Exec(*this, ctx, *batch[0].array(), out);
     } else {
       return Scalar(ctx, *batch[0].scalar(), out);
     }
@@ -788,7 +788,7 @@ struct ScalarUnaryNotNull {
   using OutValue = typename GetOutputType<OutType>::T;
   using Arg0Value = typename GetViewType<Arg0Type>::T;
 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     // Seed kernel with dummy state
     ScalarUnaryNotNullStateful<OutType, Arg0Type, Op> kernel({});
     return kernel.Exec(ctx, batch, out);
@@ -807,11 +807,11 @@ struct ScalarUnaryNotNull {
 //
 // struct Op {
 //   template <typename OutValue, typename Arg0Value, typename Arg1Value>
-//   static OutValue Call(KernelContext* ctx, Arg0Value arg0, Arg1Value arg1, Status* st) 
-//   { 
+//   static OutValue Call(KernelContext* ctx, Arg0Value arg0, Arg1Value arg1, Status* st)
+//   {
 //     // implementation
-//     // NOTE: "status" should only populated with errors, 
-//     //       leave it unmodified to indicate Status::OK() 
+//     // NOTE: "status" should only populated with errors,
+//     //       leave it unmodified to indicate Status::OK()
 //   }
 // };
 template <typename OutType, typename Arg0Type, typename Arg1Type, typename Op>
@@ -820,56 +820,56 @@ struct ScalarBinary {
   using Arg0Value = typename GetViewType<Arg0Type>::T;
   using Arg1Value = typename GetViewType<Arg1Type>::T;
 
-  static Status ArrayArray(KernelContext* ctx, const ArrayData& arg0, 
-                           const ArrayData& arg1, Datum* out) { 
-    Status st = Status::OK(); 
+  static Status ArrayArray(KernelContext* ctx, const ArrayData& arg0,
+                           const ArrayData& arg1, Datum* out) {
+    Status st = Status::OK();
     ArrayIterator<Arg0Type> arg0_it(arg0);
     ArrayIterator<Arg1Type> arg1_it(arg1);
-    RETURN_NOT_OK(OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue { 
-      return Op::template Call<OutValue, Arg0Value, Arg1Value>(ctx, arg0_it(), arg1_it(), 
-                                                               &st); 
-    })); 
-    return st; 
+    RETURN_NOT_OK(OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue {
+      return Op::template Call<OutValue, Arg0Value, Arg1Value>(ctx, arg0_it(), arg1_it(),
+                                                               &st);
+    }));
+    return st;
   }
 
-  static Status ArrayScalar(KernelContext* ctx, const ArrayData& arg0, const Scalar& arg1, 
-                            Datum* out) { 
-    Status st = Status::OK(); 
+  static Status ArrayScalar(KernelContext* ctx, const ArrayData& arg0, const Scalar& arg1,
+                            Datum* out) {
+    Status st = Status::OK();
     ArrayIterator<Arg0Type> arg0_it(arg0);
     auto arg1_val = UnboxScalar<Arg1Type>::Unbox(arg1);
-    RETURN_NOT_OK(OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue { 
-      return Op::template Call<OutValue, Arg0Value, Arg1Value>(ctx, arg0_it(), arg1_val, 
-                                                               &st); 
-    })); 
-    return st; 
+    RETURN_NOT_OK(OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue {
+      return Op::template Call<OutValue, Arg0Value, Arg1Value>(ctx, arg0_it(), arg1_val,
+                                                               &st);
+    }));
+    return st;
   }
 
-  static Status ScalarArray(KernelContext* ctx, const Scalar& arg0, const ArrayData& arg1, 
-                            Datum* out) { 
-    Status st = Status::OK(); 
+  static Status ScalarArray(KernelContext* ctx, const Scalar& arg0, const ArrayData& arg1,
+                            Datum* out) {
+    Status st = Status::OK();
     auto arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0);
     ArrayIterator<Arg1Type> arg1_it(arg1);
-    RETURN_NOT_OK(OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue { 
-      return Op::template Call<OutValue, Arg0Value, Arg1Value>(ctx, arg0_val, arg1_it(), 
-                                                               &st); 
-    })); 
-    return st; 
+    RETURN_NOT_OK(OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue {
+      return Op::template Call<OutValue, Arg0Value, Arg1Value>(ctx, arg0_val, arg1_it(),
+                                                               &st);
+    }));
+    return st;
   }
 
-  static Status ScalarScalar(KernelContext* ctx, const Scalar& arg0, const Scalar& arg1, 
-                             Datum* out) { 
-    Status st = Status::OK(); 
+  static Status ScalarScalar(KernelContext* ctx, const Scalar& arg0, const Scalar& arg1,
+                             Datum* out) {
+    Status st = Status::OK();
     if (out->scalar()->is_valid) {
       auto arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0);
       auto arg1_val = UnboxScalar<Arg1Type>::Unbox(arg1);
-      BoxScalar<OutType>::Box( 
-          Op::template Call<OutValue, Arg0Value, Arg1Value>(ctx, arg0_val, arg1_val, &st), 
-          out->scalar().get()); 
+      BoxScalar<OutType>::Box(
+          Op::template Call<OutValue, Arg0Value, Arg1Value>(ctx, arg0_val, arg1_val, &st),
+          out->scalar().get());
     }
-    return st; 
+    return st;
   }
 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     if (batch[0].kind() == Datum::ARRAY) {
       if (batch[1].kind() == Datum::ARRAY) {
         return ArrayArray(ctx, *batch[0].array(), *batch[1].array(), out);
@@ -900,22 +900,22 @@ struct ScalarBinaryNotNullStateful {
 
   // NOTE: In ArrayExec<Type>, Type is really OutputType
 
-  Status ArrayArray(KernelContext* ctx, const ArrayData& arg0, const ArrayData& arg1, 
-                    Datum* out) { 
-    Status st = Status::OK(); 
+  Status ArrayArray(KernelContext* ctx, const ArrayData& arg0, const ArrayData& arg1,
+                    Datum* out) {
+    Status st = Status::OK();
     OutputArrayWriter<OutType> writer(out->mutable_array());
     VisitTwoArrayValuesInline<Arg0Type, Arg1Type>(
         arg0, arg1,
         [&](Arg0Value u, Arg1Value v) {
-          writer.Write(op.template Call<OutValue, Arg0Value, Arg1Value>(ctx, u, v, &st)); 
+          writer.Write(op.template Call<OutValue, Arg0Value, Arg1Value>(ctx, u, v, &st));
         },
         [&]() { writer.WriteNull(); });
-    return st; 
+    return st;
   }
 
-  Status ArrayScalar(KernelContext* ctx, const ArrayData& arg0, const Scalar& arg1, 
-                     Datum* out) { 
-    Status st = Status::OK(); 
+  Status ArrayScalar(KernelContext* ctx, const ArrayData& arg0, const Scalar& arg1,
+                     Datum* out) {
+    Status st = Status::OK();
     OutputArrayWriter<OutType> writer(out->mutable_array());
     if (arg1.is_valid) {
       const auto arg1_val = UnboxScalar<Arg1Type>::Unbox(arg1);
@@ -923,18 +923,18 @@ struct ScalarBinaryNotNullStateful {
           arg0,
           [&](Arg0Value u) {
             writer.Write(
-                op.template Call<OutValue, Arg0Value, Arg1Value>(ctx, u, arg1_val, &st)); 
+                op.template Call<OutValue, Arg0Value, Arg1Value>(ctx, u, arg1_val, &st));
           },
           [&]() { writer.WriteNull(); });
-    } else { 
-      writer.WriteAllNull(out->mutable_array()->length); 
+    } else {
+      writer.WriteAllNull(out->mutable_array()->length);
     }
-    return st; 
+    return st;
   }
 
-  Status ScalarArray(KernelContext* ctx, const Scalar& arg0, const ArrayData& arg1, 
-                     Datum* out) { 
-    Status st = Status::OK(); 
+  Status ScalarArray(KernelContext* ctx, const Scalar& arg0, const ArrayData& arg1,
+                     Datum* out) {
+    Status st = Status::OK();
     OutputArrayWriter<OutType> writer(out->mutable_array());
     if (arg0.is_valid) {
       const auto arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0);
@@ -942,29 +942,29 @@ struct ScalarBinaryNotNullStateful {
           arg1,
           [&](Arg1Value v) {
             writer.Write(
-                op.template Call<OutValue, Arg0Value, Arg1Value>(ctx, arg0_val, v, &st)); 
+                op.template Call<OutValue, Arg0Value, Arg1Value>(ctx, arg0_val, v, &st));
           },
           [&]() { writer.WriteNull(); });
-    } else { 
-      writer.WriteAllNull(out->mutable_array()->length); 
+    } else {
+      writer.WriteAllNull(out->mutable_array()->length);
     }
-    return st; 
+    return st;
   }
 
-  Status ScalarScalar(KernelContext* ctx, const Scalar& arg0, const Scalar& arg1, 
-                      Datum* out) { 
-    Status st = Status::OK(); 
+  Status ScalarScalar(KernelContext* ctx, const Scalar& arg0, const Scalar& arg1,
+                      Datum* out) {
+    Status st = Status::OK();
     if (arg0.is_valid && arg1.is_valid) {
       const auto arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0);
       const auto arg1_val = UnboxScalar<Arg1Type>::Unbox(arg1);
       BoxScalar<OutType>::Box(
-          op.template Call<OutValue, Arg0Value, Arg1Value>(ctx, arg0_val, arg1_val, &st), 
+          op.template Call<OutValue, Arg0Value, Arg1Value>(ctx, arg0_val, arg1_val, &st),
           out->scalar().get());
     }
-    return st; 
+    return st;
   }
 
-  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     if (batch[0].kind() == Datum::ARRAY) {
       if (batch[1].kind() == Datum::ARRAY) {
         return ArrayArray(ctx, *batch[0].array(), *batch[1].array(), out);
@@ -991,7 +991,7 @@ struct ScalarBinaryNotNull {
   using Arg0Value = typename GetViewType<Arg0Type>::T;
   using Arg1Value = typename GetViewType<Arg1Type>::T;
 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     // Seed kernel with dummy state
     ScalarBinaryNotNullStateful<OutType, Arg0Type, Arg1Type, Op> kernel({});
     return kernel.Exec(ctx, batch, out);
@@ -1160,41 +1160,41 @@ ArrayKernelExec GeneratePhysicalInteger(detail::GetTypeId get_id) {
   }
 }
 
-template <template <typename... Args> class Generator, typename... Args> 
-ArrayKernelExec GeneratePhysicalNumeric(detail::GetTypeId get_id) { 
-  switch (get_id.id) { 
-    case Type::INT8: 
-      return Generator<Int8Type, Args...>::Exec; 
-    case Type::INT16: 
-      return Generator<Int16Type, Args...>::Exec; 
-    case Type::INT32: 
-    case Type::DATE32: 
-    case Type::TIME32: 
-      return Generator<Int32Type, Args...>::Exec; 
-    case Type::INT64: 
-    case Type::DATE64: 
-    case Type::TIMESTAMP: 
-    case Type::TIME64: 
-    case Type::DURATION: 
-      return Generator<Int64Type, Args...>::Exec; 
-    case Type::UINT8: 
-      return Generator<UInt8Type, Args...>::Exec; 
-    case Type::UINT16: 
-      return Generator<UInt16Type, Args...>::Exec; 
-    case Type::UINT32: 
-      return Generator<UInt32Type, Args...>::Exec; 
-    case Type::UINT64: 
-      return Generator<UInt64Type, Args...>::Exec; 
-    case Type::FLOAT: 
-      return Generator<FloatType, Args...>::Exec; 
-    case Type::DOUBLE: 
-      return Generator<DoubleType, Args...>::Exec; 
-    default: 
-      DCHECK(false); 
-      return ExecFail; 
-  } 
-} 
- 
+template <template <typename... Args> class Generator, typename... Args>
+ArrayKernelExec GeneratePhysicalNumeric(detail::GetTypeId get_id) {
+  switch (get_id.id) {
+    case Type::INT8:
+      return Generator<Int8Type, Args...>::Exec;
+    case Type::INT16:
+      return Generator<Int16Type, Args...>::Exec;
+    case Type::INT32:
+    case Type::DATE32:
+    case Type::TIME32:
+      return Generator<Int32Type, Args...>::Exec;
+    case Type::INT64:
+    case Type::DATE64:
+    case Type::TIMESTAMP:
+    case Type::TIME64:
+    case Type::DURATION:
+      return Generator<Int64Type, Args...>::Exec;
+    case Type::UINT8:
+      return Generator<UInt8Type, Args...>::Exec;
+    case Type::UINT16:
+      return Generator<UInt16Type, Args...>::Exec;
+    case Type::UINT32:
+      return Generator<UInt32Type, Args...>::Exec;
+    case Type::UINT64:
+      return Generator<UInt64Type, Args...>::Exec;
+    case Type::FLOAT:
+      return Generator<FloatType, Args...>::Exec;
+    case Type::DOUBLE:
+      return Generator<DoubleType, Args...>::Exec;
+    default:
+      DCHECK(false);
+      return ExecFail;
+  }
+}
+
 // Generate a kernel given a templated functor for integer types
 //
 // See "Numeric" above for description of the generator functor
@@ -1222,26 +1222,26 @@ ArrayKernelExec GenerateSignedInteger(detail::GetTypeId get_id) {
 // bits).
 //
 // See "Numeric" above for description of the generator functor
-template <template <typename...> class Generator, typename... Args> 
+template <template <typename...> class Generator, typename... Args>
 ArrayKernelExec GenerateTypeAgnosticPrimitive(detail::GetTypeId get_id) {
   switch (get_id.id) {
     case Type::NA:
-      return Generator<NullType, Args...>::Exec; 
+      return Generator<NullType, Args...>::Exec;
     case Type::BOOL:
-      return Generator<BooleanType, Args...>::Exec; 
+      return Generator<BooleanType, Args...>::Exec;
     case Type::UINT8:
     case Type::INT8:
-      return Generator<UInt8Type, Args...>::Exec; 
+      return Generator<UInt8Type, Args...>::Exec;
     case Type::UINT16:
     case Type::INT16:
-      return Generator<UInt16Type, Args...>::Exec; 
+      return Generator<UInt16Type, Args...>::Exec;
     case Type::UINT32:
     case Type::INT32:
     case Type::FLOAT:
     case Type::DATE32:
     case Type::TIME32:
-    case Type::INTERVAL_MONTHS: 
-      return Generator<UInt32Type, Args...>::Exec; 
+    case Type::INTERVAL_MONTHS:
+      return Generator<UInt32Type, Args...>::Exec;
     case Type::UINT64:
     case Type::INT64:
     case Type::DOUBLE:
@@ -1249,30 +1249,30 @@ ArrayKernelExec GenerateTypeAgnosticPrimitive(detail::GetTypeId get_id) {
     case Type::TIMESTAMP:
     case Type::TIME64:
     case Type::DURATION:
-    case Type::INTERVAL_DAY_TIME: 
-      return Generator<UInt64Type, Args...>::Exec; 
+    case Type::INTERVAL_DAY_TIME:
+      return Generator<UInt64Type, Args...>::Exec;
+    default:
+      DCHECK(false);
+      return ExecFail;
+  }
+}
+
+// similar to GenerateTypeAgnosticPrimitive, but for variable types
+template <template <typename...> class Generator, typename... Args>
+ArrayKernelExec GenerateTypeAgnosticVarBinaryBase(detail::GetTypeId get_id) {
+  switch (get_id.id) {
+    case Type::BINARY:
+    case Type::STRING:
+      return Generator<BinaryType, Args...>::Exec;
+    case Type::LARGE_BINARY:
+    case Type::LARGE_STRING:
+      return Generator<LargeBinaryType, Args...>::Exec;
     default:
       DCHECK(false);
       return ExecFail;
   }
 }
 
-// similar to GenerateTypeAgnosticPrimitive, but for variable types 
-template <template <typename...> class Generator, typename... Args> 
-ArrayKernelExec GenerateTypeAgnosticVarBinaryBase(detail::GetTypeId get_id) { 
-  switch (get_id.id) { 
-    case Type::BINARY: 
-    case Type::STRING: 
-      return Generator<BinaryType, Args...>::Exec; 
-    case Type::LARGE_BINARY: 
-    case Type::LARGE_STRING: 
-      return Generator<LargeBinaryType, Args...>::Exec; 
-    default: 
-      DCHECK(false); 
-      return ExecFail; 
-  } 
-} 
- 
 // Generate a kernel given a templated functor for base binary types. Generates
 // a single kernel for binary/string and large binary / large string. If your
 // kernel implementation needs access to the specific type at compile time,
@@ -1336,46 +1336,46 @@ ArrayKernelExec GenerateTemporal(detail::GetTypeId get_id) {
   }
 }
 
-// Generate a kernel given a templated functor for decimal types 
-// 
-// See "Numeric" above for description of the generator functor 
-template <template <typename...> class Generator, typename Type0, typename... Args> 
-ArrayKernelExec GenerateDecimal(detail::GetTypeId get_id) { 
-  switch (get_id.id) { 
-    case Type::DECIMAL128: 
-      return Generator<Type0, Decimal128Type, Args...>::Exec; 
-    case Type::DECIMAL256: 
-      return Generator<Type0, Decimal256Type, Args...>::Exec; 
-    default: 
-      DCHECK(false); 
-      return ExecFail; 
-  } 
-} 
- 
+// Generate a kernel given a templated functor for decimal types
+//
+// See "Numeric" above for description of the generator functor
+template <template <typename...> class Generator, typename Type0, typename... Args>
+ArrayKernelExec GenerateDecimal(detail::GetTypeId get_id) {
+  switch (get_id.id) {
+    case Type::DECIMAL128:
+      return Generator<Type0, Decimal128Type, Args...>::Exec;
+    case Type::DECIMAL256:
+      return Generator<Type0, Decimal256Type, Args...>::Exec;
+    default:
+      DCHECK(false);
+      return ExecFail;
+  }
+}
+
 // END of kernel generator-dispatchers
 // ----------------------------------------------------------------------
 
-ARROW_EXPORT 
-void EnsureDictionaryDecoded(std::vector<ValueDescr>* descrs); 
- 
-ARROW_EXPORT 
-void ReplaceNullWithOtherType(std::vector<ValueDescr>* descrs); 
- 
-ARROW_EXPORT 
-void ReplaceTypes(const std::shared_ptr<DataType>&, std::vector<ValueDescr>* descrs); 
- 
-ARROW_EXPORT 
-std::shared_ptr<DataType> CommonNumeric(const std::vector<ValueDescr>& descrs); 
- 
-ARROW_EXPORT 
-std::shared_ptr<DataType> CommonNumeric(const ValueDescr* begin, size_t count); 
- 
-ARROW_EXPORT 
-std::shared_ptr<DataType> CommonTimestamp(const std::vector<ValueDescr>& descrs); 
- 
-ARROW_EXPORT 
-std::shared_ptr<DataType> CommonBinary(const std::vector<ValueDescr>& descrs); 
- 
+ARROW_EXPORT
+void EnsureDictionaryDecoded(std::vector<ValueDescr>* descrs);
+
+ARROW_EXPORT
+void ReplaceNullWithOtherType(std::vector<ValueDescr>* descrs);
+
+ARROW_EXPORT
+void ReplaceTypes(const std::shared_ptr<DataType>&, std::vector<ValueDescr>* descrs);
+
+ARROW_EXPORT
+std::shared_ptr<DataType> CommonNumeric(const std::vector<ValueDescr>& descrs);
+
+ARROW_EXPORT
+std::shared_ptr<DataType> CommonNumeric(const ValueDescr* begin, size_t count);
+
+ARROW_EXPORT
+std::shared_ptr<DataType> CommonTimestamp(const std::vector<ValueDescr>& descrs);
+
+ARROW_EXPORT
+std::shared_ptr<DataType> CommonBinary(const std::vector<ValueDescr>& descrs);
+
 }  // namespace internal
 }  // namespace compute
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/hash_aggregate.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/hash_aggregate.cc
index 63d41392203..ed40a6b1b8c 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/hash_aggregate.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/hash_aggregate.cc
@@ -1,1379 +1,1379 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include <functional> 
-#include <memory> 
-#include <string> 
-#include <unordered_map> 
-#include <vector> 
- 
-#include "arrow/buffer_builder.h" 
-#include "arrow/compute/api_aggregate.h" 
-#include "arrow/compute/api_vector.h" 
-#include "arrow/compute/exec/key_compare.h" 
-#include "arrow/compute/exec/key_encode.h" 
-#include "arrow/compute/exec/key_hash.h" 
-#include "arrow/compute/exec/key_map.h" 
-#include "arrow/compute/exec/util.h" 
-#include "arrow/compute/exec_internal.h" 
-#include "arrow/compute/kernel.h" 
-#include "arrow/compute/kernels/aggregate_internal.h" 
-#include "arrow/compute/kernels/common.h" 
-#include "arrow/util/bit_run_reader.h" 
-#include "arrow/util/bitmap_ops.h" 
-#include "arrow/util/bitmap_writer.h" 
-#include "arrow/util/checked_cast.h" 
-#include "arrow/util/cpu_info.h" 
-#include "arrow/util/make_unique.h" 
-#include "arrow/visitor_inline.h" 
- 
-namespace arrow { 
- 
-using internal::checked_cast; 
-using internal::FirstTimeBitmapWriter; 
- 
-namespace compute { 
-namespace internal { 
-namespace { 
- 
-struct KeyEncoder { 
-  // the first byte of an encoded key is used to indicate nullity 
-  static constexpr bool kExtraByteForNull = true; 
- 
-  static constexpr uint8_t kNullByte = 1; 
-  static constexpr uint8_t kValidByte = 0; 
- 
-  virtual ~KeyEncoder() = default; 
- 
-  virtual void AddLength(const ArrayData&, int32_t* lengths) = 0; 
- 
-  virtual Status Encode(const ArrayData&, uint8_t** encoded_bytes) = 0; 
- 
-  virtual Result<std::shared_ptr<ArrayData>> Decode(uint8_t** encoded_bytes, 
-                                                    int32_t length, MemoryPool*) = 0; 
- 
-  // extract the null bitmap from the leading nullity bytes of encoded keys 
-  static Status DecodeNulls(MemoryPool* pool, int32_t length, uint8_t** encoded_bytes, 
-                            std::shared_ptr<Buffer>* null_bitmap, int32_t* null_count) { 
-    // first count nulls to determine if a null bitmap is necessary 
-    *null_count = 0; 
-    for (int32_t i = 0; i < length; ++i) { 
-      *null_count += (encoded_bytes[i][0] == kNullByte); 
-    } 
- 
-    if (*null_count > 0) { 
-      ARROW_ASSIGN_OR_RAISE(*null_bitmap, AllocateBitmap(length, pool)); 
-      uint8_t* validity = (*null_bitmap)->mutable_data(); 
- 
-      FirstTimeBitmapWriter writer(validity, 0, length); 
-      for (int32_t i = 0; i < length; ++i) { 
-        if (encoded_bytes[i][0] == kValidByte) { 
-          writer.Set(); 
-        } else { 
-          writer.Clear(); 
-        } 
-        writer.Next(); 
-        encoded_bytes[i] += 1; 
-      } 
-      writer.Finish(); 
-    } else { 
-      for (int32_t i = 0; i < length; ++i) { 
-        encoded_bytes[i] += 1; 
-      } 
-    } 
-    return Status ::OK(); 
-  } 
-}; 
- 
-struct BooleanKeyEncoder : KeyEncoder { 
-  static constexpr int kByteWidth = 1; 
- 
-  void AddLength(const ArrayData& data, int32_t* lengths) override { 
-    for (int64_t i = 0; i < data.length; ++i) { 
-      lengths[i] += kByteWidth + kExtraByteForNull; 
-    } 
-  } 
- 
-  Status Encode(const ArrayData& data, uint8_t** encoded_bytes) override { 
-    VisitArrayDataInline<BooleanType>( 
-        data, 
-        [&](bool value) { 
-          auto& encoded_ptr = *encoded_bytes++; 
-          *encoded_ptr++ = kValidByte; 
-          *encoded_ptr++ = value; 
-        }, 
-        [&] { 
-          auto& encoded_ptr = *encoded_bytes++; 
-          *encoded_ptr++ = kNullByte; 
-          *encoded_ptr++ = 0; 
-        }); 
-    return Status::OK(); 
-  } 
- 
-  Result<std::shared_ptr<ArrayData>> Decode(uint8_t** encoded_bytes, int32_t length, 
-                                            MemoryPool* pool) override { 
-    std::shared_ptr<Buffer> null_buf; 
-    int32_t null_count; 
-    RETURN_NOT_OK(DecodeNulls(pool, length, encoded_bytes, &null_buf, &null_count)); 
- 
-    ARROW_ASSIGN_OR_RAISE(auto key_buf, AllocateBitmap(length, pool)); 
- 
-    uint8_t* raw_output = key_buf->mutable_data(); 
-    for (int32_t i = 0; i < length; ++i) { 
-      auto& encoded_ptr = encoded_bytes[i]; 
-      BitUtil::SetBitTo(raw_output, i, encoded_ptr[0] != 0); 
-      encoded_ptr += 1; 
-    } 
- 
-    return ArrayData::Make(boolean(), length, {std::move(null_buf), std::move(key_buf)}, 
-                           null_count); 
-  } 
-}; 
- 
-struct FixedWidthKeyEncoder : KeyEncoder { 
-  explicit FixedWidthKeyEncoder(std::shared_ptr<DataType> type) 
-      : type_(std::move(type)), 
-        byte_width_(checked_cast<const FixedWidthType&>(*type_).bit_width() / 8) {} 
- 
-  void AddLength(const ArrayData& data, int32_t* lengths) override { 
-    for (int64_t i = 0; i < data.length; ++i) { 
-      lengths[i] += byte_width_ + kExtraByteForNull; 
-    } 
-  } 
- 
-  Status Encode(const ArrayData& data, uint8_t** encoded_bytes) override { 
-    ArrayData viewed(fixed_size_binary(byte_width_), data.length, data.buffers, 
-                     data.null_count, data.offset); 
- 
-    VisitArrayDataInline<FixedSizeBinaryType>( 
-        viewed, 
-        [&](util::string_view bytes) { 
-          auto& encoded_ptr = *encoded_bytes++; 
-          *encoded_ptr++ = kValidByte; 
-          memcpy(encoded_ptr, bytes.data(), byte_width_); 
-          encoded_ptr += byte_width_; 
-        }, 
-        [&] { 
-          auto& encoded_ptr = *encoded_bytes++; 
-          *encoded_ptr++ = kNullByte; 
-          memset(encoded_ptr, 0, byte_width_); 
-          encoded_ptr += byte_width_; 
-        }); 
-    return Status::OK(); 
-  } 
- 
-  Result<std::shared_ptr<ArrayData>> Decode(uint8_t** encoded_bytes, int32_t length, 
-                                            MemoryPool* pool) override { 
-    std::shared_ptr<Buffer> null_buf; 
-    int32_t null_count; 
-    RETURN_NOT_OK(DecodeNulls(pool, length, encoded_bytes, &null_buf, &null_count)); 
- 
-    ARROW_ASSIGN_OR_RAISE(auto key_buf, AllocateBuffer(length * byte_width_, pool)); 
- 
-    uint8_t* raw_output = key_buf->mutable_data(); 
-    for (int32_t i = 0; i < length; ++i) { 
-      auto& encoded_ptr = encoded_bytes[i]; 
-      std::memcpy(raw_output, encoded_ptr, byte_width_); 
-      encoded_ptr += byte_width_; 
-      raw_output += byte_width_; 
-    } 
- 
-    return ArrayData::Make(type_, length, {std::move(null_buf), std::move(key_buf)}, 
-                           null_count); 
-  } 
- 
-  std::shared_ptr<DataType> type_; 
-  int byte_width_; 
-}; 
- 
-struct DictionaryKeyEncoder : FixedWidthKeyEncoder { 
-  DictionaryKeyEncoder(std::shared_ptr<DataType> type, MemoryPool* pool) 
-      : FixedWidthKeyEncoder(std::move(type)), pool_(pool) {} 
- 
-  Status Encode(const ArrayData& data, uint8_t** encoded_bytes) override { 
-    auto dict = MakeArray(data.dictionary); 
-    if (dictionary_) { 
-      if (!dictionary_->Equals(dict)) { 
-        // TODO(bkietz) unify if necessary. For now, just error if any batch's dictionary 
-        // differs from the first we saw for this key 
-        return Status::NotImplemented("Unifying differing dictionaries"); 
-      } 
-    } else { 
-      dictionary_ = std::move(dict); 
-    } 
-    return FixedWidthKeyEncoder::Encode(data, encoded_bytes); 
-  } 
- 
-  Result<std::shared_ptr<ArrayData>> Decode(uint8_t** encoded_bytes, int32_t length, 
-                                            MemoryPool* pool) override { 
-    ARROW_ASSIGN_OR_RAISE(auto data, 
-                          FixedWidthKeyEncoder::Decode(encoded_bytes, length, pool)); 
- 
-    if (dictionary_) { 
-      data->dictionary = dictionary_->data(); 
-    } else { 
-      ARROW_ASSIGN_OR_RAISE(auto dict, MakeArrayOfNull(type_, 0)); 
-      data->dictionary = dict->data(); 
-    } 
- 
-    data->type = type_; 
-    return data; 
-  } 
- 
-  MemoryPool* pool_; 
-  std::shared_ptr<Array> dictionary_; 
-}; 
- 
-template <typename T> 
-struct VarLengthKeyEncoder : KeyEncoder { 
-  using Offset = typename T::offset_type; 
- 
-  void AddLength(const ArrayData& data, int32_t* lengths) override { 
-    int64_t i = 0; 
-    VisitArrayDataInline<T>( 
-        data, 
-        [&](util::string_view bytes) { 
-          lengths[i++] += 
-              kExtraByteForNull + sizeof(Offset) + static_cast<int32_t>(bytes.size()); 
-        }, 
-        [&] { lengths[i++] += kExtraByteForNull + sizeof(Offset); }); 
-  } 
- 
-  Status Encode(const ArrayData& data, uint8_t** encoded_bytes) override { 
-    VisitArrayDataInline<T>( 
-        data, 
-        [&](util::string_view bytes) { 
-          auto& encoded_ptr = *encoded_bytes++; 
-          *encoded_ptr++ = kValidByte; 
-          util::SafeStore(encoded_ptr, static_cast<Offset>(bytes.size())); 
-          encoded_ptr += sizeof(Offset); 
-          memcpy(encoded_ptr, bytes.data(), bytes.size()); 
-          encoded_ptr += bytes.size(); 
-        }, 
-        [&] { 
-          auto& encoded_ptr = *encoded_bytes++; 
-          *encoded_ptr++ = kNullByte; 
-          util::SafeStore(encoded_ptr, static_cast<Offset>(0)); 
-          encoded_ptr += sizeof(Offset); 
-        }); 
-    return Status::OK(); 
-  } 
- 
-  Result<std::shared_ptr<ArrayData>> Decode(uint8_t** encoded_bytes, int32_t length, 
-                                            MemoryPool* pool) override { 
-    std::shared_ptr<Buffer> null_buf; 
-    int32_t null_count; 
-    RETURN_NOT_OK(DecodeNulls(pool, length, encoded_bytes, &null_buf, &null_count)); 
- 
-    Offset length_sum = 0; 
-    for (int32_t i = 0; i < length; ++i) { 
-      length_sum += util::SafeLoadAs<Offset>(encoded_bytes[i]); 
-    } 
- 
-    ARROW_ASSIGN_OR_RAISE(auto offset_buf, 
-                          AllocateBuffer(sizeof(Offset) * (1 + length), pool)); 
-    ARROW_ASSIGN_OR_RAISE(auto key_buf, AllocateBuffer(length_sum)); 
- 
-    auto raw_offsets = reinterpret_cast<Offset*>(offset_buf->mutable_data()); 
-    auto raw_keys = key_buf->mutable_data(); 
- 
-    Offset current_offset = 0; 
-    for (int32_t i = 0; i < length; ++i) { 
-      raw_offsets[i] = current_offset; 
- 
-      auto key_length = util::SafeLoadAs<Offset>(encoded_bytes[i]); 
-      encoded_bytes[i] += sizeof(Offset); 
- 
-      memcpy(raw_keys + current_offset, encoded_bytes[i], key_length); 
-      encoded_bytes[i] += key_length; 
- 
-      current_offset += key_length; 
-    } 
-    raw_offsets[length] = current_offset; 
- 
-    return ArrayData::Make( 
-        type_, length, {std::move(null_buf), std::move(offset_buf), std::move(key_buf)}, 
-        null_count); 
-  } 
- 
-  explicit VarLengthKeyEncoder(std::shared_ptr<DataType> type) : type_(std::move(type)) {} 
- 
-  std::shared_ptr<DataType> type_; 
-}; 
- 
-struct GrouperImpl : Grouper { 
-  static Result<std::unique_ptr<GrouperImpl>> Make(const std::vector<ValueDescr>& keys, 
-                                                   ExecContext* ctx) { 
-    auto impl = ::arrow::internal::make_unique<GrouperImpl>(); 
- 
-    impl->encoders_.resize(keys.size()); 
-    impl->ctx_ = ctx; 
- 
-    for (size_t i = 0; i < keys.size(); ++i) { 
-      const auto& key = keys[i].type; 
- 
-      if (key->id() == Type::BOOL) { 
-        impl->encoders_[i] = ::arrow::internal::make_unique<BooleanKeyEncoder>(); 
-        continue; 
-      } 
- 
-      if (key->id() == Type::DICTIONARY) { 
-        impl->encoders_[i] = 
-            ::arrow::internal::make_unique<DictionaryKeyEncoder>(key, ctx->memory_pool()); 
-        continue; 
-      } 
- 
-      if (is_fixed_width(key->id())) { 
-        impl->encoders_[i] = ::arrow::internal::make_unique<FixedWidthKeyEncoder>(key); 
-        continue; 
-      } 
- 
-      if (is_binary_like(key->id())) { 
-        impl->encoders_[i] = 
-            ::arrow::internal::make_unique<VarLengthKeyEncoder<BinaryType>>(key); 
-        continue; 
-      } 
- 
-      if (is_large_binary_like(key->id())) { 
-        impl->encoders_[i] = 
-            ::arrow::internal::make_unique<VarLengthKeyEncoder<LargeBinaryType>>(key); 
-        continue; 
-      } 
- 
-      return Status::NotImplemented("Keys of type ", *key); 
-    } 
- 
-    return std::move(impl); 
-  } 
- 
-  Result<Datum> Consume(const ExecBatch& batch) override { 
-    std::vector<int32_t> offsets_batch(batch.length + 1); 
-    for (int i = 0; i < batch.num_values(); ++i) { 
-      encoders_[i]->AddLength(*batch[i].array(), offsets_batch.data()); 
-    } 
- 
-    int32_t total_length = 0; 
-    for (int64_t i = 0; i < batch.length; ++i) { 
-      auto total_length_before = total_length; 
-      total_length += offsets_batch[i]; 
-      offsets_batch[i] = total_length_before; 
-    } 
-    offsets_batch[batch.length] = total_length; 
- 
-    std::vector<uint8_t> key_bytes_batch(total_length); 
-    std::vector<uint8_t*> key_buf_ptrs(batch.length); 
-    for (int64_t i = 0; i < batch.length; ++i) { 
-      key_buf_ptrs[i] = key_bytes_batch.data() + offsets_batch[i]; 
-    } 
- 
-    for (int i = 0; i < batch.num_values(); ++i) { 
-      RETURN_NOT_OK(encoders_[i]->Encode(*batch[i].array(), key_buf_ptrs.data())); 
-    } 
- 
-    TypedBufferBuilder<uint32_t> group_ids_batch(ctx_->memory_pool()); 
-    RETURN_NOT_OK(group_ids_batch.Resize(batch.length)); 
- 
-    for (int64_t i = 0; i < batch.length; ++i) { 
-      int32_t key_length = offsets_batch[i + 1] - offsets_batch[i]; 
-      std::string key( 
-          reinterpret_cast<const char*>(key_bytes_batch.data() + offsets_batch[i]), 
-          key_length); 
- 
-      auto it_success = map_.emplace(key, num_groups_); 
-      auto group_id = it_success.first->second; 
- 
-      if (it_success.second) { 
-        // new key; update offsets and key_bytes 
-        ++num_groups_; 
-        auto next_key_offset = static_cast<int32_t>(key_bytes_.size()); 
-        key_bytes_.resize(next_key_offset + key_length); 
-        offsets_.push_back(next_key_offset + key_length); 
-        memcpy(key_bytes_.data() + next_key_offset, key.c_str(), key_length); 
-      } 
- 
-      group_ids_batch.UnsafeAppend(group_id); 
-    } 
- 
-    ARROW_ASSIGN_OR_RAISE(auto group_ids, group_ids_batch.Finish()); 
-    return Datum(UInt32Array(batch.length, std::move(group_ids))); 
-  } 
- 
-  uint32_t num_groups() const override { return num_groups_; } 
- 
-  Result<ExecBatch> GetUniques() override { 
-    ExecBatch out({}, num_groups_); 
- 
-    std::vector<uint8_t*> key_buf_ptrs(num_groups_); 
-    for (int64_t i = 0; i < num_groups_; ++i) { 
-      key_buf_ptrs[i] = key_bytes_.data() + offsets_[i]; 
-    } 
- 
-    out.values.resize(encoders_.size()); 
-    for (size_t i = 0; i < encoders_.size(); ++i) { 
-      ARROW_ASSIGN_OR_RAISE( 
-          out.values[i], 
-          encoders_[i]->Decode(key_buf_ptrs.data(), static_cast<int32_t>(num_groups_), 
-                               ctx_->memory_pool())); 
-    } 
- 
-    return out; 
-  } 
- 
-  ExecContext* ctx_; 
-  std::unordered_map<std::string, uint32_t> map_; 
-  std::vector<int32_t> offsets_ = {0}; 
-  std::vector<uint8_t> key_bytes_; 
-  uint32_t num_groups_ = 0; 
-  std::vector<std::unique_ptr<KeyEncoder>> encoders_; 
-}; 
- 
-struct GrouperFastImpl : Grouper { 
-  static constexpr int kBitmapPaddingForSIMD = 64;  // bits 
-  static constexpr int kPaddingForSIMD = 32;        // bytes 
- 
-  static bool CanUse(const std::vector<ValueDescr>& keys) { 
-#if ARROW_LITTLE_ENDIAN 
-    for (size_t i = 0; i < keys.size(); ++i) { 
-      const auto& key = keys[i].type; 
-      if (is_large_binary_like(key->id())) { 
-        return false; 
-      } 
-    } 
-    return true; 
-#else 
-    return false; 
-#endif 
-  } 
- 
-  static Result<std::unique_ptr<GrouperFastImpl>> Make( 
-      const std::vector<ValueDescr>& keys, ExecContext* ctx) { 
-    auto impl = ::arrow::internal::make_unique<GrouperFastImpl>(); 
-    impl->ctx_ = ctx; 
- 
-    RETURN_NOT_OK(impl->temp_stack_.Init(ctx->memory_pool(), 64 * minibatch_size_max_)); 
-    impl->encode_ctx_.hardware_flags = 
-        arrow::internal::CpuInfo::GetInstance()->hardware_flags(); 
-    impl->encode_ctx_.stack = &impl->temp_stack_; 
- 
-    auto num_columns = keys.size(); 
-    impl->col_metadata_.resize(num_columns); 
-    impl->key_types_.resize(num_columns); 
-    impl->dictionaries_.resize(num_columns); 
-    for (size_t icol = 0; icol < num_columns; ++icol) { 
-      const auto& key = keys[icol].type; 
-      if (key->id() == Type::DICTIONARY) { 
-        auto bit_width = checked_cast<const FixedWidthType&>(*key).bit_width(); 
-        ARROW_DCHECK(bit_width % 8 == 0); 
-        impl->col_metadata_[icol] = 
-            arrow::compute::KeyEncoder::KeyColumnMetadata(true, bit_width / 8); 
-      } else if (key->id() == Type::BOOL) { 
-        impl->col_metadata_[icol] = 
-            arrow::compute::KeyEncoder::KeyColumnMetadata(true, 0); 
-      } else if (is_fixed_width(key->id())) { 
-        impl->col_metadata_[icol] = arrow::compute::KeyEncoder::KeyColumnMetadata( 
-            true, checked_cast<const FixedWidthType&>(*key).bit_width() / 8); 
-      } else if (is_binary_like(key->id())) { 
-        impl->col_metadata_[icol] = 
-            arrow::compute::KeyEncoder::KeyColumnMetadata(false, sizeof(uint32_t)); 
-      } else { 
-        return Status::NotImplemented("Keys of type ", *key); 
-      } 
-      impl->key_types_[icol] = key; 
-    } 
- 
-    impl->encoder_.Init(impl->col_metadata_, &impl->encode_ctx_, 
-                        /* row_alignment = */ sizeof(uint64_t), 
-                        /* string_alignment = */ sizeof(uint64_t)); 
-    RETURN_NOT_OK(impl->rows_.Init(ctx->memory_pool(), impl->encoder_.row_metadata())); 
-    RETURN_NOT_OK( 
-        impl->rows_minibatch_.Init(ctx->memory_pool(), impl->encoder_.row_metadata())); 
-    impl->minibatch_size_ = impl->minibatch_size_min_; 
-    GrouperFastImpl* impl_ptr = impl.get(); 
-    auto equal_func = [impl_ptr]( 
-                          int num_keys_to_compare, const uint16_t* selection_may_be_null, 
-                          const uint32_t* group_ids, uint32_t* out_num_keys_mismatch, 
-                          uint16_t* out_selection_mismatch) { 
-      arrow::compute::KeyCompare::CompareRows( 
-          num_keys_to_compare, selection_may_be_null, group_ids, &impl_ptr->encode_ctx_, 
-          out_num_keys_mismatch, out_selection_mismatch, impl_ptr->rows_minibatch_, 
-          impl_ptr->rows_); 
-    }; 
-    auto append_func = [impl_ptr](int num_keys, const uint16_t* selection) { 
-      return impl_ptr->rows_.AppendSelectionFrom(impl_ptr->rows_minibatch_, num_keys, 
-                                                 selection); 
-    }; 
-    RETURN_NOT_OK(impl->map_.init(impl->encode_ctx_.hardware_flags, ctx->memory_pool(), 
-                                  impl->encode_ctx_.stack, impl->log_minibatch_max_, 
-                                  equal_func, append_func)); 
-    impl->cols_.resize(num_columns); 
-    impl->minibatch_hashes_.resize(impl->minibatch_size_max_ + 
-                                   kPaddingForSIMD / sizeof(uint32_t)); 
- 
-    return std::move(impl); 
-  } 
- 
-  ~GrouperFastImpl() { map_.cleanup(); } 
- 
-  Result<Datum> Consume(const ExecBatch& batch) override { 
-    int64_t num_rows = batch.length; 
-    int num_columns = batch.num_values(); 
- 
-    // Process dictionaries 
-    for (int icol = 0; icol < num_columns; ++icol) { 
-      if (key_types_[icol]->id() == Type::DICTIONARY) { 
-        auto data = batch[icol].array(); 
-        auto dict = MakeArray(data->dictionary); 
-        if (dictionaries_[icol]) { 
-          if (!dictionaries_[icol]->Equals(dict)) { 
-            // TODO(bkietz) unify if necessary. For now, just error if any batch's 
-            // dictionary differs from the first we saw for this key 
-            return Status::NotImplemented("Unifying differing dictionaries"); 
-          } 
-        } else { 
-          dictionaries_[icol] = std::move(dict); 
-        } 
-      } 
-    } 
- 
-    std::shared_ptr<arrow::Buffer> group_ids; 
-    ARROW_ASSIGN_OR_RAISE( 
-        group_ids, AllocateBuffer(sizeof(uint32_t) * num_rows, ctx_->memory_pool())); 
- 
-    for (int icol = 0; icol < num_columns; ++icol) { 
-      const uint8_t* non_nulls = nullptr; 
-      if (batch[icol].array()->buffers[0] != NULLPTR) { 
-        non_nulls = batch[icol].array()->buffers[0]->data(); 
-      } 
-      const uint8_t* fixedlen = batch[icol].array()->buffers[1]->data(); 
-      const uint8_t* varlen = nullptr; 
-      if (!col_metadata_[icol].is_fixed_length) { 
-        varlen = batch[icol].array()->buffers[2]->data(); 
-      } 
- 
-      int64_t offset = batch[icol].array()->offset; 
- 
-      auto col_base = arrow::compute::KeyEncoder::KeyColumnArray( 
-          col_metadata_[icol], offset + num_rows, non_nulls, fixedlen, varlen); 
- 
-      cols_[icol] = 
-          arrow::compute::KeyEncoder::KeyColumnArray(col_base, offset, num_rows); 
-    } 
- 
-    // Split into smaller mini-batches 
-    // 
-    for (uint32_t start_row = 0; start_row < num_rows;) { 
-      uint32_t batch_size_next = std::min(static_cast<uint32_t>(minibatch_size_), 
-                                          static_cast<uint32_t>(num_rows) - start_row); 
- 
-      // Encode 
-      rows_minibatch_.Clean(); 
-      RETURN_NOT_OK(encoder_.PrepareOutputForEncode(start_row, batch_size_next, 
-                                                    &rows_minibatch_, cols_)); 
-      encoder_.Encode(start_row, batch_size_next, &rows_minibatch_, cols_); 
- 
-      // Compute hash 
-      if (encoder_.row_metadata().is_fixed_length) { 
-        Hashing::hash_fixed(encode_ctx_.hardware_flags, batch_size_next, 
-                            encoder_.row_metadata().fixed_length, rows_minibatch_.data(1), 
-                            minibatch_hashes_.data()); 
-      } else { 
-        auto hash_temp_buf = 
-            util::TempVectorHolder<uint32_t>(&temp_stack_, 4 * batch_size_next); 
-        Hashing::hash_varlen(encode_ctx_.hardware_flags, batch_size_next, 
-                             rows_minibatch_.offsets(), rows_minibatch_.data(2), 
-                             hash_temp_buf.mutable_data(), minibatch_hashes_.data()); 
-      } 
- 
-      // Map 
-      RETURN_NOT_OK( 
-          map_.map(batch_size_next, minibatch_hashes_.data(), 
-                   reinterpret_cast<uint32_t*>(group_ids->mutable_data()) + start_row)); 
- 
-      start_row += batch_size_next; 
- 
-      if (minibatch_size_ * 2 <= minibatch_size_max_) { 
-        minibatch_size_ *= 2; 
-      } 
-    } 
- 
-    return Datum(UInt32Array(batch.length, std::move(group_ids))); 
-  } 
- 
-  uint32_t num_groups() const override { return static_cast<uint32_t>(rows_.length()); } 
- 
-  // Make sure padded buffers end up with the right logical size 
- 
-  Result<std::shared_ptr<Buffer>> AllocatePaddedBitmap(int64_t length) { 
-    ARROW_ASSIGN_OR_RAISE( 
-        std::shared_ptr<Buffer> buf, 
-        AllocateBitmap(length + kBitmapPaddingForSIMD, ctx_->memory_pool())); 
-    return SliceMutableBuffer(buf, 0, BitUtil::BytesForBits(length)); 
-  } 
- 
-  Result<std::shared_ptr<Buffer>> AllocatePaddedBuffer(int64_t size) { 
-    ARROW_ASSIGN_OR_RAISE( 
-        std::shared_ptr<Buffer> buf, 
-        AllocateBuffer(size + kBitmapPaddingForSIMD, ctx_->memory_pool())); 
-    return SliceMutableBuffer(buf, 0, size); 
-  } 
- 
-  Result<ExecBatch> GetUniques() override { 
-    auto num_columns = static_cast<uint32_t>(col_metadata_.size()); 
-    int64_t num_groups = rows_.length(); 
- 
-    std::vector<std::shared_ptr<Buffer>> non_null_bufs(num_columns); 
-    std::vector<std::shared_ptr<Buffer>> fixedlen_bufs(num_columns); 
-    std::vector<std::shared_ptr<Buffer>> varlen_bufs(num_columns); 
- 
-    for (size_t i = 0; i < num_columns; ++i) { 
-      ARROW_ASSIGN_OR_RAISE(non_null_bufs[i], AllocatePaddedBitmap(num_groups)); 
-      if (col_metadata_[i].is_fixed_length) { 
-        if (col_metadata_[i].fixed_length == 0) { 
-          ARROW_ASSIGN_OR_RAISE(fixedlen_bufs[i], AllocatePaddedBitmap(num_groups)); 
-        } else { 
-          ARROW_ASSIGN_OR_RAISE( 
-              fixedlen_bufs[i], 
-              AllocatePaddedBuffer(num_groups * col_metadata_[i].fixed_length)); 
-        } 
-      } else { 
-        ARROW_ASSIGN_OR_RAISE(fixedlen_bufs[i], 
-                              AllocatePaddedBuffer((num_groups + 1) * sizeof(uint32_t))); 
-      } 
-      cols_[i] = arrow::compute::KeyEncoder::KeyColumnArray( 
-          col_metadata_[i], num_groups, non_null_bufs[i]->mutable_data(), 
-          fixedlen_bufs[i]->mutable_data(), nullptr); 
-    } 
- 
-    for (int64_t start_row = 0; start_row < num_groups;) { 
-      int64_t batch_size_next = 
-          std::min(num_groups - start_row, static_cast<int64_t>(minibatch_size_max_)); 
-      encoder_.DecodeFixedLengthBuffers(start_row, start_row, batch_size_next, rows_, 
-                                        &cols_); 
-      start_row += batch_size_next; 
-    } 
- 
-    if (!rows_.metadata().is_fixed_length) { 
-      for (size_t i = 0; i < num_columns; ++i) { 
-        if (!col_metadata_[i].is_fixed_length) { 
-          auto varlen_size = 
-              reinterpret_cast<const uint32_t*>(fixedlen_bufs[i]->data())[num_groups]; 
-          ARROW_ASSIGN_OR_RAISE(varlen_bufs[i], AllocatePaddedBuffer(varlen_size)); 
-          cols_[i] = arrow::compute::KeyEncoder::KeyColumnArray( 
-              col_metadata_[i], num_groups, non_null_bufs[i]->mutable_data(), 
-              fixedlen_bufs[i]->mutable_data(), varlen_bufs[i]->mutable_data()); 
-        } 
-      } 
- 
-      for (int64_t start_row = 0; start_row < num_groups;) { 
-        int64_t batch_size_next = 
-            std::min(num_groups - start_row, static_cast<int64_t>(minibatch_size_max_)); 
-        encoder_.DecodeVaryingLengthBuffers(start_row, start_row, batch_size_next, rows_, 
-                                            &cols_); 
-        start_row += batch_size_next; 
-      } 
-    } 
- 
-    ExecBatch out({}, num_groups); 
-    out.values.resize(num_columns); 
-    for (size_t i = 0; i < num_columns; ++i) { 
-      auto valid_count = arrow::internal::CountSetBits( 
-          non_null_bufs[i]->data(), /*offset=*/0, static_cast<int64_t>(num_groups)); 
-      int null_count = static_cast<int>(num_groups) - static_cast<int>(valid_count); 
- 
-      if (col_metadata_[i].is_fixed_length) { 
-        out.values[i] = ArrayData::Make( 
-            key_types_[i], num_groups, 
-            {std::move(non_null_bufs[i]), std::move(fixedlen_bufs[i])}, null_count); 
-      } else { 
-        out.values[i] = 
-            ArrayData::Make(key_types_[i], num_groups, 
-                            {std::move(non_null_bufs[i]), std::move(fixedlen_bufs[i]), 
-                             std::move(varlen_bufs[i])}, 
-                            null_count); 
-      } 
-    } 
- 
-    // Process dictionaries 
-    for (size_t icol = 0; icol < num_columns; ++icol) { 
-      if (key_types_[icol]->id() == Type::DICTIONARY) { 
-        if (dictionaries_[icol]) { 
-          out.values[icol].array()->dictionary = dictionaries_[icol]->data(); 
-        } else { 
-          ARROW_ASSIGN_OR_RAISE(auto dict, MakeArrayOfNull(key_types_[icol], 0)); 
-          out.values[icol].array()->dictionary = dict->data(); 
-        } 
-      } 
-    } 
- 
-    return out; 
-  } 
- 
-  static constexpr int log_minibatch_max_ = 10; 
-  static constexpr int minibatch_size_max_ = 1 << log_minibatch_max_; 
-  static constexpr int minibatch_size_min_ = 128; 
-  int minibatch_size_; 
- 
-  ExecContext* ctx_; 
-  arrow::util::TempVectorStack temp_stack_; 
-  arrow::compute::KeyEncoder::KeyEncoderContext encode_ctx_; 
- 
-  std::vector<std::shared_ptr<arrow::DataType>> key_types_; 
-  std::vector<arrow::compute::KeyEncoder::KeyColumnMetadata> col_metadata_; 
-  std::vector<arrow::compute::KeyEncoder::KeyColumnArray> cols_; 
-  std::vector<uint32_t> minibatch_hashes_; 
- 
-  std::vector<std::shared_ptr<Array>> dictionaries_; 
- 
-  arrow::compute::KeyEncoder::KeyRowArray rows_; 
-  arrow::compute::KeyEncoder::KeyRowArray rows_minibatch_; 
-  arrow::compute::KeyEncoder encoder_; 
-  arrow::compute::SwissTable map_; 
-}; 
- 
-/// C++ abstract base class for the HashAggregateKernel interface. 
-/// Implementations should be default constructible and perform initialization in 
-/// Init(). 
-struct GroupedAggregator : KernelState { 
-  virtual Status Init(ExecContext*, const FunctionOptions*, 
-                      const std::shared_ptr<DataType>&) = 0; 
- 
-  virtual Status Consume(const ExecBatch& batch) = 0; 
- 
-  virtual Result<Datum> Finalize() = 0; 
- 
-  template <typename Reserve> 
-  Status MaybeReserve(int64_t old_num_groups, const ExecBatch& batch, 
-                      const Reserve& reserve) { 
-    int64_t new_num_groups = batch[2].scalar_as<UInt32Scalar>().value; 
-    if (new_num_groups <= old_num_groups) { 
-      return Status::OK(); 
-    } 
-    return reserve(new_num_groups - old_num_groups); 
-  } 
- 
-  virtual std::shared_ptr<DataType> out_type() const = 0; 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// Count implementation 
- 
-struct GroupedCountImpl : public GroupedAggregator { 
-  Status Init(ExecContext* ctx, const FunctionOptions* options, 
-              const std::shared_ptr<DataType>&) override { 
-    options_ = checked_cast<const ScalarAggregateOptions&>(*options); 
-    counts_ = BufferBuilder(ctx->memory_pool()); 
-    return Status::OK(); 
-  } 
- 
-  Status Consume(const ExecBatch& batch) override { 
-    RETURN_NOT_OK(MaybeReserve(num_groups_, batch, [&](int64_t added_groups) { 
-      num_groups_ += added_groups; 
-      return counts_.Append(added_groups * sizeof(int64_t), 0); 
-    })); 
- 
-    auto group_ids = batch[1].array()->GetValues<uint32_t>(1); 
-    auto raw_counts = reinterpret_cast<int64_t*>(counts_.mutable_data()); 
- 
-    const auto& input = batch[0].array(); 
- 
-    if (!options_.skip_nulls) { 
-      if (input->GetNullCount() != 0) { 
-        for (int64_t i = 0, input_i = input->offset; i < input->length; ++i, ++input_i) { 
-          auto g = group_ids[i]; 
-          raw_counts[g] += !BitUtil::GetBit(input->buffers[0]->data(), input_i); 
-        } 
-      } 
-      return Status::OK(); 
-    } 
- 
-    arrow::internal::VisitSetBitRunsVoid( 
-        input->buffers[0], input->offset, input->length, 
-        [&](int64_t begin, int64_t length) { 
-          for (int64_t input_i = begin, i = begin - input->offset; 
-               input_i < begin + length; ++input_i, ++i) { 
-            auto g = group_ids[i]; 
-            raw_counts[g] += 1; 
-          } 
-        }); 
-    return Status::OK(); 
-  } 
- 
-  Result<Datum> Finalize() override { 
-    ARROW_ASSIGN_OR_RAISE(auto counts, counts_.Finish()); 
-    return std::make_shared<Int64Array>(num_groups_, std::move(counts)); 
-  } 
- 
-  std::shared_ptr<DataType> out_type() const override { return int64(); } 
- 
-  int64_t num_groups_ = 0; 
-  ScalarAggregateOptions options_; 
-  BufferBuilder counts_; 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// Sum implementation 
- 
-struct GroupedSumImpl : public GroupedAggregator { 
-  // NB: whether we are accumulating into double, int64_t, or uint64_t 
-  // we always have 64 bits per group in the sums buffer. 
-  static constexpr size_t kSumSize = sizeof(int64_t); 
- 
-  using ConsumeImpl = std::function<void(const std::shared_ptr<ArrayData>&, 
-                                         const uint32_t*, void*, int64_t*)>; 
- 
-  struct GetConsumeImpl { 
-    template <typename T, typename AccType = typename FindAccumulatorType<T>::Type> 
-    Status Visit(const T&) { 
-      consume_impl = [](const std::shared_ptr<ArrayData>& input, const uint32_t* group, 
-                        void* boxed_sums, int64_t* counts) { 
-        auto sums = reinterpret_cast<typename TypeTraits<AccType>::CType*>(boxed_sums); 
- 
-        VisitArrayDataInline<T>( 
-            *input, 
-            [&](typename TypeTraits<T>::CType value) { 
-              sums[*group] += value; 
-              counts[*group] += 1; 
-              ++group; 
-            }, 
-            [&] { ++group; }); 
-      }; 
-      out_type = TypeTraits<AccType>::type_singleton(); 
-      return Status::OK(); 
-    } 
- 
-    Status Visit(const HalfFloatType& type) { 
-      return Status::NotImplemented("Summing data of type ", type); 
-    } 
- 
-    Status Visit(const DataType& type) { 
-      return Status::NotImplemented("Summing data of type ", type); 
-    } 
- 
-    ConsumeImpl consume_impl; 
-    std::shared_ptr<DataType> out_type; 
-  }; 
- 
-  Status Init(ExecContext* ctx, const FunctionOptions*, 
-              const std::shared_ptr<DataType>& input_type) override { 
-    pool_ = ctx->memory_pool(); 
-    sums_ = BufferBuilder(pool_); 
-    counts_ = BufferBuilder(pool_); 
- 
-    GetConsumeImpl get_consume_impl; 
-    RETURN_NOT_OK(VisitTypeInline(*input_type, &get_consume_impl)); 
- 
-    consume_impl_ = std::move(get_consume_impl.consume_impl); 
-    out_type_ = std::move(get_consume_impl.out_type); 
- 
-    return Status::OK(); 
-  } 
- 
-  Status Consume(const ExecBatch& batch) override { 
-    RETURN_NOT_OK(MaybeReserve(num_groups_, batch, [&](int64_t added_groups) { 
-      num_groups_ += added_groups; 
-      RETURN_NOT_OK(sums_.Append(added_groups * kSumSize, 0)); 
-      RETURN_NOT_OK(counts_.Append(added_groups * sizeof(int64_t), 0)); 
-      return Status::OK(); 
-    })); 
- 
-    auto group_ids = batch[1].array()->GetValues<uint32_t>(1); 
-    consume_impl_(batch[0].array(), group_ids, sums_.mutable_data(), 
-                  reinterpret_cast<int64_t*>(counts_.mutable_data())); 
-    return Status::OK(); 
-  } 
- 
-  Result<Datum> Finalize() override { 
-    std::shared_ptr<Buffer> null_bitmap; 
-    int64_t null_count = 0; 
- 
-    for (int64_t i = 0; i < num_groups_; ++i) { 
-      if (reinterpret_cast<const int64_t*>(counts_.data())[i] > 0) continue; 
- 
-      if (null_bitmap == nullptr) { 
-        ARROW_ASSIGN_OR_RAISE(null_bitmap, AllocateBitmap(num_groups_, pool_)); 
-        BitUtil::SetBitsTo(null_bitmap->mutable_data(), 0, num_groups_, true); 
-      } 
- 
-      null_count += 1; 
-      BitUtil::SetBitTo(null_bitmap->mutable_data(), i, false); 
-    } 
- 
-    ARROW_ASSIGN_OR_RAISE(auto sums, sums_.Finish()); 
- 
-    return ArrayData::Make(std::move(out_type_), num_groups_, 
-                           {std::move(null_bitmap), std::move(sums)}, null_count); 
-  } 
- 
-  std::shared_ptr<DataType> out_type() const override { return out_type_; } 
- 
-  // NB: counts are used here instead of a simple "has_values_" bitmap since 
-  // we expect to reuse this kernel to handle Mean 
-  int64_t num_groups_ = 0; 
-  BufferBuilder sums_, counts_; 
-  std::shared_ptr<DataType> out_type_; 
-  ConsumeImpl consume_impl_; 
-  MemoryPool* pool_; 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// MinMax implementation 
- 
-template <typename CType> 
-struct Extrema : std::numeric_limits<CType> {}; 
- 
-template <> 
-struct Extrema<float> { 
-  static constexpr float min() { return -std::numeric_limits<float>::infinity(); } 
-  static constexpr float max() { return std::numeric_limits<float>::infinity(); } 
-}; 
- 
-template <> 
-struct Extrema<double> { 
-  static constexpr double min() { return -std::numeric_limits<double>::infinity(); } 
-  static constexpr double max() { return std::numeric_limits<double>::infinity(); } 
-}; 
- 
-struct GroupedMinMaxImpl : public GroupedAggregator { 
-  using ConsumeImpl = 
-      std::function<void(const std::shared_ptr<ArrayData>&, const uint32_t*, void*, void*, 
-                         uint8_t*, uint8_t*)>; 
- 
-  using ResizeImpl = std::function<Status(BufferBuilder*, int64_t)>; 
- 
-  template <typename CType> 
-  static ResizeImpl MakeResizeImpl(CType anti_extreme) { 
-    // resize a min or max buffer, storing the correct anti extreme 
-    return [anti_extreme](BufferBuilder* builder, int64_t added_groups) { 
-      TypedBufferBuilder<CType> typed_builder(std::move(*builder)); 
-      RETURN_NOT_OK(typed_builder.Append(added_groups, anti_extreme)); 
-      *builder = std::move(*typed_builder.bytes_builder()); 
-      return Status::OK(); 
-    }; 
-  } 
- 
-  struct GetImpl { 
-    template <typename T, typename CType = typename TypeTraits<T>::CType> 
-    enable_if_number<T, Status> Visit(const T&) { 
-      consume_impl = [](const std::shared_ptr<ArrayData>& input, const uint32_t* group, 
-                        void* mins, void* maxes, uint8_t* has_values, 
-                        uint8_t* has_nulls) { 
-        auto raw_mins = reinterpret_cast<CType*>(mins); 
-        auto raw_maxes = reinterpret_cast<CType*>(maxes); 
- 
-        VisitArrayDataInline<T>( 
-            *input, 
-            [&](CType val) { 
-              raw_maxes[*group] = std::max(raw_maxes[*group], val); 
-              raw_mins[*group] = std::min(raw_mins[*group], val); 
-              BitUtil::SetBit(has_values, *group++); 
-            }, 
-            [&] { BitUtil::SetBit(has_nulls, *group++); }); 
-      }; 
- 
-      resize_min_impl = MakeResizeImpl(Extrema<CType>::max()); 
-      resize_max_impl = MakeResizeImpl(Extrema<CType>::min()); 
-      return Status::OK(); 
-    } 
- 
-    Status Visit(const BooleanType& type) { 
-      return Status::NotImplemented("Grouped MinMax data of type ", type); 
-    } 
- 
-    Status Visit(const HalfFloatType& type) { 
-      return Status::NotImplemented("Grouped MinMax data of type ", type); 
-    } 
- 
-    Status Visit(const DataType& type) { 
-      return Status::NotImplemented("Grouped MinMax data of type ", type); 
-    } 
- 
-    ConsumeImpl consume_impl; 
-    ResizeImpl resize_min_impl, resize_max_impl; 
-  }; 
- 
-  Status Init(ExecContext* ctx, const FunctionOptions* options, 
-              const std::shared_ptr<DataType>& input_type) override { 
-    options_ = *checked_cast<const ScalarAggregateOptions*>(options); 
-    type_ = input_type; 
- 
-    mins_ = BufferBuilder(ctx->memory_pool()); 
-    maxes_ = BufferBuilder(ctx->memory_pool()); 
-    has_values_ = TypedBufferBuilder<bool>(ctx->memory_pool()); 
-    has_nulls_ = TypedBufferBuilder<bool>(ctx->memory_pool()); 
- 
-    GetImpl get_impl; 
-    RETURN_NOT_OK(VisitTypeInline(*input_type, &get_impl)); 
- 
-    consume_impl_ = std::move(get_impl.consume_impl); 
-    resize_min_impl_ = std::move(get_impl.resize_min_impl); 
-    resize_max_impl_ = std::move(get_impl.resize_max_impl); 
- 
-    return Status::OK(); 
-  } 
- 
-  Status Consume(const ExecBatch& batch) override { 
-    RETURN_NOT_OK(MaybeReserve(num_groups_, batch, [&](int64_t added_groups) { 
-      num_groups_ += added_groups; 
-      RETURN_NOT_OK(resize_min_impl_(&mins_, added_groups)); 
-      RETURN_NOT_OK(resize_max_impl_(&maxes_, added_groups)); 
-      RETURN_NOT_OK(has_values_.Append(added_groups, false)); 
-      RETURN_NOT_OK(has_nulls_.Append(added_groups, false)); 
-      return Status::OK(); 
-    })); 
- 
-    auto group_ids = batch[1].array()->GetValues<uint32_t>(1); 
-    consume_impl_(batch[0].array(), group_ids, mins_.mutable_data(), 
-                  maxes_.mutable_data(), has_values_.mutable_data(), 
-                  has_nulls_.mutable_data()); 
-    return Status::OK(); 
-  } 
- 
-  Result<Datum> Finalize() override { 
-    // aggregation for group is valid if there was at least one value in that group 
-    ARROW_ASSIGN_OR_RAISE(auto null_bitmap, has_values_.Finish()); 
- 
-    if (!options_.skip_nulls) { 
-      // ... and there were no nulls in that group 
-      ARROW_ASSIGN_OR_RAISE(auto has_nulls, has_nulls_.Finish()); 
-      arrow::internal::BitmapAndNot(null_bitmap->data(), 0, has_nulls->data(), 0, 
-                                    num_groups_, 0, null_bitmap->mutable_data()); 
-    } 
- 
-    auto mins = ArrayData::Make(type_, num_groups_, {null_bitmap, nullptr}); 
-    auto maxes = ArrayData::Make(type_, num_groups_, {std::move(null_bitmap), nullptr}); 
-    ARROW_ASSIGN_OR_RAISE(mins->buffers[1], mins_.Finish()); 
-    ARROW_ASSIGN_OR_RAISE(maxes->buffers[1], maxes_.Finish()); 
- 
-    return ArrayData::Make(out_type(), num_groups_, {nullptr}, 
-                           {std::move(mins), std::move(maxes)}); 
-  } 
- 
-  std::shared_ptr<DataType> out_type() const override { 
-    return struct_({field("min", type_), field("max", type_)}); 
-  } 
- 
-  int64_t num_groups_; 
-  BufferBuilder mins_, maxes_; 
-  TypedBufferBuilder<bool> has_values_, has_nulls_; 
-  std::shared_ptr<DataType> type_; 
-  ConsumeImpl consume_impl_; 
-  ResizeImpl resize_min_impl_, resize_max_impl_; 
-  ScalarAggregateOptions options_; 
-}; 
- 
-template <typename Impl> 
-HashAggregateKernel MakeKernel(InputType argument_type) { 
-  HashAggregateKernel kernel; 
- 
-  kernel.init = [](KernelContext* ctx, 
-                   const KernelInitArgs& args) -> Result<std::unique_ptr<KernelState>> { 
-    auto impl = ::arrow::internal::make_unique<Impl>(); 
-    // FIXME(bkietz) Init should not take a type. That should be an unboxed template arg 
-    // for the Impl. Otherwise we're not exposing dispatch as well as we should. 
-    RETURN_NOT_OK(impl->Init(ctx->exec_context(), args.options, args.inputs[0].type)); 
-    return std::move(impl); 
-  }; 
- 
-  kernel.signature = KernelSignature::Make( 
-      {std::move(argument_type), InputType::Array(Type::UINT32), 
-       InputType::Scalar(Type::UINT32)}, 
-      OutputType( 
-          [](KernelContext* ctx, const std::vector<ValueDescr>&) -> Result<ValueDescr> { 
-            return checked_cast<GroupedAggregator*>(ctx->state())->out_type(); 
-          })); 
- 
-  kernel.consume = [](KernelContext* ctx, const ExecBatch& batch) { 
-    return checked_cast<GroupedAggregator*>(ctx->state())->Consume(batch); 
-  }; 
- 
-  kernel.merge = [](KernelContext* ctx, KernelState&&, KernelState*) { 
-    // TODO(ARROW-11840) merge two hash tables 
-    return Status::NotImplemented("Merge hashed aggregations"); 
-  }; 
- 
-  kernel.finalize = [](KernelContext* ctx, Datum* out) { 
-    ARROW_ASSIGN_OR_RAISE(*out, 
-                          checked_cast<GroupedAggregator*>(ctx->state())->Finalize()); 
-    return Status::OK(); 
-  }; 
- 
-  return kernel; 
-} 
- 
-Result<std::vector<const HashAggregateKernel*>> GetKernels( 
-    ExecContext* ctx, const std::vector<Aggregate>& aggregates, 
-    const std::vector<ValueDescr>& in_descrs) { 
-  if (aggregates.size() != in_descrs.size()) { 
-    return Status::Invalid(aggregates.size(), " aggregate functions were specified but ", 
-                           in_descrs.size(), " arguments were provided."); 
-  } 
- 
-  std::vector<const HashAggregateKernel*> kernels(in_descrs.size()); 
- 
-  for (size_t i = 0; i < aggregates.size(); ++i) { 
-    ARROW_ASSIGN_OR_RAISE(auto function, 
-                          ctx->func_registry()->GetFunction(aggregates[i].function)); 
-    ARROW_ASSIGN_OR_RAISE( 
-        const Kernel* kernel, 
-        function->DispatchExact( 
-            {in_descrs[i], ValueDescr::Array(uint32()), ValueDescr::Scalar(uint32())})); 
-    kernels[i] = static_cast<const HashAggregateKernel*>(kernel); 
-  } 
-  return kernels; 
-} 
- 
-Result<std::vector<std::unique_ptr<KernelState>>> InitKernels( 
-    const std::vector<const HashAggregateKernel*>& kernels, ExecContext* ctx, 
-    const std::vector<Aggregate>& aggregates, const std::vector<ValueDescr>& in_descrs) { 
-  std::vector<std::unique_ptr<KernelState>> states(kernels.size()); 
- 
-  for (size_t i = 0; i < aggregates.size(); ++i) { 
-    auto options = aggregates[i].options; 
- 
-    if (options == nullptr) { 
-      // use known default options for the named function if possible 
-      auto maybe_function = ctx->func_registry()->GetFunction(aggregates[i].function); 
-      if (maybe_function.ok()) { 
-        options = maybe_function.ValueOrDie()->default_options(); 
-      } 
-    } 
- 
-    KernelContext kernel_ctx{ctx}; 
-    ARROW_ASSIGN_OR_RAISE( 
-        states[i], kernels[i]->init(&kernel_ctx, KernelInitArgs{kernels[i], 
-                                                                { 
-                                                                    in_descrs[i].type, 
-                                                                    uint32(), 
-                                                                    uint32(), 
-                                                                }, 
-                                                                options})); 
-  } 
- 
-  return std::move(states); 
-} 
- 
-Result<FieldVector> ResolveKernels( 
-    const std::vector<Aggregate>& aggregates, 
-    const std::vector<const HashAggregateKernel*>& kernels, 
-    const std::vector<std::unique_ptr<KernelState>>& states, ExecContext* ctx, 
-    const std::vector<ValueDescr>& descrs) { 
-  FieldVector fields(descrs.size()); 
- 
-  for (size_t i = 0; i < kernels.size(); ++i) { 
-    KernelContext kernel_ctx{ctx}; 
-    kernel_ctx.SetState(states[i].get()); 
- 
-    ARROW_ASSIGN_OR_RAISE(auto descr, kernels[i]->signature->out_type().Resolve( 
-                                          &kernel_ctx, { 
-                                                           descrs[i].type, 
-                                                           uint32(), 
-                                                           uint32(), 
-                                                       })); 
-    fields[i] = field(aggregates[i].function, std::move(descr.type)); 
-  } 
-  return fields; 
-} 
- 
-}  // namespace 
- 
-Result<std::unique_ptr<Grouper>> Grouper::Make(const std::vector<ValueDescr>& descrs, 
-                                               ExecContext* ctx) { 
-  if (GrouperFastImpl::CanUse(descrs)) { 
-    return GrouperFastImpl::Make(descrs, ctx); 
-  } 
-  return GrouperImpl::Make(descrs, ctx); 
-} 
- 
-Result<Datum> GroupBy(const std::vector<Datum>& arguments, const std::vector<Datum>& keys, 
-                      const std::vector<Aggregate>& aggregates, ExecContext* ctx) { 
-  // Construct and initialize HashAggregateKernels 
-  ARROW_ASSIGN_OR_RAISE(auto argument_descrs, 
-                        ExecBatch::Make(arguments).Map( 
-                            [](ExecBatch batch) { return batch.GetDescriptors(); })); 
- 
-  ARROW_ASSIGN_OR_RAISE(auto kernels, GetKernels(ctx, aggregates, argument_descrs)); 
- 
-  ARROW_ASSIGN_OR_RAISE(auto states, 
-                        InitKernels(kernels, ctx, aggregates, argument_descrs)); 
- 
-  ARROW_ASSIGN_OR_RAISE( 
-      FieldVector out_fields, 
-      ResolveKernels(aggregates, kernels, states, ctx, argument_descrs)); 
- 
-  using arrow::compute::detail::ExecBatchIterator; 
- 
-  ARROW_ASSIGN_OR_RAISE(auto argument_batch_iterator, 
-                        ExecBatchIterator::Make(arguments, ctx->exec_chunksize())); 
- 
-  // Construct Grouper 
-  ARROW_ASSIGN_OR_RAISE(auto key_descrs, ExecBatch::Make(keys).Map([](ExecBatch batch) { 
-    return batch.GetDescriptors(); 
-  })); 
- 
-  ARROW_ASSIGN_OR_RAISE(auto grouper, Grouper::Make(key_descrs, ctx)); 
- 
-  int i = 0; 
-  for (ValueDescr& key_descr : key_descrs) { 
-    out_fields.push_back(field("key_" + std::to_string(i++), std::move(key_descr.type))); 
-  } 
- 
-  ARROW_ASSIGN_OR_RAISE(auto key_batch_iterator, 
-                        ExecBatchIterator::Make(keys, ctx->exec_chunksize())); 
- 
-  // start "streaming" execution 
-  ExecBatch key_batch, argument_batch; 
-  while (argument_batch_iterator->Next(&argument_batch) && 
-         key_batch_iterator->Next(&key_batch)) { 
-    if (key_batch.length == 0) continue; 
- 
-    // compute a batch of group ids 
-    ARROW_ASSIGN_OR_RAISE(Datum id_batch, grouper->Consume(key_batch)); 
- 
-    // consume group ids with HashAggregateKernels 
-    for (size_t i = 0; i < kernels.size(); ++i) { 
-      KernelContext batch_ctx{ctx}; 
-      batch_ctx.SetState(states[i].get()); 
-      ARROW_ASSIGN_OR_RAISE(auto batch, ExecBatch::Make({argument_batch[i], id_batch, 
-                                                         Datum(grouper->num_groups())})); 
-      RETURN_NOT_OK(kernels[i]->consume(&batch_ctx, batch)); 
-    } 
-  } 
- 
-  // Finalize output 
-  ArrayDataVector out_data(arguments.size() + keys.size()); 
-  auto it = out_data.begin(); 
- 
-  for (size_t i = 0; i < kernels.size(); ++i) { 
-    KernelContext batch_ctx{ctx}; 
-    batch_ctx.SetState(states[i].get()); 
-    Datum out; 
-    RETURN_NOT_OK(kernels[i]->finalize(&batch_ctx, &out)); 
-    *it++ = out.array(); 
-  } 
- 
-  ARROW_ASSIGN_OR_RAISE(ExecBatch out_keys, grouper->GetUniques()); 
-  for (const auto& key : out_keys.values) { 
-    *it++ = key.array(); 
-  } 
- 
-  int64_t length = out_data[0]->length; 
-  return ArrayData::Make(struct_(std::move(out_fields)), length, 
-                         {/*null_bitmap=*/nullptr}, std::move(out_data), 
-                         /*null_count=*/0); 
-} 
- 
-Result<std::shared_ptr<ListArray>> Grouper::ApplyGroupings(const ListArray& groupings, 
-                                                           const Array& array, 
-                                                           ExecContext* ctx) { 
-  ARROW_ASSIGN_OR_RAISE(Datum sorted, 
-                        compute::Take(array, groupings.data()->child_data[0], 
-                                      TakeOptions::NoBoundsCheck(), ctx)); 
- 
-  return std::make_shared<ListArray>(list(array.type()), groupings.length(), 
-                                     groupings.value_offsets(), sorted.make_array()); 
-} 
- 
-Result<std::shared_ptr<ListArray>> Grouper::MakeGroupings(const UInt32Array& ids, 
-                                                          uint32_t num_groups, 
-                                                          ExecContext* ctx) { 
-  if (ids.null_count() != 0) { 
-    return Status::Invalid("MakeGroupings with null ids"); 
-  } 
- 
-  ARROW_ASSIGN_OR_RAISE(auto offsets, AllocateBuffer(sizeof(int32_t) * (num_groups + 1), 
-                                                     ctx->memory_pool())); 
-  auto raw_offsets = reinterpret_cast<int32_t*>(offsets->mutable_data()); 
- 
-  std::memset(raw_offsets, 0, offsets->size()); 
-  for (int i = 0; i < ids.length(); ++i) { 
-    DCHECK_LT(ids.Value(i), num_groups); 
-    raw_offsets[ids.Value(i)] += 1; 
-  } 
-  int32_t length = 0; 
-  for (uint32_t id = 0; id < num_groups; ++id) { 
-    auto offset = raw_offsets[id]; 
-    raw_offsets[id] = length; 
-    length += offset; 
-  } 
-  raw_offsets[num_groups] = length; 
-  DCHECK_EQ(ids.length(), length); 
- 
-  ARROW_ASSIGN_OR_RAISE(auto offsets_copy, 
-                        offsets->CopySlice(0, offsets->size(), ctx->memory_pool())); 
-  raw_offsets = reinterpret_cast<int32_t*>(offsets_copy->mutable_data()); 
- 
-  ARROW_ASSIGN_OR_RAISE(auto sort_indices, AllocateBuffer(sizeof(int32_t) * ids.length(), 
-                                                          ctx->memory_pool())); 
-  auto raw_sort_indices = reinterpret_cast<int32_t*>(sort_indices->mutable_data()); 
-  for (int i = 0; i < ids.length(); ++i) { 
-    raw_sort_indices[raw_offsets[ids.Value(i)]++] = i; 
-  } 
- 
-  return std::make_shared<ListArray>( 
-      list(int32()), num_groups, std::move(offsets), 
-      std::make_shared<Int32Array>(ids.length(), std::move(sort_indices))); 
-} 
- 
-namespace { 
-const FunctionDoc hash_count_doc{"Count the number of null / non-null values", 
-                                 ("By default, non-null values are counted.\n" 
-                                  "This can be changed through ScalarAggregateOptions."), 
-                                 {"array", "group_id_array", "group_count"}, 
-                                 "ScalarAggregateOptions"}; 
- 
-const FunctionDoc hash_sum_doc{"Sum values of a numeric array", 
-                               ("Null values are ignored."), 
-                               {"array", "group_id_array", "group_count"}}; 
- 
-const FunctionDoc hash_min_max_doc{ 
-    "Compute the minimum and maximum values of a numeric array", 
-    ("Null values are ignored by default.\n" 
-     "This can be changed through ScalarAggregateOptions."), 
-    {"array", "group_id_array", "group_count"}, 
-    "ScalarAggregateOptions"}; 
-}  // namespace 
- 
-void RegisterHashAggregateBasic(FunctionRegistry* registry) { 
-  { 
-    static auto default_scalar_aggregate_options = ScalarAggregateOptions::Defaults(); 
-    auto func = std::make_shared<HashAggregateFunction>( 
-        "hash_count", Arity::Ternary(), &hash_count_doc, 
-        &default_scalar_aggregate_options); 
-    DCHECK_OK(func->AddKernel(MakeKernel<GroupedCountImpl>(ValueDescr::ARRAY))); 
-    DCHECK_OK(registry->AddFunction(std::move(func))); 
-  } 
- 
-  { 
-    auto func = std::make_shared<HashAggregateFunction>("hash_sum", Arity::Ternary(), 
-                                                        &hash_sum_doc); 
-    DCHECK_OK(func->AddKernel(MakeKernel<GroupedSumImpl>(ValueDescr::ARRAY))); 
-    DCHECK_OK(registry->AddFunction(std::move(func))); 
-  } 
- 
-  { 
-    static auto default_scalar_aggregate_options = ScalarAggregateOptions::Defaults(); 
-    auto func = std::make_shared<HashAggregateFunction>( 
-        "hash_min_max", Arity::Ternary(), &hash_min_max_doc, 
-        &default_scalar_aggregate_options); 
-    DCHECK_OK(func->AddKernel(MakeKernel<GroupedMinMaxImpl>(ValueDescr::ARRAY))); 
-    DCHECK_OK(registry->AddFunction(std::move(func))); 
-  } 
-} 
- 
-}  // namespace internal 
-}  // namespace compute 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <functional>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "arrow/buffer_builder.h"
+#include "arrow/compute/api_aggregate.h"
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/exec/key_compare.h"
+#include "arrow/compute/exec/key_encode.h"
+#include "arrow/compute/exec/key_hash.h"
+#include "arrow/compute/exec/key_map.h"
+#include "arrow/compute/exec/util.h"
+#include "arrow/compute/exec_internal.h"
+#include "arrow/compute/kernel.h"
+#include "arrow/compute/kernels/aggregate_internal.h"
+#include "arrow/compute/kernels/common.h"
+#include "arrow/util/bit_run_reader.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/bitmap_writer.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/cpu_info.h"
+#include "arrow/util/make_unique.h"
+#include "arrow/visitor_inline.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+using internal::FirstTimeBitmapWriter;
+
+namespace compute {
+namespace internal {
+namespace {
+
+struct KeyEncoder {
+  // the first byte of an encoded key is used to indicate nullity
+  static constexpr bool kExtraByteForNull = true;
+
+  static constexpr uint8_t kNullByte = 1;
+  static constexpr uint8_t kValidByte = 0;
+
+  virtual ~KeyEncoder() = default;
+
+  virtual void AddLength(const ArrayData&, int32_t* lengths) = 0;
+
+  virtual Status Encode(const ArrayData&, uint8_t** encoded_bytes) = 0;
+
+  virtual Result<std::shared_ptr<ArrayData>> Decode(uint8_t** encoded_bytes,
+                                                    int32_t length, MemoryPool*) = 0;
+
+  // extract the null bitmap from the leading nullity bytes of encoded keys
+  static Status DecodeNulls(MemoryPool* pool, int32_t length, uint8_t** encoded_bytes,
+                            std::shared_ptr<Buffer>* null_bitmap, int32_t* null_count) {
+    // first count nulls to determine if a null bitmap is necessary
+    *null_count = 0;
+    for (int32_t i = 0; i < length; ++i) {
+      *null_count += (encoded_bytes[i][0] == kNullByte);
+    }
+
+    if (*null_count > 0) {
+      ARROW_ASSIGN_OR_RAISE(*null_bitmap, AllocateBitmap(length, pool));
+      uint8_t* validity = (*null_bitmap)->mutable_data();
+
+      FirstTimeBitmapWriter writer(validity, 0, length);
+      for (int32_t i = 0; i < length; ++i) {
+        if (encoded_bytes[i][0] == kValidByte) {
+          writer.Set();
+        } else {
+          writer.Clear();
+        }
+        writer.Next();
+        encoded_bytes[i] += 1;
+      }
+      writer.Finish();
+    } else {
+      for (int32_t i = 0; i < length; ++i) {
+        encoded_bytes[i] += 1;
+      }
+    }
+    return Status ::OK();
+  }
+};
+
+struct BooleanKeyEncoder : KeyEncoder {
+  static constexpr int kByteWidth = 1;
+
+  void AddLength(const ArrayData& data, int32_t* lengths) override {
+    for (int64_t i = 0; i < data.length; ++i) {
+      lengths[i] += kByteWidth + kExtraByteForNull;
+    }
+  }
+
+  Status Encode(const ArrayData& data, uint8_t** encoded_bytes) override {
+    VisitArrayDataInline<BooleanType>(
+        data,
+        [&](bool value) {
+          auto& encoded_ptr = *encoded_bytes++;
+          *encoded_ptr++ = kValidByte;
+          *encoded_ptr++ = value;
+        },
+        [&] {
+          auto& encoded_ptr = *encoded_bytes++;
+          *encoded_ptr++ = kNullByte;
+          *encoded_ptr++ = 0;
+        });
+    return Status::OK();
+  }
+
+  Result<std::shared_ptr<ArrayData>> Decode(uint8_t** encoded_bytes, int32_t length,
+                                            MemoryPool* pool) override {
+    std::shared_ptr<Buffer> null_buf;
+    int32_t null_count;
+    RETURN_NOT_OK(DecodeNulls(pool, length, encoded_bytes, &null_buf, &null_count));
+
+    ARROW_ASSIGN_OR_RAISE(auto key_buf, AllocateBitmap(length, pool));
+
+    uint8_t* raw_output = key_buf->mutable_data();
+    for (int32_t i = 0; i < length; ++i) {
+      auto& encoded_ptr = encoded_bytes[i];
+      BitUtil::SetBitTo(raw_output, i, encoded_ptr[0] != 0);
+      encoded_ptr += 1;
+    }
+
+    return ArrayData::Make(boolean(), length, {std::move(null_buf), std::move(key_buf)},
+                           null_count);
+  }
+};
+
+struct FixedWidthKeyEncoder : KeyEncoder {
+  explicit FixedWidthKeyEncoder(std::shared_ptr<DataType> type)
+      : type_(std::move(type)),
+        byte_width_(checked_cast<const FixedWidthType&>(*type_).bit_width() / 8) {}
+
+  void AddLength(const ArrayData& data, int32_t* lengths) override {
+    for (int64_t i = 0; i < data.length; ++i) {
+      lengths[i] += byte_width_ + kExtraByteForNull;
+    }
+  }
+
+  Status Encode(const ArrayData& data, uint8_t** encoded_bytes) override {
+    ArrayData viewed(fixed_size_binary(byte_width_), data.length, data.buffers,
+                     data.null_count, data.offset);
+
+    VisitArrayDataInline<FixedSizeBinaryType>(
+        viewed,
+        [&](util::string_view bytes) {
+          auto& encoded_ptr = *encoded_bytes++;
+          *encoded_ptr++ = kValidByte;
+          memcpy(encoded_ptr, bytes.data(), byte_width_);
+          encoded_ptr += byte_width_;
+        },
+        [&] {
+          auto& encoded_ptr = *encoded_bytes++;
+          *encoded_ptr++ = kNullByte;
+          memset(encoded_ptr, 0, byte_width_);
+          encoded_ptr += byte_width_;
+        });
+    return Status::OK();
+  }
+
+  Result<std::shared_ptr<ArrayData>> Decode(uint8_t** encoded_bytes, int32_t length,
+                                            MemoryPool* pool) override {
+    std::shared_ptr<Buffer> null_buf;
+    int32_t null_count;
+    RETURN_NOT_OK(DecodeNulls(pool, length, encoded_bytes, &null_buf, &null_count));
+
+    ARROW_ASSIGN_OR_RAISE(auto key_buf, AllocateBuffer(length * byte_width_, pool));
+
+    uint8_t* raw_output = key_buf->mutable_data();
+    for (int32_t i = 0; i < length; ++i) {
+      auto& encoded_ptr = encoded_bytes[i];
+      std::memcpy(raw_output, encoded_ptr, byte_width_);
+      encoded_ptr += byte_width_;
+      raw_output += byte_width_;
+    }
+
+    return ArrayData::Make(type_, length, {std::move(null_buf), std::move(key_buf)},
+                           null_count);
+  }
+
+  std::shared_ptr<DataType> type_;
+  int byte_width_;
+};
+
+struct DictionaryKeyEncoder : FixedWidthKeyEncoder {
+  DictionaryKeyEncoder(std::shared_ptr<DataType> type, MemoryPool* pool)
+      : FixedWidthKeyEncoder(std::move(type)), pool_(pool) {}
+
+  Status Encode(const ArrayData& data, uint8_t** encoded_bytes) override {
+    auto dict = MakeArray(data.dictionary);
+    if (dictionary_) {
+      if (!dictionary_->Equals(dict)) {
+        // TODO(bkietz) unify if necessary. For now, just error if any batch's dictionary
+        // differs from the first we saw for this key
+        return Status::NotImplemented("Unifying differing dictionaries");
+      }
+    } else {
+      dictionary_ = std::move(dict);
+    }
+    return FixedWidthKeyEncoder::Encode(data, encoded_bytes);
+  }
+
+  Result<std::shared_ptr<ArrayData>> Decode(uint8_t** encoded_bytes, int32_t length,
+                                            MemoryPool* pool) override {
+    ARROW_ASSIGN_OR_RAISE(auto data,
+                          FixedWidthKeyEncoder::Decode(encoded_bytes, length, pool));
+
+    if (dictionary_) {
+      data->dictionary = dictionary_->data();
+    } else {
+      ARROW_ASSIGN_OR_RAISE(auto dict, MakeArrayOfNull(type_, 0));
+      data->dictionary = dict->data();
+    }
+
+    data->type = type_;
+    return data;
+  }
+
+  MemoryPool* pool_;
+  std::shared_ptr<Array> dictionary_;
+};
+
+template <typename T>
+struct VarLengthKeyEncoder : KeyEncoder {
+  using Offset = typename T::offset_type;
+
+  void AddLength(const ArrayData& data, int32_t* lengths) override {
+    int64_t i = 0;
+    VisitArrayDataInline<T>(
+        data,
+        [&](util::string_view bytes) {
+          lengths[i++] +=
+              kExtraByteForNull + sizeof(Offset) + static_cast<int32_t>(bytes.size());
+        },
+        [&] { lengths[i++] += kExtraByteForNull + sizeof(Offset); });
+  }
+
+  Status Encode(const ArrayData& data, uint8_t** encoded_bytes) override {
+    VisitArrayDataInline<T>(
+        data,
+        [&](util::string_view bytes) {
+          auto& encoded_ptr = *encoded_bytes++;
+          *encoded_ptr++ = kValidByte;
+          util::SafeStore(encoded_ptr, static_cast<Offset>(bytes.size()));
+          encoded_ptr += sizeof(Offset);
+          memcpy(encoded_ptr, bytes.data(), bytes.size());
+          encoded_ptr += bytes.size();
+        },
+        [&] {
+          auto& encoded_ptr = *encoded_bytes++;
+          *encoded_ptr++ = kNullByte;
+          util::SafeStore(encoded_ptr, static_cast<Offset>(0));
+          encoded_ptr += sizeof(Offset);
+        });
+    return Status::OK();
+  }
+
+  Result<std::shared_ptr<ArrayData>> Decode(uint8_t** encoded_bytes, int32_t length,
+                                            MemoryPool* pool) override {
+    std::shared_ptr<Buffer> null_buf;
+    int32_t null_count;
+    RETURN_NOT_OK(DecodeNulls(pool, length, encoded_bytes, &null_buf, &null_count));
+
+    Offset length_sum = 0;
+    for (int32_t i = 0; i < length; ++i) {
+      length_sum += util::SafeLoadAs<Offset>(encoded_bytes[i]);
+    }
+
+    ARROW_ASSIGN_OR_RAISE(auto offset_buf,
+                          AllocateBuffer(sizeof(Offset) * (1 + length), pool));
+    ARROW_ASSIGN_OR_RAISE(auto key_buf, AllocateBuffer(length_sum));
+
+    auto raw_offsets = reinterpret_cast<Offset*>(offset_buf->mutable_data());
+    auto raw_keys = key_buf->mutable_data();
+
+    Offset current_offset = 0;
+    for (int32_t i = 0; i < length; ++i) {
+      raw_offsets[i] = current_offset;
+
+      auto key_length = util::SafeLoadAs<Offset>(encoded_bytes[i]);
+      encoded_bytes[i] += sizeof(Offset);
+
+      memcpy(raw_keys + current_offset, encoded_bytes[i], key_length);
+      encoded_bytes[i] += key_length;
+
+      current_offset += key_length;
+    }
+    raw_offsets[length] = current_offset;
+
+    return ArrayData::Make(
+        type_, length, {std::move(null_buf), std::move(offset_buf), std::move(key_buf)},
+        null_count);
+  }
+
+  explicit VarLengthKeyEncoder(std::shared_ptr<DataType> type) : type_(std::move(type)) {}
+
+  std::shared_ptr<DataType> type_;
+};
+
+struct GrouperImpl : Grouper {
+  static Result<std::unique_ptr<GrouperImpl>> Make(const std::vector<ValueDescr>& keys,
+                                                   ExecContext* ctx) {
+    auto impl = ::arrow::internal::make_unique<GrouperImpl>();
+
+    impl->encoders_.resize(keys.size());
+    impl->ctx_ = ctx;
+
+    for (size_t i = 0; i < keys.size(); ++i) {
+      const auto& key = keys[i].type;
+
+      if (key->id() == Type::BOOL) {
+        impl->encoders_[i] = ::arrow::internal::make_unique<BooleanKeyEncoder>();
+        continue;
+      }
+
+      if (key->id() == Type::DICTIONARY) {
+        impl->encoders_[i] =
+            ::arrow::internal::make_unique<DictionaryKeyEncoder>(key, ctx->memory_pool());
+        continue;
+      }
+
+      if (is_fixed_width(key->id())) {
+        impl->encoders_[i] = ::arrow::internal::make_unique<FixedWidthKeyEncoder>(key);
+        continue;
+      }
+
+      if (is_binary_like(key->id())) {
+        impl->encoders_[i] =
+            ::arrow::internal::make_unique<VarLengthKeyEncoder<BinaryType>>(key);
+        continue;
+      }
+
+      if (is_large_binary_like(key->id())) {
+        impl->encoders_[i] =
+            ::arrow::internal::make_unique<VarLengthKeyEncoder<LargeBinaryType>>(key);
+        continue;
+      }
+
+      return Status::NotImplemented("Keys of type ", *key);
+    }
+
+    return std::move(impl);
+  }
+
+  Result<Datum> Consume(const ExecBatch& batch) override {
+    std::vector<int32_t> offsets_batch(batch.length + 1);
+    for (int i = 0; i < batch.num_values(); ++i) {
+      encoders_[i]->AddLength(*batch[i].array(), offsets_batch.data());
+    }
+
+    int32_t total_length = 0;
+    for (int64_t i = 0; i < batch.length; ++i) {
+      auto total_length_before = total_length;
+      total_length += offsets_batch[i];
+      offsets_batch[i] = total_length_before;
+    }
+    offsets_batch[batch.length] = total_length;
+
+    std::vector<uint8_t> key_bytes_batch(total_length);
+    std::vector<uint8_t*> key_buf_ptrs(batch.length);
+    for (int64_t i = 0; i < batch.length; ++i) {
+      key_buf_ptrs[i] = key_bytes_batch.data() + offsets_batch[i];
+    }
+
+    for (int i = 0; i < batch.num_values(); ++i) {
+      RETURN_NOT_OK(encoders_[i]->Encode(*batch[i].array(), key_buf_ptrs.data()));
+    }
+
+    TypedBufferBuilder<uint32_t> group_ids_batch(ctx_->memory_pool());
+    RETURN_NOT_OK(group_ids_batch.Resize(batch.length));
+
+    for (int64_t i = 0; i < batch.length; ++i) {
+      int32_t key_length = offsets_batch[i + 1] - offsets_batch[i];
+      std::string key(
+          reinterpret_cast<const char*>(key_bytes_batch.data() + offsets_batch[i]),
+          key_length);
+
+      auto it_success = map_.emplace(key, num_groups_);
+      auto group_id = it_success.first->second;
+
+      if (it_success.second) {
+        // new key; update offsets and key_bytes
+        ++num_groups_;
+        auto next_key_offset = static_cast<int32_t>(key_bytes_.size());
+        key_bytes_.resize(next_key_offset + key_length);
+        offsets_.push_back(next_key_offset + key_length);
+        memcpy(key_bytes_.data() + next_key_offset, key.c_str(), key_length);
+      }
+
+      group_ids_batch.UnsafeAppend(group_id);
+    }
+
+    ARROW_ASSIGN_OR_RAISE(auto group_ids, group_ids_batch.Finish());
+    return Datum(UInt32Array(batch.length, std::move(group_ids)));
+  }
+
+  uint32_t num_groups() const override { return num_groups_; }
+
+  Result<ExecBatch> GetUniques() override {
+    ExecBatch out({}, num_groups_);
+
+    std::vector<uint8_t*> key_buf_ptrs(num_groups_);
+    for (int64_t i = 0; i < num_groups_; ++i) {
+      key_buf_ptrs[i] = key_bytes_.data() + offsets_[i];
+    }
+
+    out.values.resize(encoders_.size());
+    for (size_t i = 0; i < encoders_.size(); ++i) {
+      ARROW_ASSIGN_OR_RAISE(
+          out.values[i],
+          encoders_[i]->Decode(key_buf_ptrs.data(), static_cast<int32_t>(num_groups_),
+                               ctx_->memory_pool()));
+    }
+
+    return out;
+  }
+
+  ExecContext* ctx_;
+  std::unordered_map<std::string, uint32_t> map_;
+  std::vector<int32_t> offsets_ = {0};
+  std::vector<uint8_t> key_bytes_;
+  uint32_t num_groups_ = 0;
+  std::vector<std::unique_ptr<KeyEncoder>> encoders_;
+};
+
+struct GrouperFastImpl : Grouper {
+  static constexpr int kBitmapPaddingForSIMD = 64;  // bits
+  static constexpr int kPaddingForSIMD = 32;        // bytes
+
+  static bool CanUse(const std::vector<ValueDescr>& keys) {
+#if ARROW_LITTLE_ENDIAN
+    for (size_t i = 0; i < keys.size(); ++i) {
+      const auto& key = keys[i].type;
+      if (is_large_binary_like(key->id())) {
+        return false;
+      }
+    }
+    return true;
+#else
+    return false;
+#endif
+  }
+
+  static Result<std::unique_ptr<GrouperFastImpl>> Make(
+      const std::vector<ValueDescr>& keys, ExecContext* ctx) {
+    auto impl = ::arrow::internal::make_unique<GrouperFastImpl>();
+    impl->ctx_ = ctx;
+
+    RETURN_NOT_OK(impl->temp_stack_.Init(ctx->memory_pool(), 64 * minibatch_size_max_));
+    impl->encode_ctx_.hardware_flags =
+        arrow::internal::CpuInfo::GetInstance()->hardware_flags();
+    impl->encode_ctx_.stack = &impl->temp_stack_;
+
+    auto num_columns = keys.size();
+    impl->col_metadata_.resize(num_columns);
+    impl->key_types_.resize(num_columns);
+    impl->dictionaries_.resize(num_columns);
+    for (size_t icol = 0; icol < num_columns; ++icol) {
+      const auto& key = keys[icol].type;
+      if (key->id() == Type::DICTIONARY) {
+        auto bit_width = checked_cast<const FixedWidthType&>(*key).bit_width();
+        ARROW_DCHECK(bit_width % 8 == 0);
+        impl->col_metadata_[icol] =
+            arrow::compute::KeyEncoder::KeyColumnMetadata(true, bit_width / 8);
+      } else if (key->id() == Type::BOOL) {
+        impl->col_metadata_[icol] =
+            arrow::compute::KeyEncoder::KeyColumnMetadata(true, 0);
+      } else if (is_fixed_width(key->id())) {
+        impl->col_metadata_[icol] = arrow::compute::KeyEncoder::KeyColumnMetadata(
+            true, checked_cast<const FixedWidthType&>(*key).bit_width() / 8);
+      } else if (is_binary_like(key->id())) {
+        impl->col_metadata_[icol] =
+            arrow::compute::KeyEncoder::KeyColumnMetadata(false, sizeof(uint32_t));
+      } else {
+        return Status::NotImplemented("Keys of type ", *key);
+      }
+      impl->key_types_[icol] = key;
+    }
+
+    impl->encoder_.Init(impl->col_metadata_, &impl->encode_ctx_,
+                        /* row_alignment = */ sizeof(uint64_t),
+                        /* string_alignment = */ sizeof(uint64_t));
+    RETURN_NOT_OK(impl->rows_.Init(ctx->memory_pool(), impl->encoder_.row_metadata()));
+    RETURN_NOT_OK(
+        impl->rows_minibatch_.Init(ctx->memory_pool(), impl->encoder_.row_metadata()));
+    impl->minibatch_size_ = impl->minibatch_size_min_;
+    GrouperFastImpl* impl_ptr = impl.get();
+    auto equal_func = [impl_ptr](
+                          int num_keys_to_compare, const uint16_t* selection_may_be_null,
+                          const uint32_t* group_ids, uint32_t* out_num_keys_mismatch,
+                          uint16_t* out_selection_mismatch) {
+      arrow::compute::KeyCompare::CompareRows(
+          num_keys_to_compare, selection_may_be_null, group_ids, &impl_ptr->encode_ctx_,
+          out_num_keys_mismatch, out_selection_mismatch, impl_ptr->rows_minibatch_,
+          impl_ptr->rows_);
+    };
+    auto append_func = [impl_ptr](int num_keys, const uint16_t* selection) {
+      return impl_ptr->rows_.AppendSelectionFrom(impl_ptr->rows_minibatch_, num_keys,
+                                                 selection);
+    };
+    RETURN_NOT_OK(impl->map_.init(impl->encode_ctx_.hardware_flags, ctx->memory_pool(),
+                                  impl->encode_ctx_.stack, impl->log_minibatch_max_,
+                                  equal_func, append_func));
+    impl->cols_.resize(num_columns);
+    impl->minibatch_hashes_.resize(impl->minibatch_size_max_ +
+                                   kPaddingForSIMD / sizeof(uint32_t));
+
+    return std::move(impl);
+  }
+
+  ~GrouperFastImpl() { map_.cleanup(); }
+
+  Result<Datum> Consume(const ExecBatch& batch) override {
+    int64_t num_rows = batch.length;
+    int num_columns = batch.num_values();
+
+    // Process dictionaries
+    for (int icol = 0; icol < num_columns; ++icol) {
+      if (key_types_[icol]->id() == Type::DICTIONARY) {
+        auto data = batch[icol].array();
+        auto dict = MakeArray(data->dictionary);
+        if (dictionaries_[icol]) {
+          if (!dictionaries_[icol]->Equals(dict)) {
+            // TODO(bkietz) unify if necessary. For now, just error if any batch's
+            // dictionary differs from the first we saw for this key
+            return Status::NotImplemented("Unifying differing dictionaries");
+          }
+        } else {
+          dictionaries_[icol] = std::move(dict);
+        }
+      }
+    }
+
+    std::shared_ptr<arrow::Buffer> group_ids;
+    ARROW_ASSIGN_OR_RAISE(
+        group_ids, AllocateBuffer(sizeof(uint32_t) * num_rows, ctx_->memory_pool()));
+
+    for (int icol = 0; icol < num_columns; ++icol) {
+      const uint8_t* non_nulls = nullptr;
+      if (batch[icol].array()->buffers[0] != NULLPTR) {
+        non_nulls = batch[icol].array()->buffers[0]->data();
+      }
+      const uint8_t* fixedlen = batch[icol].array()->buffers[1]->data();
+      const uint8_t* varlen = nullptr;
+      if (!col_metadata_[icol].is_fixed_length) {
+        varlen = batch[icol].array()->buffers[2]->data();
+      }
+
+      int64_t offset = batch[icol].array()->offset;
+
+      auto col_base = arrow::compute::KeyEncoder::KeyColumnArray(
+          col_metadata_[icol], offset + num_rows, non_nulls, fixedlen, varlen);
+
+      cols_[icol] =
+          arrow::compute::KeyEncoder::KeyColumnArray(col_base, offset, num_rows);
+    }
+
+    // Split into smaller mini-batches
+    //
+    for (uint32_t start_row = 0; start_row < num_rows;) {
+      uint32_t batch_size_next = std::min(static_cast<uint32_t>(minibatch_size_),
+                                          static_cast<uint32_t>(num_rows) - start_row);
+
+      // Encode
+      rows_minibatch_.Clean();
+      RETURN_NOT_OK(encoder_.PrepareOutputForEncode(start_row, batch_size_next,
+                                                    &rows_minibatch_, cols_));
+      encoder_.Encode(start_row, batch_size_next, &rows_minibatch_, cols_);
+
+      // Compute hash
+      if (encoder_.row_metadata().is_fixed_length) {
+        Hashing::hash_fixed(encode_ctx_.hardware_flags, batch_size_next,
+                            encoder_.row_metadata().fixed_length, rows_minibatch_.data(1),
+                            minibatch_hashes_.data());
+      } else {
+        auto hash_temp_buf =
+            util::TempVectorHolder<uint32_t>(&temp_stack_, 4 * batch_size_next);
+        Hashing::hash_varlen(encode_ctx_.hardware_flags, batch_size_next,
+                             rows_minibatch_.offsets(), rows_minibatch_.data(2),
+                             hash_temp_buf.mutable_data(), minibatch_hashes_.data());
+      }
+
+      // Map
+      RETURN_NOT_OK(
+          map_.map(batch_size_next, minibatch_hashes_.data(),
+                   reinterpret_cast<uint32_t*>(group_ids->mutable_data()) + start_row));
+
+      start_row += batch_size_next;
+
+      if (minibatch_size_ * 2 <= minibatch_size_max_) {
+        minibatch_size_ *= 2;
+      }
+    }
+
+    return Datum(UInt32Array(batch.length, std::move(group_ids)));
+  }
+
+  uint32_t num_groups() const override { return static_cast<uint32_t>(rows_.length()); }
+
+  // Make sure padded buffers end up with the right logical size
+
+  Result<std::shared_ptr<Buffer>> AllocatePaddedBitmap(int64_t length) {
+    ARROW_ASSIGN_OR_RAISE(
+        std::shared_ptr<Buffer> buf,
+        AllocateBitmap(length + kBitmapPaddingForSIMD, ctx_->memory_pool()));
+    return SliceMutableBuffer(buf, 0, BitUtil::BytesForBits(length));
+  }
+
+  Result<std::shared_ptr<Buffer>> AllocatePaddedBuffer(int64_t size) {
+    ARROW_ASSIGN_OR_RAISE(
+        std::shared_ptr<Buffer> buf,
+        AllocateBuffer(size + kBitmapPaddingForSIMD, ctx_->memory_pool()));
+    return SliceMutableBuffer(buf, 0, size);
+  }
+
+  Result<ExecBatch> GetUniques() override {
+    auto num_columns = static_cast<uint32_t>(col_metadata_.size());
+    int64_t num_groups = rows_.length();
+
+    std::vector<std::shared_ptr<Buffer>> non_null_bufs(num_columns);
+    std::vector<std::shared_ptr<Buffer>> fixedlen_bufs(num_columns);
+    std::vector<std::shared_ptr<Buffer>> varlen_bufs(num_columns);
+
+    for (size_t i = 0; i < num_columns; ++i) {
+      ARROW_ASSIGN_OR_RAISE(non_null_bufs[i], AllocatePaddedBitmap(num_groups));
+      if (col_metadata_[i].is_fixed_length) {
+        if (col_metadata_[i].fixed_length == 0) {
+          ARROW_ASSIGN_OR_RAISE(fixedlen_bufs[i], AllocatePaddedBitmap(num_groups));
+        } else {
+          ARROW_ASSIGN_OR_RAISE(
+              fixedlen_bufs[i],
+              AllocatePaddedBuffer(num_groups * col_metadata_[i].fixed_length));
+        }
+      } else {
+        ARROW_ASSIGN_OR_RAISE(fixedlen_bufs[i],
+                              AllocatePaddedBuffer((num_groups + 1) * sizeof(uint32_t)));
+      }
+      cols_[i] = arrow::compute::KeyEncoder::KeyColumnArray(
+          col_metadata_[i], num_groups, non_null_bufs[i]->mutable_data(),
+          fixedlen_bufs[i]->mutable_data(), nullptr);
+    }
+
+    for (int64_t start_row = 0; start_row < num_groups;) {
+      int64_t batch_size_next =
+          std::min(num_groups - start_row, static_cast<int64_t>(minibatch_size_max_));
+      encoder_.DecodeFixedLengthBuffers(start_row, start_row, batch_size_next, rows_,
+                                        &cols_);
+      start_row += batch_size_next;
+    }
+
+    if (!rows_.metadata().is_fixed_length) {
+      for (size_t i = 0; i < num_columns; ++i) {
+        if (!col_metadata_[i].is_fixed_length) {
+          auto varlen_size =
+              reinterpret_cast<const uint32_t*>(fixedlen_bufs[i]->data())[num_groups];
+          ARROW_ASSIGN_OR_RAISE(varlen_bufs[i], AllocatePaddedBuffer(varlen_size));
+          cols_[i] = arrow::compute::KeyEncoder::KeyColumnArray(
+              col_metadata_[i], num_groups, non_null_bufs[i]->mutable_data(),
+              fixedlen_bufs[i]->mutable_data(), varlen_bufs[i]->mutable_data());
+        }
+      }
+
+      for (int64_t start_row = 0; start_row < num_groups;) {
+        int64_t batch_size_next =
+            std::min(num_groups - start_row, static_cast<int64_t>(minibatch_size_max_));
+        encoder_.DecodeVaryingLengthBuffers(start_row, start_row, batch_size_next, rows_,
+                                            &cols_);
+        start_row += batch_size_next;
+      }
+    }
+
+    ExecBatch out({}, num_groups);
+    out.values.resize(num_columns);
+    for (size_t i = 0; i < num_columns; ++i) {
+      auto valid_count = arrow::internal::CountSetBits(
+          non_null_bufs[i]->data(), /*offset=*/0, static_cast<int64_t>(num_groups));
+      int null_count = static_cast<int>(num_groups) - static_cast<int>(valid_count);
+
+      if (col_metadata_[i].is_fixed_length) {
+        out.values[i] = ArrayData::Make(
+            key_types_[i], num_groups,
+            {std::move(non_null_bufs[i]), std::move(fixedlen_bufs[i])}, null_count);
+      } else {
+        out.values[i] =
+            ArrayData::Make(key_types_[i], num_groups,
+                            {std::move(non_null_bufs[i]), std::move(fixedlen_bufs[i]),
+                             std::move(varlen_bufs[i])},
+                            null_count);
+      }
+    }
+
+    // Process dictionaries
+    for (size_t icol = 0; icol < num_columns; ++icol) {
+      if (key_types_[icol]->id() == Type::DICTIONARY) {
+        if (dictionaries_[icol]) {
+          out.values[icol].array()->dictionary = dictionaries_[icol]->data();
+        } else {
+          ARROW_ASSIGN_OR_RAISE(auto dict, MakeArrayOfNull(key_types_[icol], 0));
+          out.values[icol].array()->dictionary = dict->data();
+        }
+      }
+    }
+
+    return out;
+  }
+
+  static constexpr int log_minibatch_max_ = 10;
+  static constexpr int minibatch_size_max_ = 1 << log_minibatch_max_;
+  static constexpr int minibatch_size_min_ = 128;
+  int minibatch_size_;
+
+  ExecContext* ctx_;
+  arrow::util::TempVectorStack temp_stack_;
+  arrow::compute::KeyEncoder::KeyEncoderContext encode_ctx_;
+
+  std::vector<std::shared_ptr<arrow::DataType>> key_types_;
+  std::vector<arrow::compute::KeyEncoder::KeyColumnMetadata> col_metadata_;
+  std::vector<arrow::compute::KeyEncoder::KeyColumnArray> cols_;
+  std::vector<uint32_t> minibatch_hashes_;
+
+  std::vector<std::shared_ptr<Array>> dictionaries_;
+
+  arrow::compute::KeyEncoder::KeyRowArray rows_;
+  arrow::compute::KeyEncoder::KeyRowArray rows_minibatch_;
+  arrow::compute::KeyEncoder encoder_;
+  arrow::compute::SwissTable map_;
+};
+
+/// C++ abstract base class for the HashAggregateKernel interface.
+/// Implementations should be default constructible and perform initialization in
+/// Init().
+struct GroupedAggregator : KernelState {
+  virtual Status Init(ExecContext*, const FunctionOptions*,
+                      const std::shared_ptr<DataType>&) = 0;
+
+  virtual Status Consume(const ExecBatch& batch) = 0;
+
+  virtual Result<Datum> Finalize() = 0;
+
+  template <typename Reserve>
+  Status MaybeReserve(int64_t old_num_groups, const ExecBatch& batch,
+                      const Reserve& reserve) {
+    int64_t new_num_groups = batch[2].scalar_as<UInt32Scalar>().value;
+    if (new_num_groups <= old_num_groups) {
+      return Status::OK();
+    }
+    return reserve(new_num_groups - old_num_groups);
+  }
+
+  virtual std::shared_ptr<DataType> out_type() const = 0;
+};
+
+// ----------------------------------------------------------------------
+// Count implementation
+
+struct GroupedCountImpl : public GroupedAggregator {
+  Status Init(ExecContext* ctx, const FunctionOptions* options,
+              const std::shared_ptr<DataType>&) override {
+    options_ = checked_cast<const ScalarAggregateOptions&>(*options);
+    counts_ = BufferBuilder(ctx->memory_pool());
+    return Status::OK();
+  }
+
+  Status Consume(const ExecBatch& batch) override {
+    RETURN_NOT_OK(MaybeReserve(num_groups_, batch, [&](int64_t added_groups) {
+      num_groups_ += added_groups;
+      return counts_.Append(added_groups * sizeof(int64_t), 0);
+    }));
+
+    auto group_ids = batch[1].array()->GetValues<uint32_t>(1);
+    auto raw_counts = reinterpret_cast<int64_t*>(counts_.mutable_data());
+
+    const auto& input = batch[0].array();
+
+    if (!options_.skip_nulls) {
+      if (input->GetNullCount() != 0) {
+        for (int64_t i = 0, input_i = input->offset; i < input->length; ++i, ++input_i) {
+          auto g = group_ids[i];
+          raw_counts[g] += !BitUtil::GetBit(input->buffers[0]->data(), input_i);
+        }
+      }
+      return Status::OK();
+    }
+
+    arrow::internal::VisitSetBitRunsVoid(
+        input->buffers[0], input->offset, input->length,
+        [&](int64_t begin, int64_t length) {
+          for (int64_t input_i = begin, i = begin - input->offset;
+               input_i < begin + length; ++input_i, ++i) {
+            auto g = group_ids[i];
+            raw_counts[g] += 1;
+          }
+        });
+    return Status::OK();
+  }
+
+  Result<Datum> Finalize() override {
+    ARROW_ASSIGN_OR_RAISE(auto counts, counts_.Finish());
+    return std::make_shared<Int64Array>(num_groups_, std::move(counts));
+  }
+
+  std::shared_ptr<DataType> out_type() const override { return int64(); }
+
+  int64_t num_groups_ = 0;
+  ScalarAggregateOptions options_;
+  BufferBuilder counts_;
+};
+
+// ----------------------------------------------------------------------
+// Sum implementation
+
+struct GroupedSumImpl : public GroupedAggregator {
+  // NB: whether we are accumulating into double, int64_t, or uint64_t
+  // we always have 64 bits per group in the sums buffer.
+  static constexpr size_t kSumSize = sizeof(int64_t);
+
+  using ConsumeImpl = std::function<void(const std::shared_ptr<ArrayData>&,
+                                         const uint32_t*, void*, int64_t*)>;
+
+  struct GetConsumeImpl {
+    template <typename T, typename AccType = typename FindAccumulatorType<T>::Type>
+    Status Visit(const T&) {
+      consume_impl = [](const std::shared_ptr<ArrayData>& input, const uint32_t* group,
+                        void* boxed_sums, int64_t* counts) {
+        auto sums = reinterpret_cast<typename TypeTraits<AccType>::CType*>(boxed_sums);
+
+        VisitArrayDataInline<T>(
+            *input,
+            [&](typename TypeTraits<T>::CType value) {
+              sums[*group] += value;
+              counts[*group] += 1;
+              ++group;
+            },
+            [&] { ++group; });
+      };
+      out_type = TypeTraits<AccType>::type_singleton();
+      return Status::OK();
+    }
+
+    Status Visit(const HalfFloatType& type) {
+      return Status::NotImplemented("Summing data of type ", type);
+    }
+
+    Status Visit(const DataType& type) {
+      return Status::NotImplemented("Summing data of type ", type);
+    }
+
+    ConsumeImpl consume_impl;
+    std::shared_ptr<DataType> out_type;
+  };
+
+  Status Init(ExecContext* ctx, const FunctionOptions*,
+              const std::shared_ptr<DataType>& input_type) override {
+    pool_ = ctx->memory_pool();
+    sums_ = BufferBuilder(pool_);
+    counts_ = BufferBuilder(pool_);
+
+    GetConsumeImpl get_consume_impl;
+    RETURN_NOT_OK(VisitTypeInline(*input_type, &get_consume_impl));
+
+    consume_impl_ = std::move(get_consume_impl.consume_impl);
+    out_type_ = std::move(get_consume_impl.out_type);
+
+    return Status::OK();
+  }
+
+  Status Consume(const ExecBatch& batch) override {
+    RETURN_NOT_OK(MaybeReserve(num_groups_, batch, [&](int64_t added_groups) {
+      num_groups_ += added_groups;
+      RETURN_NOT_OK(sums_.Append(added_groups * kSumSize, 0));
+      RETURN_NOT_OK(counts_.Append(added_groups * sizeof(int64_t), 0));
+      return Status::OK();
+    }));
+
+    auto group_ids = batch[1].array()->GetValues<uint32_t>(1);
+    consume_impl_(batch[0].array(), group_ids, sums_.mutable_data(),
+                  reinterpret_cast<int64_t*>(counts_.mutable_data()));
+    return Status::OK();
+  }
+
+  Result<Datum> Finalize() override {
+    std::shared_ptr<Buffer> null_bitmap;
+    int64_t null_count = 0;
+
+    for (int64_t i = 0; i < num_groups_; ++i) {
+      if (reinterpret_cast<const int64_t*>(counts_.data())[i] > 0) continue;
+
+      if (null_bitmap == nullptr) {
+        ARROW_ASSIGN_OR_RAISE(null_bitmap, AllocateBitmap(num_groups_, pool_));
+        BitUtil::SetBitsTo(null_bitmap->mutable_data(), 0, num_groups_, true);
+      }
+
+      null_count += 1;
+      BitUtil::SetBitTo(null_bitmap->mutable_data(), i, false);
+    }
+
+    ARROW_ASSIGN_OR_RAISE(auto sums, sums_.Finish());
+
+    return ArrayData::Make(std::move(out_type_), num_groups_,
+                           {std::move(null_bitmap), std::move(sums)}, null_count);
+  }
+
+  std::shared_ptr<DataType> out_type() const override { return out_type_; }
+
+  // NB: counts are used here instead of a simple "has_values_" bitmap since
+  // we expect to reuse this kernel to handle Mean
+  int64_t num_groups_ = 0;
+  BufferBuilder sums_, counts_;
+  std::shared_ptr<DataType> out_type_;
+  ConsumeImpl consume_impl_;
+  MemoryPool* pool_;
+};
+
+// ----------------------------------------------------------------------
+// MinMax implementation
+
+template <typename CType>
+struct Extrema : std::numeric_limits<CType> {};
+
+template <>
+struct Extrema<float> {
+  static constexpr float min() { return -std::numeric_limits<float>::infinity(); }
+  static constexpr float max() { return std::numeric_limits<float>::infinity(); }
+};
+
+template <>
+struct Extrema<double> {
+  static constexpr double min() { return -std::numeric_limits<double>::infinity(); }
+  static constexpr double max() { return std::numeric_limits<double>::infinity(); }
+};
+
+struct GroupedMinMaxImpl : public GroupedAggregator {
+  using ConsumeImpl =
+      std::function<void(const std::shared_ptr<ArrayData>&, const uint32_t*, void*, void*,
+                         uint8_t*, uint8_t*)>;
+
+  using ResizeImpl = std::function<Status(BufferBuilder*, int64_t)>;
+
+  template <typename CType>
+  static ResizeImpl MakeResizeImpl(CType anti_extreme) {
+    // resize a min or max buffer, storing the correct anti extreme
+    return [anti_extreme](BufferBuilder* builder, int64_t added_groups) {
+      TypedBufferBuilder<CType> typed_builder(std::move(*builder));
+      RETURN_NOT_OK(typed_builder.Append(added_groups, anti_extreme));
+      *builder = std::move(*typed_builder.bytes_builder());
+      return Status::OK();
+    };
+  }
+
+  struct GetImpl {
+    template <typename T, typename CType = typename TypeTraits<T>::CType>
+    enable_if_number<T, Status> Visit(const T&) {
+      consume_impl = [](const std::shared_ptr<ArrayData>& input, const uint32_t* group,
+                        void* mins, void* maxes, uint8_t* has_values,
+                        uint8_t* has_nulls) {
+        auto raw_mins = reinterpret_cast<CType*>(mins);
+        auto raw_maxes = reinterpret_cast<CType*>(maxes);
+
+        VisitArrayDataInline<T>(
+            *input,
+            [&](CType val) {
+              raw_maxes[*group] = std::max(raw_maxes[*group], val);
+              raw_mins[*group] = std::min(raw_mins[*group], val);
+              BitUtil::SetBit(has_values, *group++);
+            },
+            [&] { BitUtil::SetBit(has_nulls, *group++); });
+      };
+
+      resize_min_impl = MakeResizeImpl(Extrema<CType>::max());
+      resize_max_impl = MakeResizeImpl(Extrema<CType>::min());
+      return Status::OK();
+    }
+
+    Status Visit(const BooleanType& type) {
+      return Status::NotImplemented("Grouped MinMax data of type ", type);
+    }
+
+    Status Visit(const HalfFloatType& type) {
+      return Status::NotImplemented("Grouped MinMax data of type ", type);
+    }
+
+    Status Visit(const DataType& type) {
+      return Status::NotImplemented("Grouped MinMax data of type ", type);
+    }
+
+    ConsumeImpl consume_impl;
+    ResizeImpl resize_min_impl, resize_max_impl;
+  };
+
+  Status Init(ExecContext* ctx, const FunctionOptions* options,
+              const std::shared_ptr<DataType>& input_type) override {
+    options_ = *checked_cast<const ScalarAggregateOptions*>(options);
+    type_ = input_type;
+
+    mins_ = BufferBuilder(ctx->memory_pool());
+    maxes_ = BufferBuilder(ctx->memory_pool());
+    has_values_ = TypedBufferBuilder<bool>(ctx->memory_pool());
+    has_nulls_ = TypedBufferBuilder<bool>(ctx->memory_pool());
+
+    GetImpl get_impl;
+    RETURN_NOT_OK(VisitTypeInline(*input_type, &get_impl));
+
+    consume_impl_ = std::move(get_impl.consume_impl);
+    resize_min_impl_ = std::move(get_impl.resize_min_impl);
+    resize_max_impl_ = std::move(get_impl.resize_max_impl);
+
+    return Status::OK();
+  }
+
+  Status Consume(const ExecBatch& batch) override {
+    RETURN_NOT_OK(MaybeReserve(num_groups_, batch, [&](int64_t added_groups) {
+      num_groups_ += added_groups;
+      RETURN_NOT_OK(resize_min_impl_(&mins_, added_groups));
+      RETURN_NOT_OK(resize_max_impl_(&maxes_, added_groups));
+      RETURN_NOT_OK(has_values_.Append(added_groups, false));
+      RETURN_NOT_OK(has_nulls_.Append(added_groups, false));
+      return Status::OK();
+    }));
+
+    auto group_ids = batch[1].array()->GetValues<uint32_t>(1);
+    consume_impl_(batch[0].array(), group_ids, mins_.mutable_data(),
+                  maxes_.mutable_data(), has_values_.mutable_data(),
+                  has_nulls_.mutable_data());
+    return Status::OK();
+  }
+
+  Result<Datum> Finalize() override {
+    // aggregation for group is valid if there was at least one value in that group
+    ARROW_ASSIGN_OR_RAISE(auto null_bitmap, has_values_.Finish());
+
+    if (!options_.skip_nulls) {
+      // ... and there were no nulls in that group
+      ARROW_ASSIGN_OR_RAISE(auto has_nulls, has_nulls_.Finish());
+      arrow::internal::BitmapAndNot(null_bitmap->data(), 0, has_nulls->data(), 0,
+                                    num_groups_, 0, null_bitmap->mutable_data());
+    }
+
+    auto mins = ArrayData::Make(type_, num_groups_, {null_bitmap, nullptr});
+    auto maxes = ArrayData::Make(type_, num_groups_, {std::move(null_bitmap), nullptr});
+    ARROW_ASSIGN_OR_RAISE(mins->buffers[1], mins_.Finish());
+    ARROW_ASSIGN_OR_RAISE(maxes->buffers[1], maxes_.Finish());
+
+    return ArrayData::Make(out_type(), num_groups_, {nullptr},
+                           {std::move(mins), std::move(maxes)});
+  }
+
+  std::shared_ptr<DataType> out_type() const override {
+    return struct_({field("min", type_), field("max", type_)});
+  }
+
+  int64_t num_groups_;
+  BufferBuilder mins_, maxes_;
+  TypedBufferBuilder<bool> has_values_, has_nulls_;
+  std::shared_ptr<DataType> type_;
+  ConsumeImpl consume_impl_;
+  ResizeImpl resize_min_impl_, resize_max_impl_;
+  ScalarAggregateOptions options_;
+};
+
+template <typename Impl>
+HashAggregateKernel MakeKernel(InputType argument_type) {
+  HashAggregateKernel kernel;
+
+  kernel.init = [](KernelContext* ctx,
+                   const KernelInitArgs& args) -> Result<std::unique_ptr<KernelState>> {
+    auto impl = ::arrow::internal::make_unique<Impl>();
+    // FIXME(bkietz) Init should not take a type. That should be an unboxed template arg
+    // for the Impl. Otherwise we're not exposing dispatch as well as we should.
+    RETURN_NOT_OK(impl->Init(ctx->exec_context(), args.options, args.inputs[0].type));
+    return std::move(impl);
+  };
+
+  kernel.signature = KernelSignature::Make(
+      {std::move(argument_type), InputType::Array(Type::UINT32),
+       InputType::Scalar(Type::UINT32)},
+      OutputType(
+          [](KernelContext* ctx, const std::vector<ValueDescr>&) -> Result<ValueDescr> {
+            return checked_cast<GroupedAggregator*>(ctx->state())->out_type();
+          }));
+
+  kernel.consume = [](KernelContext* ctx, const ExecBatch& batch) {
+    return checked_cast<GroupedAggregator*>(ctx->state())->Consume(batch);
+  };
+
+  kernel.merge = [](KernelContext* ctx, KernelState&&, KernelState*) {
+    // TODO(ARROW-11840) merge two hash tables
+    return Status::NotImplemented("Merge hashed aggregations");
+  };
+
+  kernel.finalize = [](KernelContext* ctx, Datum* out) {
+    ARROW_ASSIGN_OR_RAISE(*out,
+                          checked_cast<GroupedAggregator*>(ctx->state())->Finalize());
+    return Status::OK();
+  };
+
+  return kernel;
+}
+
+Result<std::vector<const HashAggregateKernel*>> GetKernels(
+    ExecContext* ctx, const std::vector<Aggregate>& aggregates,
+    const std::vector<ValueDescr>& in_descrs) {
+  if (aggregates.size() != in_descrs.size()) {
+    return Status::Invalid(aggregates.size(), " aggregate functions were specified but ",
+                           in_descrs.size(), " arguments were provided.");
+  }
+
+  std::vector<const HashAggregateKernel*> kernels(in_descrs.size());
+
+  for (size_t i = 0; i < aggregates.size(); ++i) {
+    ARROW_ASSIGN_OR_RAISE(auto function,
+                          ctx->func_registry()->GetFunction(aggregates[i].function));
+    ARROW_ASSIGN_OR_RAISE(
+        const Kernel* kernel,
+        function->DispatchExact(
+            {in_descrs[i], ValueDescr::Array(uint32()), ValueDescr::Scalar(uint32())}));
+    kernels[i] = static_cast<const HashAggregateKernel*>(kernel);
+  }
+  return kernels;
+}
+
+Result<std::vector<std::unique_ptr<KernelState>>> InitKernels(
+    const std::vector<const HashAggregateKernel*>& kernels, ExecContext* ctx,
+    const std::vector<Aggregate>& aggregates, const std::vector<ValueDescr>& in_descrs) {
+  std::vector<std::unique_ptr<KernelState>> states(kernels.size());
+
+  for (size_t i = 0; i < aggregates.size(); ++i) {
+    auto options = aggregates[i].options;
+
+    if (options == nullptr) {
+      // use known default options for the named function if possible
+      auto maybe_function = ctx->func_registry()->GetFunction(aggregates[i].function);
+      if (maybe_function.ok()) {
+        options = maybe_function.ValueOrDie()->default_options();
+      }
+    }
+
+    KernelContext kernel_ctx{ctx};
+    ARROW_ASSIGN_OR_RAISE(
+        states[i], kernels[i]->init(&kernel_ctx, KernelInitArgs{kernels[i],
+                                                                {
+                                                                    in_descrs[i].type,
+                                                                    uint32(),
+                                                                    uint32(),
+                                                                },
+                                                                options}));
+  }
+
+  return std::move(states);
+}
+
+Result<FieldVector> ResolveKernels(
+    const std::vector<Aggregate>& aggregates,
+    const std::vector<const HashAggregateKernel*>& kernels,
+    const std::vector<std::unique_ptr<KernelState>>& states, ExecContext* ctx,
+    const std::vector<ValueDescr>& descrs) {
+  FieldVector fields(descrs.size());
+
+  for (size_t i = 0; i < kernels.size(); ++i) {
+    KernelContext kernel_ctx{ctx};
+    kernel_ctx.SetState(states[i].get());
+
+    ARROW_ASSIGN_OR_RAISE(auto descr, kernels[i]->signature->out_type().Resolve(
+                                          &kernel_ctx, {
+                                                           descrs[i].type,
+                                                           uint32(),
+                                                           uint32(),
+                                                       }));
+    fields[i] = field(aggregates[i].function, std::move(descr.type));
+  }
+  return fields;
+}
+
+}  // namespace
+
+Result<std::unique_ptr<Grouper>> Grouper::Make(const std::vector<ValueDescr>& descrs,
+                                               ExecContext* ctx) {
+  if (GrouperFastImpl::CanUse(descrs)) {
+    return GrouperFastImpl::Make(descrs, ctx);
+  }
+  return GrouperImpl::Make(descrs, ctx);
+}
+
+Result<Datum> GroupBy(const std::vector<Datum>& arguments, const std::vector<Datum>& keys,
+                      const std::vector<Aggregate>& aggregates, ExecContext* ctx) {
+  // Construct and initialize HashAggregateKernels
+  ARROW_ASSIGN_OR_RAISE(auto argument_descrs,
+                        ExecBatch::Make(arguments).Map(
+                            [](ExecBatch batch) { return batch.GetDescriptors(); }));
+
+  ARROW_ASSIGN_OR_RAISE(auto kernels, GetKernels(ctx, aggregates, argument_descrs));
+
+  ARROW_ASSIGN_OR_RAISE(auto states,
+                        InitKernels(kernels, ctx, aggregates, argument_descrs));
+
+  ARROW_ASSIGN_OR_RAISE(
+      FieldVector out_fields,
+      ResolveKernels(aggregates, kernels, states, ctx, argument_descrs));
+
+  using arrow::compute::detail::ExecBatchIterator;
+
+  ARROW_ASSIGN_OR_RAISE(auto argument_batch_iterator,
+                        ExecBatchIterator::Make(arguments, ctx->exec_chunksize()));
+
+  // Construct Grouper
+  ARROW_ASSIGN_OR_RAISE(auto key_descrs, ExecBatch::Make(keys).Map([](ExecBatch batch) {
+    return batch.GetDescriptors();
+  }));
+
+  ARROW_ASSIGN_OR_RAISE(auto grouper, Grouper::Make(key_descrs, ctx));
+
+  int i = 0;
+  for (ValueDescr& key_descr : key_descrs) {
+    out_fields.push_back(field("key_" + std::to_string(i++), std::move(key_descr.type)));
+  }
+
+  ARROW_ASSIGN_OR_RAISE(auto key_batch_iterator,
+                        ExecBatchIterator::Make(keys, ctx->exec_chunksize()));
+
+  // start "streaming" execution
+  ExecBatch key_batch, argument_batch;
+  while (argument_batch_iterator->Next(&argument_batch) &&
+         key_batch_iterator->Next(&key_batch)) {
+    if (key_batch.length == 0) continue;
+
+    // compute a batch of group ids
+    ARROW_ASSIGN_OR_RAISE(Datum id_batch, grouper->Consume(key_batch));
+
+    // consume group ids with HashAggregateKernels
+    for (size_t i = 0; i < kernels.size(); ++i) {
+      KernelContext batch_ctx{ctx};
+      batch_ctx.SetState(states[i].get());
+      ARROW_ASSIGN_OR_RAISE(auto batch, ExecBatch::Make({argument_batch[i], id_batch,
+                                                         Datum(grouper->num_groups())}));
+      RETURN_NOT_OK(kernels[i]->consume(&batch_ctx, batch));
+    }
+  }
+
+  // Finalize output
+  ArrayDataVector out_data(arguments.size() + keys.size());
+  auto it = out_data.begin();
+
+  for (size_t i = 0; i < kernels.size(); ++i) {
+    KernelContext batch_ctx{ctx};
+    batch_ctx.SetState(states[i].get());
+    Datum out;
+    RETURN_NOT_OK(kernels[i]->finalize(&batch_ctx, &out));
+    *it++ = out.array();
+  }
+
+  ARROW_ASSIGN_OR_RAISE(ExecBatch out_keys, grouper->GetUniques());
+  for (const auto& key : out_keys.values) {
+    *it++ = key.array();
+  }
+
+  int64_t length = out_data[0]->length;
+  return ArrayData::Make(struct_(std::move(out_fields)), length,
+                         {/*null_bitmap=*/nullptr}, std::move(out_data),
+                         /*null_count=*/0);
+}
+
+Result<std::shared_ptr<ListArray>> Grouper::ApplyGroupings(const ListArray& groupings,
+                                                           const Array& array,
+                                                           ExecContext* ctx) {
+  ARROW_ASSIGN_OR_RAISE(Datum sorted,
+                        compute::Take(array, groupings.data()->child_data[0],
+                                      TakeOptions::NoBoundsCheck(), ctx));
+
+  return std::make_shared<ListArray>(list(array.type()), groupings.length(),
+                                     groupings.value_offsets(), sorted.make_array());
+}
+
+Result<std::shared_ptr<ListArray>> Grouper::MakeGroupings(const UInt32Array& ids,
+                                                          uint32_t num_groups,
+                                                          ExecContext* ctx) {
+  if (ids.null_count() != 0) {
+    return Status::Invalid("MakeGroupings with null ids");
+  }
+
+  ARROW_ASSIGN_OR_RAISE(auto offsets, AllocateBuffer(sizeof(int32_t) * (num_groups + 1),
+                                                     ctx->memory_pool()));
+  auto raw_offsets = reinterpret_cast<int32_t*>(offsets->mutable_data());
+
+  std::memset(raw_offsets, 0, offsets->size());
+  for (int i = 0; i < ids.length(); ++i) {
+    DCHECK_LT(ids.Value(i), num_groups);
+    raw_offsets[ids.Value(i)] += 1;
+  }
+  int32_t length = 0;
+  for (uint32_t id = 0; id < num_groups; ++id) {
+    auto offset = raw_offsets[id];
+    raw_offsets[id] = length;
+    length += offset;
+  }
+  raw_offsets[num_groups] = length;
+  DCHECK_EQ(ids.length(), length);
+
+  ARROW_ASSIGN_OR_RAISE(auto offsets_copy,
+                        offsets->CopySlice(0, offsets->size(), ctx->memory_pool()));
+  raw_offsets = reinterpret_cast<int32_t*>(offsets_copy->mutable_data());
+
+  ARROW_ASSIGN_OR_RAISE(auto sort_indices, AllocateBuffer(sizeof(int32_t) * ids.length(),
+                                                          ctx->memory_pool()));
+  auto raw_sort_indices = reinterpret_cast<int32_t*>(sort_indices->mutable_data());
+  for (int i = 0; i < ids.length(); ++i) {
+    raw_sort_indices[raw_offsets[ids.Value(i)]++] = i;
+  }
+
+  return std::make_shared<ListArray>(
+      list(int32()), num_groups, std::move(offsets),
+      std::make_shared<Int32Array>(ids.length(), std::move(sort_indices)));
+}
+
+namespace {
+const FunctionDoc hash_count_doc{"Count the number of null / non-null values",
+                                 ("By default, non-null values are counted.\n"
+                                  "This can be changed through ScalarAggregateOptions."),
+                                 {"array", "group_id_array", "group_count"},
+                                 "ScalarAggregateOptions"};
+
+const FunctionDoc hash_sum_doc{"Sum values of a numeric array",
+                               ("Null values are ignored."),
+                               {"array", "group_id_array", "group_count"}};
+
+const FunctionDoc hash_min_max_doc{
+    "Compute the minimum and maximum values of a numeric array",
+    ("Null values are ignored by default.\n"
+     "This can be changed through ScalarAggregateOptions."),
+    {"array", "group_id_array", "group_count"},
+    "ScalarAggregateOptions"};
+}  // namespace
+
+void RegisterHashAggregateBasic(FunctionRegistry* registry) {
+  {
+    static auto default_scalar_aggregate_options = ScalarAggregateOptions::Defaults();
+    auto func = std::make_shared<HashAggregateFunction>(
+        "hash_count", Arity::Ternary(), &hash_count_doc,
+        &default_scalar_aggregate_options);
+    DCHECK_OK(func->AddKernel(MakeKernel<GroupedCountImpl>(ValueDescr::ARRAY)));
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+
+  {
+    auto func = std::make_shared<HashAggregateFunction>("hash_sum", Arity::Ternary(),
+                                                        &hash_sum_doc);
+    DCHECK_OK(func->AddKernel(MakeKernel<GroupedSumImpl>(ValueDescr::ARRAY)));
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+
+  {
+    static auto default_scalar_aggregate_options = ScalarAggregateOptions::Defaults();
+    auto func = std::make_shared<HashAggregateFunction>(
+        "hash_min_max", Arity::Ternary(), &hash_min_max_doc,
+        &default_scalar_aggregate_options);
+    DCHECK_OK(func->AddKernel(MakeKernel<GroupedMinMaxImpl>(ValueDescr::ARRAY)));
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+}
+
+}  // namespace internal
+}  // namespace compute
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
index a8f1f82771b..a5d4a557740 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
@@ -15,17 +15,17 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <algorithm> 
-#include <cmath> 
-#include <limits> 
-#include <utility> 
- 
-#include "arrow/compute/kernels/codegen_internal.h" 
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <utility>
+
+#include "arrow/compute/kernels/codegen_internal.h"
 #include "arrow/compute/kernels/common.h"
-#include "arrow/compute/kernels/util_internal.h" 
-#include "arrow/type.h" 
-#include "arrow/type_traits.h" 
-#include "arrow/util/decimal.h" 
+#include "arrow/compute/kernels/util_internal.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/decimal.h"
 #include "arrow/util/int_util_internal.h"
 #include "arrow/util/macros.h"
 
@@ -34,7 +34,7 @@ namespace arrow {
 using internal::AddWithOverflow;
 using internal::DivideWithOverflow;
 using internal::MultiplyWithOverflow;
-using internal::NegateWithOverflow; 
+using internal::NegateWithOverflow;
 using internal::SubtractWithOverflow;
 
 namespace compute {
@@ -42,8 +42,8 @@ namespace internal {
 
 using applicator::ScalarBinaryEqualTypes;
 using applicator::ScalarBinaryNotNullEqualTypes;
-using applicator::ScalarUnary; 
-using applicator::ScalarUnaryNotNull; 
+using applicator::ScalarUnary;
+using applicator::ScalarUnaryNotNull;
 
 namespace {
 
@@ -55,169 +55,169 @@ template <typename T>
 using is_signed_integer =
     std::integral_constant<bool, std::is_integral<T>::value && std::is_signed<T>::value>;
 
-template <typename T, typename R = T> 
-using enable_if_signed_integer = enable_if_t<is_signed_integer<T>::value, R>; 
+template <typename T, typename R = T>
+using enable_if_signed_integer = enable_if_t<is_signed_integer<T>::value, R>;
 
-template <typename T, typename R = T> 
-using enable_if_unsigned_integer = enable_if_t<is_unsigned_integer<T>::value, R>; 
+template <typename T, typename R = T>
+using enable_if_unsigned_integer = enable_if_t<is_unsigned_integer<T>::value, R>;
 
-template <typename T, typename R = T> 
+template <typename T, typename R = T>
 using enable_if_integer =
-    enable_if_t<is_signed_integer<T>::value || is_unsigned_integer<T>::value, R>; 
+    enable_if_t<is_signed_integer<T>::value || is_unsigned_integer<T>::value, R>;
+
+template <typename T, typename R = T>
+using enable_if_floating_point = enable_if_t<std::is_floating_point<T>::value, R>;
 
-template <typename T, typename R = T> 
-using enable_if_floating_point = enable_if_t<std::is_floating_point<T>::value, R>; 
- 
 template <typename T>
-using enable_if_decimal = 
-    enable_if_t<std::is_same<Decimal128, T>::value || std::is_same<Decimal256, T>::value, 
-                T>; 
+using enable_if_decimal =
+    enable_if_t<std::is_same<Decimal128, T>::value || std::is_same<Decimal256, T>::value,
+                T>;
 
 template <typename T, typename Unsigned = typename std::make_unsigned<T>::type>
 constexpr Unsigned to_unsigned(T signed_) {
   return static_cast<Unsigned>(signed_);
 }
 
-struct AbsoluteValue { 
-  template <typename T, typename Arg> 
-  static constexpr enable_if_floating_point<T> Call(KernelContext*, T arg, Status*) { 
-    return std::fabs(arg); 
-  } 
- 
-  template <typename T, typename Arg> 
-  static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, T arg, Status*) { 
-    return arg; 
-  } 
- 
-  template <typename T, typename Arg> 
-  static constexpr enable_if_signed_integer<T> Call(KernelContext*, T arg, Status* st) { 
-    return (arg < 0) ? arrow::internal::SafeSignedNegate(arg) : arg; 
-  } 
-}; 
- 
-struct AbsoluteValueChecked { 
-  template <typename T, typename Arg> 
-  static enable_if_signed_integer<T> Call(KernelContext*, Arg arg, Status* st) { 
-    static_assert(std::is_same<T, Arg>::value, ""); 
-    if (arg == std::numeric_limits<Arg>::min()) { 
-      *st = Status::Invalid("overflow"); 
-      return arg; 
-    } 
-    return std::abs(arg); 
-  } 
- 
-  template <typename T, typename Arg> 
-  static enable_if_unsigned_integer<T> Call(KernelContext* ctx, Arg arg, Status* st) { 
-    static_assert(std::is_same<T, Arg>::value, ""); 
-    return arg; 
-  } 
- 
-  template <typename T, typename Arg> 
-  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg arg, Status* st) { 
-    static_assert(std::is_same<T, Arg>::value, ""); 
-    return std::fabs(arg); 
-  } 
-}; 
- 
+struct AbsoluteValue {
+  template <typename T, typename Arg>
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, T arg, Status*) {
+    return std::fabs(arg);
+  }
+
+  template <typename T, typename Arg>
+  static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, T arg, Status*) {
+    return arg;
+  }
+
+  template <typename T, typename Arg>
+  static constexpr enable_if_signed_integer<T> Call(KernelContext*, T arg, Status* st) {
+    return (arg < 0) ? arrow::internal::SafeSignedNegate(arg) : arg;
+  }
+};
+
+struct AbsoluteValueChecked {
+  template <typename T, typename Arg>
+  static enable_if_signed_integer<T> Call(KernelContext*, Arg arg, Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    if (arg == std::numeric_limits<Arg>::min()) {
+      *st = Status::Invalid("overflow");
+      return arg;
+    }
+    return std::abs(arg);
+  }
+
+  template <typename T, typename Arg>
+  static enable_if_unsigned_integer<T> Call(KernelContext* ctx, Arg arg, Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    return arg;
+  }
+
+  template <typename T, typename Arg>
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg arg, Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    return std::fabs(arg);
+  }
+};
+
 struct Add {
-  template <typename T, typename Arg0, typename Arg1> 
-  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right, 
-                                                    Status*) { 
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right,
+                                                    Status*) {
     return left + right;
   }
 
-  template <typename T, typename Arg0, typename Arg1> 
-  static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, Arg0 left, 
-                                                      Arg1 right, Status*) { 
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, Arg0 left,
+                                                      Arg1 right, Status*) {
     return left + right;
   }
 
-  template <typename T, typename Arg0, typename Arg1> 
-  static constexpr enable_if_signed_integer<T> Call(KernelContext*, Arg0 left, Arg1 right, 
-                                                    Status*) { 
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_signed_integer<T> Call(KernelContext*, Arg0 left, Arg1 right,
+                                                    Status*) {
     return arrow::internal::SafeSignedAdd(left, right);
   }
- 
-  template <typename T, typename Arg0, typename Arg1> 
-  static enable_if_decimal<T> Call(KernelContext*, Arg0 left, Arg1 right, Status*) { 
-    return left + right; 
-  } 
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_decimal<T> Call(KernelContext*, Arg0 left, Arg1 right, Status*) {
+    return left + right;
+  }
 };
 
 struct AddChecked {
   template <typename T, typename Arg0, typename Arg1>
-  static enable_if_integer<T> Call(KernelContext*, Arg0 left, Arg1 right, Status* st) { 
+  static enable_if_integer<T> Call(KernelContext*, Arg0 left, Arg1 right, Status* st) {
     static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     T result = 0;
     if (ARROW_PREDICT_FALSE(AddWithOverflow(left, right, &result))) {
-      *st = Status::Invalid("overflow"); 
+      *st = Status::Invalid("overflow");
     }
     return result;
   }
 
   template <typename T, typename Arg0, typename Arg1>
-  static enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right, 
-                                          Status*) { 
+  static enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right,
+                                          Status*) {
     static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     return left + right;
   }
- 
-  template <typename T, typename Arg0, typename Arg1> 
-  static enable_if_decimal<T> Call(KernelContext*, Arg0 left, Arg1 right, Status*) { 
-    return left + right; 
-  } 
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_decimal<T> Call(KernelContext*, Arg0 left, Arg1 right, Status*) {
+    return left + right;
+  }
 };
 
 struct Subtract {
-  template <typename T, typename Arg0, typename Arg1> 
-  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right, 
-                                                    Status*) { 
-    static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, ""); 
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right,
+                                                    Status*) {
+    static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     return left - right;
   }
 
-  template <typename T, typename Arg0, typename Arg1> 
-  static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, Arg0 left, 
-                                                      Arg1 right, Status*) { 
-    static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, ""); 
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, Arg0 left,
+                                                      Arg1 right, Status*) {
+    static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     return left - right;
   }
 
-  template <typename T, typename Arg0, typename Arg1> 
-  static constexpr enable_if_signed_integer<T> Call(KernelContext*, Arg0 left, Arg1 right, 
-                                                    Status*) { 
-    static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, ""); 
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_signed_integer<T> Call(KernelContext*, Arg0 left, Arg1 right,
+                                                    Status*) {
+    static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     return arrow::internal::SafeSignedSubtract(left, right);
   }
- 
-  template <typename T, typename Arg0, typename Arg1> 
-  static enable_if_decimal<T> Call(KernelContext*, Arg0 left, Arg1 right, Status*) { 
-    return left + (-right); 
-  } 
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_decimal<T> Call(KernelContext*, Arg0 left, Arg1 right, Status*) {
+    return left + (-right);
+  }
 };
 
 struct SubtractChecked {
   template <typename T, typename Arg0, typename Arg1>
-  static enable_if_integer<T> Call(KernelContext*, Arg0 left, Arg1 right, Status* st) { 
+  static enable_if_integer<T> Call(KernelContext*, Arg0 left, Arg1 right, Status* st) {
     static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     T result = 0;
     if (ARROW_PREDICT_FALSE(SubtractWithOverflow(left, right, &result))) {
-      *st = Status::Invalid("overflow"); 
+      *st = Status::Invalid("overflow");
     }
     return result;
   }
 
   template <typename T, typename Arg0, typename Arg1>
-  static enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right, 
-                                          Status*) { 
+  static enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right,
+                                          Status*) {
     static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     return left - right;
   }
- 
-  template <typename T, typename Arg0, typename Arg1> 
-  static enable_if_decimal<T> Call(KernelContext*, Arg0 left, Arg1 right, Status*) { 
-    return left + (-right); 
-  } 
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_decimal<T> Call(KernelContext*, Arg0 left, Arg1 right, Status*) {
+    return left + (-right);
+  }
 };
 
 struct Multiply {
@@ -230,23 +230,23 @@ struct Multiply {
   static_assert(std::is_same<decltype(int64_t() * int64_t()), int64_t>::value, "");
   static_assert(std::is_same<decltype(uint64_t() * uint64_t()), uint64_t>::value, "");
 
-  template <typename T, typename Arg0, typename Arg1> 
-  static constexpr enable_if_floating_point<T> Call(KernelContext*, T left, T right, 
-                                                    Status*) { 
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, T left, T right,
+                                                    Status*) {
     return left * right;
   }
 
-  template <typename T, typename Arg0, typename Arg1> 
-  static constexpr enable_if_t< 
-      is_unsigned_integer<T>::value && !std::is_same<T, uint16_t>::value, T> 
-  Call(KernelContext*, T left, T right, Status*) { 
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_t<
+      is_unsigned_integer<T>::value && !std::is_same<T, uint16_t>::value, T>
+  Call(KernelContext*, T left, T right, Status*) {
     return left * right;
   }
 
-  template <typename T, typename Arg0, typename Arg1> 
-  static constexpr enable_if_t< 
-      is_signed_integer<T>::value && !std::is_same<T, int16_t>::value, T> 
-  Call(KernelContext*, T left, T right, Status*) { 
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_t<
+      is_signed_integer<T>::value && !std::is_same<T, int16_t>::value, T>
+  Call(KernelContext*, T left, T right, Status*) {
     return to_unsigned(left) * to_unsigned(right);
   }
 
@@ -254,593 +254,593 @@ struct Multiply {
   // integer. However, some inputs may nevertheless overflow (which triggers undefined
   // behaviour). Therefore we first cast to 32 bit unsigned integers where overflow is
   // well defined.
-  template <typename T, typename Arg0, typename Arg1> 
-  static constexpr enable_if_same<T, int16_t, T> Call(KernelContext*, int16_t left, 
-                                                      int16_t right, Status*) { 
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_same<T, int16_t, T> Call(KernelContext*, int16_t left,
+                                                      int16_t right, Status*) {
     return static_cast<uint32_t>(left) * static_cast<uint32_t>(right);
   }
-  template <typename T, typename Arg0, typename Arg1> 
-  static constexpr enable_if_same<T, uint16_t, T> Call(KernelContext*, uint16_t left, 
-                                                       uint16_t right, Status*) { 
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_same<T, uint16_t, T> Call(KernelContext*, uint16_t left,
+                                                       uint16_t right, Status*) {
     return static_cast<uint32_t>(left) * static_cast<uint32_t>(right);
   }
- 
-  template <typename T, typename Arg0, typename Arg1> 
-  static enable_if_decimal<T> Call(KernelContext*, Arg0 left, Arg1 right, Status*) { 
-    return left * right; 
-  } 
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_decimal<T> Call(KernelContext*, Arg0 left, Arg1 right, Status*) {
+    return left * right;
+  }
 };
 
 struct MultiplyChecked {
   template <typename T, typename Arg0, typename Arg1>
-  static enable_if_integer<T> Call(KernelContext*, Arg0 left, Arg1 right, Status* st) { 
+  static enable_if_integer<T> Call(KernelContext*, Arg0 left, Arg1 right, Status* st) {
     static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     T result = 0;
     if (ARROW_PREDICT_FALSE(MultiplyWithOverflow(left, right, &result))) {
-      *st = Status::Invalid("overflow"); 
+      *st = Status::Invalid("overflow");
     }
     return result;
   }
 
   template <typename T, typename Arg0, typename Arg1>
-  static enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right, 
-                                          Status*) { 
+  static enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right,
+                                          Status*) {
     static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     return left * right;
   }
- 
-  template <typename T, typename Arg0, typename Arg1> 
-  static enable_if_decimal<T> Call(KernelContext*, Arg0 left, Arg1 right, Status*) { 
-    return left * right; 
-  } 
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_decimal<T> Call(KernelContext*, Arg0 left, Arg1 right, Status*) {
+    return left * right;
+  }
 };
 
 struct Divide {
   template <typename T, typename Arg0, typename Arg1>
-  static enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right, 
-                                          Status*) { 
+  static enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right,
+                                          Status*) {
     return left / right;
   }
 
   template <typename T, typename Arg0, typename Arg1>
-  static enable_if_integer<T> Call(KernelContext*, Arg0 left, Arg1 right, Status* st) { 
+  static enable_if_integer<T> Call(KernelContext*, Arg0 left, Arg1 right, Status* st) {
     T result;
     if (ARROW_PREDICT_FALSE(DivideWithOverflow(left, right, &result))) {
       if (right == 0) {
-        *st = Status::Invalid("divide by zero"); 
+        *st = Status::Invalid("divide by zero");
       } else {
         result = 0;
       }
     }
     return result;
   }
- 
-  template <typename T, typename Arg0, typename Arg1> 
-  static enable_if_decimal<T> Call(KernelContext*, Arg0 left, Arg1 right, Status* st) { 
-    if (right == Arg1()) { 
-      *st = Status::Invalid("Divide by zero"); 
-      return T(); 
-    } else { 
-      return left / right; 
-    } 
-  } 
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_decimal<T> Call(KernelContext*, Arg0 left, Arg1 right, Status* st) {
+    if (right == Arg1()) {
+      *st = Status::Invalid("Divide by zero");
+      return T();
+    } else {
+      return left / right;
+    }
+  }
 };
 
 struct DivideChecked {
   template <typename T, typename Arg0, typename Arg1>
-  static enable_if_integer<T> Call(KernelContext*, Arg0 left, Arg1 right, Status* st) { 
+  static enable_if_integer<T> Call(KernelContext*, Arg0 left, Arg1 right, Status* st) {
     static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     T result;
     if (ARROW_PREDICT_FALSE(DivideWithOverflow(left, right, &result))) {
       if (right == 0) {
-        *st = Status::Invalid("divide by zero"); 
+        *st = Status::Invalid("divide by zero");
       } else {
-        *st = Status::Invalid("overflow"); 
+        *st = Status::Invalid("overflow");
       }
     }
     return result;
   }
 
   template <typename T, typename Arg0, typename Arg1>
-  static enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right, 
-                                          Status* st) { 
+  static enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right,
+                                          Status* st) {
     static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     if (ARROW_PREDICT_FALSE(right == 0)) {
-      *st = Status::Invalid("divide by zero"); 
+      *st = Status::Invalid("divide by zero");
       return 0;
     }
     return left / right;
   }
- 
-  template <typename T, typename Arg0, typename Arg1> 
-  static enable_if_decimal<T> Call(KernelContext* ctx, Arg0 left, Arg1 right, 
-                                   Status* st) { 
-    return Divide::Call<T>(ctx, left, right, st); 
-  } 
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_decimal<T> Call(KernelContext* ctx, Arg0 left, Arg1 right,
+                                   Status* st) {
+    return Divide::Call<T>(ctx, left, right, st);
+  }
+};
+
+struct Negate {
+  template <typename T, typename Arg>
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg arg, Status*) {
+    return -arg;
+  }
+
+  template <typename T, typename Arg>
+  static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, Arg arg, Status*) {
+    return ~arg + 1;
+  }
+
+  template <typename T, typename Arg>
+  static constexpr enable_if_signed_integer<T> Call(KernelContext*, Arg arg, Status*) {
+    return arrow::internal::SafeSignedNegate(arg);
+  }
+};
+
+struct NegateChecked {
+  template <typename T, typename Arg>
+  static enable_if_signed_integer<T> Call(KernelContext*, Arg arg, Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    T result = 0;
+    if (ARROW_PREDICT_FALSE(NegateWithOverflow(arg, &result))) {
+      *st = Status::Invalid("overflow");
+    }
+    return result;
+  }
+
+  template <typename T, typename Arg>
+  static enable_if_unsigned_integer<T> Call(KernelContext* ctx, Arg arg, Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    DCHECK(false) << "This is included only for the purposes of instantiability from the "
+                     "arithmetic kernel generator";
+    return 0;
+  }
+
+  template <typename T, typename Arg>
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg arg, Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    return -arg;
+  }
+};
+
+struct Power {
+  ARROW_NOINLINE
+  static uint64_t IntegerPower(uint64_t base, uint64_t exp) {
+    // right to left O(logn) power
+    uint64_t pow = 1;
+    while (exp) {
+      pow *= (exp & 1) ? base : 1;
+      base *= base;
+      exp >>= 1;
+    }
+    return pow;
+  }
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_integer<T> Call(KernelContext*, T base, T exp, Status* st) {
+    if (exp < 0) {
+      *st = Status::Invalid("integers to negative integer powers are not allowed");
+      return 0;
+    }
+    return static_cast<T>(IntegerPower(base, exp));
+  }
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_floating_point<T> Call(KernelContext*, T base, T exp, Status*) {
+    return std::pow(base, exp);
+  }
+};
+
+struct PowerChecked {
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_integer<T> Call(KernelContext*, Arg0 base, Arg1 exp, Status* st) {
+    if (exp < 0) {
+      *st = Status::Invalid("integers to negative integer powers are not allowed");
+      return 0;
+    } else if (exp == 0) {
+      return 1;
+    }
+    // left to right O(logn) power with overflow checks
+    bool overflow = false;
+    uint64_t bitmask =
+        1ULL << (63 - BitUtil::CountLeadingZeros(static_cast<uint64_t>(exp)));
+    T pow = 1;
+    while (bitmask) {
+      overflow |= MultiplyWithOverflow(pow, pow, &pow);
+      if (exp & bitmask) {
+        overflow |= MultiplyWithOverflow(pow, base, &pow);
+      }
+      bitmask >>= 1;
+    }
+    if (overflow) {
+      *st = Status::Invalid("overflow");
+    }
+    return pow;
+  }
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_floating_point<T> Call(KernelContext*, Arg0 base, Arg1 exp, Status*) {
+    static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
+    return std::pow(base, exp);
+  }
+};
+
+struct Sign {
+  template <typename T, typename Arg>
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg arg, Status*) {
+    return std::isnan(arg) ? arg : ((arg == 0) ? 0 : (std::signbit(arg) ? -1 : 1));
+  }
+
+  template <typename T, typename Arg>
+  static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, Arg arg, Status*) {
+    return arg > 0;
+  }
+
+  template <typename T, typename Arg>
+  static constexpr enable_if_signed_integer<T> Call(KernelContext*, Arg arg, Status*) {
+    return (arg > 0) ? 1 : ((arg == 0) ? 0 : -1);
+  }
+};
+
+// Bitwise operations
+
+struct BitWiseNot {
+  template <typename T, typename Arg>
+  static T Call(KernelContext*, Arg arg, Status*) {
+    return ~arg;
+  }
+};
+
+struct BitWiseAnd {
+  template <typename T, typename Arg0, typename Arg1>
+  static T Call(KernelContext*, Arg0 lhs, Arg1 rhs, Status*) {
+    return lhs & rhs;
+  }
+};
+
+struct BitWiseOr {
+  template <typename T, typename Arg0, typename Arg1>
+  static T Call(KernelContext*, Arg0 lhs, Arg1 rhs, Status*) {
+    return lhs | rhs;
+  }
+};
+
+struct BitWiseXor {
+  template <typename T, typename Arg0, typename Arg1>
+  static T Call(KernelContext*, Arg0 lhs, Arg1 rhs, Status*) {
+    return lhs ^ rhs;
+  }
+};
+
+struct ShiftLeft {
+  template <typename T, typename Arg0, typename Arg1>
+  static T Call(KernelContext*, Arg0 lhs, Arg1 rhs, Status*) {
+    using Unsigned = typename std::make_unsigned<Arg0>::type;
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE(rhs < 0 || rhs >= std::numeric_limits<Arg0>::digits)) {
+      return lhs;
+    }
+    return static_cast<T>(static_cast<Unsigned>(lhs) << static_cast<Unsigned>(rhs));
+  }
+};
+
+// See SEI CERT C Coding Standard rule INT34-C
+struct ShiftLeftChecked {
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_unsigned_integer<T> Call(KernelContext*, Arg0 lhs, Arg1 rhs,
+                                            Status* st) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE(rhs < 0 || rhs >= std::numeric_limits<Arg0>::digits)) {
+      *st = Status::Invalid("shift amount must be >= 0 and less than precision of type");
+      return lhs;
+    }
+    return lhs << rhs;
+  }
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_signed_integer<T> Call(KernelContext*, Arg0 lhs, Arg1 rhs,
+                                          Status* st) {
+    using Unsigned = typename std::make_unsigned<Arg0>::type;
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE(rhs < 0 || rhs >= std::numeric_limits<Arg0>::digits)) {
+      *st = Status::Invalid("shift amount must be >= 0 and less than precision of type");
+      return lhs;
+    }
+    // In C/C++ left shift of a negative number is undefined (C++11 standard 5.8.2)
+    // Mimic Java/etc. and treat left shift as based on two's complement representation
+    // Assumes two's complement machine
+    return static_cast<T>(static_cast<Unsigned>(lhs) << static_cast<Unsigned>(rhs));
+  }
+};
+
+struct ShiftRight {
+  template <typename T, typename Arg0, typename Arg1>
+  static T Call(KernelContext*, Arg0 lhs, Arg1 rhs, Status*) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    // Logical right shift when Arg0 is unsigned
+    // Arithmetic otherwise (this is implementation-defined but GCC and MSVC document this
+    // as arithmetic right shift)
+    // https://gcc.gnu.org/onlinedocs/gcc/Integers-implementation.html#Integers-implementation
+    // https://docs.microsoft.com/en-us/cpp/cpp/left-shift-and-right-shift-operators-input-and-output?view=msvc-160
+    // Clang doesn't document their behavior.
+    if (ARROW_PREDICT_FALSE(rhs < 0 || rhs >= std::numeric_limits<Arg0>::digits)) {
+      return lhs;
+    }
+    return lhs >> rhs;
+  }
+};
+
+struct ShiftRightChecked {
+  template <typename T, typename Arg0, typename Arg1>
+  static T Call(KernelContext*, Arg0 lhs, Arg1 rhs, Status* st) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE(rhs < 0 || rhs >= std::numeric_limits<Arg0>::digits)) {
+      *st = Status::Invalid("shift amount must be >= 0 and less than precision of type");
+      return lhs;
+    }
+    return lhs >> rhs;
+  }
+};
+
+struct Sin {
+  template <typename T, typename Arg0>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status*) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    return std::sin(val);
+  }
+};
+
+struct SinChecked {
+  template <typename T, typename Arg0>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status* st) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE(std::isinf(val))) {
+      *st = Status::Invalid("domain error");
+      return val;
+    }
+    return std::sin(val);
+  }
+};
+
+struct Cos {
+  template <typename T, typename Arg0>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status*) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    return std::cos(val);
+  }
+};
+
+struct CosChecked {
+  template <typename T, typename Arg0>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status* st) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE(std::isinf(val))) {
+      *st = Status::Invalid("domain error");
+      return val;
+    }
+    return std::cos(val);
+  }
+};
+
+struct Tan {
+  template <typename T, typename Arg0>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status*) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    return std::tan(val);
+  }
+};
+
+struct TanChecked {
+  template <typename T, typename Arg0>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status* st) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE(std::isinf(val))) {
+      *st = Status::Invalid("domain error");
+      return val;
+    }
+    // Cannot raise range errors (overflow) since PI/2 is not exactly representable
+    return std::tan(val);
+  }
+};
+
+struct Asin {
+  template <typename T, typename Arg0>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status*) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE(val < -1.0 || val > 1.0)) {
+      return std::numeric_limits<T>::quiet_NaN();
+    }
+    return std::asin(val);
+  }
+};
+
+struct AsinChecked {
+  template <typename T, typename Arg0>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status* st) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE(val < -1.0 || val > 1.0)) {
+      *st = Status::Invalid("domain error");
+      return val;
+    }
+    return std::asin(val);
+  }
+};
+
+struct Acos {
+  template <typename T, typename Arg0>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status*) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE((val < -1.0 || val > 1.0))) {
+      return std::numeric_limits<T>::quiet_NaN();
+    }
+    return std::acos(val);
+  }
+};
+
+struct AcosChecked {
+  template <typename T, typename Arg0>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status* st) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE((val < -1.0 || val > 1.0))) {
+      *st = Status::Invalid("domain error");
+      return val;
+    }
+    return std::acos(val);
+  }
+};
+
+struct Atan {
+  template <typename T, typename Arg0>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status*) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    return std::atan(val);
+  }
+};
+
+struct Atan2 {
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 y, Arg1 x, Status*) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    static_assert(std::is_same<Arg0, Arg1>::value, "");
+    return std::atan2(y, x);
+  }
+};
+
+struct LogNatural {
+  template <typename T, typename Arg>
+  static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status*) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    if (arg == 0.0) {
+      return -std::numeric_limits<T>::infinity();
+    } else if (arg < 0.0) {
+      return std::numeric_limits<T>::quiet_NaN();
+    }
+    return std::log(arg);
+  }
+};
+
+struct LogNaturalChecked {
+  template <typename T, typename Arg>
+  static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    if (arg == 0.0) {
+      *st = Status::Invalid("logarithm of zero");
+      return arg;
+    } else if (arg < 0.0) {
+      *st = Status::Invalid("logarithm of negative number");
+      return arg;
+    }
+    return std::log(arg);
+  }
+};
+
+struct Log10 {
+  template <typename T, typename Arg>
+  static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status*) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    if (arg == 0.0) {
+      return -std::numeric_limits<T>::infinity();
+    } else if (arg < 0.0) {
+      return std::numeric_limits<T>::quiet_NaN();
+    }
+    return std::log10(arg);
+  }
+};
+
+struct Log10Checked {
+  template <typename T, typename Arg>
+  static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    if (arg == 0) {
+      *st = Status::Invalid("logarithm of zero");
+      return arg;
+    } else if (arg < 0) {
+      *st = Status::Invalid("logarithm of negative number");
+      return arg;
+    }
+    return std::log10(arg);
+  }
+};
+
+struct Log2 {
+  template <typename T, typename Arg>
+  static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status*) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    if (arg == 0.0) {
+      return -std::numeric_limits<T>::infinity();
+    } else if (arg < 0.0) {
+      return std::numeric_limits<T>::quiet_NaN();
+    }
+    return std::log2(arg);
+  }
+};
+
+struct Log2Checked {
+  template <typename T, typename Arg>
+  static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    if (arg == 0.0) {
+      *st = Status::Invalid("logarithm of zero");
+      return arg;
+    } else if (arg < 0.0) {
+      *st = Status::Invalid("logarithm of negative number");
+      return arg;
+    }
+    return std::log2(arg);
+  }
+};
+
+struct Log1p {
+  template <typename T, typename Arg>
+  static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status*) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    if (arg == -1) {
+      return -std::numeric_limits<T>::infinity();
+    } else if (arg < -1) {
+      return std::numeric_limits<T>::quiet_NaN();
+    }
+    return std::log1p(arg);
+  }
+};
+
+struct Log1pChecked {
+  template <typename T, typename Arg>
+  static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    if (arg == -1) {
+      *st = Status::Invalid("logarithm of zero");
+      return arg;
+    } else if (arg < -1) {
+      *st = Status::Invalid("logarithm of negative number");
+      return arg;
+    }
+    return std::log1p(arg);
+  }
+};
+
+struct Floor {
+  template <typename T, typename Arg>
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg arg, Status*) {
+    return std::floor(arg);
+  }
+};
+
+struct Ceil {
+  template <typename T, typename Arg>
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg arg, Status*) {
+    return std::ceil(arg);
+  }
+};
+
+struct Trunc {
+  template <typename T, typename Arg>
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg arg, Status*) {
+    return std::trunc(arg);
+  }
 };
 
-struct Negate { 
-  template <typename T, typename Arg> 
-  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg arg, Status*) { 
-    return -arg; 
-  } 
- 
-  template <typename T, typename Arg> 
-  static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, Arg arg, Status*) { 
-    return ~arg + 1; 
-  } 
- 
-  template <typename T, typename Arg> 
-  static constexpr enable_if_signed_integer<T> Call(KernelContext*, Arg arg, Status*) { 
-    return arrow::internal::SafeSignedNegate(arg); 
-  } 
-}; 
- 
-struct NegateChecked { 
-  template <typename T, typename Arg> 
-  static enable_if_signed_integer<T> Call(KernelContext*, Arg arg, Status* st) { 
-    static_assert(std::is_same<T, Arg>::value, ""); 
-    T result = 0; 
-    if (ARROW_PREDICT_FALSE(NegateWithOverflow(arg, &result))) { 
-      *st = Status::Invalid("overflow"); 
-    } 
-    return result; 
-  } 
- 
-  template <typename T, typename Arg> 
-  static enable_if_unsigned_integer<T> Call(KernelContext* ctx, Arg arg, Status* st) { 
-    static_assert(std::is_same<T, Arg>::value, ""); 
-    DCHECK(false) << "This is included only for the purposes of instantiability from the " 
-                     "arithmetic kernel generator"; 
-    return 0; 
-  } 
- 
-  template <typename T, typename Arg> 
-  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg arg, Status* st) { 
-    static_assert(std::is_same<T, Arg>::value, ""); 
-    return -arg; 
-  } 
-}; 
- 
-struct Power { 
-  ARROW_NOINLINE 
-  static uint64_t IntegerPower(uint64_t base, uint64_t exp) { 
-    // right to left O(logn) power 
-    uint64_t pow = 1; 
-    while (exp) { 
-      pow *= (exp & 1) ? base : 1; 
-      base *= base; 
-      exp >>= 1; 
-    } 
-    return pow; 
-  } 
- 
-  template <typename T, typename Arg0, typename Arg1> 
-  static enable_if_integer<T> Call(KernelContext*, T base, T exp, Status* st) { 
-    if (exp < 0) { 
-      *st = Status::Invalid("integers to negative integer powers are not allowed"); 
-      return 0; 
-    } 
-    return static_cast<T>(IntegerPower(base, exp)); 
-  } 
- 
-  template <typename T, typename Arg0, typename Arg1> 
-  static enable_if_floating_point<T> Call(KernelContext*, T base, T exp, Status*) { 
-    return std::pow(base, exp); 
-  } 
-}; 
- 
-struct PowerChecked { 
-  template <typename T, typename Arg0, typename Arg1> 
-  static enable_if_integer<T> Call(KernelContext*, Arg0 base, Arg1 exp, Status* st) { 
-    if (exp < 0) { 
-      *st = Status::Invalid("integers to negative integer powers are not allowed"); 
-      return 0; 
-    } else if (exp == 0) { 
-      return 1; 
-    } 
-    // left to right O(logn) power with overflow checks 
-    bool overflow = false; 
-    uint64_t bitmask = 
-        1ULL << (63 - BitUtil::CountLeadingZeros(static_cast<uint64_t>(exp))); 
-    T pow = 1; 
-    while (bitmask) { 
-      overflow |= MultiplyWithOverflow(pow, pow, &pow); 
-      if (exp & bitmask) { 
-        overflow |= MultiplyWithOverflow(pow, base, &pow); 
-      } 
-      bitmask >>= 1; 
-    } 
-    if (overflow) { 
-      *st = Status::Invalid("overflow"); 
-    } 
-    return pow; 
-  } 
- 
-  template <typename T, typename Arg0, typename Arg1> 
-  static enable_if_floating_point<T> Call(KernelContext*, Arg0 base, Arg1 exp, Status*) { 
-    static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, ""); 
-    return std::pow(base, exp); 
-  } 
-}; 
- 
-struct Sign { 
-  template <typename T, typename Arg> 
-  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg arg, Status*) { 
-    return std::isnan(arg) ? arg : ((arg == 0) ? 0 : (std::signbit(arg) ? -1 : 1)); 
-  } 
- 
-  template <typename T, typename Arg> 
-  static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, Arg arg, Status*) { 
-    return arg > 0; 
-  } 
- 
-  template <typename T, typename Arg> 
-  static constexpr enable_if_signed_integer<T> Call(KernelContext*, Arg arg, Status*) { 
-    return (arg > 0) ? 1 : ((arg == 0) ? 0 : -1); 
-  } 
-}; 
- 
-// Bitwise operations 
- 
-struct BitWiseNot { 
-  template <typename T, typename Arg> 
-  static T Call(KernelContext*, Arg arg, Status*) { 
-    return ~arg; 
-  } 
-}; 
- 
-struct BitWiseAnd { 
-  template <typename T, typename Arg0, typename Arg1> 
-  static T Call(KernelContext*, Arg0 lhs, Arg1 rhs, Status*) { 
-    return lhs & rhs; 
-  } 
-}; 
- 
-struct BitWiseOr { 
-  template <typename T, typename Arg0, typename Arg1> 
-  static T Call(KernelContext*, Arg0 lhs, Arg1 rhs, Status*) { 
-    return lhs | rhs; 
-  } 
-}; 
- 
-struct BitWiseXor { 
-  template <typename T, typename Arg0, typename Arg1> 
-  static T Call(KernelContext*, Arg0 lhs, Arg1 rhs, Status*) { 
-    return lhs ^ rhs; 
-  } 
-}; 
- 
-struct ShiftLeft { 
-  template <typename T, typename Arg0, typename Arg1> 
-  static T Call(KernelContext*, Arg0 lhs, Arg1 rhs, Status*) { 
-    using Unsigned = typename std::make_unsigned<Arg0>::type; 
-    static_assert(std::is_same<T, Arg0>::value, ""); 
-    if (ARROW_PREDICT_FALSE(rhs < 0 || rhs >= std::numeric_limits<Arg0>::digits)) { 
-      return lhs; 
-    } 
-    return static_cast<T>(static_cast<Unsigned>(lhs) << static_cast<Unsigned>(rhs)); 
-  } 
-}; 
- 
-// See SEI CERT C Coding Standard rule INT34-C 
-struct ShiftLeftChecked { 
-  template <typename T, typename Arg0, typename Arg1> 
-  static enable_if_unsigned_integer<T> Call(KernelContext*, Arg0 lhs, Arg1 rhs, 
-                                            Status* st) { 
-    static_assert(std::is_same<T, Arg0>::value, ""); 
-    if (ARROW_PREDICT_FALSE(rhs < 0 || rhs >= std::numeric_limits<Arg0>::digits)) { 
-      *st = Status::Invalid("shift amount must be >= 0 and less than precision of type"); 
-      return lhs; 
-    } 
-    return lhs << rhs; 
-  } 
- 
-  template <typename T, typename Arg0, typename Arg1> 
-  static enable_if_signed_integer<T> Call(KernelContext*, Arg0 lhs, Arg1 rhs, 
-                                          Status* st) { 
-    using Unsigned = typename std::make_unsigned<Arg0>::type; 
-    static_assert(std::is_same<T, Arg0>::value, ""); 
-    if (ARROW_PREDICT_FALSE(rhs < 0 || rhs >= std::numeric_limits<Arg0>::digits)) { 
-      *st = Status::Invalid("shift amount must be >= 0 and less than precision of type"); 
-      return lhs; 
-    } 
-    // In C/C++ left shift of a negative number is undefined (C++11 standard 5.8.2) 
-    // Mimic Java/etc. and treat left shift as based on two's complement representation 
-    // Assumes two's complement machine 
-    return static_cast<T>(static_cast<Unsigned>(lhs) << static_cast<Unsigned>(rhs)); 
-  } 
-}; 
- 
-struct ShiftRight { 
-  template <typename T, typename Arg0, typename Arg1> 
-  static T Call(KernelContext*, Arg0 lhs, Arg1 rhs, Status*) { 
-    static_assert(std::is_same<T, Arg0>::value, ""); 
-    // Logical right shift when Arg0 is unsigned 
-    // Arithmetic otherwise (this is implementation-defined but GCC and MSVC document this 
-    // as arithmetic right shift) 
-    // https://gcc.gnu.org/onlinedocs/gcc/Integers-implementation.html#Integers-implementation 
-    // https://docs.microsoft.com/en-us/cpp/cpp/left-shift-and-right-shift-operators-input-and-output?view=msvc-160 
-    // Clang doesn't document their behavior. 
-    if (ARROW_PREDICT_FALSE(rhs < 0 || rhs >= std::numeric_limits<Arg0>::digits)) { 
-      return lhs; 
-    } 
-    return lhs >> rhs; 
-  } 
-}; 
- 
-struct ShiftRightChecked { 
-  template <typename T, typename Arg0, typename Arg1> 
-  static T Call(KernelContext*, Arg0 lhs, Arg1 rhs, Status* st) { 
-    static_assert(std::is_same<T, Arg0>::value, ""); 
-    if (ARROW_PREDICT_FALSE(rhs < 0 || rhs >= std::numeric_limits<Arg0>::digits)) { 
-      *st = Status::Invalid("shift amount must be >= 0 and less than precision of type"); 
-      return lhs; 
-    } 
-    return lhs >> rhs; 
-  } 
-}; 
- 
-struct Sin { 
-  template <typename T, typename Arg0> 
-  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status*) { 
-    static_assert(std::is_same<T, Arg0>::value, ""); 
-    return std::sin(val); 
-  } 
-}; 
- 
-struct SinChecked { 
-  template <typename T, typename Arg0> 
-  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status* st) { 
-    static_assert(std::is_same<T, Arg0>::value, ""); 
-    if (ARROW_PREDICT_FALSE(std::isinf(val))) { 
-      *st = Status::Invalid("domain error"); 
-      return val; 
-    } 
-    return std::sin(val); 
-  } 
-}; 
- 
-struct Cos { 
-  template <typename T, typename Arg0> 
-  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status*) { 
-    static_assert(std::is_same<T, Arg0>::value, ""); 
-    return std::cos(val); 
-  } 
-}; 
- 
-struct CosChecked { 
-  template <typename T, typename Arg0> 
-  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status* st) { 
-    static_assert(std::is_same<T, Arg0>::value, ""); 
-    if (ARROW_PREDICT_FALSE(std::isinf(val))) { 
-      *st = Status::Invalid("domain error"); 
-      return val; 
-    } 
-    return std::cos(val); 
-  } 
-}; 
- 
-struct Tan { 
-  template <typename T, typename Arg0> 
-  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status*) { 
-    static_assert(std::is_same<T, Arg0>::value, ""); 
-    return std::tan(val); 
-  } 
-}; 
- 
-struct TanChecked { 
-  template <typename T, typename Arg0> 
-  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status* st) { 
-    static_assert(std::is_same<T, Arg0>::value, ""); 
-    if (ARROW_PREDICT_FALSE(std::isinf(val))) { 
-      *st = Status::Invalid("domain error"); 
-      return val; 
-    } 
-    // Cannot raise range errors (overflow) since PI/2 is not exactly representable 
-    return std::tan(val); 
-  } 
-}; 
- 
-struct Asin { 
-  template <typename T, typename Arg0> 
-  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status*) { 
-    static_assert(std::is_same<T, Arg0>::value, ""); 
-    if (ARROW_PREDICT_FALSE(val < -1.0 || val > 1.0)) { 
-      return std::numeric_limits<T>::quiet_NaN(); 
-    } 
-    return std::asin(val); 
-  } 
-}; 
- 
-struct AsinChecked { 
-  template <typename T, typename Arg0> 
-  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status* st) { 
-    static_assert(std::is_same<T, Arg0>::value, ""); 
-    if (ARROW_PREDICT_FALSE(val < -1.0 || val > 1.0)) { 
-      *st = Status::Invalid("domain error"); 
-      return val; 
-    } 
-    return std::asin(val); 
-  } 
-}; 
- 
-struct Acos { 
-  template <typename T, typename Arg0> 
-  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status*) { 
-    static_assert(std::is_same<T, Arg0>::value, ""); 
-    if (ARROW_PREDICT_FALSE((val < -1.0 || val > 1.0))) { 
-      return std::numeric_limits<T>::quiet_NaN(); 
-    } 
-    return std::acos(val); 
-  } 
-}; 
- 
-struct AcosChecked { 
-  template <typename T, typename Arg0> 
-  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status* st) { 
-    static_assert(std::is_same<T, Arg0>::value, ""); 
-    if (ARROW_PREDICT_FALSE((val < -1.0 || val > 1.0))) { 
-      *st = Status::Invalid("domain error"); 
-      return val; 
-    } 
-    return std::acos(val); 
-  } 
-}; 
- 
-struct Atan { 
-  template <typename T, typename Arg0> 
-  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status*) { 
-    static_assert(std::is_same<T, Arg0>::value, ""); 
-    return std::atan(val); 
-  } 
-}; 
- 
-struct Atan2 { 
-  template <typename T, typename Arg0, typename Arg1> 
-  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 y, Arg1 x, Status*) { 
-    static_assert(std::is_same<T, Arg0>::value, ""); 
-    static_assert(std::is_same<Arg0, Arg1>::value, ""); 
-    return std::atan2(y, x); 
-  } 
-}; 
- 
-struct LogNatural { 
-  template <typename T, typename Arg> 
-  static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status*) { 
-    static_assert(std::is_same<T, Arg>::value, ""); 
-    if (arg == 0.0) { 
-      return -std::numeric_limits<T>::infinity(); 
-    } else if (arg < 0.0) { 
-      return std::numeric_limits<T>::quiet_NaN(); 
-    } 
-    return std::log(arg); 
-  } 
-}; 
- 
-struct LogNaturalChecked { 
-  template <typename T, typename Arg> 
-  static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status* st) { 
-    static_assert(std::is_same<T, Arg>::value, ""); 
-    if (arg == 0.0) { 
-      *st = Status::Invalid("logarithm of zero"); 
-      return arg; 
-    } else if (arg < 0.0) { 
-      *st = Status::Invalid("logarithm of negative number"); 
-      return arg; 
-    } 
-    return std::log(arg); 
-  } 
-}; 
- 
-struct Log10 { 
-  template <typename T, typename Arg> 
-  static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status*) { 
-    static_assert(std::is_same<T, Arg>::value, ""); 
-    if (arg == 0.0) { 
-      return -std::numeric_limits<T>::infinity(); 
-    } else if (arg < 0.0) { 
-      return std::numeric_limits<T>::quiet_NaN(); 
-    } 
-    return std::log10(arg); 
-  } 
-}; 
- 
-struct Log10Checked { 
-  template <typename T, typename Arg> 
-  static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status* st) { 
-    static_assert(std::is_same<T, Arg>::value, ""); 
-    if (arg == 0) { 
-      *st = Status::Invalid("logarithm of zero"); 
-      return arg; 
-    } else if (arg < 0) { 
-      *st = Status::Invalid("logarithm of negative number"); 
-      return arg; 
-    } 
-    return std::log10(arg); 
-  } 
-}; 
- 
-struct Log2 { 
-  template <typename T, typename Arg> 
-  static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status*) { 
-    static_assert(std::is_same<T, Arg>::value, ""); 
-    if (arg == 0.0) { 
-      return -std::numeric_limits<T>::infinity(); 
-    } else if (arg < 0.0) { 
-      return std::numeric_limits<T>::quiet_NaN(); 
-    } 
-    return std::log2(arg); 
-  } 
-}; 
- 
-struct Log2Checked { 
-  template <typename T, typename Arg> 
-  static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status* st) { 
-    static_assert(std::is_same<T, Arg>::value, ""); 
-    if (arg == 0.0) { 
-      *st = Status::Invalid("logarithm of zero"); 
-      return arg; 
-    } else if (arg < 0.0) { 
-      *st = Status::Invalid("logarithm of negative number"); 
-      return arg; 
-    } 
-    return std::log2(arg); 
-  } 
-}; 
- 
-struct Log1p { 
-  template <typename T, typename Arg> 
-  static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status*) { 
-    static_assert(std::is_same<T, Arg>::value, ""); 
-    if (arg == -1) { 
-      return -std::numeric_limits<T>::infinity(); 
-    } else if (arg < -1) { 
-      return std::numeric_limits<T>::quiet_NaN(); 
-    } 
-    return std::log1p(arg); 
-  } 
-}; 
- 
-struct Log1pChecked { 
-  template <typename T, typename Arg> 
-  static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status* st) { 
-    static_assert(std::is_same<T, Arg>::value, ""); 
-    if (arg == -1) { 
-      *st = Status::Invalid("logarithm of zero"); 
-      return arg; 
-    } else if (arg < -1) { 
-      *st = Status::Invalid("logarithm of negative number"); 
-      return arg; 
-    } 
-    return std::log1p(arg); 
-  } 
-}; 
- 
-struct Floor { 
-  template <typename T, typename Arg> 
-  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg arg, Status*) { 
-    return std::floor(arg); 
-  } 
-}; 
- 
-struct Ceil { 
-  template <typename T, typename Arg> 
-  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg arg, Status*) { 
-    return std::ceil(arg); 
-  } 
-}; 
- 
-struct Trunc { 
-  template <typename T, typename Arg> 
-  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg arg, Status*) { 
-    return std::trunc(arg); 
-  } 
-}; 
- 
 // Generate a kernel given an arithmetic functor
 template <template <typename... Args> class KernelGenerator, typename Op>
-ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) { 
+ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
   switch (get_id.id) {
     case Type::INT8:
       return KernelGenerator<Int8Type, Int8Type, Op>::Exec;
@@ -869,321 +869,321 @@ ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
   }
 }
 
-// Generate a kernel given a bitwise arithmetic functor. Assumes the 
-// functor treats all integer types of equal width identically 
-template <template <typename... Args> class KernelGenerator, typename Op> 
-ArrayKernelExec TypeAgnosticBitWiseExecFromOp(detail::GetTypeId get_id) { 
-  switch (get_id.id) { 
-    case Type::INT8: 
-    case Type::UINT8: 
-      return KernelGenerator<UInt8Type, UInt8Type, Op>::Exec; 
-    case Type::INT16: 
-    case Type::UINT16: 
-      return KernelGenerator<UInt16Type, UInt16Type, Op>::Exec; 
-    case Type::INT32: 
-    case Type::UINT32: 
-      return KernelGenerator<UInt32Type, UInt32Type, Op>::Exec; 
-    case Type::INT64: 
-    case Type::UINT64: 
-      return KernelGenerator<UInt64Type, UInt64Type, Op>::Exec; 
-    default: 
-      DCHECK(false); 
-      return ExecFail; 
-  } 
-} 
- 
-template <template <typename... Args> class KernelGenerator, typename Op> 
-ArrayKernelExec ShiftExecFromOp(detail::GetTypeId get_id) { 
-  switch (get_id.id) { 
-    case Type::INT8: 
-      return KernelGenerator<Int8Type, Int8Type, Op>::Exec; 
-    case Type::UINT8: 
-      return KernelGenerator<UInt8Type, UInt8Type, Op>::Exec; 
-    case Type::INT16: 
-      return KernelGenerator<Int16Type, Int16Type, Op>::Exec; 
-    case Type::UINT16: 
-      return KernelGenerator<UInt16Type, UInt16Type, Op>::Exec; 
-    case Type::INT32: 
-      return KernelGenerator<Int32Type, Int32Type, Op>::Exec; 
-    case Type::UINT32: 
-      return KernelGenerator<UInt32Type, UInt32Type, Op>::Exec; 
-    case Type::INT64: 
-      return KernelGenerator<Int64Type, Int64Type, Op>::Exec; 
-    case Type::UINT64: 
-      return KernelGenerator<UInt64Type, UInt64Type, Op>::Exec; 
-    default: 
-      DCHECK(false); 
-      return ExecFail; 
-  } 
-} 
- 
-template <template <typename... Args> class KernelGenerator, typename Op> 
-ArrayKernelExec GenerateArithmeticFloatingPoint(detail::GetTypeId get_id) { 
-  switch (get_id.id) { 
-    case Type::FLOAT: 
-      return KernelGenerator<FloatType, FloatType, Op>::Exec; 
-    case Type::DOUBLE: 
-      return KernelGenerator<DoubleType, DoubleType, Op>::Exec; 
-    default: 
-      DCHECK(false); 
-      return ExecFail; 
-  } 
-} 
- 
-Status CastBinaryDecimalArgs(const std::string& func_name, 
-                             std::vector<ValueDescr>* values) { 
-  auto& left_type = (*values)[0].type; 
-  auto& right_type = (*values)[1].type; 
-  DCHECK(is_decimal(left_type->id()) || is_decimal(right_type->id())); 
- 
-  // decimal + float = float 
-  if (is_floating(left_type->id())) { 
-    right_type = left_type; 
-    return Status::OK(); 
-  } else if (is_floating(right_type->id())) { 
-    left_type = right_type; 
-    return Status::OK(); 
-  } 
- 
-  // precision, scale of left and right args 
-  int32_t p1, s1, p2, s2; 
- 
-  // decimal + integer = decimal 
-  if (is_decimal(left_type->id())) { 
-    auto decimal = checked_cast<const DecimalType*>(left_type.get()); 
-    p1 = decimal->precision(); 
-    s1 = decimal->scale(); 
-  } else { 
-    DCHECK(is_integer(left_type->id())); 
-    p1 = static_cast<int32_t>(std::ceil(std::log10(bit_width(left_type->id())))); 
-    s1 = 0; 
-  } 
-  if (is_decimal(right_type->id())) { 
-    auto decimal = checked_cast<const DecimalType*>(right_type.get()); 
-    p2 = decimal->precision(); 
-    s2 = decimal->scale(); 
-  } else { 
-    DCHECK(is_integer(right_type->id())); 
-    p2 = static_cast<int32_t>(std::ceil(std::log10(bit_width(right_type->id())))); 
-    s2 = 0; 
-  } 
-  if (s1 < 0 || s2 < 0) { 
-    return Status::NotImplemented("Decimals with negative scales not supported"); 
-  } 
- 
-  // decimal128 + decimal256 = decimal256 
-  Type::type casted_type_id = Type::DECIMAL128; 
-  if (left_type->id() == Type::DECIMAL256 || right_type->id() == Type::DECIMAL256) { 
-    casted_type_id = Type::DECIMAL256; 
-  } 
- 
-  // decimal promotion rules compatible with amazon redshift 
-  // https://docs.aws.amazon.com/redshift/latest/dg/r_numeric_computations201.html 
-  int32_t left_scaleup, right_scaleup; 
- 
-  // "add_checked" -> "add" 
-  const std::string op = func_name.substr(0, func_name.find("_")); 
-  if (op == "add" || op == "subtract") { 
-    left_scaleup = std::max(s1, s2) - s1; 
-    right_scaleup = std::max(s1, s2) - s2; 
-  } else if (op == "multiply") { 
-    left_scaleup = right_scaleup = 0; 
-  } else if (op == "divide") { 
-    left_scaleup = std::max(4, s1 + p2 - s2 + 1) + s2 - s1; 
-    right_scaleup = 0; 
-  } else { 
-    return Status::Invalid("Invalid decimal function: ", func_name); 
-  } 
- 
-  ARROW_ASSIGN_OR_RAISE( 
-      left_type, DecimalType::Make(casted_type_id, p1 + left_scaleup, s1 + left_scaleup)); 
-  ARROW_ASSIGN_OR_RAISE(right_type, DecimalType::Make(casted_type_id, p2 + right_scaleup, 
-                                                      s2 + right_scaleup)); 
-  return Status::OK(); 
-} 
- 
-// resolve decimal binary operation output type per *casted* args 
-template <typename OutputGetter> 
-Result<ValueDescr> ResolveDecimalBinaryOperationOutput( 
-    const std::vector<ValueDescr>& args, OutputGetter&& getter) { 
-  // casted args should be same size decimals 
-  auto left_type = checked_cast<const DecimalType*>(args[0].type.get()); 
-  auto right_type = checked_cast<const DecimalType*>(args[1].type.get()); 
-  DCHECK_EQ(left_type->id(), right_type->id()); 
- 
-  int32_t precision, scale; 
-  std::tie(precision, scale) = getter(left_type->precision(), left_type->scale(), 
-                                      right_type->precision(), right_type->scale()); 
-  ARROW_ASSIGN_OR_RAISE(auto type, DecimalType::Make(left_type->id(), precision, scale)); 
-  return ValueDescr(std::move(type), GetBroadcastShape(args)); 
-} 
- 
-Result<ValueDescr> ResolveDecimalAdditionOrSubtractionOutput( 
-    KernelContext*, const std::vector<ValueDescr>& args) { 
-  return ResolveDecimalBinaryOperationOutput( 
-      args, [](int32_t p1, int32_t s1, int32_t p2, int32_t s2) { 
-        DCHECK_EQ(s1, s2); 
-        const int32_t scale = s1; 
-        const int32_t precision = std::max(p1 - s1, p2 - s2) + scale + 1; 
-        return std::make_pair(precision, scale); 
-      }); 
-} 
- 
-Result<ValueDescr> ResolveDecimalMultiplicationOutput( 
-    KernelContext*, const std::vector<ValueDescr>& args) { 
-  return ResolveDecimalBinaryOperationOutput( 
-      args, [](int32_t p1, int32_t s1, int32_t p2, int32_t s2) { 
-        const int32_t scale = s1 + s2; 
-        const int32_t precision = p1 + p2 + 1; 
-        return std::make_pair(precision, scale); 
-      }); 
-} 
- 
-Result<ValueDescr> ResolveDecimalDivisionOutput(KernelContext*, 
-                                                const std::vector<ValueDescr>& args) { 
-  return ResolveDecimalBinaryOperationOutput( 
-      args, [](int32_t p1, int32_t s1, int32_t p2, int32_t s2) { 
-        DCHECK_GE(s1, s2); 
-        const int32_t scale = s1 - s2; 
-        const int32_t precision = p1; 
-        return std::make_pair(precision, scale); 
-      }); 
-} 
- 
+// Generate a kernel given a bitwise arithmetic functor. Assumes the
+// functor treats all integer types of equal width identically
+template <template <typename... Args> class KernelGenerator, typename Op>
+ArrayKernelExec TypeAgnosticBitWiseExecFromOp(detail::GetTypeId get_id) {
+  switch (get_id.id) {
+    case Type::INT8:
+    case Type::UINT8:
+      return KernelGenerator<UInt8Type, UInt8Type, Op>::Exec;
+    case Type::INT16:
+    case Type::UINT16:
+      return KernelGenerator<UInt16Type, UInt16Type, Op>::Exec;
+    case Type::INT32:
+    case Type::UINT32:
+      return KernelGenerator<UInt32Type, UInt32Type, Op>::Exec;
+    case Type::INT64:
+    case Type::UINT64:
+      return KernelGenerator<UInt64Type, UInt64Type, Op>::Exec;
+    default:
+      DCHECK(false);
+      return ExecFail;
+  }
+}
+
+template <template <typename... Args> class KernelGenerator, typename Op>
+ArrayKernelExec ShiftExecFromOp(detail::GetTypeId get_id) {
+  switch (get_id.id) {
+    case Type::INT8:
+      return KernelGenerator<Int8Type, Int8Type, Op>::Exec;
+    case Type::UINT8:
+      return KernelGenerator<UInt8Type, UInt8Type, Op>::Exec;
+    case Type::INT16:
+      return KernelGenerator<Int16Type, Int16Type, Op>::Exec;
+    case Type::UINT16:
+      return KernelGenerator<UInt16Type, UInt16Type, Op>::Exec;
+    case Type::INT32:
+      return KernelGenerator<Int32Type, Int32Type, Op>::Exec;
+    case Type::UINT32:
+      return KernelGenerator<UInt32Type, UInt32Type, Op>::Exec;
+    case Type::INT64:
+      return KernelGenerator<Int64Type, Int64Type, Op>::Exec;
+    case Type::UINT64:
+      return KernelGenerator<UInt64Type, UInt64Type, Op>::Exec;
+    default:
+      DCHECK(false);
+      return ExecFail;
+  }
+}
+
+template <template <typename... Args> class KernelGenerator, typename Op>
+ArrayKernelExec GenerateArithmeticFloatingPoint(detail::GetTypeId get_id) {
+  switch (get_id.id) {
+    case Type::FLOAT:
+      return KernelGenerator<FloatType, FloatType, Op>::Exec;
+    case Type::DOUBLE:
+      return KernelGenerator<DoubleType, DoubleType, Op>::Exec;
+    default:
+      DCHECK(false);
+      return ExecFail;
+  }
+}
+
+Status CastBinaryDecimalArgs(const std::string& func_name,
+                             std::vector<ValueDescr>* values) {
+  auto& left_type = (*values)[0].type;
+  auto& right_type = (*values)[1].type;
+  DCHECK(is_decimal(left_type->id()) || is_decimal(right_type->id()));
+
+  // decimal + float = float
+  if (is_floating(left_type->id())) {
+    right_type = left_type;
+    return Status::OK();
+  } else if (is_floating(right_type->id())) {
+    left_type = right_type;
+    return Status::OK();
+  }
+
+  // precision, scale of left and right args
+  int32_t p1, s1, p2, s2;
+
+  // decimal + integer = decimal
+  if (is_decimal(left_type->id())) {
+    auto decimal = checked_cast<const DecimalType*>(left_type.get());
+    p1 = decimal->precision();
+    s1 = decimal->scale();
+  } else {
+    DCHECK(is_integer(left_type->id()));
+    p1 = static_cast<int32_t>(std::ceil(std::log10(bit_width(left_type->id()))));
+    s1 = 0;
+  }
+  if (is_decimal(right_type->id())) {
+    auto decimal = checked_cast<const DecimalType*>(right_type.get());
+    p2 = decimal->precision();
+    s2 = decimal->scale();
+  } else {
+    DCHECK(is_integer(right_type->id()));
+    p2 = static_cast<int32_t>(std::ceil(std::log10(bit_width(right_type->id()))));
+    s2 = 0;
+  }
+  if (s1 < 0 || s2 < 0) {
+    return Status::NotImplemented("Decimals with negative scales not supported");
+  }
+
+  // decimal128 + decimal256 = decimal256
+  Type::type casted_type_id = Type::DECIMAL128;
+  if (left_type->id() == Type::DECIMAL256 || right_type->id() == Type::DECIMAL256) {
+    casted_type_id = Type::DECIMAL256;
+  }
+
+  // decimal promotion rules compatible with amazon redshift
+  // https://docs.aws.amazon.com/redshift/latest/dg/r_numeric_computations201.html
+  int32_t left_scaleup, right_scaleup;
+
+  // "add_checked" -> "add"
+  const std::string op = func_name.substr(0, func_name.find("_"));
+  if (op == "add" || op == "subtract") {
+    left_scaleup = std::max(s1, s2) - s1;
+    right_scaleup = std::max(s1, s2) - s2;
+  } else if (op == "multiply") {
+    left_scaleup = right_scaleup = 0;
+  } else if (op == "divide") {
+    left_scaleup = std::max(4, s1 + p2 - s2 + 1) + s2 - s1;
+    right_scaleup = 0;
+  } else {
+    return Status::Invalid("Invalid decimal function: ", func_name);
+  }
+
+  ARROW_ASSIGN_OR_RAISE(
+      left_type, DecimalType::Make(casted_type_id, p1 + left_scaleup, s1 + left_scaleup));
+  ARROW_ASSIGN_OR_RAISE(right_type, DecimalType::Make(casted_type_id, p2 + right_scaleup,
+                                                      s2 + right_scaleup));
+  return Status::OK();
+}
+
+// resolve decimal binary operation output type per *casted* args
+template <typename OutputGetter>
+Result<ValueDescr> ResolveDecimalBinaryOperationOutput(
+    const std::vector<ValueDescr>& args, OutputGetter&& getter) {
+  // casted args should be same size decimals
+  auto left_type = checked_cast<const DecimalType*>(args[0].type.get());
+  auto right_type = checked_cast<const DecimalType*>(args[1].type.get());
+  DCHECK_EQ(left_type->id(), right_type->id());
+
+  int32_t precision, scale;
+  std::tie(precision, scale) = getter(left_type->precision(), left_type->scale(),
+                                      right_type->precision(), right_type->scale());
+  ARROW_ASSIGN_OR_RAISE(auto type, DecimalType::Make(left_type->id(), precision, scale));
+  return ValueDescr(std::move(type), GetBroadcastShape(args));
+}
+
+Result<ValueDescr> ResolveDecimalAdditionOrSubtractionOutput(
+    KernelContext*, const std::vector<ValueDescr>& args) {
+  return ResolveDecimalBinaryOperationOutput(
+      args, [](int32_t p1, int32_t s1, int32_t p2, int32_t s2) {
+        DCHECK_EQ(s1, s2);
+        const int32_t scale = s1;
+        const int32_t precision = std::max(p1 - s1, p2 - s2) + scale + 1;
+        return std::make_pair(precision, scale);
+      });
+}
+
+Result<ValueDescr> ResolveDecimalMultiplicationOutput(
+    KernelContext*, const std::vector<ValueDescr>& args) {
+  return ResolveDecimalBinaryOperationOutput(
+      args, [](int32_t p1, int32_t s1, int32_t p2, int32_t s2) {
+        const int32_t scale = s1 + s2;
+        const int32_t precision = p1 + p2 + 1;
+        return std::make_pair(precision, scale);
+      });
+}
+
+Result<ValueDescr> ResolveDecimalDivisionOutput(KernelContext*,
+                                                const std::vector<ValueDescr>& args) {
+  return ResolveDecimalBinaryOperationOutput(
+      args, [](int32_t p1, int32_t s1, int32_t p2, int32_t s2) {
+        DCHECK_GE(s1, s2);
+        const int32_t scale = s1 - s2;
+        const int32_t precision = p1;
+        return std::make_pair(precision, scale);
+      });
+}
+
+template <typename Op>
+void AddDecimalBinaryKernels(const std::string& name,
+                             std::shared_ptr<ScalarFunction>* func) {
+  OutputType out_type(null());
+  const std::string op = name.substr(0, name.find("_"));
+  if (op == "add" || op == "subtract") {
+    out_type = OutputType(ResolveDecimalAdditionOrSubtractionOutput);
+  } else if (op == "multiply") {
+    out_type = OutputType(ResolveDecimalMultiplicationOutput);
+  } else if (op == "divide") {
+    out_type = OutputType(ResolveDecimalDivisionOutput);
+  } else {
+    DCHECK(false);
+  }
+
+  auto in_type128 = InputType(Type::DECIMAL128);
+  auto in_type256 = InputType(Type::DECIMAL256);
+  auto exec128 = ScalarBinaryNotNullEqualTypes<Decimal128Type, Decimal128Type, Op>::Exec;
+  auto exec256 = ScalarBinaryNotNullEqualTypes<Decimal256Type, Decimal256Type, Op>::Exec;
+  DCHECK_OK((*func)->AddKernel({in_type128, in_type128}, out_type, exec128));
+  DCHECK_OK((*func)->AddKernel({in_type256, in_type256}, out_type, exec256));
+}
+
+// Generate a kernel given an arithmetic functor
+template <template <typename...> class KernelGenerator, typename OutType, typename Op>
+ArrayKernelExec GenerateArithmeticWithFixedIntOutType(detail::GetTypeId get_id) {
+  switch (get_id.id) {
+    case Type::INT8:
+      return KernelGenerator<OutType, Int8Type, Op>::Exec;
+    case Type::UINT8:
+      return KernelGenerator<OutType, UInt8Type, Op>::Exec;
+    case Type::INT16:
+      return KernelGenerator<OutType, Int16Type, Op>::Exec;
+    case Type::UINT16:
+      return KernelGenerator<OutType, UInt16Type, Op>::Exec;
+    case Type::INT32:
+      return KernelGenerator<OutType, Int32Type, Op>::Exec;
+    case Type::UINT32:
+      return KernelGenerator<OutType, UInt32Type, Op>::Exec;
+    case Type::INT64:
+    case Type::TIMESTAMP:
+      return KernelGenerator<OutType, Int64Type, Op>::Exec;
+    case Type::UINT64:
+      return KernelGenerator<OutType, UInt64Type, Op>::Exec;
+    case Type::FLOAT:
+      return KernelGenerator<FloatType, FloatType, Op>::Exec;
+    case Type::DOUBLE:
+      return KernelGenerator<DoubleType, DoubleType, Op>::Exec;
+    default:
+      DCHECK(false);
+      return ExecFail;
+  }
+}
+
+struct ArithmeticFunction : ScalarFunction {
+  using ScalarFunction::ScalarFunction;
+
+  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
+    RETURN_NOT_OK(CheckArity(*values));
+
+    RETURN_NOT_OK(CheckDecimals(values));
+
+    using arrow::compute::detail::DispatchExactImpl;
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+
+    EnsureDictionaryDecoded(values);
+
+    // Only promote types for binary functions
+    if (values->size() == 2) {
+      ReplaceNullWithOtherType(values);
+
+      if (auto type = CommonNumeric(*values)) {
+        ReplaceTypes(type, values);
+      }
+    }
+
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *values);
+  }
+
+  Status CheckDecimals(std::vector<ValueDescr>* values) const {
+    bool has_decimal = false;
+    for (const auto& value : *values) {
+      if (is_decimal(value.type->id())) {
+        has_decimal = true;
+        break;
+      }
+    }
+    if (!has_decimal) return Status::OK();
+
+    if (values->size() == 2) {
+      return CastBinaryDecimalArgs(name(), values);
+    }
+    return Status::OK();
+  }
+};
+
+/// An ArithmeticFunction that promotes integer arguments to double.
+struct ArithmeticFloatingPointFunction : public ArithmeticFunction {
+  using ArithmeticFunction::ArithmeticFunction;
+
+  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
+    RETURN_NOT_OK(CheckArity(*values));
+    RETURN_NOT_OK(CheckDecimals(values));
+
+    using arrow::compute::detail::DispatchExactImpl;
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+
+    EnsureDictionaryDecoded(values);
+
+    if (values->size() == 2) {
+      ReplaceNullWithOtherType(values);
+    }
+
+    for (auto& descr : *values) {
+      if (is_integer(descr.type->id())) {
+        descr.type = float64();
+      }
+    }
+    if (auto type = CommonNumeric(*values)) {
+      ReplaceTypes(type, values);
+    }
+
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *values);
+  }
+};
+
 template <typename Op>
-void AddDecimalBinaryKernels(const std::string& name, 
-                             std::shared_ptr<ScalarFunction>* func) { 
-  OutputType out_type(null()); 
-  const std::string op = name.substr(0, name.find("_")); 
-  if (op == "add" || op == "subtract") { 
-    out_type = OutputType(ResolveDecimalAdditionOrSubtractionOutput); 
-  } else if (op == "multiply") { 
-    out_type = OutputType(ResolveDecimalMultiplicationOutput); 
-  } else if (op == "divide") { 
-    out_type = OutputType(ResolveDecimalDivisionOutput); 
-  } else { 
-    DCHECK(false); 
-  } 
- 
-  auto in_type128 = InputType(Type::DECIMAL128); 
-  auto in_type256 = InputType(Type::DECIMAL256); 
-  auto exec128 = ScalarBinaryNotNullEqualTypes<Decimal128Type, Decimal128Type, Op>::Exec; 
-  auto exec256 = ScalarBinaryNotNullEqualTypes<Decimal256Type, Decimal256Type, Op>::Exec; 
-  DCHECK_OK((*func)->AddKernel({in_type128, in_type128}, out_type, exec128)); 
-  DCHECK_OK((*func)->AddKernel({in_type256, in_type256}, out_type, exec256)); 
-} 
- 
-// Generate a kernel given an arithmetic functor 
-template <template <typename...> class KernelGenerator, typename OutType, typename Op> 
-ArrayKernelExec GenerateArithmeticWithFixedIntOutType(detail::GetTypeId get_id) { 
-  switch (get_id.id) { 
-    case Type::INT8: 
-      return KernelGenerator<OutType, Int8Type, Op>::Exec; 
-    case Type::UINT8: 
-      return KernelGenerator<OutType, UInt8Type, Op>::Exec; 
-    case Type::INT16: 
-      return KernelGenerator<OutType, Int16Type, Op>::Exec; 
-    case Type::UINT16: 
-      return KernelGenerator<OutType, UInt16Type, Op>::Exec; 
-    case Type::INT32: 
-      return KernelGenerator<OutType, Int32Type, Op>::Exec; 
-    case Type::UINT32: 
-      return KernelGenerator<OutType, UInt32Type, Op>::Exec; 
-    case Type::INT64: 
-    case Type::TIMESTAMP: 
-      return KernelGenerator<OutType, Int64Type, Op>::Exec; 
-    case Type::UINT64: 
-      return KernelGenerator<OutType, UInt64Type, Op>::Exec; 
-    case Type::FLOAT: 
-      return KernelGenerator<FloatType, FloatType, Op>::Exec; 
-    case Type::DOUBLE: 
-      return KernelGenerator<DoubleType, DoubleType, Op>::Exec; 
-    default: 
-      DCHECK(false); 
-      return ExecFail; 
-  } 
-} 
- 
-struct ArithmeticFunction : ScalarFunction { 
-  using ScalarFunction::ScalarFunction; 
- 
-  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override { 
-    RETURN_NOT_OK(CheckArity(*values)); 
- 
-    RETURN_NOT_OK(CheckDecimals(values)); 
- 
-    using arrow::compute::detail::DispatchExactImpl; 
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel; 
- 
-    EnsureDictionaryDecoded(values); 
- 
-    // Only promote types for binary functions 
-    if (values->size() == 2) { 
-      ReplaceNullWithOtherType(values); 
- 
-      if (auto type = CommonNumeric(*values)) { 
-        ReplaceTypes(type, values); 
-      } 
-    } 
- 
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel; 
-    return arrow::compute::detail::NoMatchingKernel(this, *values); 
-  } 
- 
-  Status CheckDecimals(std::vector<ValueDescr>* values) const { 
-    bool has_decimal = false; 
-    for (const auto& value : *values) { 
-      if (is_decimal(value.type->id())) { 
-        has_decimal = true; 
-        break; 
-      } 
-    } 
-    if (!has_decimal) return Status::OK(); 
- 
-    if (values->size() == 2) { 
-      return CastBinaryDecimalArgs(name(), values); 
-    } 
-    return Status::OK(); 
-  } 
-}; 
- 
-/// An ArithmeticFunction that promotes integer arguments to double. 
-struct ArithmeticFloatingPointFunction : public ArithmeticFunction { 
-  using ArithmeticFunction::ArithmeticFunction; 
- 
-  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override { 
-    RETURN_NOT_OK(CheckArity(*values)); 
-    RETURN_NOT_OK(CheckDecimals(values)); 
- 
-    using arrow::compute::detail::DispatchExactImpl; 
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel; 
- 
-    EnsureDictionaryDecoded(values); 
- 
-    if (values->size() == 2) { 
-      ReplaceNullWithOtherType(values); 
-    } 
- 
-    for (auto& descr : *values) { 
-      if (is_integer(descr.type->id())) { 
-        descr.type = float64(); 
-      } 
-    } 
-    if (auto type = CommonNumeric(*values)) { 
-      ReplaceTypes(type, values); 
-    } 
- 
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel; 
-    return arrow::compute::detail::NoMatchingKernel(this, *values); 
-  } 
-}; 
- 
-template <typename Op> 
-std::shared_ptr<ScalarFunction> MakeArithmeticFunction(std::string name, 
-                                                       const FunctionDoc* doc) { 
-  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Binary(), doc); 
+std::shared_ptr<ScalarFunction> MakeArithmeticFunction(std::string name,
+                                                       const FunctionDoc* doc) {
+  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Binary(), doc);
   for (const auto& ty : NumericTypes()) {
-    auto exec = ArithmeticExecFromOp<ScalarBinaryEqualTypes, Op>(ty); 
+    auto exec = ArithmeticExecFromOp<ScalarBinaryEqualTypes, Op>(ty);
     DCHECK_OK(func->AddKernel({ty, ty}, ty, exec));
   }
   return func;
@@ -1192,630 +1192,630 @@ std::shared_ptr<ScalarFunction> MakeArithmeticFunction(std::string name,
 // Like MakeArithmeticFunction, but for arithmetic ops that need to run
 // only on non-null output.
 template <typename Op>
-std::shared_ptr<ScalarFunction> MakeArithmeticFunctionNotNull(std::string name, 
-                                                              const FunctionDoc* doc) { 
-  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Binary(), doc); 
+std::shared_ptr<ScalarFunction> MakeArithmeticFunctionNotNull(std::string name,
+                                                              const FunctionDoc* doc) {
+  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Binary(), doc);
+  for (const auto& ty : NumericTypes()) {
+    auto exec = ArithmeticExecFromOp<ScalarBinaryNotNullEqualTypes, Op>(ty);
+    DCHECK_OK(func->AddKernel({ty, ty}, ty, exec));
+  }
+  return func;
+}
+
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeUnaryArithmeticFunction(std::string name,
+                                                            const FunctionDoc* doc) {
+  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Unary(), doc);
+  for (const auto& ty : NumericTypes()) {
+    auto exec = ArithmeticExecFromOp<ScalarUnary, Op>(ty);
+    DCHECK_OK(func->AddKernel({ty}, ty, exec));
+  }
+  return func;
+}
+
+// Like MakeUnaryArithmeticFunction, but for unary arithmetic ops with a fixed
+// output type for integral inputs.
+template <typename Op, typename IntOutType>
+std::shared_ptr<ScalarFunction> MakeUnaryArithmeticFunctionWithFixedIntOutType(
+    std::string name, const FunctionDoc* doc) {
+  auto int_out_ty = TypeTraits<IntOutType>::type_singleton();
+  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Unary(), doc);
+  for (const auto& ty : NumericTypes()) {
+    auto out_ty = arrow::is_floating(ty->id()) ? ty : int_out_ty;
+    auto exec = GenerateArithmeticWithFixedIntOutType<ScalarUnary, IntOutType, Op>(ty);
+    DCHECK_OK(func->AddKernel({ty}, out_ty, exec));
+  }
+  return func;
+}
+
+// Like MakeUnaryArithmeticFunction, but for arithmetic ops that need to run
+// only on non-null output.
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeUnaryArithmeticFunctionNotNull(
+    std::string name, const FunctionDoc* doc) {
+  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Unary(), doc);
   for (const auto& ty : NumericTypes()) {
-    auto exec = ArithmeticExecFromOp<ScalarBinaryNotNullEqualTypes, Op>(ty); 
+    auto exec = ArithmeticExecFromOp<ScalarUnaryNotNull, Op>(ty);
+    DCHECK_OK(func->AddKernel({ty}, ty, exec));
+  }
+  return func;
+}
+
+// Like MakeUnaryArithmeticFunction, but for signed arithmetic ops that need to run
+// only on non-null output.
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeUnarySignedArithmeticFunctionNotNull(
+    std::string name, const FunctionDoc* doc) {
+  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Unary(), doc);
+  for (const auto& ty : NumericTypes()) {
+    if (!arrow::is_unsigned_integer(ty->id())) {
+      auto exec = ArithmeticExecFromOp<ScalarUnaryNotNull, Op>(ty);
+      DCHECK_OK(func->AddKernel({ty}, ty, exec));
+    }
+  }
+  return func;
+}
+
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeBitWiseFunctionNotNull(std::string name,
+                                                           const FunctionDoc* doc) {
+  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Binary(), doc);
+  for (const auto& ty : IntTypes()) {
+    auto exec = TypeAgnosticBitWiseExecFromOp<ScalarBinaryNotNullEqualTypes, Op>(ty);
     DCHECK_OK(func->AddKernel({ty, ty}, ty, exec));
   }
   return func;
 }
 
-template <typename Op> 
-std::shared_ptr<ScalarFunction> MakeUnaryArithmeticFunction(std::string name, 
-                                                            const FunctionDoc* doc) { 
-  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Unary(), doc); 
-  for (const auto& ty : NumericTypes()) { 
-    auto exec = ArithmeticExecFromOp<ScalarUnary, Op>(ty); 
-    DCHECK_OK(func->AddKernel({ty}, ty, exec)); 
-  } 
-  return func; 
-} 
- 
-// Like MakeUnaryArithmeticFunction, but for unary arithmetic ops with a fixed 
-// output type for integral inputs. 
-template <typename Op, typename IntOutType> 
-std::shared_ptr<ScalarFunction> MakeUnaryArithmeticFunctionWithFixedIntOutType( 
-    std::string name, const FunctionDoc* doc) { 
-  auto int_out_ty = TypeTraits<IntOutType>::type_singleton(); 
-  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Unary(), doc); 
-  for (const auto& ty : NumericTypes()) { 
-    auto out_ty = arrow::is_floating(ty->id()) ? ty : int_out_ty; 
-    auto exec = GenerateArithmeticWithFixedIntOutType<ScalarUnary, IntOutType, Op>(ty); 
-    DCHECK_OK(func->AddKernel({ty}, out_ty, exec)); 
-  } 
-  return func; 
-} 
- 
-// Like MakeUnaryArithmeticFunction, but for arithmetic ops that need to run 
-// only on non-null output. 
-template <typename Op> 
-std::shared_ptr<ScalarFunction> MakeUnaryArithmeticFunctionNotNull( 
-    std::string name, const FunctionDoc* doc) { 
-  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Unary(), doc); 
-  for (const auto& ty : NumericTypes()) { 
-    auto exec = ArithmeticExecFromOp<ScalarUnaryNotNull, Op>(ty); 
-    DCHECK_OK(func->AddKernel({ty}, ty, exec)); 
-  } 
-  return func; 
-} 
- 
-// Like MakeUnaryArithmeticFunction, but for signed arithmetic ops that need to run 
-// only on non-null output. 
-template <typename Op> 
-std::shared_ptr<ScalarFunction> MakeUnarySignedArithmeticFunctionNotNull( 
-    std::string name, const FunctionDoc* doc) { 
-  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Unary(), doc); 
-  for (const auto& ty : NumericTypes()) { 
-    if (!arrow::is_unsigned_integer(ty->id())) { 
-      auto exec = ArithmeticExecFromOp<ScalarUnaryNotNull, Op>(ty); 
-      DCHECK_OK(func->AddKernel({ty}, ty, exec)); 
-    } 
-  } 
-  return func; 
-} 
- 
-template <typename Op> 
-std::shared_ptr<ScalarFunction> MakeBitWiseFunctionNotNull(std::string name, 
-                                                           const FunctionDoc* doc) { 
-  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Binary(), doc); 
-  for (const auto& ty : IntTypes()) { 
-    auto exec = TypeAgnosticBitWiseExecFromOp<ScalarBinaryNotNullEqualTypes, Op>(ty); 
-    DCHECK_OK(func->AddKernel({ty, ty}, ty, exec)); 
-  } 
-  return func; 
-} 
- 
-template <typename Op> 
-std::shared_ptr<ScalarFunction> MakeShiftFunctionNotNull(std::string name, 
-                                                         const FunctionDoc* doc) { 
-  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Binary(), doc); 
-  for (const auto& ty : IntTypes()) { 
-    auto exec = ShiftExecFromOp<ScalarBinaryNotNullEqualTypes, Op>(ty); 
-    DCHECK_OK(func->AddKernel({ty, ty}, ty, exec)); 
-  } 
-  return func; 
-} 
- 
-template <typename Op> 
-std::shared_ptr<ScalarFunction> MakeUnaryArithmeticFunctionFloatingPoint( 
-    std::string name, const FunctionDoc* doc) { 
-  auto func = 
-      std::make_shared<ArithmeticFloatingPointFunction>(name, Arity::Unary(), doc); 
-  for (const auto& ty : FloatingPointTypes()) { 
-    auto output = is_integer(ty->id()) ? float64() : ty; 
-    auto exec = GenerateArithmeticFloatingPoint<ScalarUnary, Op>(ty); 
-    DCHECK_OK(func->AddKernel({ty}, output, exec)); 
-  } 
-  return func; 
-} 
- 
-template <typename Op> 
-std::shared_ptr<ScalarFunction> MakeUnaryArithmeticFunctionFloatingPointNotNull( 
-    std::string name, const FunctionDoc* doc) { 
-  auto func = 
-      std::make_shared<ArithmeticFloatingPointFunction>(name, Arity::Unary(), doc); 
-  for (const auto& ty : FloatingPointTypes()) { 
-    auto output = is_integer(ty->id()) ? float64() : ty; 
-    auto exec = GenerateArithmeticFloatingPoint<ScalarUnaryNotNull, Op>(ty); 
-    DCHECK_OK(func->AddKernel({ty}, output, exec)); 
-  } 
-  return func; 
-} 
- 
-template <typename Op> 
-std::shared_ptr<ScalarFunction> MakeArithmeticFunctionFloatingPoint( 
-    std::string name, const FunctionDoc* doc) { 
-  auto func = 
-      std::make_shared<ArithmeticFloatingPointFunction>(name, Arity::Binary(), doc); 
-  for (const auto& ty : FloatingPointTypes()) { 
-    auto output = is_integer(ty->id()) ? float64() : ty; 
-    auto exec = GenerateArithmeticFloatingPoint<ScalarBinaryEqualTypes, Op>(ty); 
-    DCHECK_OK(func->AddKernel({ty, ty}, output, exec)); 
-  } 
-  return func; 
-} 
- 
-const FunctionDoc absolute_value_doc{ 
-    "Calculate the absolute value of the argument element-wise", 
-    ("Results will wrap around on integer overflow.\n" 
-     "Use function \"abs_checked\" if you want overflow\n" 
-     "to return an error."), 
-    {"x"}}; 
- 
-const FunctionDoc absolute_value_checked_doc{ 
-    "Calculate the absolute value of the argument element-wise", 
-    ("This function returns an error on overflow.  For a variant that\n" 
-     "doesn't fail on overflow, use function \"abs\"."), 
-    {"x"}}; 
- 
-const FunctionDoc add_doc{"Add the arguments element-wise", 
-                          ("Results will wrap around on integer overflow.\n" 
-                           "Use function \"add_checked\" if you want overflow\n" 
-                           "to return an error."), 
-                          {"x", "y"}}; 
- 
-const FunctionDoc add_checked_doc{ 
-    "Add the arguments element-wise", 
-    ("This function returns an error on overflow.  For a variant that\n" 
-     "doesn't fail on overflow, use function \"add\"."), 
-    {"x", "y"}}; 
- 
-const FunctionDoc sub_doc{"Subtract the arguments element-wise", 
-                          ("Results will wrap around on integer overflow.\n" 
-                           "Use function \"subtract_checked\" if you want overflow\n" 
-                           "to return an error."), 
-                          {"x", "y"}}; 
- 
-const FunctionDoc sub_checked_doc{ 
-    "Subtract the arguments element-wise", 
-    ("This function returns an error on overflow.  For a variant that\n" 
-     "doesn't fail on overflow, use function \"subtract\"."), 
-    {"x", "y"}}; 
- 
-const FunctionDoc mul_doc{"Multiply the arguments element-wise", 
-                          ("Results will wrap around on integer overflow.\n" 
-                           "Use function \"multiply_checked\" if you want overflow\n" 
-                           "to return an error."), 
-                          {"x", "y"}}; 
- 
-const FunctionDoc mul_checked_doc{ 
-    "Multiply the arguments element-wise", 
-    ("This function returns an error on overflow.  For a variant that\n" 
-     "doesn't fail on overflow, use function \"multiply\"."), 
-    {"x", "y"}}; 
- 
-const FunctionDoc div_doc{ 
-    "Divide the arguments element-wise", 
-    ("Integer division by zero returns an error. However, integer overflow\n" 
-     "wraps around, and floating-point division by zero returns an infinite.\n" 
-     "Use function \"divide_checked\" if you want to get an error\n" 
-     "in all the aforementioned cases."), 
-    {"dividend", "divisor"}}; 
- 
-const FunctionDoc div_checked_doc{ 
-    "Divide the arguments element-wise", 
-    ("An error is returned when trying to divide by zero, or when\n" 
-     "integer overflow is encountered."), 
-    {"dividend", "divisor"}}; 
- 
-const FunctionDoc negate_doc{"Negate the argument element-wise", 
-                             ("Results will wrap around on integer overflow.\n" 
-                              "Use function \"negate_checked\" if you want overflow\n" 
-                              "to return an error."), 
-                             {"x"}}; 
- 
-const FunctionDoc negate_checked_doc{ 
-    "Negate the arguments element-wise", 
-    ("This function returns an error on overflow.  For a variant that\n" 
-     "doesn't fail on overflow, use function \"negate\"."), 
-    {"x"}}; 
- 
-const FunctionDoc pow_doc{ 
-    "Raise arguments to power element-wise", 
-    ("Integer to negative integer power returns an error. However, integer overflow\n" 
-     "wraps around. If either base or exponent is null the result will be null."), 
-    {"base", "exponent"}}; 
- 
-const FunctionDoc pow_checked_doc{ 
-    "Raise arguments to power element-wise", 
-    ("An error is returned when integer to negative integer power is encountered,\n" 
-     "or integer overflow is encountered."), 
-    {"base", "exponent"}}; 
- 
-const FunctionDoc sign_doc{ 
-    "Get the signedness of the arguments element-wise", 
-    ("Output is any of (-1,1) for nonzero inputs and 0 for zero input.\n" 
-     "NaN values return NaN.  Integral values return signedness as Int8 and\n" 
-     "floating-point values return it with the same type as the input values."), 
-    {"x"}}; 
- 
-const FunctionDoc bit_wise_not_doc{ 
-    "Bit-wise negate the arguments element-wise", "Null values return null.", {"x"}}; 
- 
-const FunctionDoc bit_wise_and_doc{ 
-    "Bit-wise AND the arguments element-wise", "Null values return null.", {"x", "y"}}; 
- 
-const FunctionDoc bit_wise_or_doc{ 
-    "Bit-wise OR the arguments element-wise", "Null values return null.", {"x", "y"}}; 
- 
-const FunctionDoc bit_wise_xor_doc{ 
-    "Bit-wise XOR the arguments element-wise", "Null values return null.", {"x", "y"}}; 
- 
-const FunctionDoc shift_left_doc{ 
-    "Left shift `x` by `y`", 
-    ("This function will return `x` if `y` (the amount to shift by) is: " 
-     "(1) negative or (2) greater than or equal to the precision of `x`.\n" 
-     "The shift operates as if on the two's complement representation of the number. " 
-     "In other words, this is equivalent to multiplying `x` by 2 to the power `y`, " 
-     "even if overflow occurs.\n" 
-     "Use function \"shift_left_checked\" if you want an invalid shift amount to " 
-     "return an error."), 
-    {"x", "y"}}; 
- 
-const FunctionDoc shift_left_checked_doc{ 
-    "Left shift `x` by `y` with invalid shift check", 
-    ("This function will raise an error if `y` (the amount to shift by) is: " 
-     "(1) negative or (2) greater than or equal to the precision of `x`. " 
-     "The shift operates as if on the two's complement representation of the number. " 
-     "In other words, this is equivalent to multiplying `x` by 2 to the power `y`, " 
-     "even if overflow occurs.\n" 
-     "See \"shift_left\" for a variant that doesn't fail for an invalid shift amount."), 
-    {"x", "y"}}; 
- 
-const FunctionDoc shift_right_doc{ 
-    "Right shift `x` by `y`", 
-    ("Perform a logical shift for unsigned `x` and an arithmetic shift for signed `x`.\n" 
-     "This function will return `x` if `y` (the amount to shift by) is: " 
-     "(1) negative or (2) greater than or equal to the precision of `x`.\n" 
-     "Use function \"shift_right_checked\" if you want an invalid shift amount to return " 
-     "an error."), 
-    {"x", "y"}}; 
- 
-const FunctionDoc shift_right_checked_doc{ 
-    "Right shift `x` by `y` with invalid shift check", 
-    ("Perform a logical shift for unsigned `x` and an arithmetic shift for signed `x`.\n" 
-     "This function will raise an error if `y` (the amount to shift by) is: " 
-     "(1) negative or (2) greater than or equal to the precision of `x`.\n" 
-     "See \"shift_right\" for a variant that doesn't fail for an invalid shift amount"), 
-    {"x", "y"}}; 
- 
-const FunctionDoc sin_doc{"Compute the sine of the elements argument-wise", 
-                          ("Integer arguments return double values. " 
-                           "This function returns NaN on values outside its domain. " 
-                           "To raise an error instead, see \"sin_checked\"."), 
-                          {"x"}}; 
- 
-const FunctionDoc sin_checked_doc{ 
-    "Compute the sine of the elements argument-wise", 
-    ("Integer arguments return double values. " 
-     "This function raises an error on values outside its domain. " 
-     "To return NaN instead, see \"sin\"."), 
-    {"x"}}; 
- 
-const FunctionDoc cos_doc{"Compute the cosine of the elements argument-wise", 
-                          ("Integer arguments return double values. " 
-                           "This function returns NaN on values outside its domain. " 
-                           "To raise an error instead, see \"cos_checked\"."), 
-                          {"x"}}; 
- 
-const FunctionDoc cos_checked_doc{ 
-    "Compute the cosine of the elements argument-wise", 
-    ("Integer arguments return double values. " 
-     "This function raises an error on values outside its domain. " 
-     "To return NaN instead, see \"cos\"."), 
-    {"x"}}; 
- 
-const FunctionDoc tan_doc{"Compute the tangent of the elements argument-wise", 
-                          ("Integer arguments return double values. " 
-                           "This function returns NaN on values outside its domain. " 
-                           "To raise an error instead, see \"tan_checked\"."), 
-                          {"x"}}; 
- 
-const FunctionDoc tan_checked_doc{ 
-    "Compute the tangent of the elements argument-wise", 
-    ("Integer arguments return double values. " 
-     "This function raises an error on values outside its domain. " 
-     "To return NaN instead, see \"tan\"."), 
-    {"x"}}; 
- 
-const FunctionDoc asin_doc{"Compute the inverse sine of the elements argument-wise", 
-                           ("Integer arguments return double values. " 
-                            "This function returns NaN on values outside its domain. " 
-                            "To raise an error instead, see \"asin_checked\"."), 
-                           {"x"}}; 
- 
-const FunctionDoc asin_checked_doc{ 
-    "Compute the inverse sine of the elements argument-wise", 
-    ("Integer arguments return double values. " 
-     "This function raises an error on values outside its domain. " 
-     "To return NaN instead, see \"asin\"."), 
-    {"x"}}; 
- 
-const FunctionDoc acos_doc{"Compute the inverse cosine of the elements argument-wise", 
-                           ("Integer arguments return double values. " 
-                            "This function returns NaN on values outside its domain. " 
-                            "To raise an error instead, see \"acos_checked\"."), 
-                           {"x"}}; 
- 
-const FunctionDoc acos_checked_doc{ 
-    "Compute the inverse cosine of the elements argument-wise", 
-    ("Integer arguments return double values. " 
-     "This function raises an error on values outside its domain. " 
-     "To return NaN instead, see \"acos\"."), 
-    {"x"}}; 
- 
-const FunctionDoc atan_doc{"Compute the principal value of the inverse tangent", 
-                           "Integer arguments return double values.", 
-                           {"x"}}; 
- 
-const FunctionDoc atan2_doc{ 
-    "Compute the inverse tangent using argument signs to determine the quadrant", 
-    "Integer arguments return double values.", 
-    {"y", "x"}}; 
- 
-const FunctionDoc ln_doc{ 
-    "Compute natural log of arguments element-wise", 
-    ("Non-positive values return -inf or NaN. Null values return null.\n" 
-     "Use function \"ln_checked\" if you want non-positive values to raise an error."), 
-    {"x"}}; 
- 
-const FunctionDoc ln_checked_doc{ 
-    "Compute natural log of arguments element-wise", 
-    ("Non-positive values return -inf or NaN. Null values return null.\n" 
-     "Use function \"ln\" if you want non-positive values to return " 
-     "-inf or NaN."), 
-    {"x"}}; 
- 
-const FunctionDoc log10_doc{ 
-    "Compute log base 10 of arguments element-wise", 
-    ("Non-positive values return -inf or NaN. Null values return null.\n" 
-     "Use function \"log10_checked\" if you want non-positive values to raise an error."), 
-    {"x"}}; 
- 
-const FunctionDoc log10_checked_doc{ 
-    "Compute log base 10 of arguments element-wise", 
-    ("Non-positive values return -inf or NaN. Null values return null.\n" 
-     "Use function \"log10\" if you want non-positive values to return " 
-     "-inf or NaN."), 
-    {"x"}}; 
- 
-const FunctionDoc log2_doc{ 
-    "Compute log base 2 of arguments element-wise", 
-    ("Non-positive values return -inf or NaN. Null values return null.\n" 
-     "Use function \"log2_checked\" if you want non-positive values to raise an error."), 
-    {"x"}}; 
- 
-const FunctionDoc log2_checked_doc{ 
-    "Compute log base 2 of arguments element-wise", 
-    ("Non-positive values return -inf or NaN. Null values return null.\n" 
-     "Use function \"log2\" if you want non-positive values to return " 
-     "-inf or NaN."), 
-    {"x"}}; 
- 
-const FunctionDoc log1p_doc{ 
-    "Compute natural log of (1+x) element-wise", 
-    ("Values <= -1 return -inf or NaN. Null values return null.\n" 
-     "This function may be more precise than log(1 + x) for x close to zero." 
-     "Use function \"log1p_checked\" if you want non-positive values to raise an error."), 
-    {"x"}}; 
- 
-const FunctionDoc log1p_checked_doc{ 
-    "Compute natural log of (1+x) element-wise", 
-    ("Values <= -1 return -inf or NaN. Null values return null.\n" 
-     "This function may be more precise than log(1 + x) for x close to zero." 
-     "Use function \"log1p\" if you want non-positive values to return " 
-     "-inf or NaN."), 
-    {"x"}}; 
- 
-const FunctionDoc floor_doc{ 
-    "Round down to the nearest integer", 
-    ("Calculate the nearest integer less than or equal in magnitude to the " 
-     "argument element-wise"), 
-    {"x"}}; 
- 
-const FunctionDoc ceil_doc{ 
-    "Round up to the nearest integer", 
-    ("Calculate the nearest integer greater than or equal in magnitude to the " 
-     "argument element-wise"), 
-    {"x"}}; 
- 
-const FunctionDoc trunc_doc{ 
-    "Get the integral part without fractional digits", 
-    ("Calculate the nearest integer not greater in magnitude than to the " 
-     "argument element-wise."), 
-    {"x"}}; 
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeShiftFunctionNotNull(std::string name,
+                                                         const FunctionDoc* doc) {
+  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Binary(), doc);
+  for (const auto& ty : IntTypes()) {
+    auto exec = ShiftExecFromOp<ScalarBinaryNotNullEqualTypes, Op>(ty);
+    DCHECK_OK(func->AddKernel({ty, ty}, ty, exec));
+  }
+  return func;
+}
+
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeUnaryArithmeticFunctionFloatingPoint(
+    std::string name, const FunctionDoc* doc) {
+  auto func =
+      std::make_shared<ArithmeticFloatingPointFunction>(name, Arity::Unary(), doc);
+  for (const auto& ty : FloatingPointTypes()) {
+    auto output = is_integer(ty->id()) ? float64() : ty;
+    auto exec = GenerateArithmeticFloatingPoint<ScalarUnary, Op>(ty);
+    DCHECK_OK(func->AddKernel({ty}, output, exec));
+  }
+  return func;
+}
+
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeUnaryArithmeticFunctionFloatingPointNotNull(
+    std::string name, const FunctionDoc* doc) {
+  auto func =
+      std::make_shared<ArithmeticFloatingPointFunction>(name, Arity::Unary(), doc);
+  for (const auto& ty : FloatingPointTypes()) {
+    auto output = is_integer(ty->id()) ? float64() : ty;
+    auto exec = GenerateArithmeticFloatingPoint<ScalarUnaryNotNull, Op>(ty);
+    DCHECK_OK(func->AddKernel({ty}, output, exec));
+  }
+  return func;
+}
+
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeArithmeticFunctionFloatingPoint(
+    std::string name, const FunctionDoc* doc) {
+  auto func =
+      std::make_shared<ArithmeticFloatingPointFunction>(name, Arity::Binary(), doc);
+  for (const auto& ty : FloatingPointTypes()) {
+    auto output = is_integer(ty->id()) ? float64() : ty;
+    auto exec = GenerateArithmeticFloatingPoint<ScalarBinaryEqualTypes, Op>(ty);
+    DCHECK_OK(func->AddKernel({ty, ty}, output, exec));
+  }
+  return func;
+}
+
+const FunctionDoc absolute_value_doc{
+    "Calculate the absolute value of the argument element-wise",
+    ("Results will wrap around on integer overflow.\n"
+     "Use function \"abs_checked\" if you want overflow\n"
+     "to return an error."),
+    {"x"}};
+
+const FunctionDoc absolute_value_checked_doc{
+    "Calculate the absolute value of the argument element-wise",
+    ("This function returns an error on overflow.  For a variant that\n"
+     "doesn't fail on overflow, use function \"abs\"."),
+    {"x"}};
+
+const FunctionDoc add_doc{"Add the arguments element-wise",
+                          ("Results will wrap around on integer overflow.\n"
+                           "Use function \"add_checked\" if you want overflow\n"
+                           "to return an error."),
+                          {"x", "y"}};
+
+const FunctionDoc add_checked_doc{
+    "Add the arguments element-wise",
+    ("This function returns an error on overflow.  For a variant that\n"
+     "doesn't fail on overflow, use function \"add\"."),
+    {"x", "y"}};
+
+const FunctionDoc sub_doc{"Subtract the arguments element-wise",
+                          ("Results will wrap around on integer overflow.\n"
+                           "Use function \"subtract_checked\" if you want overflow\n"
+                           "to return an error."),
+                          {"x", "y"}};
+
+const FunctionDoc sub_checked_doc{
+    "Subtract the arguments element-wise",
+    ("This function returns an error on overflow.  For a variant that\n"
+     "doesn't fail on overflow, use function \"subtract\"."),
+    {"x", "y"}};
+
+const FunctionDoc mul_doc{"Multiply the arguments element-wise",
+                          ("Results will wrap around on integer overflow.\n"
+                           "Use function \"multiply_checked\" if you want overflow\n"
+                           "to return an error."),
+                          {"x", "y"}};
+
+const FunctionDoc mul_checked_doc{
+    "Multiply the arguments element-wise",
+    ("This function returns an error on overflow.  For a variant that\n"
+     "doesn't fail on overflow, use function \"multiply\"."),
+    {"x", "y"}};
+
+const FunctionDoc div_doc{
+    "Divide the arguments element-wise",
+    ("Integer division by zero returns an error. However, integer overflow\n"
+     "wraps around, and floating-point division by zero returns an infinite.\n"
+     "Use function \"divide_checked\" if you want to get an error\n"
+     "in all the aforementioned cases."),
+    {"dividend", "divisor"}};
+
+const FunctionDoc div_checked_doc{
+    "Divide the arguments element-wise",
+    ("An error is returned when trying to divide by zero, or when\n"
+     "integer overflow is encountered."),
+    {"dividend", "divisor"}};
+
+const FunctionDoc negate_doc{"Negate the argument element-wise",
+                             ("Results will wrap around on integer overflow.\n"
+                              "Use function \"negate_checked\" if you want overflow\n"
+                              "to return an error."),
+                             {"x"}};
+
+const FunctionDoc negate_checked_doc{
+    "Negate the arguments element-wise",
+    ("This function returns an error on overflow.  For a variant that\n"
+     "doesn't fail on overflow, use function \"negate\"."),
+    {"x"}};
+
+const FunctionDoc pow_doc{
+    "Raise arguments to power element-wise",
+    ("Integer to negative integer power returns an error. However, integer overflow\n"
+     "wraps around. If either base or exponent is null the result will be null."),
+    {"base", "exponent"}};
+
+const FunctionDoc pow_checked_doc{
+    "Raise arguments to power element-wise",
+    ("An error is returned when integer to negative integer power is encountered,\n"
+     "or integer overflow is encountered."),
+    {"base", "exponent"}};
+
+const FunctionDoc sign_doc{
+    "Get the signedness of the arguments element-wise",
+    ("Output is any of (-1,1) for nonzero inputs and 0 for zero input.\n"
+     "NaN values return NaN.  Integral values return signedness as Int8 and\n"
+     "floating-point values return it with the same type as the input values."),
+    {"x"}};
+
+const FunctionDoc bit_wise_not_doc{
+    "Bit-wise negate the arguments element-wise", "Null values return null.", {"x"}};
+
+const FunctionDoc bit_wise_and_doc{
+    "Bit-wise AND the arguments element-wise", "Null values return null.", {"x", "y"}};
+
+const FunctionDoc bit_wise_or_doc{
+    "Bit-wise OR the arguments element-wise", "Null values return null.", {"x", "y"}};
+
+const FunctionDoc bit_wise_xor_doc{
+    "Bit-wise XOR the arguments element-wise", "Null values return null.", {"x", "y"}};
+
+const FunctionDoc shift_left_doc{
+    "Left shift `x` by `y`",
+    ("This function will return `x` if `y` (the amount to shift by) is: "
+     "(1) negative or (2) greater than or equal to the precision of `x`.\n"
+     "The shift operates as if on the two's complement representation of the number. "
+     "In other words, this is equivalent to multiplying `x` by 2 to the power `y`, "
+     "even if overflow occurs.\n"
+     "Use function \"shift_left_checked\" if you want an invalid shift amount to "
+     "return an error."),
+    {"x", "y"}};
+
+const FunctionDoc shift_left_checked_doc{
+    "Left shift `x` by `y` with invalid shift check",
+    ("This function will raise an error if `y` (the amount to shift by) is: "
+     "(1) negative or (2) greater than or equal to the precision of `x`. "
+     "The shift operates as if on the two's complement representation of the number. "
+     "In other words, this is equivalent to multiplying `x` by 2 to the power `y`, "
+     "even if overflow occurs.\n"
+     "See \"shift_left\" for a variant that doesn't fail for an invalid shift amount."),
+    {"x", "y"}};
+
+const FunctionDoc shift_right_doc{
+    "Right shift `x` by `y`",
+    ("Perform a logical shift for unsigned `x` and an arithmetic shift for signed `x`.\n"
+     "This function will return `x` if `y` (the amount to shift by) is: "
+     "(1) negative or (2) greater than or equal to the precision of `x`.\n"
+     "Use function \"shift_right_checked\" if you want an invalid shift amount to return "
+     "an error."),
+    {"x", "y"}};
+
+const FunctionDoc shift_right_checked_doc{
+    "Right shift `x` by `y` with invalid shift check",
+    ("Perform a logical shift for unsigned `x` and an arithmetic shift for signed `x`.\n"
+     "This function will raise an error if `y` (the amount to shift by) is: "
+     "(1) negative or (2) greater than or equal to the precision of `x`.\n"
+     "See \"shift_right\" for a variant that doesn't fail for an invalid shift amount"),
+    {"x", "y"}};
+
+const FunctionDoc sin_doc{"Compute the sine of the elements argument-wise",
+                          ("Integer arguments return double values. "
+                           "This function returns NaN on values outside its domain. "
+                           "To raise an error instead, see \"sin_checked\"."),
+                          {"x"}};
+
+const FunctionDoc sin_checked_doc{
+    "Compute the sine of the elements argument-wise",
+    ("Integer arguments return double values. "
+     "This function raises an error on values outside its domain. "
+     "To return NaN instead, see \"sin\"."),
+    {"x"}};
+
+const FunctionDoc cos_doc{"Compute the cosine of the elements argument-wise",
+                          ("Integer arguments return double values. "
+                           "This function returns NaN on values outside its domain. "
+                           "To raise an error instead, see \"cos_checked\"."),
+                          {"x"}};
+
+const FunctionDoc cos_checked_doc{
+    "Compute the cosine of the elements argument-wise",
+    ("Integer arguments return double values. "
+     "This function raises an error on values outside its domain. "
+     "To return NaN instead, see \"cos\"."),
+    {"x"}};
+
+const FunctionDoc tan_doc{"Compute the tangent of the elements argument-wise",
+                          ("Integer arguments return double values. "
+                           "This function returns NaN on values outside its domain. "
+                           "To raise an error instead, see \"tan_checked\"."),
+                          {"x"}};
+
+const FunctionDoc tan_checked_doc{
+    "Compute the tangent of the elements argument-wise",
+    ("Integer arguments return double values. "
+     "This function raises an error on values outside its domain. "
+     "To return NaN instead, see \"tan\"."),
+    {"x"}};
+
+const FunctionDoc asin_doc{"Compute the inverse sine of the elements argument-wise",
+                           ("Integer arguments return double values. "
+                            "This function returns NaN on values outside its domain. "
+                            "To raise an error instead, see \"asin_checked\"."),
+                           {"x"}};
+
+const FunctionDoc asin_checked_doc{
+    "Compute the inverse sine of the elements argument-wise",
+    ("Integer arguments return double values. "
+     "This function raises an error on values outside its domain. "
+     "To return NaN instead, see \"asin\"."),
+    {"x"}};
+
+const FunctionDoc acos_doc{"Compute the inverse cosine of the elements argument-wise",
+                           ("Integer arguments return double values. "
+                            "This function returns NaN on values outside its domain. "
+                            "To raise an error instead, see \"acos_checked\"."),
+                           {"x"}};
+
+const FunctionDoc acos_checked_doc{
+    "Compute the inverse cosine of the elements argument-wise",
+    ("Integer arguments return double values. "
+     "This function raises an error on values outside its domain. "
+     "To return NaN instead, see \"acos\"."),
+    {"x"}};
+
+const FunctionDoc atan_doc{"Compute the principal value of the inverse tangent",
+                           "Integer arguments return double values.",
+                           {"x"}};
+
+const FunctionDoc atan2_doc{
+    "Compute the inverse tangent using argument signs to determine the quadrant",
+    "Integer arguments return double values.",
+    {"y", "x"}};
+
+const FunctionDoc ln_doc{
+    "Compute natural log of arguments element-wise",
+    ("Non-positive values return -inf or NaN. Null values return null.\n"
+     "Use function \"ln_checked\" if you want non-positive values to raise an error."),
+    {"x"}};
+
+const FunctionDoc ln_checked_doc{
+    "Compute natural log of arguments element-wise",
+    ("Non-positive values return -inf or NaN. Null values return null.\n"
+     "Use function \"ln\" if you want non-positive values to return "
+     "-inf or NaN."),
+    {"x"}};
+
+const FunctionDoc log10_doc{
+    "Compute log base 10 of arguments element-wise",
+    ("Non-positive values return -inf or NaN. Null values return null.\n"
+     "Use function \"log10_checked\" if you want non-positive values to raise an error."),
+    {"x"}};
+
+const FunctionDoc log10_checked_doc{
+    "Compute log base 10 of arguments element-wise",
+    ("Non-positive values return -inf or NaN. Null values return null.\n"
+     "Use function \"log10\" if you want non-positive values to return "
+     "-inf or NaN."),
+    {"x"}};
+
+const FunctionDoc log2_doc{
+    "Compute log base 2 of arguments element-wise",
+    ("Non-positive values return -inf or NaN. Null values return null.\n"
+     "Use function \"log2_checked\" if you want non-positive values to raise an error."),
+    {"x"}};
+
+const FunctionDoc log2_checked_doc{
+    "Compute log base 2 of arguments element-wise",
+    ("Non-positive values return -inf or NaN. Null values return null.\n"
+     "Use function \"log2\" if you want non-positive values to return "
+     "-inf or NaN."),
+    {"x"}};
+
+const FunctionDoc log1p_doc{
+    "Compute natural log of (1+x) element-wise",
+    ("Values <= -1 return -inf or NaN. Null values return null.\n"
+     "This function may be more precise than log(1 + x) for x close to zero."
+     "Use function \"log1p_checked\" if you want non-positive values to raise an error."),
+    {"x"}};
+
+const FunctionDoc log1p_checked_doc{
+    "Compute natural log of (1+x) element-wise",
+    ("Values <= -1 return -inf or NaN. Null values return null.\n"
+     "This function may be more precise than log(1 + x) for x close to zero."
+     "Use function \"log1p\" if you want non-positive values to return "
+     "-inf or NaN."),
+    {"x"}};
+
+const FunctionDoc floor_doc{
+    "Round down to the nearest integer",
+    ("Calculate the nearest integer less than or equal in magnitude to the "
+     "argument element-wise"),
+    {"x"}};
+
+const FunctionDoc ceil_doc{
+    "Round up to the nearest integer",
+    ("Calculate the nearest integer greater than or equal in magnitude to the "
+     "argument element-wise"),
+    {"x"}};
+
+const FunctionDoc trunc_doc{
+    "Get the integral part without fractional digits",
+    ("Calculate the nearest integer not greater in magnitude than to the "
+     "argument element-wise."),
+    {"x"}};
 }  // namespace
 
 void RegisterScalarArithmetic(FunctionRegistry* registry) {
   // ----------------------------------------------------------------------
-  auto absolute_value = 
-      MakeUnaryArithmeticFunction<AbsoluteValue>("abs", &absolute_value_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(absolute_value))); 
- 
-  // ---------------------------------------------------------------------- 
-  auto absolute_value_checked = MakeUnaryArithmeticFunctionNotNull<AbsoluteValueChecked>( 
-      "abs_checked", &absolute_value_checked_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(absolute_value_checked))); 
- 
-  // ---------------------------------------------------------------------- 
-  auto add = MakeArithmeticFunction<Add>("add", &add_doc); 
-  AddDecimalBinaryKernels<Add>("add", &add); 
+  auto absolute_value =
+      MakeUnaryArithmeticFunction<AbsoluteValue>("abs", &absolute_value_doc);
+  DCHECK_OK(registry->AddFunction(std::move(absolute_value)));
+
+  // ----------------------------------------------------------------------
+  auto absolute_value_checked = MakeUnaryArithmeticFunctionNotNull<AbsoluteValueChecked>(
+      "abs_checked", &absolute_value_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(absolute_value_checked)));
+
+  // ----------------------------------------------------------------------
+  auto add = MakeArithmeticFunction<Add>("add", &add_doc);
+  AddDecimalBinaryKernels<Add>("add", &add);
   DCHECK_OK(registry->AddFunction(std::move(add)));
 
   // ----------------------------------------------------------------------
-  auto add_checked = 
-      MakeArithmeticFunctionNotNull<AddChecked>("add_checked", &add_checked_doc); 
-  AddDecimalBinaryKernels<AddChecked>("add_checked", &add_checked); 
+  auto add_checked =
+      MakeArithmeticFunctionNotNull<AddChecked>("add_checked", &add_checked_doc);
+  AddDecimalBinaryKernels<AddChecked>("add_checked", &add_checked);
   DCHECK_OK(registry->AddFunction(std::move(add_checked)));
 
   // ----------------------------------------------------------------------
-  auto subtract = MakeArithmeticFunction<Subtract>("subtract", &sub_doc); 
-  AddDecimalBinaryKernels<Subtract>("subtract", &subtract); 
+  auto subtract = MakeArithmeticFunction<Subtract>("subtract", &sub_doc);
+  AddDecimalBinaryKernels<Subtract>("subtract", &subtract);
 
   // Add subtract(timestamp, timestamp) -> duration
   for (auto unit : AllTimeUnits()) {
     InputType in_type(match::TimestampTypeUnit(unit));
-    auto exec = ArithmeticExecFromOp<ScalarBinaryEqualTypes, Subtract>(Type::TIMESTAMP); 
+    auto exec = ArithmeticExecFromOp<ScalarBinaryEqualTypes, Subtract>(Type::TIMESTAMP);
     DCHECK_OK(subtract->AddKernel({in_type, in_type}, duration(unit), std::move(exec)));
   }
 
   DCHECK_OK(registry->AddFunction(std::move(subtract)));
 
   // ----------------------------------------------------------------------
-  auto subtract_checked = MakeArithmeticFunctionNotNull<SubtractChecked>( 
-      "subtract_checked", &sub_checked_doc); 
-  AddDecimalBinaryKernels<SubtractChecked>("subtract_checked", &subtract_checked); 
+  auto subtract_checked = MakeArithmeticFunctionNotNull<SubtractChecked>(
+      "subtract_checked", &sub_checked_doc);
+  AddDecimalBinaryKernels<SubtractChecked>("subtract_checked", &subtract_checked);
   DCHECK_OK(registry->AddFunction(std::move(subtract_checked)));
 
   // ----------------------------------------------------------------------
-  auto multiply = MakeArithmeticFunction<Multiply>("multiply", &mul_doc); 
-  AddDecimalBinaryKernels<Multiply>("multiply", &multiply); 
+  auto multiply = MakeArithmeticFunction<Multiply>("multiply", &mul_doc);
+  AddDecimalBinaryKernels<Multiply>("multiply", &multiply);
   DCHECK_OK(registry->AddFunction(std::move(multiply)));
 
   // ----------------------------------------------------------------------
-  auto multiply_checked = MakeArithmeticFunctionNotNull<MultiplyChecked>( 
-      "multiply_checked", &mul_checked_doc); 
-  AddDecimalBinaryKernels<MultiplyChecked>("multiply_checked", &multiply_checked); 
+  auto multiply_checked = MakeArithmeticFunctionNotNull<MultiplyChecked>(
+      "multiply_checked", &mul_checked_doc);
+  AddDecimalBinaryKernels<MultiplyChecked>("multiply_checked", &multiply_checked);
   DCHECK_OK(registry->AddFunction(std::move(multiply_checked)));
 
   // ----------------------------------------------------------------------
-  auto divide = MakeArithmeticFunctionNotNull<Divide>("divide", &div_doc); 
-  AddDecimalBinaryKernels<Divide>("divide", &divide); 
+  auto divide = MakeArithmeticFunctionNotNull<Divide>("divide", &div_doc);
+  AddDecimalBinaryKernels<Divide>("divide", &divide);
   DCHECK_OK(registry->AddFunction(std::move(divide)));
 
   // ----------------------------------------------------------------------
-  auto divide_checked = 
-      MakeArithmeticFunctionNotNull<DivideChecked>("divide_checked", &div_checked_doc); 
-  AddDecimalBinaryKernels<DivideChecked>("divide_checked", &divide_checked); 
+  auto divide_checked =
+      MakeArithmeticFunctionNotNull<DivideChecked>("divide_checked", &div_checked_doc);
+  AddDecimalBinaryKernels<DivideChecked>("divide_checked", &divide_checked);
   DCHECK_OK(registry->AddFunction(std::move(divide_checked)));
- 
-  // ---------------------------------------------------------------------- 
-  auto negate = MakeUnaryArithmeticFunction<Negate>("negate", &negate_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(negate))); 
- 
-  // ---------------------------------------------------------------------- 
-  auto negate_checked = MakeUnarySignedArithmeticFunctionNotNull<NegateChecked>( 
-      "negate_checked", &negate_checked_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(negate_checked))); 
- 
-  // ---------------------------------------------------------------------- 
-  auto power = MakeArithmeticFunction<Power>("power", &pow_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(power))); 
- 
-  // ---------------------------------------------------------------------- 
-  auto power_checked = 
-      MakeArithmeticFunctionNotNull<PowerChecked>("power_checked", &pow_checked_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(power_checked))); 
- 
-  // ---------------------------------------------------------------------- 
-  auto sign = 
-      MakeUnaryArithmeticFunctionWithFixedIntOutType<Sign, Int8Type>("sign", &sign_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(sign))); 
- 
-  // ---------------------------------------------------------------------- 
-  // Bitwise functions 
-  { 
-    auto bit_wise_not = std::make_shared<ArithmeticFunction>( 
-        "bit_wise_not", Arity::Unary(), &bit_wise_not_doc); 
-    for (const auto& ty : IntTypes()) { 
-      auto exec = TypeAgnosticBitWiseExecFromOp<ScalarUnaryNotNull, BitWiseNot>(ty); 
-      DCHECK_OK(bit_wise_not->AddKernel({ty}, ty, exec)); 
-    } 
-    DCHECK_OK(registry->AddFunction(std::move(bit_wise_not))); 
-  } 
- 
-  auto bit_wise_and = 
-      MakeBitWiseFunctionNotNull<BitWiseAnd>("bit_wise_and", &bit_wise_and_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(bit_wise_and))); 
- 
-  auto bit_wise_or = 
-      MakeBitWiseFunctionNotNull<BitWiseOr>("bit_wise_or", &bit_wise_or_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(bit_wise_or))); 
- 
-  auto bit_wise_xor = 
-      MakeBitWiseFunctionNotNull<BitWiseXor>("bit_wise_xor", &bit_wise_xor_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(bit_wise_xor))); 
- 
-  auto shift_left = MakeShiftFunctionNotNull<ShiftLeft>("shift_left", &shift_left_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(shift_left))); 
- 
-  auto shift_left_checked = MakeShiftFunctionNotNull<ShiftLeftChecked>( 
-      "shift_left_checked", &shift_left_checked_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(shift_left_checked))); 
- 
-  auto shift_right = 
-      MakeShiftFunctionNotNull<ShiftRight>("shift_right", &shift_right_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(shift_right))); 
- 
-  auto shift_right_checked = MakeShiftFunctionNotNull<ShiftRightChecked>( 
-      "shift_right_checked", &shift_right_checked_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(shift_right_checked))); 
- 
-  // ---------------------------------------------------------------------- 
-  // Trig functions 
-  auto sin = MakeUnaryArithmeticFunctionFloatingPoint<Sin>("sin", &sin_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(sin))); 
- 
-  auto sin_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<SinChecked>( 
-      "sin_checked", &sin_checked_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(sin_checked))); 
- 
-  auto cos = MakeUnaryArithmeticFunctionFloatingPoint<Cos>("cos", &cos_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(cos))); 
- 
-  auto cos_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<CosChecked>( 
-      "cos_checked", &cos_checked_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(cos_checked))); 
- 
-  auto tan = MakeUnaryArithmeticFunctionFloatingPoint<Tan>("tan", &tan_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(tan))); 
- 
-  auto tan_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<TanChecked>( 
-      "tan_checked", &tan_checked_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(tan_checked))); 
- 
-  auto asin = MakeUnaryArithmeticFunctionFloatingPoint<Asin>("asin", &asin_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(asin))); 
- 
-  auto asin_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<AsinChecked>( 
-      "asin_checked", &asin_checked_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(asin_checked))); 
- 
-  auto acos = MakeUnaryArithmeticFunctionFloatingPoint<Acos>("acos", &acos_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(acos))); 
- 
-  auto acos_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<AcosChecked>( 
-      "acos_checked", &acos_checked_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(acos_checked))); 
- 
-  auto atan = MakeUnaryArithmeticFunctionFloatingPoint<Atan>("atan", &atan_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(atan))); 
- 
-  auto atan2 = MakeArithmeticFunctionFloatingPoint<Atan2>("atan2", &atan2_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(atan2))); 
- 
-  // ---------------------------------------------------------------------- 
-  // Logarithms 
-  auto ln = MakeUnaryArithmeticFunctionFloatingPoint<LogNatural>("ln", &ln_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(ln))); 
- 
-  auto ln_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<LogNaturalChecked>( 
-      "ln_checked", &ln_checked_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(ln_checked))); 
- 
-  auto log10 = MakeUnaryArithmeticFunctionFloatingPoint<Log10>("log10", &log10_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(log10))); 
- 
-  auto log10_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<Log10Checked>( 
-      "log10_checked", &log10_checked_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(log10_checked))); 
- 
-  auto log2 = MakeUnaryArithmeticFunctionFloatingPoint<Log2>("log2", &log2_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(log2))); 
- 
-  auto log2_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<Log2Checked>( 
-      "log2_checked", &log2_checked_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(log2_checked))); 
- 
-  auto log1p = MakeUnaryArithmeticFunctionFloatingPoint<Log1p>("log1p", &log1p_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(log1p))); 
- 
-  auto log1p_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<Log1pChecked>( 
-      "log1p_checked", &log1p_checked_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(log1p_checked))); 
- 
-  // ---------------------------------------------------------------------- 
-  // Rounding functions 
-  auto floor = MakeUnaryArithmeticFunctionFloatingPoint<Floor>("floor", &floor_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(floor))); 
- 
-  auto ceil = MakeUnaryArithmeticFunctionFloatingPoint<Ceil>("ceil", &ceil_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(ceil))); 
- 
-  auto trunc = MakeUnaryArithmeticFunctionFloatingPoint<Trunc>("trunc", &trunc_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(trunc))); 
+
+  // ----------------------------------------------------------------------
+  auto negate = MakeUnaryArithmeticFunction<Negate>("negate", &negate_doc);
+  DCHECK_OK(registry->AddFunction(std::move(negate)));
+
+  // ----------------------------------------------------------------------
+  auto negate_checked = MakeUnarySignedArithmeticFunctionNotNull<NegateChecked>(
+      "negate_checked", &negate_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(negate_checked)));
+
+  // ----------------------------------------------------------------------
+  auto power = MakeArithmeticFunction<Power>("power", &pow_doc);
+  DCHECK_OK(registry->AddFunction(std::move(power)));
+
+  // ----------------------------------------------------------------------
+  auto power_checked =
+      MakeArithmeticFunctionNotNull<PowerChecked>("power_checked", &pow_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(power_checked)));
+
+  // ----------------------------------------------------------------------
+  auto sign =
+      MakeUnaryArithmeticFunctionWithFixedIntOutType<Sign, Int8Type>("sign", &sign_doc);
+  DCHECK_OK(registry->AddFunction(std::move(sign)));
+
+  // ----------------------------------------------------------------------
+  // Bitwise functions
+  {
+    auto bit_wise_not = std::make_shared<ArithmeticFunction>(
+        "bit_wise_not", Arity::Unary(), &bit_wise_not_doc);
+    for (const auto& ty : IntTypes()) {
+      auto exec = TypeAgnosticBitWiseExecFromOp<ScalarUnaryNotNull, BitWiseNot>(ty);
+      DCHECK_OK(bit_wise_not->AddKernel({ty}, ty, exec));
+    }
+    DCHECK_OK(registry->AddFunction(std::move(bit_wise_not)));
+  }
+
+  auto bit_wise_and =
+      MakeBitWiseFunctionNotNull<BitWiseAnd>("bit_wise_and", &bit_wise_and_doc);
+  DCHECK_OK(registry->AddFunction(std::move(bit_wise_and)));
+
+  auto bit_wise_or =
+      MakeBitWiseFunctionNotNull<BitWiseOr>("bit_wise_or", &bit_wise_or_doc);
+  DCHECK_OK(registry->AddFunction(std::move(bit_wise_or)));
+
+  auto bit_wise_xor =
+      MakeBitWiseFunctionNotNull<BitWiseXor>("bit_wise_xor", &bit_wise_xor_doc);
+  DCHECK_OK(registry->AddFunction(std::move(bit_wise_xor)));
+
+  auto shift_left = MakeShiftFunctionNotNull<ShiftLeft>("shift_left", &shift_left_doc);
+  DCHECK_OK(registry->AddFunction(std::move(shift_left)));
+
+  auto shift_left_checked = MakeShiftFunctionNotNull<ShiftLeftChecked>(
+      "shift_left_checked", &shift_left_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(shift_left_checked)));
+
+  auto shift_right =
+      MakeShiftFunctionNotNull<ShiftRight>("shift_right", &shift_right_doc);
+  DCHECK_OK(registry->AddFunction(std::move(shift_right)));
+
+  auto shift_right_checked = MakeShiftFunctionNotNull<ShiftRightChecked>(
+      "shift_right_checked", &shift_right_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(shift_right_checked)));
+
+  // ----------------------------------------------------------------------
+  // Trig functions
+  auto sin = MakeUnaryArithmeticFunctionFloatingPoint<Sin>("sin", &sin_doc);
+  DCHECK_OK(registry->AddFunction(std::move(sin)));
+
+  auto sin_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<SinChecked>(
+      "sin_checked", &sin_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(sin_checked)));
+
+  auto cos = MakeUnaryArithmeticFunctionFloatingPoint<Cos>("cos", &cos_doc);
+  DCHECK_OK(registry->AddFunction(std::move(cos)));
+
+  auto cos_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<CosChecked>(
+      "cos_checked", &cos_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(cos_checked)));
+
+  auto tan = MakeUnaryArithmeticFunctionFloatingPoint<Tan>("tan", &tan_doc);
+  DCHECK_OK(registry->AddFunction(std::move(tan)));
+
+  auto tan_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<TanChecked>(
+      "tan_checked", &tan_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(tan_checked)));
+
+  auto asin = MakeUnaryArithmeticFunctionFloatingPoint<Asin>("asin", &asin_doc);
+  DCHECK_OK(registry->AddFunction(std::move(asin)));
+
+  auto asin_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<AsinChecked>(
+      "asin_checked", &asin_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(asin_checked)));
+
+  auto acos = MakeUnaryArithmeticFunctionFloatingPoint<Acos>("acos", &acos_doc);
+  DCHECK_OK(registry->AddFunction(std::move(acos)));
+
+  auto acos_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<AcosChecked>(
+      "acos_checked", &acos_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(acos_checked)));
+
+  auto atan = MakeUnaryArithmeticFunctionFloatingPoint<Atan>("atan", &atan_doc);
+  DCHECK_OK(registry->AddFunction(std::move(atan)));
+
+  auto atan2 = MakeArithmeticFunctionFloatingPoint<Atan2>("atan2", &atan2_doc);
+  DCHECK_OK(registry->AddFunction(std::move(atan2)));
+
+  // ----------------------------------------------------------------------
+  // Logarithms
+  auto ln = MakeUnaryArithmeticFunctionFloatingPoint<LogNatural>("ln", &ln_doc);
+  DCHECK_OK(registry->AddFunction(std::move(ln)));
+
+  auto ln_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<LogNaturalChecked>(
+      "ln_checked", &ln_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(ln_checked)));
+
+  auto log10 = MakeUnaryArithmeticFunctionFloatingPoint<Log10>("log10", &log10_doc);
+  DCHECK_OK(registry->AddFunction(std::move(log10)));
+
+  auto log10_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<Log10Checked>(
+      "log10_checked", &log10_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(log10_checked)));
+
+  auto log2 = MakeUnaryArithmeticFunctionFloatingPoint<Log2>("log2", &log2_doc);
+  DCHECK_OK(registry->AddFunction(std::move(log2)));
+
+  auto log2_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<Log2Checked>(
+      "log2_checked", &log2_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(log2_checked)));
+
+  auto log1p = MakeUnaryArithmeticFunctionFloatingPoint<Log1p>("log1p", &log1p_doc);
+  DCHECK_OK(registry->AddFunction(std::move(log1p)));
+
+  auto log1p_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<Log1pChecked>(
+      "log1p_checked", &log1p_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(log1p_checked)));
+
+  // ----------------------------------------------------------------------
+  // Rounding functions
+  auto floor = MakeUnaryArithmeticFunctionFloatingPoint<Floor>("floor", &floor_doc);
+  DCHECK_OK(registry->AddFunction(std::move(floor)));
+
+  auto ceil = MakeUnaryArithmeticFunctionFloatingPoint<Ceil>("ceil", &ceil_doc);
+  DCHECK_OK(registry->AddFunction(std::move(ceil)));
+
+  auto trunc = MakeUnaryArithmeticFunctionFloatingPoint<Trunc>("trunc", &trunc_doc);
+  DCHECK_OK(registry->AddFunction(std::move(trunc)));
 }
 
 }  // namespace internal
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_boolean.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_boolean.cc
index 3a99c87dd99..7a0e3654edb 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_boolean.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_boolean.cc
@@ -33,180 +33,180 @@ namespace {
 template <typename ComputeWord>
 void ComputeKleene(ComputeWord&& compute_word, KernelContext* ctx, const ArrayData& left,
                    const ArrayData& right, ArrayData* out) {
-  DCHECK(left.null_count != 0 || right.null_count != 0) 
-      << "ComputeKleene is unnecessarily expensive for the non-null case"; 
+  DCHECK(left.null_count != 0 || right.null_count != 0)
+      << "ComputeKleene is unnecessarily expensive for the non-null case";
 
-  Bitmap left_valid_bm{left.buffers[0], left.offset, left.length}; 
-  Bitmap left_data_bm{left.buffers[1], left.offset, left.length}; 
+  Bitmap left_valid_bm{left.buffers[0], left.offset, left.length};
+  Bitmap left_data_bm{left.buffers[1], left.offset, left.length};
 
-  Bitmap right_valid_bm{right.buffers[0], right.offset, right.length}; 
-  Bitmap right_data_bm{right.buffers[1], right.offset, right.length}; 
+  Bitmap right_valid_bm{right.buffers[0], right.offset, right.length};
+  Bitmap right_data_bm{right.buffers[1], right.offset, right.length};
 
-  std::array<Bitmap, 2> out_bms{Bitmap(out->buffers[0], out->offset, out->length), 
-                                Bitmap(out->buffers[1], out->offset, out->length)}; 
+  std::array<Bitmap, 2> out_bms{Bitmap(out->buffers[0], out->offset, out->length),
+                                Bitmap(out->buffers[1], out->offset, out->length)};
 
   auto apply = [&](uint64_t left_valid, uint64_t left_data, uint64_t right_valid,
-                   uint64_t right_data, uint64_t* out_validity, uint64_t* out_data) { 
+                   uint64_t right_data, uint64_t* out_validity, uint64_t* out_data) {
     auto left_true = left_valid & left_data;
     auto left_false = left_valid & ~left_data;
 
     auto right_true = right_valid & right_data;
     auto right_false = right_valid & ~right_data;
 
-    compute_word(left_true, left_false, right_true, right_false, out_validity, out_data); 
+    compute_word(left_true, left_false, right_true, right_false, out_validity, out_data);
   };
 
-  if (right.null_count == 0) { 
-    std::array<Bitmap, 3> in_bms{left_valid_bm, left_data_bm, right_data_bm}; 
-    Bitmap::VisitWordsAndWrite( 
-        in_bms, &out_bms, 
-        [&](const std::array<uint64_t, 3>& in, std::array<uint64_t, 2>* out) { 
-          apply(in[0], in[1], ~uint64_t(0), in[2], &(out->at(0)), &(out->at(1))); 
-        }); 
-    return; 
-  } 
-
-  if (left.null_count == 0) { 
-    std::array<Bitmap, 3> in_bms{left_data_bm, right_valid_bm, right_data_bm}; 
-    Bitmap::VisitWordsAndWrite( 
-        in_bms, &out_bms, 
-        [&](const std::array<uint64_t, 3>& in, std::array<uint64_t, 2>* out) { 
-          apply(~uint64_t(0), in[0], in[1], in[2], &(out->at(0)), &(out->at(1))); 
-        }); 
-    return; 
+  if (right.null_count == 0) {
+    std::array<Bitmap, 3> in_bms{left_valid_bm, left_data_bm, right_data_bm};
+    Bitmap::VisitWordsAndWrite(
+        in_bms, &out_bms,
+        [&](const std::array<uint64_t, 3>& in, std::array<uint64_t, 2>* out) {
+          apply(in[0], in[1], ~uint64_t(0), in[2], &(out->at(0)), &(out->at(1)));
+        });
+    return;
   }
- 
-  DCHECK(left.null_count != 0 && right.null_count != 0); 
-  std::array<Bitmap, 4> in_bms{left_valid_bm, left_data_bm, right_valid_bm, 
-                               right_data_bm}; 
-  Bitmap::VisitWordsAndWrite( 
-      in_bms, &out_bms, 
-      [&](const std::array<uint64_t, 4>& in, std::array<uint64_t, 2>* out) { 
-        apply(in[0], in[1], in[2], in[3], &(out->at(0)), &(out->at(1))); 
-      }); 
+
+  if (left.null_count == 0) {
+    std::array<Bitmap, 3> in_bms{left_data_bm, right_valid_bm, right_data_bm};
+    Bitmap::VisitWordsAndWrite(
+        in_bms, &out_bms,
+        [&](const std::array<uint64_t, 3>& in, std::array<uint64_t, 2>* out) {
+          apply(~uint64_t(0), in[0], in[1], in[2], &(out->at(0)), &(out->at(1)));
+        });
+    return;
+  }
+
+  DCHECK(left.null_count != 0 && right.null_count != 0);
+  std::array<Bitmap, 4> in_bms{left_valid_bm, left_data_bm, right_valid_bm,
+                               right_data_bm};
+  Bitmap::VisitWordsAndWrite(
+      in_bms, &out_bms,
+      [&](const std::array<uint64_t, 4>& in, std::array<uint64_t, 2>* out) {
+        apply(in[0], in[1], in[2], in[3], &(out->at(0)), &(out->at(1)));
+      });
+}
+
+inline BooleanScalar InvertScalar(const Scalar& in) {
+  return in.is_valid ? BooleanScalar(!checked_cast<const BooleanScalar&>(in).value)
+                     : BooleanScalar();
+}
+
+inline Bitmap GetBitmap(const ArrayData& arr, int index) {
+  return Bitmap{arr.buffers[index], arr.offset, arr.length};
 }
 
-inline BooleanScalar InvertScalar(const Scalar& in) { 
-  return in.is_valid ? BooleanScalar(!checked_cast<const BooleanScalar&>(in).value) 
-                     : BooleanScalar(); 
-} 
- 
-inline Bitmap GetBitmap(const ArrayData& arr, int index) { 
-  return Bitmap{arr.buffers[index], arr.offset, arr.length}; 
-} 
- 
-struct InvertOp { 
-  static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { 
-    *checked_cast<BooleanScalar*>(out) = InvertScalar(in); 
-    return Status::OK(); 
-  } 
- 
-  static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { 
-    GetBitmap(*out, 1).CopyFromInverted(GetBitmap(in, 1)); 
-    return Status::OK(); 
-  } 
-}; 
- 
-template <typename Op> 
-struct Commutative { 
-  static Status Call(KernelContext* ctx, const Scalar& left, const ArrayData& right, 
-                     ArrayData* out) { 
-    return Op::Call(ctx, right, left, out); 
-  } 
-}; 
- 
-struct AndOp : Commutative<AndOp> { 
-  using Commutative<AndOp>::Call; 
- 
-  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right, 
-                     Scalar* out) { 
-    if (left.is_valid && right.is_valid) { 
+struct InvertOp {
+  static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
+    *checked_cast<BooleanScalar*>(out) = InvertScalar(in);
+    return Status::OK();
+  }
+
+  static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) {
+    GetBitmap(*out, 1).CopyFromInverted(GetBitmap(in, 1));
+    return Status::OK();
+  }
+};
+
+template <typename Op>
+struct Commutative {
+  static Status Call(KernelContext* ctx, const Scalar& left, const ArrayData& right,
+                     ArrayData* out) {
+    return Op::Call(ctx, right, left, out);
+  }
+};
+
+struct AndOp : Commutative<AndOp> {
+  using Commutative<AndOp>::Call;
+
+  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+                     Scalar* out) {
+    if (left.is_valid && right.is_valid) {
       checked_cast<BooleanScalar*>(out)->value =
-          checked_cast<const BooleanScalar&>(left).value && 
-          checked_cast<const BooleanScalar&>(right).value; 
+          checked_cast<const BooleanScalar&>(left).value &&
+          checked_cast<const BooleanScalar&>(right).value;
     }
-    return Status::OK(); 
+    return Status::OK();
   }
 
-  static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right, 
-                     ArrayData* out) { 
-    if (right.is_valid) { 
-      checked_cast<const BooleanScalar&>(right).value 
-          ? GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1)) 
-          : GetBitmap(*out, 1).SetBitsTo(false); 
-    } 
-    return Status::OK(); 
+  static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
+                     ArrayData* out) {
+    if (right.is_valid) {
+      checked_cast<const BooleanScalar&>(right).value
+          ? GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1))
+          : GetBitmap(*out, 1).SetBitsTo(false);
+    }
+    return Status::OK();
   }
 
-  static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right, 
-                     ArrayData* out) { 
+  static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
+                     ArrayData* out) {
     ::arrow::internal::BitmapAnd(left.buffers[1]->data(), left.offset,
                                  right.buffers[1]->data(), right.offset, right.length,
                                  out->offset, out->buffers[1]->mutable_data());
-    return Status::OK(); 
+    return Status::OK();
   }
 };
 
-struct KleeneAndOp : Commutative<KleeneAndOp> { 
-  using Commutative<KleeneAndOp>::Call; 
- 
-  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right, 
-                     Scalar* out) { 
-    bool left_true = left.is_valid && checked_cast<const BooleanScalar&>(left).value; 
-    bool left_false = left.is_valid && !checked_cast<const BooleanScalar&>(left).value; 
- 
-    bool right_true = right.is_valid && checked_cast<const BooleanScalar&>(right).value; 
-    bool right_false = right.is_valid && !checked_cast<const BooleanScalar&>(right).value; 
- 
-    checked_cast<BooleanScalar*>(out)->value = left_true && right_true; 
-    out->is_valid = left_false || right_false || (left_true && right_true); 
-    return Status::OK(); 
-  } 
- 
-  static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right, 
-                     ArrayData* out) { 
-    bool right_true = right.is_valid && checked_cast<const BooleanScalar&>(right).value; 
-    bool right_false = right.is_valid && !checked_cast<const BooleanScalar&>(right).value; 
- 
-    if (right_false) { 
-      out->null_count = 0; 
-      out->buffers[0] = nullptr; 
-      GetBitmap(*out, 1).SetBitsTo(false);  // all false case 
-      return Status::OK(); 
-    } 
- 
-    if (right_true) { 
-      if (left.GetNullCount() == 0) { 
-        out->null_count = 0; 
-        out->buffers[0] = nullptr; 
-      } else { 
-        GetBitmap(*out, 0).CopyFrom(GetBitmap(left, 0)); 
-      } 
-      GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1)); 
-      return Status::OK(); 
-    } 
- 
-    // scalar was null: out[i] is valid iff left[i] was false 
-    if (left.GetNullCount() == 0) { 
-      ::arrow::internal::InvertBitmap(left.buffers[1]->data(), left.offset, left.length, 
-                                      out->buffers[0]->mutable_data(), out->offset); 
-    } else { 
-      ::arrow::internal::BitmapAndNot(left.buffers[0]->data(), left.offset, 
-                                      left.buffers[1]->data(), left.offset, left.length, 
-                                      out->offset, out->buffers[0]->mutable_data()); 
-    } 
-    ::arrow::internal::CopyBitmap(left.buffers[1]->data(), left.offset, left.length, 
-                                  out->buffers[1]->mutable_data(), out->offset); 
-    return Status::OK(); 
-  } 
- 
-  static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right, 
-                     ArrayData* out) { 
+struct KleeneAndOp : Commutative<KleeneAndOp> {
+  using Commutative<KleeneAndOp>::Call;
+
+  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+                     Scalar* out) {
+    bool left_true = left.is_valid && checked_cast<const BooleanScalar&>(left).value;
+    bool left_false = left.is_valid && !checked_cast<const BooleanScalar&>(left).value;
+
+    bool right_true = right.is_valid && checked_cast<const BooleanScalar&>(right).value;
+    bool right_false = right.is_valid && !checked_cast<const BooleanScalar&>(right).value;
+
+    checked_cast<BooleanScalar*>(out)->value = left_true && right_true;
+    out->is_valid = left_false || right_false || (left_true && right_true);
+    return Status::OK();
+  }
+
+  static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
+                     ArrayData* out) {
+    bool right_true = right.is_valid && checked_cast<const BooleanScalar&>(right).value;
+    bool right_false = right.is_valid && !checked_cast<const BooleanScalar&>(right).value;
+
+    if (right_false) {
+      out->null_count = 0;
+      out->buffers[0] = nullptr;
+      GetBitmap(*out, 1).SetBitsTo(false);  // all false case
+      return Status::OK();
+    }
+
+    if (right_true) {
+      if (left.GetNullCount() == 0) {
+        out->null_count = 0;
+        out->buffers[0] = nullptr;
+      } else {
+        GetBitmap(*out, 0).CopyFrom(GetBitmap(left, 0));
+      }
+      GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1));
+      return Status::OK();
+    }
+
+    // scalar was null: out[i] is valid iff left[i] was false
+    if (left.GetNullCount() == 0) {
+      ::arrow::internal::InvertBitmap(left.buffers[1]->data(), left.offset, left.length,
+                                      out->buffers[0]->mutable_data(), out->offset);
+    } else {
+      ::arrow::internal::BitmapAndNot(left.buffers[0]->data(), left.offset,
+                                      left.buffers[1]->data(), left.offset, left.length,
+                                      out->offset, out->buffers[0]->mutable_data());
+    }
+    ::arrow::internal::CopyBitmap(left.buffers[1]->data(), left.offset, left.length,
+                                  out->buffers[1]->mutable_data(), out->offset);
+    return Status::OK();
+  }
+
+  static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
+                     ArrayData* out) {
     if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
-      out->null_count = 0; 
-      // Kleene kernels have validity bitmap pre-allocated. Therefore, set it to 1 
-      BitUtil::SetBitmap(out->buffers[0]->mutable_data(), out->offset, out->length); 
-      return AndOp::Call(ctx, left, right, out); 
+      out->null_count = 0;
+      // Kleene kernels have validity bitmap pre-allocated. Therefore, set it to 1
+      BitUtil::SetBitmap(out->buffers[0]->mutable_data(), out->offset, out->length);
+      return AndOp::Call(ctx, left, right, out);
     }
     auto compute_word = [](uint64_t left_true, uint64_t left_false, uint64_t right_true,
                            uint64_t right_false, uint64_t* out_valid,
@@ -215,104 +215,104 @@ struct KleeneAndOp : Commutative<KleeneAndOp> {
       *out_valid = left_false | right_false | (left_true & right_true);
     };
     ComputeKleene(compute_word, ctx, left, right, out);
-    return Status::OK(); 
+    return Status::OK();
   }
 };
 
-struct OrOp : Commutative<OrOp> { 
-  using Commutative<OrOp>::Call; 
- 
-  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right, 
-                     Scalar* out) { 
-    if (left.is_valid && right.is_valid) { 
-      checked_cast<BooleanScalar*>(out)->value = 
-          checked_cast<const BooleanScalar&>(left).value || 
-          checked_cast<const BooleanScalar&>(right).value; 
-    } 
-    return Status::OK(); 
-  } 
- 
-  static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right, 
-                     ArrayData* out) { 
-    if (right.is_valid) { 
-      checked_cast<const BooleanScalar&>(right).value 
-          ? GetBitmap(*out, 1).SetBitsTo(true) 
-          : GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1)); 
-    } 
-    return Status::OK(); 
-  } 
- 
-  static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right, 
-                     ArrayData* out) { 
+struct OrOp : Commutative<OrOp> {
+  using Commutative<OrOp>::Call;
+
+  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+                     Scalar* out) {
+    if (left.is_valid && right.is_valid) {
+      checked_cast<BooleanScalar*>(out)->value =
+          checked_cast<const BooleanScalar&>(left).value ||
+          checked_cast<const BooleanScalar&>(right).value;
+    }
+    return Status::OK();
+  }
+
+  static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
+                     ArrayData* out) {
+    if (right.is_valid) {
+      checked_cast<const BooleanScalar&>(right).value
+          ? GetBitmap(*out, 1).SetBitsTo(true)
+          : GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1));
+    }
+    return Status::OK();
+  }
+
+  static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
+                     ArrayData* out) {
     ::arrow::internal::BitmapOr(left.buffers[1]->data(), left.offset,
                                 right.buffers[1]->data(), right.offset, right.length,
                                 out->offset, out->buffers[1]->mutable_data());
-    return Status::OK(); 
+    return Status::OK();
   }
 };
 
-struct KleeneOrOp : Commutative<KleeneOrOp> { 
-  using Commutative<KleeneOrOp>::Call; 
- 
-  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right, 
-                     Scalar* out) { 
-    bool left_true = left.is_valid && checked_cast<const BooleanScalar&>(left).value; 
-    bool left_false = left.is_valid && !checked_cast<const BooleanScalar&>(left).value; 
- 
-    bool right_true = right.is_valid && checked_cast<const BooleanScalar&>(right).value; 
-    bool right_false = right.is_valid && !checked_cast<const BooleanScalar&>(right).value; 
- 
-    checked_cast<BooleanScalar*>(out)->value = left_true || right_true; 
-    out->is_valid = left_true || right_true || (left_false && right_false); 
-    return Status::OK(); 
-  } 
- 
-  static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right, 
-                     ArrayData* out) { 
-    bool right_true = right.is_valid && checked_cast<const BooleanScalar&>(right).value; 
-    bool right_false = right.is_valid && !checked_cast<const BooleanScalar&>(right).value; 
- 
-    if (right_true) { 
-      out->null_count = 0; 
-      out->buffers[0] = nullptr; 
-      GetBitmap(*out, 1).SetBitsTo(true);  // all true case 
-      return Status::OK(); 
-    } 
- 
-    if (right_false) { 
-      if (left.GetNullCount() == 0) { 
-        out->null_count = 0; 
-        out->buffers[0] = nullptr; 
-      } else { 
-        GetBitmap(*out, 0).CopyFrom(GetBitmap(left, 0)); 
-      } 
-      GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1)); 
-      return Status::OK(); 
-    } 
- 
-    // scalar was null: out[i] is valid iff left[i] was true 
-    if (left.GetNullCount() == 0) { 
-      ::arrow::internal::CopyBitmap(left.buffers[1]->data(), left.offset, left.length, 
-                                    out->buffers[0]->mutable_data(), out->offset); 
-    } else { 
-      ::arrow::internal::BitmapAnd(left.buffers[0]->data(), left.offset, 
-                                   left.buffers[1]->data(), left.offset, left.length, 
-                                   out->offset, out->buffers[0]->mutable_data()); 
-    } 
-    ::arrow::internal::CopyBitmap(left.buffers[1]->data(), left.offset, left.length, 
-                                  out->buffers[1]->mutable_data(), out->offset); 
-    return Status::OK(); 
-  } 
- 
-  static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right, 
-                     ArrayData* out) { 
+struct KleeneOrOp : Commutative<KleeneOrOp> {
+  using Commutative<KleeneOrOp>::Call;
+
+  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+                     Scalar* out) {
+    bool left_true = left.is_valid && checked_cast<const BooleanScalar&>(left).value;
+    bool left_false = left.is_valid && !checked_cast<const BooleanScalar&>(left).value;
+
+    bool right_true = right.is_valid && checked_cast<const BooleanScalar&>(right).value;
+    bool right_false = right.is_valid && !checked_cast<const BooleanScalar&>(right).value;
+
+    checked_cast<BooleanScalar*>(out)->value = left_true || right_true;
+    out->is_valid = left_true || right_true || (left_false && right_false);
+    return Status::OK();
+  }
+
+  static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
+                     ArrayData* out) {
+    bool right_true = right.is_valid && checked_cast<const BooleanScalar&>(right).value;
+    bool right_false = right.is_valid && !checked_cast<const BooleanScalar&>(right).value;
+
+    if (right_true) {
+      out->null_count = 0;
+      out->buffers[0] = nullptr;
+      GetBitmap(*out, 1).SetBitsTo(true);  // all true case
+      return Status::OK();
+    }
+
+    if (right_false) {
+      if (left.GetNullCount() == 0) {
+        out->null_count = 0;
+        out->buffers[0] = nullptr;
+      } else {
+        GetBitmap(*out, 0).CopyFrom(GetBitmap(left, 0));
+      }
+      GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1));
+      return Status::OK();
+    }
+
+    // scalar was null: out[i] is valid iff left[i] was true
+    if (left.GetNullCount() == 0) {
+      ::arrow::internal::CopyBitmap(left.buffers[1]->data(), left.offset, left.length,
+                                    out->buffers[0]->mutable_data(), out->offset);
+    } else {
+      ::arrow::internal::BitmapAnd(left.buffers[0]->data(), left.offset,
+                                   left.buffers[1]->data(), left.offset, left.length,
+                                   out->offset, out->buffers[0]->mutable_data());
+    }
+    ::arrow::internal::CopyBitmap(left.buffers[1]->data(), left.offset, left.length,
+                                  out->buffers[1]->mutable_data(), out->offset);
+    return Status::OK();
+  }
+
+  static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
+                     ArrayData* out) {
     if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
-      out->null_count = 0; 
-      // Kleene kernels have validity bitmap pre-allocated. Therefore, set it to 1 
-      BitUtil::SetBitmap(out->buffers[0]->mutable_data(), out->offset, out->length); 
-      return OrOp::Call(ctx, left, right, out); 
+      out->null_count = 0;
+      // Kleene kernels have validity bitmap pre-allocated. Therefore, set it to 1
+      BitUtil::SetBitmap(out->buffers[0]->mutable_data(), out->offset, out->length);
+      return OrOp::Call(ctx, left, right, out);
     }
- 
+
     static auto compute_word = [](uint64_t left_true, uint64_t left_false,
                                   uint64_t right_true, uint64_t right_false,
                                   uint64_t* out_valid, uint64_t* out_data) {
@@ -320,149 +320,149 @@ struct KleeneOrOp : Commutative<KleeneOrOp> {
       *out_valid = left_true | right_true | (left_false & right_false);
     };
 
-    ComputeKleene(compute_word, ctx, left, right, out); 
-    return Status::OK(); 
+    ComputeKleene(compute_word, ctx, left, right, out);
+    return Status::OK();
   }
 };
 
-struct XorOp : Commutative<XorOp> { 
-  using Commutative<XorOp>::Call; 
- 
-  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right, 
-                     Scalar* out) { 
-    if (left.is_valid && right.is_valid) { 
-      checked_cast<BooleanScalar*>(out)->value = 
-          checked_cast<const BooleanScalar&>(left).value ^ 
-          checked_cast<const BooleanScalar&>(right).value; 
-    } 
-    return Status::OK(); 
-  } 
- 
-  static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right, 
-                     ArrayData* out) { 
-    if (right.is_valid) { 
-      checked_cast<const BooleanScalar&>(right).value 
-          ? GetBitmap(*out, 1).CopyFromInverted(GetBitmap(left, 1)) 
-          : GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1)); 
-    } 
-    return Status::OK(); 
-  } 
- 
-  static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right, 
-                     ArrayData* out) { 
+struct XorOp : Commutative<XorOp> {
+  using Commutative<XorOp>::Call;
+
+  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+                     Scalar* out) {
+    if (left.is_valid && right.is_valid) {
+      checked_cast<BooleanScalar*>(out)->value =
+          checked_cast<const BooleanScalar&>(left).value ^
+          checked_cast<const BooleanScalar&>(right).value;
+    }
+    return Status::OK();
+  }
+
+  static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
+                     ArrayData* out) {
+    if (right.is_valid) {
+      checked_cast<const BooleanScalar&>(right).value
+          ? GetBitmap(*out, 1).CopyFromInverted(GetBitmap(left, 1))
+          : GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1));
+    }
+    return Status::OK();
+  }
+
+  static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
+                     ArrayData* out) {
     ::arrow::internal::BitmapXor(left.buffers[1]->data(), left.offset,
                                  right.buffers[1]->data(), right.offset, right.length,
                                  out->offset, out->buffers[1]->mutable_data());
-    return Status::OK(); 
+    return Status::OK();
+  }
+};
+
+struct AndNotOp {
+  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+                     Scalar* out) {
+    return AndOp::Call(ctx, left, InvertScalar(right), out);
+  }
+
+  static Status Call(KernelContext* ctx, const Scalar& left, const ArrayData& right,
+                     ArrayData* out) {
+    if (left.is_valid) {
+      checked_cast<const BooleanScalar&>(left).value
+          ? GetBitmap(*out, 1).CopyFromInverted(GetBitmap(right, 1))
+          : GetBitmap(*out, 1).SetBitsTo(false);
+    }
+    return Status::OK();
+  }
+
+  static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
+                     ArrayData* out) {
+    return AndOp::Call(ctx, left, InvertScalar(right), out);
+  }
+
+  static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
+                     ArrayData* out) {
+    ::arrow::internal::BitmapAndNot(left.buffers[1]->data(), left.offset,
+                                    right.buffers[1]->data(), right.offset, right.length,
+                                    out->offset, out->buffers[1]->mutable_data());
+    return Status::OK();
+  }
+};
+
+struct KleeneAndNotOp {
+  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+                     Scalar* out) {
+    return KleeneAndOp::Call(ctx, left, InvertScalar(right), out);
+  }
+
+  static Status Call(KernelContext* ctx, const Scalar& left, const ArrayData& right,
+                     ArrayData* out) {
+    bool left_true = left.is_valid && checked_cast<const BooleanScalar&>(left).value;
+    bool left_false = left.is_valid && !checked_cast<const BooleanScalar&>(left).value;
+
+    if (left_false) {
+      out->null_count = 0;
+      out->buffers[0] = nullptr;
+      GetBitmap(*out, 1).SetBitsTo(false);  // all false case
+      return Status::OK();
+    }
+
+    if (left_true) {
+      if (right.GetNullCount() == 0) {
+        out->null_count = 0;
+        out->buffers[0] = nullptr;
+      } else {
+        GetBitmap(*out, 0).CopyFrom(GetBitmap(right, 0));
+      }
+      GetBitmap(*out, 1).CopyFromInverted(GetBitmap(right, 1));
+      return Status::OK();
+    }
+
+    // scalar was null: out[i] is valid iff right[i] was true
+    if (right.GetNullCount() == 0) {
+      ::arrow::internal::CopyBitmap(right.buffers[1]->data(), right.offset, right.length,
+                                    out->buffers[0]->mutable_data(), out->offset);
+    } else {
+      ::arrow::internal::BitmapAnd(right.buffers[0]->data(), right.offset,
+                                   right.buffers[1]->data(), right.offset, right.length,
+                                   out->offset, out->buffers[0]->mutable_data());
+    }
+    ::arrow::internal::InvertBitmap(right.buffers[1]->data(), right.offset, right.length,
+                                    out->buffers[1]->mutable_data(), out->offset);
+    return Status::OK();
+  }
+
+  static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
+                     ArrayData* out) {
+    return KleeneAndOp::Call(ctx, left, InvertScalar(right), out);
+  }
+
+  static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
+                     ArrayData* out) {
+    if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
+      out->null_count = 0;
+      // Kleene kernels have validity bitmap pre-allocated. Therefore, set it to 1
+      BitUtil::SetBitmap(out->buffers[0]->mutable_data(), out->offset, out->length);
+      return AndNotOp::Call(ctx, left, right, out);
+    }
+
+    static auto compute_word = [](uint64_t left_true, uint64_t left_false,
+                                  uint64_t right_true, uint64_t right_false,
+                                  uint64_t* out_valid, uint64_t* out_data) {
+      *out_data = left_true & right_false;
+      *out_valid = left_false | right_true | (left_true & right_false);
+    };
+
+    ComputeKleene(compute_word, ctx, left, right, out);
+    return Status::OK();
   }
 };
 
-struct AndNotOp { 
-  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right, 
-                     Scalar* out) { 
-    return AndOp::Call(ctx, left, InvertScalar(right), out); 
-  } 
- 
-  static Status Call(KernelContext* ctx, const Scalar& left, const ArrayData& right, 
-                     ArrayData* out) { 
-    if (left.is_valid) { 
-      checked_cast<const BooleanScalar&>(left).value 
-          ? GetBitmap(*out, 1).CopyFromInverted(GetBitmap(right, 1)) 
-          : GetBitmap(*out, 1).SetBitsTo(false); 
-    } 
-    return Status::OK(); 
-  } 
- 
-  static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right, 
-                     ArrayData* out) { 
-    return AndOp::Call(ctx, left, InvertScalar(right), out); 
-  } 
- 
-  static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right, 
-                     ArrayData* out) { 
-    ::arrow::internal::BitmapAndNot(left.buffers[1]->data(), left.offset, 
-                                    right.buffers[1]->data(), right.offset, right.length, 
-                                    out->offset, out->buffers[1]->mutable_data()); 
-    return Status::OK(); 
-  } 
-}; 
- 
-struct KleeneAndNotOp { 
-  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right, 
-                     Scalar* out) { 
-    return KleeneAndOp::Call(ctx, left, InvertScalar(right), out); 
-  } 
- 
-  static Status Call(KernelContext* ctx, const Scalar& left, const ArrayData& right, 
-                     ArrayData* out) { 
-    bool left_true = left.is_valid && checked_cast<const BooleanScalar&>(left).value; 
-    bool left_false = left.is_valid && !checked_cast<const BooleanScalar&>(left).value; 
- 
-    if (left_false) { 
-      out->null_count = 0; 
-      out->buffers[0] = nullptr; 
-      GetBitmap(*out, 1).SetBitsTo(false);  // all false case 
-      return Status::OK(); 
-    } 
- 
-    if (left_true) { 
-      if (right.GetNullCount() == 0) { 
-        out->null_count = 0; 
-        out->buffers[0] = nullptr; 
-      } else { 
-        GetBitmap(*out, 0).CopyFrom(GetBitmap(right, 0)); 
-      } 
-      GetBitmap(*out, 1).CopyFromInverted(GetBitmap(right, 1)); 
-      return Status::OK(); 
-    } 
- 
-    // scalar was null: out[i] is valid iff right[i] was true 
-    if (right.GetNullCount() == 0) { 
-      ::arrow::internal::CopyBitmap(right.buffers[1]->data(), right.offset, right.length, 
-                                    out->buffers[0]->mutable_data(), out->offset); 
-    } else { 
-      ::arrow::internal::BitmapAnd(right.buffers[0]->data(), right.offset, 
-                                   right.buffers[1]->data(), right.offset, right.length, 
-                                   out->offset, out->buffers[0]->mutable_data()); 
-    } 
-    ::arrow::internal::InvertBitmap(right.buffers[1]->data(), right.offset, right.length, 
-                                    out->buffers[1]->mutable_data(), out->offset); 
-    return Status::OK(); 
-  } 
- 
-  static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right, 
-                     ArrayData* out) { 
-    return KleeneAndOp::Call(ctx, left, InvertScalar(right), out); 
-  } 
- 
-  static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right, 
-                     ArrayData* out) { 
-    if (left.GetNullCount() == 0 && right.GetNullCount() == 0) { 
-      out->null_count = 0; 
-      // Kleene kernels have validity bitmap pre-allocated. Therefore, set it to 1 
-      BitUtil::SetBitmap(out->buffers[0]->mutable_data(), out->offset, out->length); 
-      return AndNotOp::Call(ctx, left, right, out); 
-    } 
- 
-    static auto compute_word = [](uint64_t left_true, uint64_t left_false, 
-                                  uint64_t right_true, uint64_t right_false, 
-                                  uint64_t* out_valid, uint64_t* out_data) { 
-      *out_data = left_true & right_false; 
-      *out_valid = left_false | right_true | (left_true & right_false); 
-    }; 
- 
-    ComputeKleene(compute_word, ctx, left, right, out); 
-    return Status::OK(); 
-  } 
-}; 
- 
-void MakeFunction(const std::string& name, int arity, ArrayKernelExec exec, 
-                  const FunctionDoc* doc, FunctionRegistry* registry, 
+void MakeFunction(const std::string& name, int arity, ArrayKernelExec exec,
+                  const FunctionDoc* doc, FunctionRegistry* registry,
                   NullHandling::type null_handling = NullHandling::INTERSECTION) {
-  auto func = std::make_shared<ScalarFunction>(name, Arity(arity), doc); 
+  auto func = std::make_shared<ScalarFunction>(name, Arity(arity), doc);
 
   // Scalar arguments not yet supported
-  std::vector<InputType> in_types(arity, InputType(boolean())); 
+  std::vector<InputType> in_types(arity, InputType(boolean()));
   ScalarKernel kernel(std::move(in_types), boolean(), exec);
   kernel.null_handling = null_handling;
 
@@ -470,92 +470,92 @@ void MakeFunction(const std::string& name, int arity, ArrayKernelExec exec,
   DCHECK_OK(registry->AddFunction(std::move(func)));
 }
 
-const FunctionDoc invert_doc{"Invert boolean values", "", {"values"}}; 
- 
-const FunctionDoc and_doc{ 
-    "Logical 'and' boolean values", 
-    ("When a null is encountered in either input, a null is output.\n" 
-     "For a different null behavior, see function \"and_kleene\"."), 
-    {"x", "y"}}; 
- 
-const FunctionDoc and_not_doc{ 
-    "Logical 'and not' boolean values", 
-    ("When a null is encountered in either input, a null is output.\n" 
-     "For a different null behavior, see function \"and_not_kleene\"."), 
-    {"x", "y"}}; 
- 
-const FunctionDoc or_doc{ 
-    "Logical 'or' boolean values", 
-    ("When a null is encountered in either input, a null is output.\n" 
-     "For a different null behavior, see function \"or_kleene\"."), 
-    {"x", "y"}}; 
- 
-const FunctionDoc xor_doc{ 
-    "Logical 'xor' boolean values", 
-    ("When a null is encountered in either input, a null is output."), 
-    {"x", "y"}}; 
- 
-const FunctionDoc and_kleene_doc{ 
-    "Logical 'and' boolean values (Kleene logic)", 
-    ("This function behaves as follows with nulls:\n\n" 
-     "- true and null = null\n" 
-     "- null and true = null\n" 
-     "- false and null = false\n" 
-     "- null and false = false\n" 
-     "- null and null = null\n" 
-     "\n" 
-     "In other words, in this context a null value really means \"unknown\",\n" 
-     "and an unknown value 'and' false is always false.\n" 
-     "For a different null behavior, see function \"and\"."), 
-    {"x", "y"}}; 
- 
-const FunctionDoc and_not_kleene_doc{ 
-    "Logical 'and not' boolean values (Kleene logic)", 
-    ("This function behaves as follows with nulls:\n\n" 
-     "- true and null = null\n" 
-     "- null and false = null\n" 
-     "- false and null = false\n" 
-     "- null and true = false\n" 
-     "- null and null = null\n" 
-     "\n" 
-     "In other words, in this context a null value really means \"unknown\",\n" 
-     "and an unknown value 'and not' true is always false, as is false\n" 
-     "'and not' an unknown value.\n" 
-     "For a different null behavior, see function \"and_not\"."), 
-    {"x", "y"}}; 
- 
-const FunctionDoc or_kleene_doc{ 
-    "Logical 'or' boolean values (Kleene logic)", 
-    ("This function behaves as follows with nulls:\n\n" 
-     "- true or null = true\n" 
-     "- null and true = true\n" 
-     "- false and null = null\n" 
-     "- null and false = null\n" 
-     "- null and null = null\n" 
-     "\n" 
-     "In other words, in this context a null value really means \"unknown\",\n" 
-     "and an unknown value 'or' true is always true.\n" 
-     "For a different null behavior, see function \"and\"."), 
-    {"x", "y"}}; 
- 
+const FunctionDoc invert_doc{"Invert boolean values", "", {"values"}};
+
+const FunctionDoc and_doc{
+    "Logical 'and' boolean values",
+    ("When a null is encountered in either input, a null is output.\n"
+     "For a different null behavior, see function \"and_kleene\"."),
+    {"x", "y"}};
+
+const FunctionDoc and_not_doc{
+    "Logical 'and not' boolean values",
+    ("When a null is encountered in either input, a null is output.\n"
+     "For a different null behavior, see function \"and_not_kleene\"."),
+    {"x", "y"}};
+
+const FunctionDoc or_doc{
+    "Logical 'or' boolean values",
+    ("When a null is encountered in either input, a null is output.\n"
+     "For a different null behavior, see function \"or_kleene\"."),
+    {"x", "y"}};
+
+const FunctionDoc xor_doc{
+    "Logical 'xor' boolean values",
+    ("When a null is encountered in either input, a null is output."),
+    {"x", "y"}};
+
+const FunctionDoc and_kleene_doc{
+    "Logical 'and' boolean values (Kleene logic)",
+    ("This function behaves as follows with nulls:\n\n"
+     "- true and null = null\n"
+     "- null and true = null\n"
+     "- false and null = false\n"
+     "- null and false = false\n"
+     "- null and null = null\n"
+     "\n"
+     "In other words, in this context a null value really means \"unknown\",\n"
+     "and an unknown value 'and' false is always false.\n"
+     "For a different null behavior, see function \"and\"."),
+    {"x", "y"}};
+
+const FunctionDoc and_not_kleene_doc{
+    "Logical 'and not' boolean values (Kleene logic)",
+    ("This function behaves as follows with nulls:\n\n"
+     "- true and null = null\n"
+     "- null and false = null\n"
+     "- false and null = false\n"
+     "- null and true = false\n"
+     "- null and null = null\n"
+     "\n"
+     "In other words, in this context a null value really means \"unknown\",\n"
+     "and an unknown value 'and not' true is always false, as is false\n"
+     "'and not' an unknown value.\n"
+     "For a different null behavior, see function \"and_not\"."),
+    {"x", "y"}};
+
+const FunctionDoc or_kleene_doc{
+    "Logical 'or' boolean values (Kleene logic)",
+    ("This function behaves as follows with nulls:\n\n"
+     "- true or null = true\n"
+     "- null and true = true\n"
+     "- false and null = null\n"
+     "- null and false = null\n"
+     "- null and null = null\n"
+     "\n"
+     "In other words, in this context a null value really means \"unknown\",\n"
+     "and an unknown value 'or' true is always true.\n"
+     "For a different null behavior, see function \"and\"."),
+    {"x", "y"}};
+
 }  // namespace
 
 namespace internal {
 
 void RegisterScalarBoolean(FunctionRegistry* registry) {
   // These functions can write into sliced output bitmaps
-  MakeFunction("invert", 1, applicator::SimpleUnary<InvertOp>, &invert_doc, registry); 
-  MakeFunction("and", 2, applicator::SimpleBinary<AndOp>, &and_doc, registry); 
-  MakeFunction("and_not", 2, applicator::SimpleBinary<AndNotOp>, &and_not_doc, registry); 
-  MakeFunction("or", 2, applicator::SimpleBinary<OrOp>, &or_doc, registry); 
-  MakeFunction("xor", 2, applicator::SimpleBinary<XorOp>, &xor_doc, registry); 
-
-  MakeFunction("and_kleene", 2, applicator::SimpleBinary<KleeneAndOp>, &and_kleene_doc, 
-               registry, NullHandling::COMPUTED_PREALLOCATE); 
-  MakeFunction("and_not_kleene", 2, applicator::SimpleBinary<KleeneAndNotOp>, 
-               &and_not_kleene_doc, registry, NullHandling::COMPUTED_PREALLOCATE); 
-  MakeFunction("or_kleene", 2, applicator::SimpleBinary<KleeneOrOp>, &or_kleene_doc, 
-               registry, NullHandling::COMPUTED_PREALLOCATE); 
+  MakeFunction("invert", 1, applicator::SimpleUnary<InvertOp>, &invert_doc, registry);
+  MakeFunction("and", 2, applicator::SimpleBinary<AndOp>, &and_doc, registry);
+  MakeFunction("and_not", 2, applicator::SimpleBinary<AndNotOp>, &and_not_doc, registry);
+  MakeFunction("or", 2, applicator::SimpleBinary<OrOp>, &or_doc, registry);
+  MakeFunction("xor", 2, applicator::SimpleBinary<XorOp>, &xor_doc, registry);
+
+  MakeFunction("and_kleene", 2, applicator::SimpleBinary<KleeneAndOp>, &and_kleene_doc,
+               registry, NullHandling::COMPUTED_PREALLOCATE);
+  MakeFunction("and_not_kleene", 2, applicator::SimpleBinary<KleeneAndNotOp>,
+               &and_not_kleene_doc, registry, NullHandling::COMPUTED_PREALLOCATE);
+  MakeFunction("or_kleene", 2, applicator::SimpleBinary<KleeneOrOp>, &or_kleene_doc,
+               registry, NullHandling::COMPUTED_PREALLOCATE);
 }
 
 }  // namespace internal
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_cast_boolean.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_cast_boolean.cc
index daee9cff79a..dad94c1ace7 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_cast_boolean.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_cast_boolean.cc
@@ -17,7 +17,7 @@
 
 // Cast types to boolean
 
-#include "arrow/array/builder_primitive.h" 
+#include "arrow/array/builder_primitive.h"
 #include "arrow/compute/kernels/common.h"
 #include "arrow/compute/kernels/scalar_cast_internal.h"
 #include "arrow/util/value_parsing.h"
@@ -31,17 +31,17 @@ namespace internal {
 
 struct IsNonZero {
   template <typename OutValue, typename Arg0Value>
-  static OutValue Call(KernelContext*, Arg0Value val, Status*) { 
+  static OutValue Call(KernelContext*, Arg0Value val, Status*) {
     return val != 0;
   }
 };
 
 struct ParseBooleanString {
   template <typename OutValue, typename Arg0Value>
-  static OutValue Call(KernelContext*, Arg0Value val, Status* st) { 
+  static OutValue Call(KernelContext*, Arg0Value val, Status* st) {
     bool result = false;
     if (ARROW_PREDICT_FALSE(!ParseValue<BooleanType>(val.data(), val.size(), &result))) {
-      *st = Status::Invalid("Failed to parse value: ", val); 
+      *st = Status::Invalid("Failed to parse value: ", val);
     }
     return result;
   }
@@ -50,7 +50,7 @@ struct ParseBooleanString {
 std::vector<std::shared_ptr<CastFunction>> GetBooleanCasts() {
   auto func = std::make_shared<CastFunction>("cast_boolean", Type::BOOL);
   AddCommonCasts(Type::BOOL, boolean(), func.get());
-  AddZeroCopyCast(Type::BOOL, boolean(), boolean(), func.get()); 
+  AddZeroCopyCast(Type::BOOL, boolean(), boolean(), func.get());
 
   for (const auto& ty : NumericTypes()) {
     ArrayKernelExec exec =
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc
index b8be4d78696..b1e1164fd34 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc
@@ -1,126 +1,126 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-// Implementation of casting to dictionary type 
- 
-#include <arrow/util/bitmap_ops.h> 
-#include <arrow/util/checked_cast.h> 
- 
-#include "arrow/array/builder_primitive.h" 
-#include "arrow/compute/cast_internal.h" 
-#include "arrow/compute/kernels/scalar_cast_internal.h" 
-#include "arrow/compute/kernels/util_internal.h" 
-#include "arrow/util/int_util.h" 
- 
-namespace arrow { 
-using internal::CopyBitmap; 
- 
-namespace compute { 
-namespace internal { 
- 
-Status CastDictionary(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-  const CastOptions& options = CastState::Get(ctx); 
-  auto out_type = std::static_pointer_cast<DictionaryType>(out->type()); 
- 
-  // if out type is same as in type, return input 
-  if (out_type->Equals(batch[0].type())) { 
-    *out = batch[0]; 
-    return Status::OK(); 
-  } 
- 
-  if (batch[0].is_scalar()) {  // if input is scalar 
-    auto in_scalar = checked_cast<const DictionaryScalar&>(*batch[0].scalar()); 
- 
-    // if invalid scalar, return null scalar 
-    if (!in_scalar.is_valid) { 
-      *out = MakeNullScalar(out_type); 
-      return Status::OK(); 
-    } 
- 
-    Datum casted_index, casted_dict; 
-    if (in_scalar.value.index->type->Equals(out_type->index_type())) { 
-      casted_index = in_scalar.value.index; 
-    } else { 
-      ARROW_ASSIGN_OR_RAISE(casted_index, 
-                            Cast(in_scalar.value.index, out_type->index_type(), options, 
-                                 ctx->exec_context())); 
-    } 
- 
-    if (in_scalar.value.dictionary->type()->Equals(out_type->value_type())) { 
-      casted_dict = in_scalar.value.dictionary; 
-    } else { 
-      ARROW_ASSIGN_OR_RAISE( 
-          casted_dict, Cast(in_scalar.value.dictionary, out_type->value_type(), options, 
-                            ctx->exec_context())); 
-    } 
- 
-    *out = std::static_pointer_cast<Scalar>( 
-        DictionaryScalar::Make(casted_index.scalar(), casted_dict.make_array())); 
- 
-    return Status::OK(); 
-  } 
- 
-  // if input is array 
-  const std::shared_ptr<ArrayData>& in_array = batch[0].array(); 
-  const auto& in_type = checked_cast<const DictionaryType&>(*in_array->type); 
- 
-  ArrayData* out_array = out->mutable_array(); 
- 
-  if (in_type.index_type()->Equals(out_type->index_type())) { 
-    out_array->buffers[0] = in_array->buffers[0]; 
-    out_array->buffers[1] = in_array->buffers[1]; 
-    out_array->null_count = in_array->GetNullCount(); 
-    out_array->offset = in_array->offset; 
-  } else { 
-    // for indices, create a dummy ArrayData with index_type() 
-    const std::shared_ptr<ArrayData>& indices_arr = 
-        ArrayData::Make(in_type.index_type(), in_array->length, in_array->buffers, 
-                        in_array->GetNullCount(), in_array->offset); 
-    ARROW_ASSIGN_OR_RAISE(auto casted_indices, Cast(indices_arr, out_type->index_type(), 
-                                                    options, ctx->exec_context())); 
-    out_array->buffers[0] = std::move(casted_indices.array()->buffers[0]); 
-    out_array->buffers[1] = std::move(casted_indices.array()->buffers[1]); 
-  } 
- 
-  // data (dict) 
-  if (in_type.value_type()->Equals(out_type->value_type())) { 
-    out_array->dictionary = in_array->dictionary; 
-  } else { 
-    const std::shared_ptr<Array>& dict_arr = MakeArray(in_array->dictionary); 
-    ARROW_ASSIGN_OR_RAISE(auto casted_data, Cast(dict_arr, out_type->value_type(), 
-                                                 options, ctx->exec_context())); 
-    out_array->dictionary = casted_data.array(); 
-  } 
-  return Status::OK(); 
-} 
- 
-std::vector<std::shared_ptr<CastFunction>> GetDictionaryCasts() { 
-  auto func = std::make_shared<CastFunction>("cast_dictionary", Type::DICTIONARY); 
- 
-  AddCommonCasts(Type::DICTIONARY, kOutputTargetType, func.get()); 
-  ScalarKernel kernel({InputType(Type::DICTIONARY)}, kOutputTargetType, CastDictionary); 
-  kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE; 
-  kernel.mem_allocation = MemAllocation::NO_PREALLOCATE; 
- 
-  DCHECK_OK(func->AddKernel(Type::DICTIONARY, std::move(kernel))); 
- 
-  return {func}; 
-} 
- 
-}  // namespace internal 
-}  // namespace compute 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Implementation of casting to dictionary type
+
+#include <arrow/util/bitmap_ops.h>
+#include <arrow/util/checked_cast.h>
+
+#include "arrow/array/builder_primitive.h"
+#include "arrow/compute/cast_internal.h"
+#include "arrow/compute/kernels/scalar_cast_internal.h"
+#include "arrow/compute/kernels/util_internal.h"
+#include "arrow/util/int_util.h"
+
+namespace arrow {
+using internal::CopyBitmap;
+
+namespace compute {
+namespace internal {
+
+Status CastDictionary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  const CastOptions& options = CastState::Get(ctx);
+  auto out_type = std::static_pointer_cast<DictionaryType>(out->type());
+
+  // if out type is same as in type, return input
+  if (out_type->Equals(batch[0].type())) {
+    *out = batch[0];
+    return Status::OK();
+  }
+
+  if (batch[0].is_scalar()) {  // if input is scalar
+    auto in_scalar = checked_cast<const DictionaryScalar&>(*batch[0].scalar());
+
+    // if invalid scalar, return null scalar
+    if (!in_scalar.is_valid) {
+      *out = MakeNullScalar(out_type);
+      return Status::OK();
+    }
+
+    Datum casted_index, casted_dict;
+    if (in_scalar.value.index->type->Equals(out_type->index_type())) {
+      casted_index = in_scalar.value.index;
+    } else {
+      ARROW_ASSIGN_OR_RAISE(casted_index,
+                            Cast(in_scalar.value.index, out_type->index_type(), options,
+                                 ctx->exec_context()));
+    }
+
+    if (in_scalar.value.dictionary->type()->Equals(out_type->value_type())) {
+      casted_dict = in_scalar.value.dictionary;
+    } else {
+      ARROW_ASSIGN_OR_RAISE(
+          casted_dict, Cast(in_scalar.value.dictionary, out_type->value_type(), options,
+                            ctx->exec_context()));
+    }
+
+    *out = std::static_pointer_cast<Scalar>(
+        DictionaryScalar::Make(casted_index.scalar(), casted_dict.make_array()));
+
+    return Status::OK();
+  }
+
+  // if input is array
+  const std::shared_ptr<ArrayData>& in_array = batch[0].array();
+  const auto& in_type = checked_cast<const DictionaryType&>(*in_array->type);
+
+  ArrayData* out_array = out->mutable_array();
+
+  if (in_type.index_type()->Equals(out_type->index_type())) {
+    out_array->buffers[0] = in_array->buffers[0];
+    out_array->buffers[1] = in_array->buffers[1];
+    out_array->null_count = in_array->GetNullCount();
+    out_array->offset = in_array->offset;
+  } else {
+    // for indices, create a dummy ArrayData with index_type()
+    const std::shared_ptr<ArrayData>& indices_arr =
+        ArrayData::Make(in_type.index_type(), in_array->length, in_array->buffers,
+                        in_array->GetNullCount(), in_array->offset);
+    ARROW_ASSIGN_OR_RAISE(auto casted_indices, Cast(indices_arr, out_type->index_type(),
+                                                    options, ctx->exec_context()));
+    out_array->buffers[0] = std::move(casted_indices.array()->buffers[0]);
+    out_array->buffers[1] = std::move(casted_indices.array()->buffers[1]);
+  }
+
+  // data (dict)
+  if (in_type.value_type()->Equals(out_type->value_type())) {
+    out_array->dictionary = in_array->dictionary;
+  } else {
+    const std::shared_ptr<Array>& dict_arr = MakeArray(in_array->dictionary);
+    ARROW_ASSIGN_OR_RAISE(auto casted_data, Cast(dict_arr, out_type->value_type(),
+                                                 options, ctx->exec_context()));
+    out_array->dictionary = casted_data.array();
+  }
+  return Status::OK();
+}
+
+std::vector<std::shared_ptr<CastFunction>> GetDictionaryCasts() {
+  auto func = std::make_shared<CastFunction>("cast_dictionary", Type::DICTIONARY);
+
+  AddCommonCasts(Type::DICTIONARY, kOutputTargetType, func.get());
+  ScalarKernel kernel({InputType(Type::DICTIONARY)}, kOutputTargetType, CastDictionary);
+  kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
+  kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+
+  DCHECK_OK(func->AddKernel(Type::DICTIONARY, std::move(kernel)));
+
+  return {func};
+}
+
+}  // namespace internal
+}  // namespace compute
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc
index 660250359c4..198c82bd97e 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc
@@ -148,40 +148,40 @@ void CastNumberToNumberUnsafe(Type::type in_type, Type::type out_type, const Dat
 
 // ----------------------------------------------------------------------
 
-Status UnpackDictionary(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-  DCHECK(out->is_array()); 
- 
+Status UnpackDictionary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  DCHECK(out->is_array());
+
   DictionaryArray dict_arr(batch[0].array());
   const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
 
   const auto& dict_type = *dict_arr.dictionary()->type();
-  if (!dict_type.Equals(options.to_type) && !CanCast(dict_type, *options.to_type)) { 
-    return Status::Invalid("Cast type ", options.to_type->ToString(), 
-                           " incompatible with dictionary type ", dict_type.ToString()); 
+  if (!dict_type.Equals(options.to_type) && !CanCast(dict_type, *options.to_type)) {
+    return Status::Invalid("Cast type ", options.to_type->ToString(),
+                           " incompatible with dictionary type ", dict_type.ToString());
   }
 
-  ARROW_ASSIGN_OR_RAISE(*out, 
-                        Take(Datum(dict_arr.dictionary()), Datum(dict_arr.indices()), 
-                             TakeOptions::Defaults(), ctx->exec_context())); 
- 
-  if (!dict_type.Equals(options.to_type)) { 
-    ARROW_ASSIGN_OR_RAISE(*out, Cast(*out, options)); 
+  ARROW_ASSIGN_OR_RAISE(*out,
+                        Take(Datum(dict_arr.dictionary()), Datum(dict_arr.indices()),
+                             TakeOptions::Defaults(), ctx->exec_context()));
+
+  if (!dict_type.Equals(options.to_type)) {
+    ARROW_ASSIGN_OR_RAISE(*out, Cast(*out, options));
   }
-  return Status::OK(); 
+  return Status::OK();
 }
 
-Status OutputAllNull(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-  if (out->is_scalar()) { 
-    out->scalar()->is_valid = false; 
-  } else { 
-    ArrayData* output = out->mutable_array(); 
-    output->buffers = {nullptr}; 
-    output->null_count = batch.length; 
-  } 
-  return Status::OK(); 
+Status OutputAllNull(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  if (out->is_scalar()) {
+    out->scalar()->is_valid = false;
+  } else {
+    ArrayData* output = out->mutable_array();
+    output->buffers = {nullptr};
+    output->null_count = batch.length;
+  }
+  return Status::OK();
 }
 
-Status CastFromExtension(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+Status CastFromExtension(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   const CastOptions& options = checked_cast<const CastState*>(ctx->state())->options;
 
   const DataType& in_type = *batch[0].type();
@@ -190,20 +190,20 @@ Status CastFromExtension(KernelContext* ctx, const ExecBatch& batch, Datum* out)
   ExtensionArray extension(batch[0].array());
 
   Datum casted_storage;
-  RETURN_NOT_OK(Cast(*extension.storage(), out->type(), options, ctx->exec_context()) 
-                    .Value(&casted_storage)); 
+  RETURN_NOT_OK(Cast(*extension.storage(), out->type(), options, ctx->exec_context())
+                    .Value(&casted_storage));
   out->value = casted_storage.array();
-  return Status::OK(); 
+  return Status::OK();
 }
 
-Status CastFromNull(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-  if (!batch[0].is_scalar()) { 
-    ArrayData* output = out->mutable_array(); 
-    std::shared_ptr<Array> nulls; 
-    RETURN_NOT_OK(MakeArrayOfNull(output->type, batch.length).Value(&nulls)); 
-    out->value = nulls->data(); 
-  } 
-  return Status::OK(); 
+Status CastFromNull(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  if (!batch[0].is_scalar()) {
+    ArrayData* output = out->mutable_array();
+    std::shared_ptr<Array> nulls;
+    RETURN_NOT_OK(MakeArrayOfNull(output->type, batch.length).Value(&nulls));
+    out->value = nulls->data();
+  }
+  return Status::OK();
 }
 
 Result<ValueDescr> ResolveOutputFromOptions(KernelContext* ctx,
@@ -223,25 +223,25 @@ Result<ValueDescr> ResolveOutputFromOptions(KernelContext* ctx,
 
 OutputType kOutputTargetType(ResolveOutputFromOptions);
 
-Status ZeroCopyCastExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-  DCHECK_EQ(batch[0].kind(), Datum::ARRAY); 
-  // Make a copy of the buffers into a destination array without carrying 
-  // the type 
-  const ArrayData& input = *batch[0].array(); 
-  ArrayData* output = out->mutable_array(); 
-  output->length = input.length; 
-  output->SetNullCount(input.null_count); 
-  output->buffers = input.buffers; 
-  output->offset = input.offset; 
-  output->child_data = input.child_data; 
-  return Status::OK(); 
+Status ZeroCopyCastExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
+  // Make a copy of the buffers into a destination array without carrying
+  // the type
+  const ArrayData& input = *batch[0].array();
+  ArrayData* output = out->mutable_array();
+  output->length = input.length;
+  output->SetNullCount(input.null_count);
+  output->buffers = input.buffers;
+  output->offset = input.offset;
+  output->child_data = input.child_data;
+  return Status::OK();
 }
 
 void AddZeroCopyCast(Type::type in_type_id, InputType in_type, OutputType out_type,
                      CastFunction* func) {
   auto sig = KernelSignature::Make({in_type}, out_type);
   ScalarKernel kernel;
-  kernel.exec = TrivialScalarUnaryAsArraysExec(ZeroCopyCastExec); 
+  kernel.exec = TrivialScalarUnaryAsArraysExec(ZeroCopyCastExec);
   kernel.signature = sig;
   kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
   kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
@@ -255,12 +255,12 @@ static bool CanCastFromDictionary(Type::type type_id) {
 
 void AddCommonCasts(Type::type out_type_id, OutputType out_ty, CastFunction* func) {
   // From null to this type
-  ScalarKernel kernel; 
-  kernel.exec = CastFromNull; 
-  kernel.signature = KernelSignature::Make({null()}, out_ty); 
-  kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE; 
-  kernel.mem_allocation = MemAllocation::NO_PREALLOCATE; 
-  DCHECK_OK(func->AddKernel(Type::NA, std::move(kernel))); 
+  ScalarKernel kernel;
+  kernel.exec = CastFromNull;
+  kernel.signature = KernelSignature::Make({null()}, out_ty);
+  kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
+  kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+  DCHECK_OK(func->AddKernel(Type::NA, std::move(kernel)));
 
   // From dictionary to this type
   if (CanCastFromDictionary(out_type_id)) {
@@ -268,10 +268,10 @@ void AddCommonCasts(Type::type out_type_id, OutputType out_ty, CastFunction* fun
     //
     // XXX: Uses Take and does its own memory allocation for the moment. We can
     // fix this later.
-    DCHECK_OK(func->AddKernel(Type::DICTIONARY, {InputType(Type::DICTIONARY)}, out_ty, 
-                              TrivialScalarUnaryAsArraysExec(UnpackDictionary), 
-                              NullHandling::COMPUTED_NO_PREALLOCATE, 
-                              MemAllocation::NO_PREALLOCATE)); 
+    DCHECK_OK(func->AddKernel(Type::DICTIONARY, {InputType(Type::DICTIONARY)}, out_ty,
+                              TrivialScalarUnaryAsArraysExec(UnpackDictionary),
+                              NullHandling::COMPUTED_NO_PREALLOCATE,
+                              MemAllocation::NO_PREALLOCATE));
   }
 
   // From extension type to this type
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_cast_internal.h b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_cast_internal.h
index bffa64988a6..2419d898a68 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_cast_internal.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_cast_internal.h
@@ -21,7 +21,7 @@
 #include "arrow/compute/cast.h"           // IWYU pragma: export
 #include "arrow/compute/cast_internal.h"  // IWYU pragma: export
 #include "arrow/compute/kernels/common.h"
-#include "arrow/compute/kernels/util_internal.h" 
+#include "arrow/compute/kernels/util_internal.h"
 
 namespace arrow {
 
@@ -37,10 +37,10 @@ struct CastFunctor {};
 template <typename O, typename I>
 struct CastFunctor<
     O, I, enable_if_t<std::is_same<O, I>::value && is_parameter_free_type<I>::value>> {
-  static Status Exec(KernelContext*, const ExecBatch&, Datum*) { return Status::OK(); } 
+  static Status Exec(KernelContext*, const ExecBatch&, Datum*) { return Status::OK(); }
 };
 
-Status CastFromExtension(KernelContext* ctx, const ExecBatch& batch, Datum* out); 
+Status CastFromExtension(KernelContext* ctx, const ExecBatch& batch, Datum* out);
 
 // Utility for numeric casts
 void CastNumberToNumberUnsafe(Type::type in_type, Type::type out_type, const Datum& input,
@@ -49,23 +49,23 @@ void CastNumberToNumberUnsafe(Type::type in_type, Type::type out_type, const Dat
 // ----------------------------------------------------------------------
 // Dictionary to other things
 
-Status UnpackDictionary(KernelContext* ctx, const ExecBatch& batch, Datum* out); 
+Status UnpackDictionary(KernelContext* ctx, const ExecBatch& batch, Datum* out);
 
-Status OutputAllNull(KernelContext* ctx, const ExecBatch& batch, Datum* out); 
+Status OutputAllNull(KernelContext* ctx, const ExecBatch& batch, Datum* out);
 
-Status CastFromNull(KernelContext* ctx, const ExecBatch& batch, Datum* out); 
+Status CastFromNull(KernelContext* ctx, const ExecBatch& batch, Datum* out);
 
-// Adds a cast function where CastFunctor is specialized and the input and output 
-// types are parameter free (have a type_singleton). Scalar inputs are handled by 
-// wrapping with TrivialScalarUnaryAsArraysExec. 
+// Adds a cast function where CastFunctor is specialized and the input and output
+// types are parameter free (have a type_singleton). Scalar inputs are handled by
+// wrapping with TrivialScalarUnaryAsArraysExec.
 template <typename InType, typename OutType>
 void AddSimpleCast(InputType in_ty, OutputType out_ty, CastFunction* func) {
-  DCHECK_OK(func->AddKernel( 
-      InType::type_id, {in_ty}, out_ty, 
-      TrivialScalarUnaryAsArraysExec(CastFunctor<OutType, InType>::Exec))); 
+  DCHECK_OK(func->AddKernel(
+      InType::type_id, {in_ty}, out_ty,
+      TrivialScalarUnaryAsArraysExec(CastFunctor<OutType, InType>::Exec)));
 }
 
-Status ZeroCopyCastExec(KernelContext* ctx, const ExecBatch& batch, Datum* out); 
+Status ZeroCopyCastExec(KernelContext* ctx, const ExecBatch& batch, Datum* out);
 
 void AddZeroCopyCast(Type::type in_type_id, InputType in_type, OutputType out_type,
                      CastFunction* func);
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
index c5fccf30311..ec92dbb5d60 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
@@ -20,74 +20,74 @@
 #include <utility>
 #include <vector>
 
-#include "arrow/array/builder_nested.h" 
-#include "arrow/compute/api_scalar.h" 
+#include "arrow/array/builder_nested.h"
+#include "arrow/compute/api_scalar.h"
 #include "arrow/compute/cast.h"
 #include "arrow/compute/kernels/common.h"
 #include "arrow/compute/kernels/scalar_cast_internal.h"
-#include "arrow/util/bitmap_ops.h" 
+#include "arrow/util/bitmap_ops.h"
 
 namespace arrow {
- 
-using internal::CopyBitmap; 
- 
+
+using internal::CopyBitmap;
+
 namespace compute {
 namespace internal {
 
 template <typename Type>
-Status CastListExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-  using offset_type = typename Type::offset_type; 
-  using ScalarType = typename TypeTraits<Type>::ScalarType; 
-
-  const CastOptions& options = CastState::Get(ctx); 
-
-  auto child_type = checked_cast<const Type&>(*out->type()).value_type(); 
- 
-  if (out->kind() == Datum::SCALAR) { 
-    const auto& in_scalar = checked_cast<const ScalarType&>(*batch[0].scalar()); 
-    auto out_scalar = checked_cast<ScalarType*>(out->scalar().get()); 
- 
-    DCHECK(!out_scalar->is_valid); 
-    if (in_scalar.is_valid) { 
-      ARROW_ASSIGN_OR_RAISE(out_scalar->value, Cast(*in_scalar.value, child_type, options, 
-                                                    ctx->exec_context())); 
- 
-      out_scalar->is_valid = true; 
-    } 
-    return Status::OK(); 
+Status CastListExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  using offset_type = typename Type::offset_type;
+  using ScalarType = typename TypeTraits<Type>::ScalarType;
+
+  const CastOptions& options = CastState::Get(ctx);
+
+  auto child_type = checked_cast<const Type&>(*out->type()).value_type();
+
+  if (out->kind() == Datum::SCALAR) {
+    const auto& in_scalar = checked_cast<const ScalarType&>(*batch[0].scalar());
+    auto out_scalar = checked_cast<ScalarType*>(out->scalar().get());
+
+    DCHECK(!out_scalar->is_valid);
+    if (in_scalar.is_valid) {
+      ARROW_ASSIGN_OR_RAISE(out_scalar->value, Cast(*in_scalar.value, child_type, options,
+                                                    ctx->exec_context()));
+
+      out_scalar->is_valid = true;
+    }
+    return Status::OK();
+  }
+
+  const ArrayData& in_array = *batch[0].array();
+  ArrayData* out_array = out->mutable_array();
+
+  // Copy from parent
+  out_array->buffers = in_array.buffers;
+  Datum values = in_array.child_data[0];
+
+  if (in_array.offset != 0) {
+    if (in_array.buffers[0]) {
+      ARROW_ASSIGN_OR_RAISE(out_array->buffers[0],
+                            CopyBitmap(ctx->memory_pool(), in_array.buffers[0]->data(),
+                                       in_array.offset, in_array.length));
+    }
+    ARROW_ASSIGN_OR_RAISE(out_array->buffers[1],
+                          ctx->Allocate(sizeof(offset_type) * (in_array.length + 1)));
+
+    auto offsets = in_array.GetValues<offset_type>(1);
+    auto shifted_offsets = out_array->GetMutableValues<offset_type>(1);
+
+    for (int64_t i = 0; i < in_array.length + 1; ++i) {
+      shifted_offsets[i] = offsets[i] - offsets[0];
+    }
+    values = in_array.child_data[0]->Slice(offsets[0], offsets[in_array.length]);
   }
 
-  const ArrayData& in_array = *batch[0].array(); 
-  ArrayData* out_array = out->mutable_array(); 
-
-  // Copy from parent 
-  out_array->buffers = in_array.buffers; 
-  Datum values = in_array.child_data[0]; 
- 
-  if (in_array.offset != 0) { 
-    if (in_array.buffers[0]) { 
-      ARROW_ASSIGN_OR_RAISE(out_array->buffers[0], 
-                            CopyBitmap(ctx->memory_pool(), in_array.buffers[0]->data(), 
-                                       in_array.offset, in_array.length)); 
-    } 
-    ARROW_ASSIGN_OR_RAISE(out_array->buffers[1], 
-                          ctx->Allocate(sizeof(offset_type) * (in_array.length + 1))); 
- 
-    auto offsets = in_array.GetValues<offset_type>(1); 
-    auto shifted_offsets = out_array->GetMutableValues<offset_type>(1); 
- 
-    for (int64_t i = 0; i < in_array.length + 1; ++i) { 
-      shifted_offsets[i] = offsets[i] - offsets[0]; 
-    } 
-    values = in_array.child_data[0]->Slice(offsets[0], offsets[in_array.length]); 
-  } 
- 
-  ARROW_ASSIGN_OR_RAISE(Datum cast_values, 
-                        Cast(values, child_type, options, ctx->exec_context())); 
- 
-  DCHECK_EQ(Datum::ARRAY, cast_values.kind()); 
-  out_array->child_data.push_back(cast_values.array()); 
-  return Status::OK(); 
+  ARROW_ASSIGN_OR_RAISE(Datum cast_values,
+                        Cast(values, child_type, options, ctx->exec_context()));
+
+  DCHECK_EQ(Datum::ARRAY, cast_values.kind());
+  out_array->child_data.push_back(cast_values.array());
+  return Status::OK();
 }
 
 template <typename Type>
@@ -120,12 +120,12 @@ std::vector<std::shared_ptr<CastFunction>> GetNestedCasts() {
   auto cast_struct = std::make_shared<CastFunction>("cast_struct", Type::STRUCT);
   AddCommonCasts(Type::STRUCT, kOutputTargetType, cast_struct.get());
 
-  // So is dictionary 
-  auto cast_dictionary = 
-      std::make_shared<CastFunction>("cast_dictionary", Type::DICTIONARY); 
-  AddCommonCasts(Type::DICTIONARY, kOutputTargetType, cast_dictionary.get()); 
- 
-  return {cast_list, cast_large_list, cast_fsl, cast_struct, cast_dictionary}; 
+  // So is dictionary
+  auto cast_dictionary =
+      std::make_shared<CastFunction>("cast_dictionary", Type::DICTIONARY);
+  AddCommonCasts(Type::DICTIONARY, kOutputTargetType, cast_dictionary.get());
+
+  return {cast_list, cast_large_list, cast_fsl, cast_struct, cast_dictionary};
 }
 
 }  // namespace internal
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
index 4ada0b08afe..cc7b533f262 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
@@ -17,10 +17,10 @@
 
 // Implementation of casting to integer, floating point, or decimal types
 
-#include "arrow/array/builder_primitive.h" 
+#include "arrow/array/builder_primitive.h"
 #include "arrow/compute/kernels/common.h"
 #include "arrow/compute/kernels/scalar_cast_internal.h"
-#include "arrow/compute/kernels/util_internal.h" 
+#include "arrow/compute/kernels/util_internal.h"
 #include "arrow/util/bit_block_counter.h"
 #include "arrow/util/int_util.h"
 #include "arrow/util/value_parsing.h"
@@ -36,18 +36,18 @@ using internal::ParseValue;
 namespace compute {
 namespace internal {
 
-Status CastIntegerToInteger(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+Status CastIntegerToInteger(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   const auto& options = checked_cast<const CastState*>(ctx->state())->options;
   if (!options.allow_int_overflow) {
-    RETURN_NOT_OK(IntegersCanFit(batch[0], *out->type())); 
+    RETURN_NOT_OK(IntegersCanFit(batch[0], *out->type()));
   }
   CastNumberToNumberUnsafe(batch[0].type()->id(), out->type()->id(), batch[0], out);
-  return Status::OK(); 
+  return Status::OK();
 }
 
-Status CastFloatingToFloating(KernelContext*, const ExecBatch& batch, Datum* out) { 
+Status CastFloatingToFloating(KernelContext*, const ExecBatch& batch, Datum* out) {
   CastNumberToNumberUnsafe(batch[0].type()->id(), out->type()->id(), batch[0], out);
-  return Status::OK(); 
+  return Status::OK();
 }
 
 // ----------------------------------------------------------------------
@@ -65,7 +65,7 @@ Status CheckFloatTruncation(const Datum& input, const Datum& output) {
     return is_valid && static_cast<InT>(out_val) != in_val;
   };
   auto GetErrorMessage = [&](InT val) {
-    return Status::Invalid("Float value ", val, " was truncated converting to ", 
+    return Status::Invalid("Float value ", val, " was truncated converting to ",
                            *output.type());
   };
 
@@ -170,13 +170,13 @@ Status CheckFloatToIntTruncation(const Datum& input, const Datum& output) {
   return Status::OK();
 }
 
-Status CastFloatingToInteger(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+Status CastFloatingToInteger(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   const auto& options = checked_cast<const CastState*>(ctx->state())->options;
   CastNumberToNumberUnsafe(batch[0].type()->id(), out->type()->id(), batch[0], out);
   if (!options.allow_float_truncate) {
-    RETURN_NOT_OK(CheckFloatToIntTruncation(batch[0], *out)); 
+    RETURN_NOT_OK(CheckFloatToIntTruncation(batch[0], *out));
   }
-  return Status::OK(); 
+  return Status::OK();
 }
 
 // ----------------------------------------------------------------------
@@ -249,14 +249,14 @@ Status CheckForIntegerToFloatingTruncation(const Datum& input, Type::type out_ty
   return Status::OK();
 }
 
-Status CastIntegerToFloating(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+Status CastIntegerToFloating(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   const auto& options = checked_cast<const CastState*>(ctx->state())->options;
   Type::type out_type = out->type()->id();
   if (!options.allow_float_truncate) {
-    RETURN_NOT_OK(CheckForIntegerToFloatingTruncation(batch[0], out_type)); 
+    RETURN_NOT_OK(CheckForIntegerToFloatingTruncation(batch[0], out_type));
   }
   CastNumberToNumberUnsafe(batch[0].type()->id(), out_type, batch[0], out);
-  return Status::OK(); 
+  return Status::OK();
 }
 
 // ----------------------------------------------------------------------
@@ -264,7 +264,7 @@ Status CastIntegerToFloating(KernelContext* ctx, const ExecBatch& batch, Datum*
 
 struct BooleanToNumber {
   template <typename OutValue, typename Arg0Value>
-  static OutValue Call(KernelContext*, Arg0Value val, Status*) { 
+  static OutValue Call(KernelContext*, Arg0Value val, Status*) {
     constexpr auto kOne = static_cast<OutValue>(1);
     constexpr auto kZero = static_cast<OutValue>(0);
     return val ? kOne : kZero;
@@ -273,9 +273,9 @@ struct BooleanToNumber {
 
 template <typename O>
 struct CastFunctor<O, BooleanType, enable_if_number<O>> {
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    return applicator::ScalarUnary<O, BooleanType, BooleanToNumber>::Exec(ctx, batch, 
-                                                                          out); 
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    return applicator::ScalarUnary<O, BooleanType, BooleanToNumber>::Exec(ctx, batch,
+                                                                          out);
   }
 };
 
@@ -285,11 +285,11 @@ struct CastFunctor<O, BooleanType, enable_if_number<O>> {
 template <typename OutType>
 struct ParseString {
   template <typename OutValue, typename Arg0Value>
-  OutValue Call(KernelContext* ctx, Arg0Value val, Status* st) const { 
+  OutValue Call(KernelContext* ctx, Arg0Value val, Status* st) const {
     OutValue result = OutValue(0);
     if (ARROW_PREDICT_FALSE(!ParseValue<OutType>(val.data(), val.size(), &result))) {
-      *st = Status::Invalid("Failed to parse string: '", val, "' as a scalar of type ", 
-                            TypeTraits<OutType>::type_singleton()->ToString()); 
+      *st = Status::Invalid("Failed to parse string: '", val, "' as a scalar of type ",
+                            TypeTraits<OutType>::type_singleton()->ToString());
     }
     return result;
   }
@@ -297,8 +297,8 @@ struct ParseString {
 
 template <typename O, typename I>
 struct CastFunctor<O, I, enable_if_base_binary<I>> {
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    return applicator::ScalarUnaryNotNull<O, I, ParseString<O>>::Exec(ctx, batch, out); 
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    return applicator::ScalarUnaryNotNull<O, I, ParseString<O>>::Exec(ctx, batch, out);
   }
 };
 
@@ -306,13 +306,13 @@ struct CastFunctor<O, I, enable_if_base_binary<I>> {
 // Decimal to integer
 
 struct DecimalToIntegerMixin {
-  template <typename OutValue, typename Arg0Value> 
-  OutValue ToInteger(KernelContext* ctx, const Arg0Value& val, Status* st) const { 
+  template <typename OutValue, typename Arg0Value>
+  OutValue ToInteger(KernelContext* ctx, const Arg0Value& val, Status* st) const {
     constexpr auto min_value = std::numeric_limits<OutValue>::min();
     constexpr auto max_value = std::numeric_limits<OutValue>::max();
 
     if (!allow_int_overflow_ && ARROW_PREDICT_FALSE(val < min_value || val > max_value)) {
-      *st = Status::Invalid("Integer value out of bounds"); 
+      *st = Status::Invalid("Integer value out of bounds");
       return OutValue{};  // Zero
     } else {
       return static_cast<OutValue>(val.low_bits());
@@ -330,8 +330,8 @@ struct UnsafeUpscaleDecimalToInteger : public DecimalToIntegerMixin {
   using DecimalToIntegerMixin::DecimalToIntegerMixin;
 
   template <typename OutValue, typename Arg0Value>
-  OutValue Call(KernelContext* ctx, Arg0Value val, Status* st) const { 
-    return ToInteger<OutValue>(ctx, val.IncreaseScaleBy(-in_scale_), st); 
+  OutValue Call(KernelContext* ctx, Arg0Value val, Status* st) const {
+    return ToInteger<OutValue>(ctx, val.IncreaseScaleBy(-in_scale_), st);
   }
 };
 
@@ -339,8 +339,8 @@ struct UnsafeDownscaleDecimalToInteger : public DecimalToIntegerMixin {
   using DecimalToIntegerMixin::DecimalToIntegerMixin;
 
   template <typename OutValue, typename Arg0Value>
-  OutValue Call(KernelContext* ctx, Arg0Value val, Status* st) const { 
-    return ToInteger<OutValue>(ctx, val.ReduceScaleBy(in_scale_, false), st); 
+  OutValue Call(KernelContext* ctx, Arg0Value val, Status* st) const {
+    return ToInteger<OutValue>(ctx, val.ReduceScaleBy(in_scale_, false), st);
   }
 };
 
@@ -348,44 +348,44 @@ struct SafeRescaleDecimalToInteger : public DecimalToIntegerMixin {
   using DecimalToIntegerMixin::DecimalToIntegerMixin;
 
   template <typename OutValue, typename Arg0Value>
-  OutValue Call(KernelContext* ctx, Arg0Value val, Status* st) const { 
+  OutValue Call(KernelContext* ctx, Arg0Value val, Status* st) const {
     auto result = val.Rescale(in_scale_, 0);
     if (ARROW_PREDICT_FALSE(!result.ok())) {
-      *st = result.status(); 
+      *st = result.status();
       return OutValue{};  // Zero
     } else {
-      return ToInteger<OutValue>(ctx, *result, st); 
+      return ToInteger<OutValue>(ctx, *result, st);
     }
   }
 };
 
-template <typename O, typename I> 
-struct CastFunctor<O, I, 
-                   enable_if_t<is_integer_type<O>::value && is_decimal_type<I>::value>> { 
+template <typename O, typename I>
+struct CastFunctor<O, I,
+                   enable_if_t<is_integer_type<O>::value && is_decimal_type<I>::value>> {
   using out_type = typename O::c_type;
 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     const auto& options = checked_cast<const CastState*>(ctx->state())->options;
 
-    const auto& in_type_inst = checked_cast<const I&>(*batch[0].type()); 
+    const auto& in_type_inst = checked_cast<const I&>(*batch[0].type());
     const auto in_scale = in_type_inst.scale();
 
     if (options.allow_decimal_truncate) {
       if (in_scale < 0) {
         // Unsafe upscale
-        applicator::ScalarUnaryNotNullStateful<O, I, UnsafeUpscaleDecimalToInteger> 
+        applicator::ScalarUnaryNotNullStateful<O, I, UnsafeUpscaleDecimalToInteger>
             kernel(UnsafeUpscaleDecimalToInteger{in_scale, options.allow_int_overflow});
         return kernel.Exec(ctx, batch, out);
       } else {
         // Unsafe downscale
-        applicator::ScalarUnaryNotNullStateful<O, I, UnsafeDownscaleDecimalToInteger> 
+        applicator::ScalarUnaryNotNullStateful<O, I, UnsafeDownscaleDecimalToInteger>
             kernel(UnsafeDownscaleDecimalToInteger{in_scale, options.allow_int_overflow});
         return kernel.Exec(ctx, batch, out);
       }
     } else {
       // Safe rescale
-      applicator::ScalarUnaryNotNullStateful<O, I, SafeRescaleDecimalToInteger> kernel( 
-          SafeRescaleDecimalToInteger{in_scale, options.allow_int_overflow}); 
+      applicator::ScalarUnaryNotNullStateful<O, I, SafeRescaleDecimalToInteger> kernel(
+          SafeRescaleDecimalToInteger{in_scale, options.allow_int_overflow});
       return kernel.Exec(ctx, batch, out);
     }
   }
@@ -394,104 +394,104 @@ struct CastFunctor<O, I,
 // ----------------------------------------------------------------------
 // Decimal to decimal
 
-// Helper that converts the input and output decimals 
-// For instance, Decimal128 -> Decimal256 requires converting, then scaling 
-// Decimal256 -> Decimal128 requires scaling, then truncating 
-template <typename OutDecimal, typename InDecimal> 
-struct DecimalConversions {}; 
- 
-template <typename InDecimal> 
-struct DecimalConversions<Decimal256, InDecimal> { 
-  // Convert then scale 
-  static Decimal256 ConvertInput(InDecimal&& val) { return Decimal256(val); } 
-  static Decimal256 ConvertOutput(Decimal256&& val) { return val; } 
-}; 
- 
-template <> 
-struct DecimalConversions<Decimal128, Decimal256> { 
-  // Scale then truncate 
-  static Decimal256 ConvertInput(Decimal256&& val) { return val; } 
-  static Decimal128 ConvertOutput(Decimal256&& val) { 
-    return Decimal128(val.little_endian_array()[1], val.little_endian_array()[0]); 
-  }
-}; 
-
-template <> 
-struct DecimalConversions<Decimal128, Decimal128> { 
-  static Decimal128 ConvertInput(Decimal128&& val) { return val; } 
-  static Decimal128 ConvertOutput(Decimal128&& val) { return val; } 
+// Helper that converts the input and output decimals
+// For instance, Decimal128 -> Decimal256 requires converting, then scaling
+// Decimal256 -> Decimal128 requires scaling, then truncating
+template <typename OutDecimal, typename InDecimal>
+struct DecimalConversions {};
+
+template <typename InDecimal>
+struct DecimalConversions<Decimal256, InDecimal> {
+  // Convert then scale
+  static Decimal256 ConvertInput(InDecimal&& val) { return Decimal256(val); }
+  static Decimal256 ConvertOutput(Decimal256&& val) { return val; }
+};
+
+template <>
+struct DecimalConversions<Decimal128, Decimal256> {
+  // Scale then truncate
+  static Decimal256 ConvertInput(Decimal256&& val) { return val; }
+  static Decimal128 ConvertOutput(Decimal256&& val) {
+    return Decimal128(val.little_endian_array()[1], val.little_endian_array()[0]);
+  }
+};
+
+template <>
+struct DecimalConversions<Decimal128, Decimal128> {
+  static Decimal128 ConvertInput(Decimal128&& val) { return val; }
+  static Decimal128 ConvertOutput(Decimal128&& val) { return val; }
+};
+
+struct UnsafeUpscaleDecimal {
+  template <typename OutValue, typename Arg0Value>
+  OutValue Call(KernelContext*, Arg0Value val, Status*) const {
+    using Conv = DecimalConversions<OutValue, Arg0Value>;
+    return Conv::ConvertOutput(Conv::ConvertInput(std::move(val)).IncreaseScaleBy(by_));
+  }
+  int32_t by_;
 };
 
-struct UnsafeUpscaleDecimal { 
-  template <typename OutValue, typename Arg0Value> 
-  OutValue Call(KernelContext*, Arg0Value val, Status*) const { 
-    using Conv = DecimalConversions<OutValue, Arg0Value>; 
-    return Conv::ConvertOutput(Conv::ConvertInput(std::move(val)).IncreaseScaleBy(by_)); 
-  } 
-  int32_t by_; 
-}; 
- 
 struct UnsafeDownscaleDecimal {
-  template <typename OutValue, typename Arg0Value> 
-  OutValue Call(KernelContext*, Arg0Value val, Status*) const { 
-    using Conv = DecimalConversions<OutValue, Arg0Value>; 
-    return Conv::ConvertOutput( 
-        Conv::ConvertInput(std::move(val)).ReduceScaleBy(by_, false)); 
+  template <typename OutValue, typename Arg0Value>
+  OutValue Call(KernelContext*, Arg0Value val, Status*) const {
+    using Conv = DecimalConversions<OutValue, Arg0Value>;
+    return Conv::ConvertOutput(
+        Conv::ConvertInput(std::move(val)).ReduceScaleBy(by_, false));
   }
-  int32_t by_; 
+  int32_t by_;
 };
 
 struct SafeRescaleDecimal {
-  template <typename OutValue, typename Arg0Value> 
-  OutValue Call(KernelContext*, Arg0Value val, Status* st) const { 
-    using Conv = DecimalConversions<OutValue, Arg0Value>; 
-    auto maybe_rescaled = 
-        Conv::ConvertInput(std::move(val)).Rescale(in_scale_, out_scale_); 
-    if (ARROW_PREDICT_FALSE(!maybe_rescaled.ok())) { 
-      *st = maybe_rescaled.status(); 
-      return {};  // Zero 
+  template <typename OutValue, typename Arg0Value>
+  OutValue Call(KernelContext*, Arg0Value val, Status* st) const {
+    using Conv = DecimalConversions<OutValue, Arg0Value>;
+    auto maybe_rescaled =
+        Conv::ConvertInput(std::move(val)).Rescale(in_scale_, out_scale_);
+    if (ARROW_PREDICT_FALSE(!maybe_rescaled.ok())) {
+      *st = maybe_rescaled.status();
+      return {};  // Zero
+    }
+
+    if (ARROW_PREDICT_TRUE(maybe_rescaled->FitsInPrecision(out_precision_))) {
+      return Conv::ConvertOutput(maybe_rescaled.MoveValueUnsafe());
     }
- 
-    if (ARROW_PREDICT_TRUE(maybe_rescaled->FitsInPrecision(out_precision_))) { 
-      return Conv::ConvertOutput(maybe_rescaled.MoveValueUnsafe()); 
-    } 
- 
-    *st = Status::Invalid("Decimal value does not fit in precision ", out_precision_); 
-    return {};  // Zero 
+
+    *st = Status::Invalid("Decimal value does not fit in precision ", out_precision_);
+    return {};  // Zero
   }
 
   int32_t out_scale_, out_precision_, in_scale_;
 };
 
-template <typename O, typename I> 
-struct CastFunctor<O, I, 
-                   enable_if_t<is_decimal_type<O>::value && is_decimal_type<I>::value>> { 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+template <typename O, typename I>
+struct CastFunctor<O, I,
+                   enable_if_t<is_decimal_type<O>::value && is_decimal_type<I>::value>> {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     const auto& options = checked_cast<const CastState*>(ctx->state())->options;
 
-    const auto& in_type = checked_cast<const I&>(*batch[0].type()); 
-    const auto& out_type = checked_cast<const O&>(*out->type()); 
-    const auto in_scale = in_type.scale(); 
-    const auto out_scale = out_type.scale(); 
+    const auto& in_type = checked_cast<const I&>(*batch[0].type());
+    const auto& out_type = checked_cast<const O&>(*out->type());
+    const auto in_scale = in_type.scale();
+    const auto out_scale = out_type.scale();
 
     if (options.allow_decimal_truncate) {
       if (in_scale < out_scale) {
         // Unsafe upscale
-        applicator::ScalarUnaryNotNullStateful<O, I, UnsafeUpscaleDecimal> kernel( 
-            UnsafeUpscaleDecimal{out_scale - in_scale}); 
+        applicator::ScalarUnaryNotNullStateful<O, I, UnsafeUpscaleDecimal> kernel(
+            UnsafeUpscaleDecimal{out_scale - in_scale});
         return kernel.Exec(ctx, batch, out);
       } else {
         // Unsafe downscale
-        applicator::ScalarUnaryNotNullStateful<O, I, UnsafeDownscaleDecimal> kernel( 
-            UnsafeDownscaleDecimal{in_scale - out_scale}); 
+        applicator::ScalarUnaryNotNullStateful<O, I, UnsafeDownscaleDecimal> kernel(
+            UnsafeDownscaleDecimal{in_scale - out_scale});
         return kernel.Exec(ctx, batch, out);
       }
     }
- 
-    // Safe rescale 
-    applicator::ScalarUnaryNotNullStateful<O, I, SafeRescaleDecimal> kernel( 
-        SafeRescaleDecimal{out_scale, out_type.precision(), in_scale}); 
-    return kernel.Exec(ctx, batch, out); 
+
+    // Safe rescale
+    applicator::ScalarUnaryNotNullStateful<O, I, SafeRescaleDecimal> kernel(
+        SafeRescaleDecimal{out_scale, out_type.precision(), in_scale});
+    return kernel.Exec(ctx, batch, out);
   }
 };
 
@@ -500,33 +500,33 @@ struct CastFunctor<O, I,
 
 struct RealToDecimal {
   template <typename OutValue, typename RealType>
-  OutValue Call(KernelContext*, RealType val, Status* st) const { 
-    auto maybe_decimal = OutValue::FromReal(val, out_precision_, out_scale_); 
- 
-    if (ARROW_PREDICT_TRUE(maybe_decimal.ok())) { 
-      return maybe_decimal.MoveValueUnsafe(); 
+  OutValue Call(KernelContext*, RealType val, Status* st) const {
+    auto maybe_decimal = OutValue::FromReal(val, out_precision_, out_scale_);
+
+    if (ARROW_PREDICT_TRUE(maybe_decimal.ok())) {
+      return maybe_decimal.MoveValueUnsafe();
     }
- 
-    if (!allow_truncate_) { 
-      *st = maybe_decimal.status(); 
-    } 
-    return {};  // Zero 
+
+    if (!allow_truncate_) {
+      *st = maybe_decimal.status();
+    }
+    return {};  // Zero
   }
 
   int32_t out_scale_, out_precision_;
   bool allow_truncate_;
 };
 
-template <typename O, typename I> 
-struct CastFunctor<O, I, 
-                   enable_if_t<is_decimal_type<O>::value && is_floating_type<I>::value>> { 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+template <typename O, typename I>
+struct CastFunctor<O, I,
+                   enable_if_t<is_decimal_type<O>::value && is_floating_type<I>::value>> {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     const auto& options = checked_cast<const CastState*>(ctx->state())->options;
-    const auto& out_type = checked_cast<const O&>(*out->type()); 
-    const auto out_scale = out_type.scale(); 
-    const auto out_precision = out_type.precision(); 
+    const auto& out_type = checked_cast<const O&>(*out->type());
+    const auto out_scale = out_type.scale();
+    const auto out_precision = out_type.precision();
 
-    applicator::ScalarUnaryNotNullStateful<O, I, RealToDecimal> kernel( 
+    applicator::ScalarUnaryNotNullStateful<O, I, RealToDecimal> kernel(
         RealToDecimal{out_scale, out_precision, options.allow_decimal_truncate});
     return kernel.Exec(ctx, batch, out);
   }
@@ -537,21 +537,21 @@ struct CastFunctor<O, I,
 
 struct DecimalToReal {
   template <typename RealType, typename Arg0Value>
-  RealType Call(KernelContext*, const Arg0Value& val, Status*) const { 
-    return val.template ToReal<RealType>(in_scale_); 
+  RealType Call(KernelContext*, const Arg0Value& val, Status*) const {
+    return val.template ToReal<RealType>(in_scale_);
   }
 
   int32_t in_scale_;
 };
 
-template <typename O, typename I> 
-struct CastFunctor<O, I, 
-                   enable_if_t<is_floating_type<O>::value && is_decimal_type<I>::value>> { 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    const auto& in_type = checked_cast<const I&>(*batch[0].type()); 
-    const auto in_scale = in_type.scale(); 
+template <typename O, typename I>
+struct CastFunctor<O, I,
+                   enable_if_t<is_floating_type<O>::value && is_decimal_type<I>::value>> {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const auto& in_type = checked_cast<const I&>(*batch[0].type());
+    const auto in_scale = in_type.scale();
 
-    applicator::ScalarUnaryNotNullStateful<O, I, DecimalToReal> kernel( 
+    applicator::ScalarUnaryNotNullStateful<O, I, DecimalToReal> kernel(
         DecimalToReal{in_scale});
     return kernel.Exec(ctx, batch, out);
   }
@@ -595,10 +595,10 @@ std::shared_ptr<CastFunction> GetCastToInteger(std::string name) {
   AddCommonNumberCasts<OutType>(out_ty, func.get());
 
   // From decimal to integer
-  DCHECK_OK(func->AddKernel(Type::DECIMAL, {InputType(Type::DECIMAL)}, out_ty, 
+  DCHECK_OK(func->AddKernel(Type::DECIMAL, {InputType(Type::DECIMAL)}, out_ty,
                             CastFunctor<OutType, Decimal128Type>::Exec));
-  DCHECK_OK(func->AddKernel(Type::DECIMAL256, {InputType(Type::DECIMAL256)}, out_ty, 
-                            CastFunctor<OutType, Decimal256Type>::Exec)); 
+  DCHECK_OK(func->AddKernel(Type::DECIMAL256, {InputType(Type::DECIMAL256)}, out_ty,
+                            CastFunctor<OutType, Decimal256Type>::Exec));
   return func;
 }
 
@@ -621,18 +621,18 @@ std::shared_ptr<CastFunction> GetCastToFloating(std::string name) {
   AddCommonNumberCasts<OutType>(out_ty, func.get());
 
   // From decimal to floating point
-  DCHECK_OK(func->AddKernel(Type::DECIMAL, {InputType(Type::DECIMAL)}, out_ty, 
+  DCHECK_OK(func->AddKernel(Type::DECIMAL, {InputType(Type::DECIMAL)}, out_ty,
                             CastFunctor<OutType, Decimal128Type>::Exec));
-  DCHECK_OK(func->AddKernel(Type::DECIMAL256, {InputType(Type::DECIMAL256)}, out_ty, 
-                            CastFunctor<OutType, Decimal256Type>::Exec)); 
+  DCHECK_OK(func->AddKernel(Type::DECIMAL256, {InputType(Type::DECIMAL256)}, out_ty,
+                            CastFunctor<OutType, Decimal256Type>::Exec));
   return func;
 }
 
-std::shared_ptr<CastFunction> GetCastToDecimal128() { 
+std::shared_ptr<CastFunction> GetCastToDecimal128() {
   OutputType sig_out_ty(ResolveOutputFromOptions);
 
-  auto func = std::make_shared<CastFunction>("cast_decimal", Type::DECIMAL128); 
-  AddCommonCasts(Type::DECIMAL128, sig_out_ty, func.get()); 
+  auto func = std::make_shared<CastFunction>("cast_decimal", Type::DECIMAL128);
+  AddCommonCasts(Type::DECIMAL128, sig_out_ty, func.get());
 
   // Cast from floating point
   DCHECK_OK(func->AddKernel(Type::FLOAT, {float32()}, sig_out_ty,
@@ -643,36 +643,36 @@ std::shared_ptr<CastFunction> GetCastToDecimal128() {
   // Cast from other decimal
   auto exec = CastFunctor<Decimal128Type, Decimal128Type>::Exec;
   // We resolve the output type of this kernel from the CastOptions
-  DCHECK_OK( 
-      func->AddKernel(Type::DECIMAL128, {InputType(Type::DECIMAL128)}, sig_out_ty, exec)); 
-  exec = CastFunctor<Decimal128Type, Decimal256Type>::Exec; 
-  DCHECK_OK( 
-      func->AddKernel(Type::DECIMAL256, {InputType(Type::DECIMAL256)}, sig_out_ty, exec)); 
+  DCHECK_OK(
+      func->AddKernel(Type::DECIMAL128, {InputType(Type::DECIMAL128)}, sig_out_ty, exec));
+  exec = CastFunctor<Decimal128Type, Decimal256Type>::Exec;
+  DCHECK_OK(
+      func->AddKernel(Type::DECIMAL256, {InputType(Type::DECIMAL256)}, sig_out_ty, exec));
+  return func;
+}
+
+std::shared_ptr<CastFunction> GetCastToDecimal256() {
+  OutputType sig_out_ty(ResolveOutputFromOptions);
+
+  auto func = std::make_shared<CastFunction>("cast_decimal256", Type::DECIMAL256);
+  AddCommonCasts(Type::DECIMAL256, sig_out_ty, func.get());
+
+  // Cast from floating point
+  DCHECK_OK(func->AddKernel(Type::FLOAT, {float32()}, sig_out_ty,
+                            CastFunctor<Decimal256Type, FloatType>::Exec));
+  DCHECK_OK(func->AddKernel(Type::DOUBLE, {float64()}, sig_out_ty,
+                            CastFunctor<Decimal256Type, DoubleType>::Exec));
+
+  // Cast from other decimal
+  auto exec = CastFunctor<Decimal256Type, Decimal128Type>::Exec;
+  DCHECK_OK(
+      func->AddKernel(Type::DECIMAL128, {InputType(Type::DECIMAL128)}, sig_out_ty, exec));
+  exec = CastFunctor<Decimal256Type, Decimal256Type>::Exec;
+  DCHECK_OK(
+      func->AddKernel(Type::DECIMAL256, {InputType(Type::DECIMAL256)}, sig_out_ty, exec));
   return func;
 }
 
-std::shared_ptr<CastFunction> GetCastToDecimal256() { 
-  OutputType sig_out_ty(ResolveOutputFromOptions); 
- 
-  auto func = std::make_shared<CastFunction>("cast_decimal256", Type::DECIMAL256); 
-  AddCommonCasts(Type::DECIMAL256, sig_out_ty, func.get()); 
- 
-  // Cast from floating point 
-  DCHECK_OK(func->AddKernel(Type::FLOAT, {float32()}, sig_out_ty, 
-                            CastFunctor<Decimal256Type, FloatType>::Exec)); 
-  DCHECK_OK(func->AddKernel(Type::DOUBLE, {float64()}, sig_out_ty, 
-                            CastFunctor<Decimal256Type, DoubleType>::Exec)); 
- 
-  // Cast from other decimal 
-  auto exec = CastFunctor<Decimal256Type, Decimal128Type>::Exec; 
-  DCHECK_OK( 
-      func->AddKernel(Type::DECIMAL128, {InputType(Type::DECIMAL128)}, sig_out_ty, exec)); 
-  exec = CastFunctor<Decimal256Type, Decimal256Type>::Exec; 
-  DCHECK_OK( 
-      func->AddKernel(Type::DECIMAL256, {InputType(Type::DECIMAL256)}, sig_out_ty, exec)); 
-  return func; 
-} 
- 
 }  // namespace
 
 std::vector<std::shared_ptr<CastFunction>> GetNumericCasts() {
@@ -681,8 +681,8 @@ std::vector<std::shared_ptr<CastFunction>> GetNumericCasts() {
   // Make a cast to null that does not do much. Not sure why we need to be able
   // to cast from dict<null> -> null but there are unit tests for it
   auto cast_null = std::make_shared<CastFunction>("cast_null", Type::NA);
-  DCHECK_OK(cast_null->AddKernel(Type::DICTIONARY, {InputType(Type::DICTIONARY)}, null(), 
-                                 OutputAllNull)); 
+  DCHECK_OK(cast_null->AddKernel(Type::DICTIONARY, {InputType(Type::DICTIONARY)}, null(),
+                                 OutputAllNull));
   functions.push_back(cast_null);
 
   functions.push_back(GetCastToInteger<Int8Type>("cast_int8"));
@@ -716,8 +716,8 @@ std::vector<std::shared_ptr<CastFunction>> GetNumericCasts() {
   functions.push_back(GetCastToFloating<FloatType>("cast_float"));
   functions.push_back(GetCastToFloating<DoubleType>("cast_double"));
 
-  functions.push_back(GetCastToDecimal128()); 
-  functions.push_back(GetCastToDecimal256()); 
+  functions.push_back(GetCastToDecimal128());
+  functions.push_back(GetCastToDecimal256());
 
   return functions;
 }
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_cast_string.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
index e24d7fabf37..3ce537b7223 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
@@ -15,15 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <limits> 
+#include <limits>
 
 #include "arrow/array/array_base.h"
-#include "arrow/array/builder_binary.h" 
+#include "arrow/array/builder_binary.h"
 #include "arrow/compute/kernels/common.h"
 #include "arrow/compute/kernels/scalar_cast_internal.h"
 #include "arrow/result.h"
 #include "arrow/util/formatting.h"
-#include "arrow/util/int_util.h" 
+#include "arrow/util/int_util.h"
 #include "arrow/util/optional.h"
 #include "arrow/util/utf8.h"
 #include "arrow/visitor_inline.h"
@@ -37,22 +37,22 @@ using util::ValidateUTF8;
 namespace compute {
 namespace internal {
 
-namespace { 
- 
+namespace {
+
 // ----------------------------------------------------------------------
 // Number / Boolean to String
 
-template <typename O, typename I> 
-struct NumericToStringCastFunctor { 
+template <typename O, typename I>
+struct NumericToStringCastFunctor {
   using value_type = typename TypeTraits<I>::CType;
   using BuilderType = typename TypeTraits<O>::BuilderType;
   using FormatterType = StringFormatter<I>;
 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    DCHECK(out->is_array()); 
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    DCHECK(out->is_array());
     const ArrayData& input = *batch[0].array();
     ArrayData* output = out->mutable_array();
-    return Convert(ctx, input, output); 
+    return Convert(ctx, input, output);
   }
 
   static Status Convert(KernelContext* ctx, const ArrayData& input, ArrayData* output) {
@@ -73,7 +73,7 @@ struct NumericToStringCastFunctor {
 };
 
 // ----------------------------------------------------------------------
-// Binary-like to binary-like 
+// Binary-like to binary-like
 //
 
 #if defined(_MSC_VER)
@@ -94,152 +94,152 @@ struct Utf8Validator {
 };
 
 template <typename I, typename O>
-Status CastBinaryToBinaryOffsets(KernelContext* ctx, const ArrayData& input, 
-                                 ArrayData* output) { 
-  static_assert(std::is_same<I, O>::value, "Cast same-width offsets (no-op)"); 
-  return Status::OK(); 
-} 
+Status CastBinaryToBinaryOffsets(KernelContext* ctx, const ArrayData& input,
+                                 ArrayData* output) {
+  static_assert(std::is_same<I, O>::value, "Cast same-width offsets (no-op)");
+  return Status::OK();
+}
 
-// Upcast offsets 
+// Upcast offsets
 template <>
-Status CastBinaryToBinaryOffsets<int32_t, int64_t>(KernelContext* ctx, 
-                                                   const ArrayData& input, 
-                                                   ArrayData* output) { 
-  using input_offset_type = int32_t; 
-  using output_offset_type = int64_t; 
-  ARROW_ASSIGN_OR_RAISE( 
-      output->buffers[1], 
-      ctx->Allocate((output->length + output->offset + 1) * sizeof(output_offset_type))); 
-  memset(output->buffers[1]->mutable_data(), 0, 
-         output->offset * sizeof(output_offset_type)); 
-  ::arrow::internal::CastInts(input.GetValues<input_offset_type>(1), 
-                              output->GetMutableValues<output_offset_type>(1), 
-                              output->length + 1); 
-  return Status::OK(); 
-} 
-
-// Downcast offsets 
+Status CastBinaryToBinaryOffsets<int32_t, int64_t>(KernelContext* ctx,
+                                                   const ArrayData& input,
+                                                   ArrayData* output) {
+  using input_offset_type = int32_t;
+  using output_offset_type = int64_t;
+  ARROW_ASSIGN_OR_RAISE(
+      output->buffers[1],
+      ctx->Allocate((output->length + output->offset + 1) * sizeof(output_offset_type)));
+  memset(output->buffers[1]->mutable_data(), 0,
+         output->offset * sizeof(output_offset_type));
+  ::arrow::internal::CastInts(input.GetValues<input_offset_type>(1),
+                              output->GetMutableValues<output_offset_type>(1),
+                              output->length + 1);
+  return Status::OK();
+}
+
+// Downcast offsets
 template <>
-Status CastBinaryToBinaryOffsets<int64_t, int32_t>(KernelContext* ctx, 
-                                                   const ArrayData& input, 
-                                                   ArrayData* output) { 
-  using input_offset_type = int64_t; 
-  using output_offset_type = int32_t; 
-
-  constexpr input_offset_type kMaxOffset = std::numeric_limits<output_offset_type>::max(); 
- 
-  auto input_offsets = input.GetValues<input_offset_type>(1); 
- 
-  // Binary offsets are ascending, so it's enough to check the last one for overflow. 
-  if (input_offsets[input.length] > kMaxOffset) { 
-    return Status::Invalid("Failed casting from ", input.type->ToString(), " to ", 
-                           output->type->ToString(), ": input array too large"); 
-  } else { 
-    ARROW_ASSIGN_OR_RAISE(output->buffers[1], 
-                          ctx->Allocate((output->length + output->offset + 1) * 
-                                        sizeof(output_offset_type))); 
-    memset(output->buffers[1]->mutable_data(), 0, 
-           output->offset * sizeof(output_offset_type)); 
-    ::arrow::internal::CastInts(input.GetValues<input_offset_type>(1), 
-                                output->GetMutableValues<output_offset_type>(1), 
-                                output->length + 1); 
-    return Status::OK(); 
-  } 
-} 
- 
-template <typename O, typename I> 
-Status BinaryToBinaryCastExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-  DCHECK(out->is_array()); 
-  const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options; 
-  const ArrayData& input = *batch[0].array(); 
- 
-  if (!I::is_utf8 && O::is_utf8 && !options.allow_invalid_utf8) { 
-    InitializeUTF8(); 
- 
-    ArrayDataVisitor<I> visitor; 
-    Utf8Validator validator; 
-    RETURN_NOT_OK(visitor.Visit(input, &validator)); 
-  } 
- 
-  // Start with a zero-copy cast, but change indices to expected size 
-  RETURN_NOT_OK(ZeroCopyCastExec(ctx, batch, out)); 
-  return CastBinaryToBinaryOffsets<typename I::offset_type, typename O::offset_type>( 
-      ctx, input, out->mutable_array()); 
-} 
- 
+Status CastBinaryToBinaryOffsets<int64_t, int32_t>(KernelContext* ctx,
+                                                   const ArrayData& input,
+                                                   ArrayData* output) {
+  using input_offset_type = int64_t;
+  using output_offset_type = int32_t;
+
+  constexpr input_offset_type kMaxOffset = std::numeric_limits<output_offset_type>::max();
+
+  auto input_offsets = input.GetValues<input_offset_type>(1);
+
+  // Binary offsets are ascending, so it's enough to check the last one for overflow.
+  if (input_offsets[input.length] > kMaxOffset) {
+    return Status::Invalid("Failed casting from ", input.type->ToString(), " to ",
+                           output->type->ToString(), ": input array too large");
+  } else {
+    ARROW_ASSIGN_OR_RAISE(output->buffers[1],
+                          ctx->Allocate((output->length + output->offset + 1) *
+                                        sizeof(output_offset_type)));
+    memset(output->buffers[1]->mutable_data(), 0,
+           output->offset * sizeof(output_offset_type));
+    ::arrow::internal::CastInts(input.GetValues<input_offset_type>(1),
+                                output->GetMutableValues<output_offset_type>(1),
+                                output->length + 1);
+    return Status::OK();
+  }
+}
+
+template <typename O, typename I>
+Status BinaryToBinaryCastExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  DCHECK(out->is_array());
+  const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
+  const ArrayData& input = *batch[0].array();
+
+  if (!I::is_utf8 && O::is_utf8 && !options.allow_invalid_utf8) {
+    InitializeUTF8();
+
+    ArrayDataVisitor<I> visitor;
+    Utf8Validator validator;
+    RETURN_NOT_OK(visitor.Visit(input, &validator));
+  }
+
+  // Start with a zero-copy cast, but change indices to expected size
+  RETURN_NOT_OK(ZeroCopyCastExec(ctx, batch, out));
+  return CastBinaryToBinaryOffsets<typename I::offset_type, typename O::offset_type>(
+      ctx, input, out->mutable_array());
+}
+
 #if defined(_MSC_VER)
 #pragma warning(pop)
 #endif
 
-// ---------------------------------------------------------------------- 
-// Cast functions registration 
+// ----------------------------------------------------------------------
+// Cast functions registration
 
 template <typename OutType>
-void AddNumberToStringCasts(CastFunction* func) { 
-  auto out_ty = TypeTraits<OutType>::type_singleton(); 
- 
+void AddNumberToStringCasts(CastFunction* func) {
+  auto out_ty = TypeTraits<OutType>::type_singleton();
+
   DCHECK_OK(func->AddKernel(Type::BOOL, {boolean()}, out_ty,
-                            TrivialScalarUnaryAsArraysExec( 
-                                NumericToStringCastFunctor<OutType, BooleanType>::Exec), 
+                            TrivialScalarUnaryAsArraysExec(
+                                NumericToStringCastFunctor<OutType, BooleanType>::Exec),
                             NullHandling::COMPUTED_NO_PREALLOCATE));
 
   for (const std::shared_ptr<DataType>& in_ty : NumericTypes()) {
-    DCHECK_OK( 
-        func->AddKernel(in_ty->id(), {in_ty}, out_ty, 
-                        TrivialScalarUnaryAsArraysExec( 
-                            GenerateNumeric<NumericToStringCastFunctor, OutType>(*in_ty)), 
-                        NullHandling::COMPUTED_NO_PREALLOCATE)); 
+    DCHECK_OK(
+        func->AddKernel(in_ty->id(), {in_ty}, out_ty,
+                        TrivialScalarUnaryAsArraysExec(
+                            GenerateNumeric<NumericToStringCastFunctor, OutType>(*in_ty)),
+                        NullHandling::COMPUTED_NO_PREALLOCATE));
   }
 }
 
-template <typename OutType, typename InType> 
-void AddBinaryToBinaryCast(CastFunction* func) { 
-  auto in_ty = TypeTraits<InType>::type_singleton(); 
-  auto out_ty = TypeTraits<OutType>::type_singleton(); 
- 
-  DCHECK_OK(func->AddKernel( 
-      InType::type_id, {in_ty}, out_ty, 
-      TrivialScalarUnaryAsArraysExec(BinaryToBinaryCastExec<OutType, InType>), 
-      NullHandling::COMPUTED_NO_PREALLOCATE)); 
-} 
- 
-template <typename OutType> 
-void AddBinaryToBinaryCast(CastFunction* func) { 
-  AddBinaryToBinaryCast<OutType, StringType>(func); 
-  AddBinaryToBinaryCast<OutType, BinaryType>(func); 
-  AddBinaryToBinaryCast<OutType, LargeStringType>(func); 
-  AddBinaryToBinaryCast<OutType, LargeBinaryType>(func); 
-} 
- 
-}  // namespace 
- 
+template <typename OutType, typename InType>
+void AddBinaryToBinaryCast(CastFunction* func) {
+  auto in_ty = TypeTraits<InType>::type_singleton();
+  auto out_ty = TypeTraits<OutType>::type_singleton();
+
+  DCHECK_OK(func->AddKernel(
+      InType::type_id, {in_ty}, out_ty,
+      TrivialScalarUnaryAsArraysExec(BinaryToBinaryCastExec<OutType, InType>),
+      NullHandling::COMPUTED_NO_PREALLOCATE));
+}
+
+template <typename OutType>
+void AddBinaryToBinaryCast(CastFunction* func) {
+  AddBinaryToBinaryCast<OutType, StringType>(func);
+  AddBinaryToBinaryCast<OutType, BinaryType>(func);
+  AddBinaryToBinaryCast<OutType, LargeStringType>(func);
+  AddBinaryToBinaryCast<OutType, LargeBinaryType>(func);
+}
+
+}  // namespace
+
 std::vector<std::shared_ptr<CastFunction>> GetBinaryLikeCasts() {
   auto cast_binary = std::make_shared<CastFunction>("cast_binary", Type::BINARY);
   AddCommonCasts(Type::BINARY, binary(), cast_binary.get());
-  AddBinaryToBinaryCast<BinaryType>(cast_binary.get()); 
+  AddBinaryToBinaryCast<BinaryType>(cast_binary.get());
 
   auto cast_large_binary =
       std::make_shared<CastFunction>("cast_large_binary", Type::LARGE_BINARY);
   AddCommonCasts(Type::LARGE_BINARY, large_binary(), cast_large_binary.get());
-  AddBinaryToBinaryCast<LargeBinaryType>(cast_large_binary.get()); 
+  AddBinaryToBinaryCast<LargeBinaryType>(cast_large_binary.get());
 
   auto cast_string = std::make_shared<CastFunction>("cast_string", Type::STRING);
   AddCommonCasts(Type::STRING, utf8(), cast_string.get());
-  AddNumberToStringCasts<StringType>(cast_string.get()); 
-  AddBinaryToBinaryCast<StringType>(cast_string.get()); 
+  AddNumberToStringCasts<StringType>(cast_string.get());
+  AddBinaryToBinaryCast<StringType>(cast_string.get());
 
   auto cast_large_string =
       std::make_shared<CastFunction>("cast_large_string", Type::LARGE_STRING);
   AddCommonCasts(Type::LARGE_STRING, large_utf8(), cast_large_string.get());
-  AddNumberToStringCasts<LargeStringType>(cast_large_string.get()); 
-  AddBinaryToBinaryCast<LargeStringType>(cast_large_string.get()); 
-
-  auto cast_fsb = 
-      std::make_shared<CastFunction>("cast_fixed_size_binary", Type::FIXED_SIZE_BINARY); 
-  AddCommonCasts(Type::FIXED_SIZE_BINARY, OutputType(ResolveOutputFromOptions), 
-                 cast_fsb.get()); 
- 
-  return {cast_binary, cast_large_binary, cast_string, cast_large_string, cast_fsb}; 
+  AddNumberToStringCasts<LargeStringType>(cast_large_string.get());
+  AddBinaryToBinaryCast<LargeStringType>(cast_large_string.get());
+
+  auto cast_fsb =
+      std::make_shared<CastFunction>("cast_fixed_size_binary", Type::FIXED_SIZE_BINARY);
+  AddCommonCasts(Type::FIXED_SIZE_BINARY, OutputType(ResolveOutputFromOptions),
+                 cast_fsb.get());
+
+  return {cast_binary, cast_large_binary, cast_string, cast_large_string, cast_fsb};
 }
 
 }  // namespace internal
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc
index b5271e02413..1a58fce7c74 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc
@@ -19,7 +19,7 @@
 
 #include <limits>
 
-#include "arrow/array/builder_time.h" 
+#include "arrow/array/builder_time.h"
 #include "arrow/compute/kernels/common.h"
 #include "arrow/compute/kernels/scalar_cast_internal.h"
 #include "arrow/util/bitmap_reader.h"
@@ -39,10 +39,10 @@ constexpr int64_t kMillisecondsInDay = 86400000;
 // From one timestamp to another
 
 template <typename in_type, typename out_type>
-Status ShiftTime(KernelContext* ctx, const util::DivideOrMultiply factor_op, 
-                 const int64_t factor, const ArrayData& input, ArrayData* output) { 
+Status ShiftTime(KernelContext* ctx, const util::DivideOrMultiply factor_op,
+                 const int64_t factor, const ArrayData& input, ArrayData* output) {
   const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
-  auto in_data = input.GetValues<in_type>(1); 
+  auto in_data = input.GetValues<in_type>(1);
   auto out_data = output->GetMutableValues<out_type>(1);
 
   if (factor == 1) {
@@ -55,10 +55,10 @@ Status ShiftTime(KernelContext* ctx, const util::DivideOrMultiply factor_op,
         out_data[i] = static_cast<out_type>(in_data[i] * factor);
       }
     } else {
-#define RAISE_OVERFLOW_CAST(VAL)                                          \ 
-  return Status::Invalid("Casting from ", input.type->ToString(), " to ", \ 
-                         output->type->ToString(), " would result in ",   \ 
-                         "out of bounds timestamp: ", VAL); 
+#define RAISE_OVERFLOW_CAST(VAL)                                          \
+  return Status::Invalid("Casting from ", input.type->ToString(), " to ", \
+                         output->type->ToString(), " would result in ",   \
+                         "out of bounds timestamp: ", VAL);
 
       int64_t max_val = std::numeric_limits<int64_t>::max() / factor;
       int64_t min_val = std::numeric_limits<int64_t>::min() / factor;
@@ -88,9 +88,9 @@ Status ShiftTime(KernelContext* ctx, const util::DivideOrMultiply factor_op,
         out_data[i] = static_cast<out_type>(in_data[i] / factor);
       }
     } else {
-#define RAISE_INVALID_CAST(VAL)                                           \ 
-  return Status::Invalid("Casting from ", input.type->ToString(), " to ", \ 
-                         output->type->ToString(), " would lose data: ", VAL); 
+#define RAISE_INVALID_CAST(VAL)                                           \
+  return Status::Invalid("Casting from ", input.type->ToString(), " to ", \
+                         output->type->ToString(), " would lose data: ", VAL);
 
       if (input.null_count != 0) {
         BitmapReader bit_reader(input.buffers[0]->data(), input.offset, input.length);
@@ -113,8 +113,8 @@ Status ShiftTime(KernelContext* ctx, const util::DivideOrMultiply factor_op,
 #undef RAISE_INVALID_CAST
     }
   }
- 
-  return Status::OK(); 
+
+  return Status::OK();
 }
 
 // <TimestampType, TimestampType> and <DurationType, DurationType>
@@ -123,7 +123,7 @@ struct CastFunctor<
     O, I,
     enable_if_t<(is_timestamp_type<O>::value && is_timestamp_type<I>::value) ||
                 (is_duration_type<O>::value && is_duration_type<I>::value)>> {
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
 
     const ArrayData& input = *batch[0].array();
@@ -137,14 +137,14 @@ struct CastFunctor<
     // lengths to make this zero copy in the future but we leave it for now
 
     auto conversion = util::GetTimestampConversion(in_type.unit(), out_type.unit());
-    return ShiftTime<int64_t, int64_t>(ctx, conversion.first, conversion.second, input, 
-                                       output); 
+    return ShiftTime<int64_t, int64_t>(ctx, conversion.first, conversion.second, input,
+                                       output);
   }
 };
 
 template <>
 struct CastFunctor<Date32Type, TimestampType> {
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
 
     const ArrayData& input = *batch[0].array();
@@ -160,13 +160,13 @@ struct CastFunctor<Date32Type, TimestampType> {
     };
 
     const int64_t factor = kTimestampToDateFactors[static_cast<int>(in_type.unit())];
-    return ShiftTime<int64_t, int32_t>(ctx, util::DIVIDE, factor, input, output); 
+    return ShiftTime<int64_t, int32_t>(ctx, util::DIVIDE, factor, input, output);
   }
 };
 
 template <>
 struct CastFunctor<Date64Type, TimestampType> {
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
 
     const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
@@ -175,8 +175,8 @@ struct CastFunctor<Date64Type, TimestampType> {
     const auto& in_type = checked_cast<const TimestampType&>(*input.type);
 
     auto conversion = util::GetTimestampConversion(in_type.unit(), TimeUnit::MILLI);
-    RETURN_NOT_OK((ShiftTime<int64_t, int64_t>(ctx, conversion.first, conversion.second, 
-                                               input, output))); 
+    RETURN_NOT_OK((ShiftTime<int64_t, int64_t>(ctx, conversion.first, conversion.second,
+                                               input, output)));
 
     // Ensure that intraday milliseconds have been zeroed out
     auto out_data = output->GetMutableValues<int64_t>(1);
@@ -188,7 +188,7 @@ struct CastFunctor<Date64Type, TimestampType> {
         const int64_t remainder = out_data[i] % kMillisecondsInDay;
         if (ARROW_PREDICT_FALSE(!options.allow_time_truncate && bit_reader.IsSet() &&
                                 remainder > 0)) {
-          return Status::Invalid("Timestamp value had non-zero intraday milliseconds"); 
+          return Status::Invalid("Timestamp value had non-zero intraday milliseconds");
         }
         out_data[i] -= remainder;
         bit_reader.Next();
@@ -197,13 +197,13 @@ struct CastFunctor<Date64Type, TimestampType> {
       for (int64_t i = 0; i < input.length; ++i) {
         const int64_t remainder = out_data[i] % kMillisecondsInDay;
         if (ARROW_PREDICT_FALSE(!options.allow_time_truncate && remainder > 0)) {
-          return Status::Invalid("Timestamp value had non-zero intraday milliseconds"); 
+          return Status::Invalid("Timestamp value had non-zero intraday milliseconds");
         }
         out_data[i] -= remainder;
       }
     }
- 
-    return Status::OK(); 
+
+    return Status::OK();
   }
 };
 
@@ -215,7 +215,7 @@ struct CastFunctor<O, I, enable_if_t<is_time_type<I>::value && is_time_type<O>::
   using in_t = typename I::c_type;
   using out_t = typename O::c_type;
 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
 
     const ArrayData& input = *batch[0].array();
@@ -226,8 +226,8 @@ struct CastFunctor<O, I, enable_if_t<is_time_type<I>::value && is_time_type<O>::
     const auto& out_type = checked_cast<const O&>(*output->type);
     DCHECK_NE(in_type.unit(), out_type.unit()) << "Do not cast equal types";
     auto conversion = util::GetTimestampConversion(in_type.unit(), out_type.unit());
-    return ShiftTime<in_t, out_t>(ctx, conversion.first, conversion.second, input, 
-                                  output); 
+    return ShiftTime<in_t, out_t>(ctx, conversion.first, conversion.second, input,
+                                  output);
   }
 };
 
@@ -236,68 +236,68 @@ struct CastFunctor<O, I, enable_if_t<is_time_type<I>::value && is_time_type<O>::
 
 template <>
 struct CastFunctor<Date64Type, Date32Type> {
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
 
-    return ShiftTime<int32_t, int64_t>(ctx, util::MULTIPLY, kMillisecondsInDay, 
-                                       *batch[0].array(), out->mutable_array()); 
+    return ShiftTime<int32_t, int64_t>(ctx, util::MULTIPLY, kMillisecondsInDay,
+                                       *batch[0].array(), out->mutable_array());
   }
 };
 
 template <>
 struct CastFunctor<Date32Type, Date64Type> {
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
 
-    return ShiftTime<int64_t, int32_t>(ctx, util::DIVIDE, kMillisecondsInDay, 
-                                       *batch[0].array(), out->mutable_array()); 
+    return ShiftTime<int64_t, int32_t>(ctx, util::DIVIDE, kMillisecondsInDay,
+                                       *batch[0].array(), out->mutable_array());
+  }
+};
+
+// ----------------------------------------------------------------------
+// date32, date64 to timestamp
+
+template <>
+struct CastFunctor<TimestampType, Date32Type> {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
+
+    const auto& out_type = checked_cast<const TimestampType&>(*out->type());
+    // get conversion SECOND -> unit
+    auto conversion = util::GetTimestampConversion(TimeUnit::SECOND, out_type.unit());
+    DCHECK_EQ(conversion.first, util::MULTIPLY);
+
+    // multiply to achieve days -> unit
+    conversion.second *= kMillisecondsInDay / 1000;
+    return ShiftTime<int32_t, int64_t>(ctx, util::MULTIPLY, conversion.second,
+                                       *batch[0].array(), out->mutable_array());
+  }
+};
+
+template <>
+struct CastFunctor<TimestampType, Date64Type> {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
+
+    const auto& out_type = checked_cast<const TimestampType&>(*out->type());
+
+    // date64 is ms since epoch
+    auto conversion = util::GetTimestampConversion(TimeUnit::MILLI, out_type.unit());
+    return ShiftTime<int64_t, int64_t>(ctx, conversion.first, conversion.second,
+                                       *batch[0].array(), out->mutable_array());
   }
 };
 
 // ----------------------------------------------------------------------
-// date32, date64 to timestamp 
- 
-template <> 
-struct CastFunctor<TimestampType, Date32Type> { 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    DCHECK_EQ(batch[0].kind(), Datum::ARRAY); 
- 
-    const auto& out_type = checked_cast<const TimestampType&>(*out->type()); 
-    // get conversion SECOND -> unit 
-    auto conversion = util::GetTimestampConversion(TimeUnit::SECOND, out_type.unit()); 
-    DCHECK_EQ(conversion.first, util::MULTIPLY); 
- 
-    // multiply to achieve days -> unit 
-    conversion.second *= kMillisecondsInDay / 1000; 
-    return ShiftTime<int32_t, int64_t>(ctx, util::MULTIPLY, conversion.second, 
-                                       *batch[0].array(), out->mutable_array()); 
-  } 
-}; 
- 
-template <> 
-struct CastFunctor<TimestampType, Date64Type> { 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    DCHECK_EQ(batch[0].kind(), Datum::ARRAY); 
- 
-    const auto& out_type = checked_cast<const TimestampType&>(*out->type()); 
- 
-    // date64 is ms since epoch 
-    auto conversion = util::GetTimestampConversion(TimeUnit::MILLI, out_type.unit()); 
-    return ShiftTime<int64_t, int64_t>(ctx, conversion.first, conversion.second, 
-                                       *batch[0].array(), out->mutable_array()); 
-  } 
-}; 
- 
-// ---------------------------------------------------------------------- 
 // String to Timestamp
 
 struct ParseTimestamp {
   template <typename OutValue, typename Arg0Value>
-  OutValue Call(KernelContext*, Arg0Value val, Status* st) const { 
+  OutValue Call(KernelContext*, Arg0Value val, Status* st) const {
     OutValue result = 0;
     if (ARROW_PREDICT_FALSE(!ParseValue(type, val.data(), val.size(), &result))) {
-      *st = Status::Invalid("Failed to parse string: '", val, "' as a scalar of type ", 
-                            type.ToString()); 
+      *st = Status::Invalid("Failed to parse string: '", val, "' as a scalar of type ",
+                            type.ToString());
     }
     return result;
   }
@@ -307,7 +307,7 @@ struct ParseTimestamp {
 
 template <typename I>
 struct CastFunctor<TimestampType, I, enable_if_t<is_base_binary_type<I>::value>> {
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     const auto& out_type = checked_cast<const TimestampType&>(*out->type());
     applicator::ScalarUnaryNotNullStateful<TimestampType, I, ParseTimestamp> kernel(
         ParseTimestamp{out_type});
@@ -318,7 +318,7 @@ struct CastFunctor<TimestampType, I, enable_if_t<is_base_binary_type<I>::value>>
 template <typename Type>
 void AddCrossUnitCast(CastFunction* func) {
   ScalarKernel kernel;
-  kernel.exec = TrivialScalarUnaryAsArraysExec(CastFunctor<Type, Type>::Exec); 
+  kernel.exec = TrivialScalarUnaryAsArraysExec(CastFunctor<Type, Type>::Exec);
   kernel.signature = KernelSignature::Make({InputType(Type::type_id)}, kOutputTargetType);
   DCHECK_OK(func->AddKernel(Type::type_id, std::move(kernel)));
 }
@@ -417,11 +417,11 @@ std::shared_ptr<CastFunction> GetTimestampCast() {
   AddZeroCopyCast(Type::INT64, /*in_type=*/int64(), kOutputTargetType, func.get());
 
   // From date types
-  // TODO: ARROW-8876, these casts are not directly tested 
-  AddSimpleCast<Date32Type, TimestampType>(InputType(Type::DATE32), kOutputTargetType, 
-                                           func.get()); 
-  AddSimpleCast<Date64Type, TimestampType>(InputType(Type::DATE64), kOutputTargetType, 
-                                           func.get()); 
+  // TODO: ARROW-8876, these casts are not directly tested
+  AddSimpleCast<Date32Type, TimestampType>(InputType(Type::DATE32), kOutputTargetType,
+                                           func.get());
+  AddSimpleCast<Date64Type, TimestampType>(InputType(Type::DATE64), kOutputTargetType,
+                                           func.get());
 
   // string -> timestamp
   AddSimpleCast<StringType, TimestampType>(utf8(), kOutputTargetType, func.get());
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_compare.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_compare.cc
index 777a7c9d5ee..4342d776c38 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_compare.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_compare.cc
@@ -15,12 +15,12 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <cmath> 
-#include <limits> 
- 
-#include "arrow/compute/api_scalar.h" 
+#include <cmath>
+#include <limits>
+
+#include "arrow/compute/api_scalar.h"
 #include "arrow/compute/kernels/common.h"
-#include "arrow/util/bitmap_ops.h" 
+#include "arrow/util/bitmap_ops.h"
 
 namespace arrow {
 
@@ -34,110 +34,110 @@ namespace internal {
 namespace {
 
 struct Equal {
-  template <typename T, typename Arg0, typename Arg1> 
-  static constexpr T Call(KernelContext*, const Arg0& left, const Arg1& right, Status*) { 
-    static_assert(std::is_same<T, bool>::value && std::is_same<Arg0, Arg1>::value, ""); 
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr T Call(KernelContext*, const Arg0& left, const Arg1& right, Status*) {
+    static_assert(std::is_same<T, bool>::value && std::is_same<Arg0, Arg1>::value, "");
     return left == right;
   }
 };
 
 struct NotEqual {
-  template <typename T, typename Arg0, typename Arg1> 
-  static constexpr T Call(KernelContext*, const Arg0& left, const Arg1& right, Status*) { 
-    static_assert(std::is_same<T, bool>::value && std::is_same<Arg0, Arg1>::value, ""); 
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr T Call(KernelContext*, const Arg0& left, const Arg1& right, Status*) {
+    static_assert(std::is_same<T, bool>::value && std::is_same<Arg0, Arg1>::value, "");
     return left != right;
   }
 };
 
 struct Greater {
-  template <typename T, typename Arg0, typename Arg1> 
-  static constexpr T Call(KernelContext*, const Arg0& left, const Arg1& right, Status*) { 
-    static_assert(std::is_same<T, bool>::value && std::is_same<Arg0, Arg1>::value, ""); 
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr T Call(KernelContext*, const Arg0& left, const Arg1& right, Status*) {
+    static_assert(std::is_same<T, bool>::value && std::is_same<Arg0, Arg1>::value, "");
     return left > right;
   }
 };
 
 struct GreaterEqual {
-  template <typename T, typename Arg0, typename Arg1> 
-  static constexpr T Call(KernelContext*, const Arg0& left, const Arg1& right, Status*) { 
-    static_assert(std::is_same<T, bool>::value && std::is_same<Arg0, Arg1>::value, ""); 
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr T Call(KernelContext*, const Arg0& left, const Arg1& right, Status*) {
+    static_assert(std::is_same<T, bool>::value && std::is_same<Arg0, Arg1>::value, "");
     return left >= right;
   }
 };
 
-template <typename T> 
-using is_unsigned_integer = std::integral_constant<bool, std::is_integral<T>::value && 
-                                                             std::is_unsigned<T>::value>; 
- 
-template <typename T> 
-using is_signed_integer = 
-    std::integral_constant<bool, std::is_integral<T>::value && std::is_signed<T>::value>; 
- 
-template <typename T> 
-using enable_if_integer = 
-    enable_if_t<is_signed_integer<T>::value || is_unsigned_integer<T>::value, T>; 
- 
-template <typename T> 
-using enable_if_floating_point = enable_if_t<std::is_floating_point<T>::value, T>; 
- 
-struct Minimum { 
-  template <typename T, typename Arg0, typename Arg1> 
-  static enable_if_floating_point<T> Call(Arg0 left, Arg1 right) { 
-    static_assert(std::is_same<T, Arg0>::value && std::is_same<Arg0, Arg1>::value, ""); 
-    return std::fmin(left, right); 
-  } 
- 
-  template <typename T, typename Arg0, typename Arg1> 
-  static enable_if_integer<T> Call(Arg0 left, Arg1 right) { 
-    static_assert(std::is_same<T, Arg0>::value && std::is_same<Arg0, Arg1>::value, ""); 
-    return std::min(left, right); 
-  } 
- 
-  template <typename T> 
-  static constexpr enable_if_t<std::is_same<float, T>::value, T> antiextreme() { 
-    return std::nanf(""); 
-  } 
- 
-  template <typename T> 
-  static constexpr enable_if_t<std::is_same<double, T>::value, T> antiextreme() { 
-    return std::nan(""); 
-  } 
- 
-  template <typename T> 
-  static constexpr enable_if_integer<T> antiextreme() { 
-    return std::numeric_limits<T>::max(); 
-  } 
-}; 
- 
-struct Maximum { 
-  template <typename T, typename Arg0, typename Arg1> 
-  static enable_if_floating_point<T> Call(Arg0 left, Arg1 right) { 
-    static_assert(std::is_same<T, Arg0>::value && std::is_same<Arg0, Arg1>::value, ""); 
-    return std::fmax(left, right); 
-  } 
- 
-  template <typename T, typename Arg0, typename Arg1> 
-  static enable_if_integer<T> Call(Arg0 left, Arg1 right) { 
-    static_assert(std::is_same<T, Arg0>::value && std::is_same<Arg0, Arg1>::value, ""); 
-    return std::max(left, right); 
-  } 
- 
-  template <typename T> 
-  static constexpr enable_if_t<std::is_same<float, T>::value, T> antiextreme() { 
-    return std::nanf(""); 
-  } 
- 
-  template <typename T> 
-  static constexpr enable_if_t<std::is_same<double, T>::value, T> antiextreme() { 
-    return std::nan(""); 
-  } 
- 
-  template <typename T> 
-  static constexpr enable_if_integer<T> antiextreme() { 
-    return std::numeric_limits<T>::min(); 
-  } 
-}; 
- 
+template <typename T>
+using is_unsigned_integer = std::integral_constant<bool, std::is_integral<T>::value &&
+                                                             std::is_unsigned<T>::value>;
+
+template <typename T>
+using is_signed_integer =
+    std::integral_constant<bool, std::is_integral<T>::value && std::is_signed<T>::value>;
+
+template <typename T>
+using enable_if_integer =
+    enable_if_t<is_signed_integer<T>::value || is_unsigned_integer<T>::value, T>;
+
+template <typename T>
+using enable_if_floating_point = enable_if_t<std::is_floating_point<T>::value, T>;
+
+struct Minimum {
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_floating_point<T> Call(Arg0 left, Arg1 right) {
+    static_assert(std::is_same<T, Arg0>::value && std::is_same<Arg0, Arg1>::value, "");
+    return std::fmin(left, right);
+  }
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_integer<T> Call(Arg0 left, Arg1 right) {
+    static_assert(std::is_same<T, Arg0>::value && std::is_same<Arg0, Arg1>::value, "");
+    return std::min(left, right);
+  }
+
+  template <typename T>
+  static constexpr enable_if_t<std::is_same<float, T>::value, T> antiextreme() {
+    return std::nanf("");
+  }
+
+  template <typename T>
+  static constexpr enable_if_t<std::is_same<double, T>::value, T> antiextreme() {
+    return std::nan("");
+  }
+
+  template <typename T>
+  static constexpr enable_if_integer<T> antiextreme() {
+    return std::numeric_limits<T>::max();
+  }
+};
+
+struct Maximum {
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_floating_point<T> Call(Arg0 left, Arg1 right) {
+    static_assert(std::is_same<T, Arg0>::value && std::is_same<Arg0, Arg1>::value, "");
+    return std::fmax(left, right);
+  }
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_integer<T> Call(Arg0 left, Arg1 right) {
+    static_assert(std::is_same<T, Arg0>::value && std::is_same<Arg0, Arg1>::value, "");
+    return std::max(left, right);
+  }
+
+  template <typename T>
+  static constexpr enable_if_t<std::is_same<float, T>::value, T> antiextreme() {
+    return std::nanf("");
+  }
+
+  template <typename T>
+  static constexpr enable_if_t<std::is_same<double, T>::value, T> antiextreme() {
+    return std::nan("");
+  }
+
+  template <typename T>
+  static constexpr enable_if_integer<T> antiextreme() {
+    return std::numeric_limits<T>::min();
+  }
+};
+
 // Implement Less, LessEqual by flipping arguments to Greater, GreaterEqual
 
 template <typename Op>
@@ -154,57 +154,57 @@ void AddGenericCompare(const std::shared_ptr<DataType>& ty, ScalarFunction* func
                       applicator::ScalarBinaryEqualTypes<BooleanType, InType, Op>::Exec));
 }
 
-struct CompareFunction : ScalarFunction { 
-  using ScalarFunction::ScalarFunction; 
- 
-  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override { 
-    RETURN_NOT_OK(CheckArity(*values)); 
- 
-    using arrow::compute::detail::DispatchExactImpl; 
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel; 
- 
-    EnsureDictionaryDecoded(values); 
-    ReplaceNullWithOtherType(values); 
- 
-    if (auto type = CommonNumeric(*values)) { 
-      ReplaceTypes(type, values); 
-    } else if (auto type = CommonTimestamp(*values)) { 
-      ReplaceTypes(type, values); 
-    } else if (auto type = CommonBinary(*values)) { 
-      ReplaceTypes(type, values); 
-    } 
- 
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel; 
-    return arrow::compute::detail::NoMatchingKernel(this, *values); 
-  } 
-}; 
- 
-struct VarArgsCompareFunction : ScalarFunction { 
-  using ScalarFunction::ScalarFunction; 
- 
-  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override { 
-    RETURN_NOT_OK(CheckArity(*values)); 
- 
-    using arrow::compute::detail::DispatchExactImpl; 
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel; 
- 
-    EnsureDictionaryDecoded(values); 
- 
-    if (auto type = CommonNumeric(*values)) { 
-      ReplaceTypes(type, values); 
-    } else if (auto type = CommonTimestamp(*values)) { 
-      ReplaceTypes(type, values); 
-    } 
- 
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel; 
-    return arrow::compute::detail::NoMatchingKernel(this, *values); 
-  } 
-}; 
- 
+struct CompareFunction : ScalarFunction {
+  using ScalarFunction::ScalarFunction;
+
+  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
+    RETURN_NOT_OK(CheckArity(*values));
+
+    using arrow::compute::detail::DispatchExactImpl;
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+
+    EnsureDictionaryDecoded(values);
+    ReplaceNullWithOtherType(values);
+
+    if (auto type = CommonNumeric(*values)) {
+      ReplaceTypes(type, values);
+    } else if (auto type = CommonTimestamp(*values)) {
+      ReplaceTypes(type, values);
+    } else if (auto type = CommonBinary(*values)) {
+      ReplaceTypes(type, values);
+    }
+
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *values);
+  }
+};
+
+struct VarArgsCompareFunction : ScalarFunction {
+  using ScalarFunction::ScalarFunction;
+
+  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
+    RETURN_NOT_OK(CheckArity(*values));
+
+    using arrow::compute::detail::DispatchExactImpl;
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+
+    EnsureDictionaryDecoded(values);
+
+    if (auto type = CommonNumeric(*values)) {
+      ReplaceTypes(type, values);
+    } else if (auto type = CommonTimestamp(*values)) {
+      ReplaceTypes(type, values);
+    }
+
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *values);
+  }
+};
+
 template <typename Op>
-std::shared_ptr<ScalarFunction> MakeCompareFunction(std::string name, 
-                                                    const FunctionDoc* doc) { 
-  auto func = std::make_shared<CompareFunction>(name, Arity::Binary(), doc); 
+std::shared_ptr<ScalarFunction> MakeCompareFunction(std::string name,
+                                                    const FunctionDoc* doc) {
+  auto func = std::make_shared<CompareFunction>(name, Arity::Binary(), doc);
 
   DCHECK_OK(func->AddKernel(
       {boolean(), boolean()}, boolean(),
@@ -263,9 +263,9 @@ std::shared_ptr<ScalarFunction> MakeCompareFunction(std::string name,
 }
 
 std::shared_ptr<ScalarFunction> MakeFlippedFunction(std::string name,
-                                                    const ScalarFunction& func, 
-                                                    const FunctionDoc* doc) { 
-  auto flipped_func = std::make_shared<CompareFunction>(name, Arity::Binary(), doc); 
+                                                    const ScalarFunction& func,
+                                                    const FunctionDoc* doc) {
+  auto flipped_func = std::make_shared<CompareFunction>(name, Arity::Binary(), doc);
   for (const ScalarKernel* kernel : func.kernels()) {
     ScalarKernel flipped_kernel = *kernel;
     flipped_kernel.exec = MakeFlippedBinaryExec(kernel->exec);
@@ -274,249 +274,249 @@ std::shared_ptr<ScalarFunction> MakeFlippedFunction(std::string name,
   return flipped_func;
 }
 
-using MinMaxState = OptionsWrapper<ElementWiseAggregateOptions>; 
- 
-// Implement a variadic scalar min/max kernel. 
-template <typename OutType, typename Op> 
-struct ScalarMinMax { 
-  using OutValue = typename GetOutputType<OutType>::T; 
- 
-  static void ExecScalar(const ExecBatch& batch, 
-                         const ElementWiseAggregateOptions& options, Scalar* out) { 
-    // All arguments are scalar 
-    OutValue value{}; 
-    bool valid = false; 
-    for (const auto& arg : batch.values) { 
-      // Ignore non-scalar arguments so we can use it in the mixed-scalar-and-array case 
-      if (!arg.is_scalar()) continue; 
-      const auto& scalar = *arg.scalar(); 
-      if (!scalar.is_valid) { 
-        if (options.skip_nulls) continue; 
-        out->is_valid = false; 
-        return; 
-      } 
-      if (!valid) { 
-        value = UnboxScalar<OutType>::Unbox(scalar); 
-        valid = true; 
-      } else { 
-        value = Op::template Call<OutValue, OutValue, OutValue>( 
-            value, UnboxScalar<OutType>::Unbox(scalar)); 
-      } 
-    } 
-    out->is_valid = valid; 
-    if (valid) { 
-      BoxScalar<OutType>::Box(value, out); 
-    } 
-  } 
- 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    const ElementWiseAggregateOptions& options = MinMaxState::Get(ctx); 
-    const auto descrs = batch.GetDescriptors(); 
-    const size_t scalar_count = 
-        static_cast<size_t>(std::count_if(batch.values.begin(), batch.values.end(), 
-                                          [](const Datum& d) { return d.is_scalar(); })); 
-    if (scalar_count == batch.values.size()) { 
-      ExecScalar(batch, options, out->scalar().get()); 
-      return Status::OK(); 
-    } 
- 
-    ArrayData* output = out->mutable_array(); 
- 
-    // At least one array, two or more arguments 
-    ArrayDataVector arrays; 
-    for (const auto& arg : batch.values) { 
-      if (!arg.is_array()) continue; 
-      arrays.push_back(arg.array()); 
-    } 
- 
-    bool initialize_output = true; 
-    if (scalar_count > 0) { 
-      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> temp_scalar, 
-                            MakeScalar(out->type(), 0)); 
-      ExecScalar(batch, options, temp_scalar.get()); 
-      if (temp_scalar->is_valid) { 
-        const auto value = UnboxScalar<OutType>::Unbox(*temp_scalar); 
-        initialize_output = false; 
-        OutValue* out = output->GetMutableValues<OutValue>(1); 
-        std::fill(out, out + batch.length, value); 
-      } else if (!options.skip_nulls) { 
-        // Abort early 
-        ARROW_ASSIGN_OR_RAISE(auto array, MakeArrayFromScalar(*temp_scalar, batch.length, 
-                                                              ctx->memory_pool())); 
-        *output = *array->data(); 
-        return Status::OK(); 
-      } 
-    } 
- 
-    if (initialize_output) { 
-      OutValue* out = output->GetMutableValues<OutValue>(1); 
-      std::fill(out, out + batch.length, Op::template antiextreme<OutValue>()); 
-    } 
- 
-    // Precompute the validity buffer 
-    if (options.skip_nulls && initialize_output) { 
-      // OR together the validity buffers of all arrays 
-      if (std::all_of(arrays.begin(), arrays.end(), 
-                      [](const std::shared_ptr<ArrayData>& arr) { 
-                        return arr->MayHaveNulls(); 
-                      })) { 
-        for (const auto& arr : arrays) { 
-          if (!arr->MayHaveNulls()) continue; 
-          if (!output->buffers[0]) { 
-            ARROW_ASSIGN_OR_RAISE(output->buffers[0], ctx->AllocateBitmap(batch.length)); 
-            ::arrow::internal::CopyBitmap(arr->buffers[0]->data(), arr->offset, 
- 
-                                          batch.length, 
-                                          output->buffers[0]->mutable_data(), 
-                                          /*dest_offset=*/0); 
-          } else { 
-            ::arrow::internal::BitmapOr( 
-                output->buffers[0]->data(), /*left_offset=*/0, arr->buffers[0]->data(), 
-                arr->offset, batch.length, 
-                /*out_offset=*/0, output->buffers[0]->mutable_data()); 
-          } 
-        } 
-      } 
-    } else if (!options.skip_nulls) { 
-      // AND together the validity buffers of all arrays 
-      for (const auto& arr : arrays) { 
-        if (!arr->MayHaveNulls()) continue; 
-        if (!output->buffers[0]) { 
-          ARROW_ASSIGN_OR_RAISE(output->buffers[0], ctx->AllocateBitmap(batch.length)); 
-          ::arrow::internal::CopyBitmap(arr->buffers[0]->data(), arr->offset, 
-                                        batch.length, output->buffers[0]->mutable_data(), 
-                                        /*dest_offset=*/0); 
-        } else { 
-          ::arrow::internal::BitmapAnd(output->buffers[0]->data(), /*left_offset=*/0, 
-                                       arr->buffers[0]->data(), arr->offset, batch.length, 
-                                       /*out_offset=*/0, 
-                                       output->buffers[0]->mutable_data()); 
-        } 
-      } 
-    } 
- 
-    for (const auto& array : arrays) { 
-      OutputArrayWriter<OutType> writer(out->mutable_array()); 
-      ArrayIterator<OutType> out_it(*output); 
-      int64_t index = 0; 
-      VisitArrayValuesInline<OutType>( 
-          *array, 
-          [&](OutValue value) { 
-            auto u = out_it(); 
-            if (!output->buffers[0] || 
-                BitUtil::GetBit(output->buffers[0]->data(), index)) { 
-              writer.Write(Op::template Call<OutValue, OutValue, OutValue>(u, value)); 
-            } else { 
-              writer.Write(value); 
-            } 
-            index++; 
-          }, 
-          [&]() { 
-            // RHS is null, preserve the LHS 
-            writer.values++; 
-            index++; 
-            out_it(); 
-          }); 
-    } 
-    output->null_count = output->buffers[0] ? -1 : 0; 
-    return Status::OK(); 
-  } 
-}; 
- 
-template <typename Op> 
-std::shared_ptr<ScalarFunction> MakeScalarMinMax(std::string name, 
-                                                 const FunctionDoc* doc) { 
-  static auto default_element_wise_aggregate_options = 
-      ElementWiseAggregateOptions::Defaults(); 
- 
-  auto func = std::make_shared<VarArgsCompareFunction>( 
-      name, Arity::VarArgs(), doc, &default_element_wise_aggregate_options); 
-  for (const auto& ty : NumericTypes()) { 
-    auto exec = GeneratePhysicalNumeric<ScalarMinMax, Op>(ty); 
-    ScalarKernel kernel{KernelSignature::Make({ty}, ty, /*is_varargs=*/true), exec, 
-                        MinMaxState::Init}; 
-    kernel.null_handling = NullHandling::type::COMPUTED_NO_PREALLOCATE; 
-    kernel.mem_allocation = MemAllocation::type::PREALLOCATE; 
-    DCHECK_OK(func->AddKernel(std::move(kernel))); 
-  } 
-  for (const auto& ty : TemporalTypes()) { 
-    auto exec = GeneratePhysicalNumeric<ScalarMinMax, Op>(ty); 
-    ScalarKernel kernel{KernelSignature::Make({ty}, ty, /*is_varargs=*/true), exec, 
-                        MinMaxState::Init}; 
-    kernel.null_handling = NullHandling::type::COMPUTED_NO_PREALLOCATE; 
-    kernel.mem_allocation = MemAllocation::type::PREALLOCATE; 
-    DCHECK_OK(func->AddKernel(std::move(kernel))); 
-  } 
-  return func; 
-} 
- 
-const FunctionDoc equal_doc{"Compare values for equality (x == y)", 
-                            ("A null on either side emits a null comparison result."), 
-                            {"x", "y"}}; 
- 
-const FunctionDoc not_equal_doc{"Compare values for inequality (x != y)", 
-                                ("A null on either side emits a null comparison result."), 
-                                {"x", "y"}}; 
- 
-const FunctionDoc greater_doc{"Compare values for ordered inequality (x > y)", 
-                              ("A null on either side emits a null comparison result."), 
-                              {"x", "y"}}; 
- 
-const FunctionDoc greater_equal_doc{ 
-    "Compare values for ordered inequality (x >= y)", 
-    ("A null on either side emits a null comparison result."), 
-    {"x", "y"}}; 
- 
-const FunctionDoc less_doc{"Compare values for ordered inequality (x < y)", 
-                           ("A null on either side emits a null comparison result."), 
-                           {"x", "y"}}; 
- 
-const FunctionDoc less_equal_doc{ 
-    "Compare values for ordered inequality (x <= y)", 
-    ("A null on either side emits a null comparison result."), 
-    {"x", "y"}}; 
- 
-const FunctionDoc min_element_wise_doc{ 
-    "Find the element-wise minimum value", 
-    ("Nulls will be ignored (default) or propagated. " 
-     "NaN will be taken over null, but not over any valid float."), 
-    {"*args"}, 
-    "ElementWiseAggregateOptions"}; 
- 
-const FunctionDoc max_element_wise_doc{ 
-    "Find the element-wise maximum value", 
-    ("Nulls will be ignored (default) or propagated. " 
-     "NaN will be taken over null, but not over any valid float."), 
-    {"*args"}, 
-    "ElementWiseAggregateOptions"}; 
+using MinMaxState = OptionsWrapper<ElementWiseAggregateOptions>;
+
+// Implement a variadic scalar min/max kernel.
+template <typename OutType, typename Op>
+struct ScalarMinMax {
+  using OutValue = typename GetOutputType<OutType>::T;
+
+  static void ExecScalar(const ExecBatch& batch,
+                         const ElementWiseAggregateOptions& options, Scalar* out) {
+    // All arguments are scalar
+    OutValue value{};
+    bool valid = false;
+    for (const auto& arg : batch.values) {
+      // Ignore non-scalar arguments so we can use it in the mixed-scalar-and-array case
+      if (!arg.is_scalar()) continue;
+      const auto& scalar = *arg.scalar();
+      if (!scalar.is_valid) {
+        if (options.skip_nulls) continue;
+        out->is_valid = false;
+        return;
+      }
+      if (!valid) {
+        value = UnboxScalar<OutType>::Unbox(scalar);
+        valid = true;
+      } else {
+        value = Op::template Call<OutValue, OutValue, OutValue>(
+            value, UnboxScalar<OutType>::Unbox(scalar));
+      }
+    }
+    out->is_valid = valid;
+    if (valid) {
+      BoxScalar<OutType>::Box(value, out);
+    }
+  }
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const ElementWiseAggregateOptions& options = MinMaxState::Get(ctx);
+    const auto descrs = batch.GetDescriptors();
+    const size_t scalar_count =
+        static_cast<size_t>(std::count_if(batch.values.begin(), batch.values.end(),
+                                          [](const Datum& d) { return d.is_scalar(); }));
+    if (scalar_count == batch.values.size()) {
+      ExecScalar(batch, options, out->scalar().get());
+      return Status::OK();
+    }
+
+    ArrayData* output = out->mutable_array();
+
+    // At least one array, two or more arguments
+    ArrayDataVector arrays;
+    for (const auto& arg : batch.values) {
+      if (!arg.is_array()) continue;
+      arrays.push_back(arg.array());
+    }
+
+    bool initialize_output = true;
+    if (scalar_count > 0) {
+      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> temp_scalar,
+                            MakeScalar(out->type(), 0));
+      ExecScalar(batch, options, temp_scalar.get());
+      if (temp_scalar->is_valid) {
+        const auto value = UnboxScalar<OutType>::Unbox(*temp_scalar);
+        initialize_output = false;
+        OutValue* out = output->GetMutableValues<OutValue>(1);
+        std::fill(out, out + batch.length, value);
+      } else if (!options.skip_nulls) {
+        // Abort early
+        ARROW_ASSIGN_OR_RAISE(auto array, MakeArrayFromScalar(*temp_scalar, batch.length,
+                                                              ctx->memory_pool()));
+        *output = *array->data();
+        return Status::OK();
+      }
+    }
+
+    if (initialize_output) {
+      OutValue* out = output->GetMutableValues<OutValue>(1);
+      std::fill(out, out + batch.length, Op::template antiextreme<OutValue>());
+    }
+
+    // Precompute the validity buffer
+    if (options.skip_nulls && initialize_output) {
+      // OR together the validity buffers of all arrays
+      if (std::all_of(arrays.begin(), arrays.end(),
+                      [](const std::shared_ptr<ArrayData>& arr) {
+                        return arr->MayHaveNulls();
+                      })) {
+        for (const auto& arr : arrays) {
+          if (!arr->MayHaveNulls()) continue;
+          if (!output->buffers[0]) {
+            ARROW_ASSIGN_OR_RAISE(output->buffers[0], ctx->AllocateBitmap(batch.length));
+            ::arrow::internal::CopyBitmap(arr->buffers[0]->data(), arr->offset,
+
+                                          batch.length,
+                                          output->buffers[0]->mutable_data(),
+                                          /*dest_offset=*/0);
+          } else {
+            ::arrow::internal::BitmapOr(
+                output->buffers[0]->data(), /*left_offset=*/0, arr->buffers[0]->data(),
+                arr->offset, batch.length,
+                /*out_offset=*/0, output->buffers[0]->mutable_data());
+          }
+        }
+      }
+    } else if (!options.skip_nulls) {
+      // AND together the validity buffers of all arrays
+      for (const auto& arr : arrays) {
+        if (!arr->MayHaveNulls()) continue;
+        if (!output->buffers[0]) {
+          ARROW_ASSIGN_OR_RAISE(output->buffers[0], ctx->AllocateBitmap(batch.length));
+          ::arrow::internal::CopyBitmap(arr->buffers[0]->data(), arr->offset,
+                                        batch.length, output->buffers[0]->mutable_data(),
+                                        /*dest_offset=*/0);
+        } else {
+          ::arrow::internal::BitmapAnd(output->buffers[0]->data(), /*left_offset=*/0,
+                                       arr->buffers[0]->data(), arr->offset, batch.length,
+                                       /*out_offset=*/0,
+                                       output->buffers[0]->mutable_data());
+        }
+      }
+    }
+
+    for (const auto& array : arrays) {
+      OutputArrayWriter<OutType> writer(out->mutable_array());
+      ArrayIterator<OutType> out_it(*output);
+      int64_t index = 0;
+      VisitArrayValuesInline<OutType>(
+          *array,
+          [&](OutValue value) {
+            auto u = out_it();
+            if (!output->buffers[0] ||
+                BitUtil::GetBit(output->buffers[0]->data(), index)) {
+              writer.Write(Op::template Call<OutValue, OutValue, OutValue>(u, value));
+            } else {
+              writer.Write(value);
+            }
+            index++;
+          },
+          [&]() {
+            // RHS is null, preserve the LHS
+            writer.values++;
+            index++;
+            out_it();
+          });
+    }
+    output->null_count = output->buffers[0] ? -1 : 0;
+    return Status::OK();
+  }
+};
+
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeScalarMinMax(std::string name,
+                                                 const FunctionDoc* doc) {
+  static auto default_element_wise_aggregate_options =
+      ElementWiseAggregateOptions::Defaults();
+
+  auto func = std::make_shared<VarArgsCompareFunction>(
+      name, Arity::VarArgs(), doc, &default_element_wise_aggregate_options);
+  for (const auto& ty : NumericTypes()) {
+    auto exec = GeneratePhysicalNumeric<ScalarMinMax, Op>(ty);
+    ScalarKernel kernel{KernelSignature::Make({ty}, ty, /*is_varargs=*/true), exec,
+                        MinMaxState::Init};
+    kernel.null_handling = NullHandling::type::COMPUTED_NO_PREALLOCATE;
+    kernel.mem_allocation = MemAllocation::type::PREALLOCATE;
+    DCHECK_OK(func->AddKernel(std::move(kernel)));
+  }
+  for (const auto& ty : TemporalTypes()) {
+    auto exec = GeneratePhysicalNumeric<ScalarMinMax, Op>(ty);
+    ScalarKernel kernel{KernelSignature::Make({ty}, ty, /*is_varargs=*/true), exec,
+                        MinMaxState::Init};
+    kernel.null_handling = NullHandling::type::COMPUTED_NO_PREALLOCATE;
+    kernel.mem_allocation = MemAllocation::type::PREALLOCATE;
+    DCHECK_OK(func->AddKernel(std::move(kernel)));
+  }
+  return func;
+}
+
+const FunctionDoc equal_doc{"Compare values for equality (x == y)",
+                            ("A null on either side emits a null comparison result."),
+                            {"x", "y"}};
+
+const FunctionDoc not_equal_doc{"Compare values for inequality (x != y)",
+                                ("A null on either side emits a null comparison result."),
+                                {"x", "y"}};
+
+const FunctionDoc greater_doc{"Compare values for ordered inequality (x > y)",
+                              ("A null on either side emits a null comparison result."),
+                              {"x", "y"}};
+
+const FunctionDoc greater_equal_doc{
+    "Compare values for ordered inequality (x >= y)",
+    ("A null on either side emits a null comparison result."),
+    {"x", "y"}};
+
+const FunctionDoc less_doc{"Compare values for ordered inequality (x < y)",
+                           ("A null on either side emits a null comparison result."),
+                           {"x", "y"}};
+
+const FunctionDoc less_equal_doc{
+    "Compare values for ordered inequality (x <= y)",
+    ("A null on either side emits a null comparison result."),
+    {"x", "y"}};
+
+const FunctionDoc min_element_wise_doc{
+    "Find the element-wise minimum value",
+    ("Nulls will be ignored (default) or propagated. "
+     "NaN will be taken over null, but not over any valid float."),
+    {"*args"},
+    "ElementWiseAggregateOptions"};
+
+const FunctionDoc max_element_wise_doc{
+    "Find the element-wise maximum value",
+    ("Nulls will be ignored (default) or propagated. "
+     "NaN will be taken over null, but not over any valid float."),
+    {"*args"},
+    "ElementWiseAggregateOptions"};
 }  // namespace
 
 void RegisterScalarComparison(FunctionRegistry* registry) {
-  DCHECK_OK(registry->AddFunction(MakeCompareFunction<Equal>("equal", &equal_doc))); 
-  DCHECK_OK( 
-      registry->AddFunction(MakeCompareFunction<NotEqual>("not_equal", &not_equal_doc))); 
+  DCHECK_OK(registry->AddFunction(MakeCompareFunction<Equal>("equal", &equal_doc)));
+  DCHECK_OK(
+      registry->AddFunction(MakeCompareFunction<NotEqual>("not_equal", &not_equal_doc)));
 
-  auto greater = MakeCompareFunction<Greater>("greater", &greater_doc); 
-  auto greater_equal = 
-      MakeCompareFunction<GreaterEqual>("greater_equal", &greater_equal_doc); 
+  auto greater = MakeCompareFunction<Greater>("greater", &greater_doc);
+  auto greater_equal =
+      MakeCompareFunction<GreaterEqual>("greater_equal", &greater_equal_doc);
 
-  auto less = MakeFlippedFunction("less", *greater, &less_doc); 
-  auto less_equal = MakeFlippedFunction("less_equal", *greater_equal, &less_equal_doc); 
+  auto less = MakeFlippedFunction("less", *greater, &less_doc);
+  auto less_equal = MakeFlippedFunction("less_equal", *greater_equal, &less_equal_doc);
   DCHECK_OK(registry->AddFunction(std::move(less)));
   DCHECK_OK(registry->AddFunction(std::move(less_equal)));
   DCHECK_OK(registry->AddFunction(std::move(greater)));
   DCHECK_OK(registry->AddFunction(std::move(greater_equal)));
- 
-  // ---------------------------------------------------------------------- 
-  // Variadic element-wise functions 
- 
-  auto min_element_wise = 
-      MakeScalarMinMax<Minimum>("min_element_wise", &min_element_wise_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(min_element_wise))); 
- 
-  auto max_element_wise = 
-      MakeScalarMinMax<Maximum>("max_element_wise", &max_element_wise_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(max_element_wise))); 
+
+  // ----------------------------------------------------------------------
+  // Variadic element-wise functions
+
+  auto min_element_wise =
+      MakeScalarMinMax<Minimum>("min_element_wise", &min_element_wise_doc);
+  DCHECK_OK(registry->AddFunction(std::move(min_element_wise)));
+
+  auto max_element_wise =
+      MakeScalarMinMax<Maximum>("max_element_wise", &max_element_wise_doc);
+  DCHECK_OK(registry->AddFunction(std::move(max_element_wise)));
 }
 
 }  // namespace internal
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_fill_null.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_fill_null.cc
index d29c3984b7a..cf22b0de3dc 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_fill_null.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_fill_null.cc
@@ -37,13 +37,13 @@ namespace {
 template <typename Type, typename Enable = void>
 struct FillNullFunctor {};
 
-// Numeric inputs 
- 
+// Numeric inputs
+
 template <typename Type>
 struct FillNullFunctor<Type, enable_if_t<is_number_type<Type>::value>> {
   using T = typename TypeTraits<Type>::CType;
 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     const ArrayData& data = *batch[0].array();
     const Scalar& fill_value = *batch[1].scalar();
     ArrayData* output = out->mutable_array();
@@ -54,8 +54,8 @@ struct FillNullFunctor<Type, enable_if_t<is_number_type<Type>::value>> {
 
     T value = UnboxScalar<Type>::Unbox(fill_value);
     if (data.MayHaveNulls() != 0 && fill_value.is_valid) {
-      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> out_buf, 
-                            ctx->Allocate(data.length * sizeof(T))); 
+      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> out_buf,
+                            ctx->Allocate(data.length * sizeof(T)));
 
       const uint8_t* is_valid = data.buffers[0]->data();
       const T* in_values = data.GetValues<T>(1);
@@ -80,28 +80,28 @@ struct FillNullFunctor<Type, enable_if_t<is_number_type<Type>::value>> {
         in_values += block.length;
       }
       output->buffers[1] = out_buf;
-      output->null_count = 0; 
+      output->null_count = 0;
     } else {
       *output = data;
     }
- 
-    return Status::OK(); 
+
+    return Status::OK();
   }
 };
 
-// Boolean input 
- 
+// Boolean input
+
 template <typename Type>
 struct FillNullFunctor<Type, enable_if_t<is_boolean_type<Type>::value>> {
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     const ArrayData& data = *batch[0].array();
     const Scalar& fill_value = *batch[1].scalar();
     ArrayData* output = out->mutable_array();
 
     bool value = UnboxScalar<BooleanType>::Unbox(fill_value);
     if (data.MayHaveNulls() != 0 && fill_value.is_valid) {
-      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> out_buf, 
-                            ctx->AllocateBitmap(data.length)); 
+      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> out_buf,
+                            ctx->AllocateBitmap(data.length));
 
       const uint8_t* is_valid = data.buffers[0]->data();
       const uint8_t* data_bitmap = data.buffers[1]->data();
@@ -132,68 +132,68 @@ struct FillNullFunctor<Type, enable_if_t<is_boolean_type<Type>::value>> {
         out_offset += block.length;
       }
       output->buffers[1] = out_buf;
-      output->null_count = 0; 
+      output->null_count = 0;
     } else {
       *output = data;
     }
- 
-    return Status::OK(); 
+
+    return Status::OK();
   }
 };
 
-// Null input 
- 
+// Null input
+
 template <typename Type>
 struct FillNullFunctor<Type, enable_if_t<is_null_type<Type>::value>> {
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     // Nothing preallocated, so we assign into the output
     *out->mutable_array() = *batch[0].array();
-    return Status::OK(); 
+    return Status::OK();
+  }
+};
+
+// Binary-like input
+
+template <typename Type>
+struct FillNullFunctor<Type, enable_if_t<is_base_binary_type<Type>::value>> {
+  using BuilderType = typename TypeTraits<Type>::BuilderType;
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const ArrayData& input = *batch[0].array();
+    const auto& fill_value_scalar =
+        checked_cast<const BaseBinaryScalar&>(*batch[1].scalar());
+    ArrayData* output = out->mutable_array();
+
+    // Ensure the kernel is configured properly to have no validity bitmap /
+    // null count 0 unless we explicitly propagate it below.
+    DCHECK(output->buffers[0] == nullptr);
+
+    const int64_t null_count = input.GetNullCount();
+
+    if (null_count > 0 && fill_value_scalar.is_valid) {
+      util::string_view fill_value(*fill_value_scalar.value);
+      BuilderType builder(input.type, ctx->memory_pool());
+      RETURN_NOT_OK(builder.ReserveData(input.buffers[2]->size() +
+                                        fill_value.length() * null_count));
+      RETURN_NOT_OK(builder.Resize(input.length));
+
+      VisitArrayDataInline<Type>(
+          input, [&](util::string_view s) { builder.UnsafeAppend(s); },
+          [&]() { builder.UnsafeAppend(fill_value); });
+      std::shared_ptr<Array> string_array;
+      RETURN_NOT_OK(builder.Finish(&string_array));
+      *output = *string_array->data();
+      // The builder does not match the logical type, due to
+      // GenerateTypeAgnosticVarBinaryBase
+      output->type = input.type;
+    } else {
+      *output = input;
+    }
+
+    return Status::OK();
   }
 };
 
-// Binary-like input 
- 
-template <typename Type> 
-struct FillNullFunctor<Type, enable_if_t<is_base_binary_type<Type>::value>> { 
-  using BuilderType = typename TypeTraits<Type>::BuilderType; 
- 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    const ArrayData& input = *batch[0].array(); 
-    const auto& fill_value_scalar = 
-        checked_cast<const BaseBinaryScalar&>(*batch[1].scalar()); 
-    ArrayData* output = out->mutable_array(); 
- 
-    // Ensure the kernel is configured properly to have no validity bitmap / 
-    // null count 0 unless we explicitly propagate it below. 
-    DCHECK(output->buffers[0] == nullptr); 
- 
-    const int64_t null_count = input.GetNullCount(); 
- 
-    if (null_count > 0 && fill_value_scalar.is_valid) { 
-      util::string_view fill_value(*fill_value_scalar.value); 
-      BuilderType builder(input.type, ctx->memory_pool()); 
-      RETURN_NOT_OK(builder.ReserveData(input.buffers[2]->size() + 
-                                        fill_value.length() * null_count)); 
-      RETURN_NOT_OK(builder.Resize(input.length)); 
- 
-      VisitArrayDataInline<Type>( 
-          input, [&](util::string_view s) { builder.UnsafeAppend(s); }, 
-          [&]() { builder.UnsafeAppend(fill_value); }); 
-      std::shared_ptr<Array> string_array; 
-      RETURN_NOT_OK(builder.Finish(&string_array)); 
-      *output = *string_array->data(); 
-      // The builder does not match the logical type, due to 
-      // GenerateTypeAgnosticVarBinaryBase 
-      output->type = input.type; 
-    } else { 
-      *output = input; 
-    } 
- 
-    return Status::OK(); 
-  } 
-}; 
- 
 void AddBasicFillNullKernels(ScalarKernel kernel, ScalarFunction* func) {
   auto AddKernels = [&](const std::vector<std::shared_ptr<DataType>>& types) {
     for (const std::shared_ptr<DataType>& ty : types) {
@@ -208,22 +208,22 @@ void AddBasicFillNullKernels(ScalarKernel kernel, ScalarFunction* func) {
   AddKernels({boolean(), null()});
 }
 
-void AddBinaryFillNullKernels(ScalarKernel kernel, ScalarFunction* func) { 
-  for (const std::shared_ptr<DataType>& ty : BaseBinaryTypes()) { 
-    kernel.signature = 
-        KernelSignature::Make({InputType::Array(ty), InputType::Scalar(ty)}, ty); 
-    kernel.exec = GenerateTypeAgnosticVarBinaryBase<FillNullFunctor>(*ty); 
-    DCHECK_OK(func->AddKernel(kernel)); 
-  } 
-} 
- 
-const FunctionDoc fill_null_doc{ 
-    "Replace null elements", 
-    ("`fill_value` must be a scalar of the same type as `values`.\n" 
-     "Each non-null value in `values` is emitted as-is.\n" 
-     "Each null value in `values` is replaced with `fill_value`."), 
-    {"values", "fill_value"}}; 
- 
+void AddBinaryFillNullKernels(ScalarKernel kernel, ScalarFunction* func) {
+  for (const std::shared_ptr<DataType>& ty : BaseBinaryTypes()) {
+    kernel.signature =
+        KernelSignature::Make({InputType::Array(ty), InputType::Scalar(ty)}, ty);
+    kernel.exec = GenerateTypeAgnosticVarBinaryBase<FillNullFunctor>(*ty);
+    DCHECK_OK(func->AddKernel(kernel));
+  }
+}
+
+const FunctionDoc fill_null_doc{
+    "Replace null elements",
+    ("`fill_value` must be a scalar of the same type as `values`.\n"
+     "Each non-null value in `values` is emitted as-is.\n"
+     "Each null value in `values` is replaced with `fill_value`."),
+    {"values", "fill_value"}};
+
 }  // namespace
 
 void RegisterScalarFillNull(FunctionRegistry* registry) {
@@ -231,10 +231,10 @@ void RegisterScalarFillNull(FunctionRegistry* registry) {
     ScalarKernel fill_null_base;
     fill_null_base.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
     fill_null_base.mem_allocation = MemAllocation::NO_PREALLOCATE;
-    auto fill_null = 
-        std::make_shared<ScalarFunction>("fill_null", Arity::Binary(), &fill_null_doc); 
+    auto fill_null =
+        std::make_shared<ScalarFunction>("fill_null", Arity::Binary(), &fill_null_doc);
     AddBasicFillNullKernels(fill_null_base, fill_null.get());
-    AddBinaryFillNullKernels(fill_null_base, fill_null.get()); 
+    AddBinaryFillNullKernels(fill_null_base, fill_null.get());
     DCHECK_OK(registry->AddFunction(fill_null));
   }
 }
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_if_else.cc
index 74fdc062930..ff308a673a3 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_if_else.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_if_else.cc
@@ -1,1730 +1,1730 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include <arrow/compute/api.h> 
-#include <arrow/compute/kernels/codegen_internal.h> 
-#include <arrow/compute/util_internal.h> 
-#include <arrow/util/bit_block_counter.h> 
-#include <arrow/util/bitmap.h> 
-#include <arrow/util/bitmap_ops.h> 
-#include <arrow/util/bitmap_reader.h> 
- 
-namespace arrow { 
-using internal::BitBlockCount; 
-using internal::BitBlockCounter; 
-using internal::Bitmap; 
-using internal::BitmapWordReader; 
- 
-namespace compute { 
-namespace internal { 
- 
-namespace { 
- 
-constexpr uint64_t kAllNull = 0; 
-constexpr uint64_t kAllValid = ~kAllNull; 
- 
-util::optional<uint64_t> GetConstantValidityWord(const Datum& data) { 
-  if (data.is_scalar()) { 
-    return data.scalar()->is_valid ? kAllValid : kAllNull; 
-  } 
- 
-  if (data.array()->null_count == data.array()->length) return kAllNull; 
- 
-  if (!data.array()->MayHaveNulls()) return kAllValid; 
- 
-  // no constant validity word available 
-  return {}; 
-} 
- 
-inline Bitmap GetBitmap(const Datum& datum, int i) { 
-  if (datum.is_scalar()) return {}; 
-  const ArrayData& a = *datum.array(); 
-  return Bitmap{a.buffers[i], a.offset, a.length}; 
-} 
- 
-// if the condition is null then output is null otherwise we take validity from the 
-// selected argument 
-// ie. cond.valid & (cond.data & left.valid | ~cond.data & right.valid) 
-template <typename AllocateNullBitmap> 
-Status PromoteNullsVisitor(KernelContext* ctx, const Datum& cond_d, const Datum& left_d, 
-                           const Datum& right_d, ArrayData* output) { 
-  auto cond_const = GetConstantValidityWord(cond_d); 
-  auto left_const = GetConstantValidityWord(left_d); 
-  auto right_const = GetConstantValidityWord(right_d); 
- 
-  enum { COND_CONST = 1, LEFT_CONST = 2, RIGHT_CONST = 4 }; 
-  auto flag = COND_CONST * cond_const.has_value() | LEFT_CONST * left_const.has_value() | 
-              RIGHT_CONST * right_const.has_value(); 
- 
-  const ArrayData& cond = *cond_d.array(); 
-  // cond.data will always be available 
-  Bitmap cond_data{cond.buffers[1], cond.offset, cond.length}; 
-  Bitmap cond_valid{cond.buffers[0], cond.offset, cond.length}; 
-  Bitmap left_valid = GetBitmap(left_d, 0); 
-  Bitmap right_valid = GetBitmap(right_d, 0); 
- 
-  // cond.valid & (cond.data & left.valid | ~cond.data & right.valid) 
-  // In the following cases, we dont need to allocate out_valid bitmap 
- 
-  // if cond & left & right all ones, then output is all valid. 
-  // if output validity buffer is already allocated (NullHandling:: 
-  // COMPUTED_PREALLOCATE) -> set all bits 
-  // else, return nullptr 
-  if (cond_const == kAllValid && left_const == kAllValid && right_const == kAllValid) { 
-    if (AllocateNullBitmap::value) {  // NullHandling::COMPUTED_NO_PREALLOCATE 
-      output->buffers[0] = nullptr; 
-    } else {  // NullHandling::COMPUTED_PREALLOCATE 
-      BitUtil::SetBitmap(output->buffers[0]->mutable_data(), output->offset, 
-                         output->length); 
-    } 
-    return Status::OK(); 
-  } 
- 
-  if (left_const == kAllValid && right_const == kAllValid) { 
-    // if both left and right are valid, no need to calculate out_valid bitmap. Copy 
-    // cond validity buffer 
-    if (AllocateNullBitmap::value) {  // NullHandling::COMPUTED_NO_PREALLOCATE 
-      // if there's an offset, copy bitmap (cannot slice a bitmap) 
-      if (cond.offset) { 
-        ARROW_ASSIGN_OR_RAISE( 
-            output->buffers[0], 
-            arrow::internal::CopyBitmap(ctx->memory_pool(), cond.buffers[0]->data(), 
-                                        cond.offset, cond.length)); 
-      } else {  // just copy assign cond validity buffer 
-        output->buffers[0] = cond.buffers[0]; 
-      } 
-    } else {  // NullHandling::COMPUTED_PREALLOCATE 
-      arrow::internal::CopyBitmap(cond.buffers[0]->data(), cond.offset, cond.length, 
-                                  output->buffers[0]->mutable_data(), output->offset); 
-    } 
-    return Status::OK(); 
-  } 
- 
-  // lambda function that will be used inside the visitor 
-  auto apply = [&](uint64_t c_valid, uint64_t c_data, uint64_t l_valid, 
-                   uint64_t r_valid) { 
-    return c_valid & ((c_data & l_valid) | (~c_data & r_valid)); 
-  }; 
- 
-  if (AllocateNullBitmap::value) { 
-    // following cases requires a separate out_valid buffer. COMPUTED_NO_PREALLOCATE 
-    // would not have allocated buffers for it. 
-    ARROW_ASSIGN_OR_RAISE(output->buffers[0], ctx->AllocateBitmap(cond.length)); 
-  } 
- 
-  std::array<Bitmap, 1> out_bitmaps{ 
-      Bitmap{output->buffers[0], output->offset, output->length}}; 
- 
-  switch (flag) { 
-    case COND_CONST | LEFT_CONST | RIGHT_CONST: { 
-      std::array<Bitmap, 1> bitmaps{cond_data}; 
-      Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps, 
-                                 [&](const std::array<uint64_t, 1>& words_in, 
-                                     std::array<uint64_t, 1>* word_out) { 
-                                   word_out->at(0) = apply(*cond_const, words_in[0], 
-                                                           *left_const, *right_const); 
-                                 }); 
-      break; 
-    } 
-    case LEFT_CONST | RIGHT_CONST: { 
-      std::array<Bitmap, 2> bitmaps{cond_valid, cond_data}; 
-      Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps, 
-                                 [&](const std::array<uint64_t, 2>& words_in, 
-                                     std::array<uint64_t, 1>* word_out) { 
-                                   word_out->at(0) = apply(words_in[0], words_in[1], 
-                                                           *left_const, *right_const); 
-                                 }); 
-      break; 
-    } 
-    case COND_CONST | RIGHT_CONST: { 
-      // bitmaps[C_VALID], bitmaps[R_VALID] might be null; override to make it safe for 
-      // Visit() 
-      std::array<Bitmap, 2> bitmaps{cond_data, left_valid}; 
-      Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps, 
-                                 [&](const std::array<uint64_t, 2>& words_in, 
-                                     std::array<uint64_t, 1>* word_out) { 
-                                   word_out->at(0) = apply(*cond_const, words_in[0], 
-                                                           words_in[1], *right_const); 
-                                 }); 
-      break; 
-    } 
-    case RIGHT_CONST: { 
-      // bitmaps[R_VALID] might be null; override to make it safe for Visit() 
-      std::array<Bitmap, 3> bitmaps{cond_valid, cond_data, left_valid}; 
-      Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps, 
-                                 [&](const std::array<uint64_t, 3>& words_in, 
-                                     std::array<uint64_t, 1>* word_out) { 
-                                   word_out->at(0) = apply(words_in[0], words_in[1], 
-                                                           words_in[2], *right_const); 
-                                 }); 
-      break; 
-    } 
-    case COND_CONST | LEFT_CONST: { 
-      // bitmaps[C_VALID], bitmaps[L_VALID] might be null; override to make it safe for 
-      // Visit() 
-      std::array<Bitmap, 2> bitmaps{cond_data, right_valid}; 
-      Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps, 
-                                 [&](const std::array<uint64_t, 2>& words_in, 
-                                     std::array<uint64_t, 1>* word_out) { 
-                                   word_out->at(0) = apply(*cond_const, words_in[0], 
-                                                           *left_const, words_in[1]); 
-                                 }); 
-      break; 
-    } 
-    case LEFT_CONST: { 
-      // bitmaps[L_VALID] might be null; override to make it safe for Visit() 
-      std::array<Bitmap, 3> bitmaps{cond_valid, cond_data, right_valid}; 
-      Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps, 
-                                 [&](const std::array<uint64_t, 3>& words_in, 
-                                     std::array<uint64_t, 1>* word_out) { 
-                                   word_out->at(0) = apply(words_in[0], words_in[1], 
-                                                           *left_const, words_in[2]); 
-                                 }); 
-      break; 
-    } 
-    case COND_CONST: { 
-      // bitmaps[C_VALID] might be null; override to make it safe for Visit() 
-      std::array<Bitmap, 3> bitmaps{cond_data, left_valid, right_valid}; 
-      Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps, 
-                                 [&](const std::array<uint64_t, 3>& words_in, 
-                                     std::array<uint64_t, 1>* word_out) { 
-                                   word_out->at(0) = apply(*cond_const, words_in[0], 
-                                                           words_in[1], words_in[2]); 
-                                 }); 
-      break; 
-    } 
-    case 0: { 
-      std::array<Bitmap, 4> bitmaps{cond_valid, cond_data, left_valid, right_valid}; 
-      Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps, 
-                                 [&](const std::array<uint64_t, 4>& words_in, 
-                                     std::array<uint64_t, 1>* word_out) { 
-                                   word_out->at(0) = apply(words_in[0], words_in[1], 
-                                                           words_in[2], words_in[3]); 
-                                 }); 
-      break; 
-    } 
-  } 
-  return Status::OK(); 
-} 
- 
-using Word = uint64_t; 
-static constexpr int64_t word_len = sizeof(Word) * 8; 
- 
-/// Runs the main if_else loop. Here, it is expected that the right data has already 
-/// been copied to the output. 
-/// If `invert` is meant to invert the cond.data. If is set to `true`, then the 
-/// buffer will be inverted before calling the handle_block or handle_each functions. 
-/// This is useful, when left is an array and right is scalar. Then rather than 
-/// copying data from the right to output, we can copy left data to the output and 
-/// invert the cond data to fill right values. Filling out with a scalar is presumed to 
-/// be more efficient than filling with an array 
-/// 
-/// `HandleBlock` has the signature: 
-///     [](int64_t offset, int64_t length){...} 
-/// It should copy `length` number of elements from source array to output array with 
-/// `offset` offset in both arrays 
-template <typename HandleBlock, bool invert = false> 
-void RunIfElseLoop(const ArrayData& cond, const HandleBlock& handle_block) { 
-  int64_t data_offset = 0; 
-  int64_t bit_offset = cond.offset; 
-  const auto* cond_data = cond.buffers[1]->data();  // this is a BoolArray 
- 
-  BitmapWordReader<Word> cond_reader(cond_data, cond.offset, cond.length); 
- 
-  constexpr Word pickAll = invert ? 0 : UINT64_MAX; 
-  constexpr Word pickNone = ~pickAll; 
- 
-  int64_t cnt = cond_reader.words(); 
-  while (cnt--) { 
-    Word word = cond_reader.NextWord(); 
- 
-    if (word == pickAll) { 
-      handle_block(data_offset, word_len); 
-    } else if (word != pickNone) { 
-      for (int64_t i = 0; i < word_len; ++i) { 
-        if (BitUtil::GetBit(cond_data, bit_offset + i) != invert) { 
-          handle_block(data_offset + i, 1); 
-        } 
-      } 
-    } 
-    data_offset += word_len; 
-    bit_offset += word_len; 
-  } 
- 
-  constexpr uint8_t pickAllByte = invert ? 0 : UINT8_MAX; 
-  // byte bit-wise inversion is int-wide. Hence XOR with 0xff 
-  constexpr uint8_t pickNoneByte = pickAllByte ^ 0xff; 
- 
-  cnt = cond_reader.trailing_bytes(); 
-  while (cnt--) { 
-    int valid_bits; 
-    uint8_t byte = cond_reader.NextTrailingByte(valid_bits); 
- 
-    if (byte == pickAllByte && valid_bits == 8) { 
-      handle_block(data_offset, 8); 
-    } else if (byte != pickNoneByte) { 
-      for (int i = 0; i < valid_bits; ++i) { 
-        if (BitUtil::GetBit(cond_data, bit_offset + i) != invert) { 
-          handle_block(data_offset + i, 1); 
-        } 
-      } 
-    } 
-    data_offset += 8; 
-    bit_offset += 8; 
-  } 
-} 
- 
-template <typename HandleBlock> 
-void RunIfElseLoopInverted(const ArrayData& cond, const HandleBlock& handle_block) { 
-  RunIfElseLoop<HandleBlock, true>(cond, handle_block); 
-} 
- 
-/// Runs if-else when cond is a scalar. Two special functions are required, 
-/// 1.CopyArrayData, 2. BroadcastScalar 
-template <typename CopyArrayData, typename BroadcastScalar> 
-Status RunIfElseScalar(const BooleanScalar& cond, const Datum& left, const Datum& right, 
-                       Datum* out, const CopyArrayData& copy_array_data, 
-                       const BroadcastScalar& broadcast_scalar) { 
-  if (left.is_scalar() && right.is_scalar()) {  // output will be a scalar 
-    if (cond.is_valid) { 
-      *out = cond.value ? left.scalar() : right.scalar(); 
-    } else { 
-      *out = MakeNullScalar(left.type()); 
-    } 
-    return Status::OK(); 
-  } 
- 
-  // either left or right is an array. Output is always an array` 
-  const std::shared_ptr<ArrayData>& out_array = out->array(); 
-  if (!cond.is_valid) { 
-    // cond is null; output is all null --> clear validity buffer 
-    BitUtil::ClearBitmap(out_array->buffers[0]->mutable_data(), out_array->offset, 
-                         out_array->length); 
-    return Status::OK(); 
-  } 
- 
-  // cond is a non-null scalar 
-  const auto& valid_data = cond.value ? left : right; 
-  if (valid_data.is_array()) { 
-    // valid_data is an array. Hence copy data to the output buffers 
-    const auto& valid_array = valid_data.array(); 
-    if (valid_array->MayHaveNulls()) { 
-      arrow::internal::CopyBitmap( 
-          valid_array->buffers[0]->data(), valid_array->offset, valid_array->length, 
-          out_array->buffers[0]->mutable_data(), out_array->offset); 
-    } else {  // validity buffer is nullptr --> set all bits 
-      BitUtil::SetBitmap(out_array->buffers[0]->mutable_data(), out_array->offset, 
-                         out_array->length); 
-    } 
-    copy_array_data(*valid_array, out_array.get()); 
-    return Status::OK(); 
- 
-  } else {  // valid data is scalar 
-    // valid data is a scalar that needs to be broadcasted 
-    const auto& valid_scalar = *valid_data.scalar(); 
-    if (valid_scalar.is_valid) {  // if the scalar is non-null, broadcast 
-      BitUtil::SetBitmap(out_array->buffers[0]->mutable_data(), out_array->offset, 
-                         out_array->length); 
-      broadcast_scalar(*valid_data.scalar(), out_array.get()); 
-    } else {  // scalar is null, clear the output validity buffer 
-      BitUtil::ClearBitmap(out_array->buffers[0]->mutable_data(), out_array->offset, 
-                           out_array->length); 
-    } 
-    return Status::OK(); 
-  } 
-} 
- 
-template <typename Type, typename Enable = void> 
-struct IfElseFunctor {}; 
- 
-// only number types needs to be handled for Fixed sized primitive data types because, 
-// internal::GenerateTypeAgnosticPrimitive forwards types to the corresponding unsigned 
-// int type 
-template <typename Type> 
-struct IfElseFunctor<Type, enable_if_number<Type>> { 
-  using T = typename TypeTraits<Type>::CType; 
-  // A - Array, S - Scalar, X = Array/Scalar 
- 
-  // SXX 
-  static Status Call(KernelContext* ctx, const BooleanScalar& cond, const Datum& left, 
-                     const Datum& right, Datum* out) { 
-    return RunIfElseScalar( 
-        cond, left, right, out, 
-        /*CopyArrayData*/ 
-        [&](const ArrayData& valid_array, ArrayData* out_array) { 
-          std::memcpy(out_array->GetMutableValues<T>(1), valid_array.GetValues<T>(1), 
-                      valid_array.length * sizeof(T)); 
-        }, 
-        /*BroadcastScalar*/ 
-        [&](const Scalar& scalar, ArrayData* out_array) { 
-          T scalar_data = internal::UnboxScalar<Type>::Unbox(scalar); 
-          std::fill(out_array->GetMutableValues<T>(1), 
-                    out_array->GetMutableValues<T>(1) + out_array->length, scalar_data); 
-        }); 
-  } 
- 
-  //  AAA 
-  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left, 
-                     const ArrayData& right, ArrayData* out) { 
-    T* out_values = out->template GetMutableValues<T>(1); 
- 
-    // copy right data to out_buff 
-    const T* right_data = right.GetValues<T>(1); 
-    std::memcpy(out_values, right_data, right.length * sizeof(T)); 
- 
-    // selectively copy values from left data 
-    const T* left_data = left.GetValues<T>(1); 
- 
-    RunIfElseLoop(cond, [&](int64_t data_offset, int64_t num_elems) { 
-      std::memcpy(out_values + data_offset, left_data + data_offset, 
-                  num_elems * sizeof(T)); 
-    }); 
- 
-    return Status::OK(); 
-  } 
- 
-  // ASA 
-  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left, 
-                     const ArrayData& right, ArrayData* out) { 
-    T* out_values = out->template GetMutableValues<T>(1); 
- 
-    // copy right data to out_buff 
-    const T* right_data = right.GetValues<T>(1); 
-    std::memcpy(out_values, right_data, right.length * sizeof(T)); 
- 
-    // selectively copy values from left data 
-    T left_data = internal::UnboxScalar<Type>::Unbox(left); 
- 
-    RunIfElseLoop(cond, [&](int64_t data_offset, int64_t num_elems) { 
-      std::fill(out_values + data_offset, out_values + data_offset + num_elems, 
-                left_data); 
-    }); 
- 
-    return Status::OK(); 
-  } 
- 
-  // AAS 
-  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left, 
-                     const Scalar& right, ArrayData* out) { 
-    T* out_values = out->template GetMutableValues<T>(1); 
- 
-    // copy left data to out_buff 
-    const T* left_data = left.GetValues<T>(1); 
-    std::memcpy(out_values, left_data, left.length * sizeof(T)); 
- 
-    T right_data = internal::UnboxScalar<Type>::Unbox(right); 
- 
-    RunIfElseLoopInverted(cond, [&](int64_t data_offset, int64_t num_elems) { 
-      std::fill(out_values + data_offset, out_values + data_offset + num_elems, 
-                right_data); 
-    }); 
- 
-    return Status::OK(); 
-  } 
- 
-  // ASS 
-  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left, 
-                     const Scalar& right, ArrayData* out) { 
-    T* out_values = out->template GetMutableValues<T>(1); 
- 
-    // copy right data to out_buff 
-    T right_data = internal::UnboxScalar<Type>::Unbox(right); 
-    std::fill(out_values, out_values + cond.length, right_data); 
- 
-    // selectively copy values from left data 
-    T left_data = internal::UnboxScalar<Type>::Unbox(left); 
-    RunIfElseLoop(cond, [&](int64_t data_offset, int64_t num_elems) { 
-      std::fill(out_values + data_offset, out_values + data_offset + num_elems, 
-                left_data); 
-    }); 
- 
-    return Status::OK(); 
-  } 
-}; 
- 
-template <typename Type> 
-struct IfElseFunctor<Type, enable_if_boolean<Type>> { 
-  // A - Array, S - Scalar, X = Array/Scalar 
- 
-  // SXX 
-  static Status Call(KernelContext* ctx, const BooleanScalar& cond, const Datum& left, 
-                     const Datum& right, Datum* out) { 
-    return RunIfElseScalar( 
-        cond, left, right, out, 
-        /*CopyArrayData*/ 
-        [&](const ArrayData& valid_array, ArrayData* out_array) { 
-          arrow::internal::CopyBitmap( 
-              valid_array.buffers[1]->data(), valid_array.offset, valid_array.length, 
-              out_array->buffers[1]->mutable_data(), out_array->offset); 
-        }, 
-        /*BroadcastScalar*/ 
-        [&](const Scalar& scalar, ArrayData* out_array) { 
-          bool scalar_data = internal::UnboxScalar<Type>::Unbox(scalar); 
-          BitUtil::SetBitsTo(out_array->buffers[1]->mutable_data(), out_array->offset, 
-                             out_array->length, scalar_data); 
-        }); 
-  } 
- 
-  // AAA 
-  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left, 
-                     const ArrayData& right, ArrayData* out) { 
-    // out_buff = right & ~cond 
-    const auto& out_buf = out->buffers[1]; 
-    arrow::internal::BitmapAndNot(right.buffers[1]->data(), right.offset, 
-                                  cond.buffers[1]->data(), cond.offset, cond.length, 
-                                  out->offset, out_buf->mutable_data()); 
- 
-    // out_buff = left & cond 
-    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> temp_buf, 
-                          arrow::internal::BitmapAnd( 
-                              ctx->memory_pool(), left.buffers[1]->data(), left.offset, 
-                              cond.buffers[1]->data(), cond.offset, cond.length, 0)); 
- 
-    arrow::internal::BitmapOr(out_buf->data(), out->offset, temp_buf->data(), 0, 
-                              cond.length, out->offset, out_buf->mutable_data()); 
- 
-    return Status::OK(); 
-  } 
- 
-  // ASA 
-  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left, 
-                     const ArrayData& right, ArrayData* out) { 
-    // out_buff = right & ~cond 
-    const auto& out_buf = out->buffers[1]; 
-    arrow::internal::BitmapAndNot(right.buffers[1]->data(), right.offset, 
-                                  cond.buffers[1]->data(), cond.offset, cond.length, 
-                                  out->offset, out_buf->mutable_data()); 
- 
-    // out_buff = left & cond 
-    bool left_data = internal::UnboxScalar<BooleanType>::Unbox(left); 
-    if (left_data) { 
-      arrow::internal::BitmapOr(out_buf->data(), out->offset, cond.buffers[1]->data(), 
-                                cond.offset, cond.length, out->offset, 
-                                out_buf->mutable_data()); 
-    } 
- 
-    return Status::OK(); 
-  } 
- 
-  // AAS 
-  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left, 
-                     const Scalar& right, ArrayData* out) { 
-    // out_buff = left & cond 
-    const auto& out_buf = out->buffers[1]; 
-    arrow::internal::BitmapAnd(left.buffers[1]->data(), left.offset, 
-                               cond.buffers[1]->data(), cond.offset, cond.length, 
-                               out->offset, out_buf->mutable_data()); 
- 
-    bool right_data = internal::UnboxScalar<BooleanType>::Unbox(right); 
- 
-    // out_buff = left & cond | right & ~cond 
-    if (right_data) { 
-      arrow::internal::BitmapOrNot(out_buf->data(), out->offset, cond.buffers[1]->data(), 
-                                   cond.offset, cond.length, out->offset, 
-                                   out_buf->mutable_data()); 
-    } 
- 
-    return Status::OK(); 
-  } 
- 
-  // ASS 
-  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left, 
-                     const Scalar& right, ArrayData* out) { 
-    bool left_data = internal::UnboxScalar<BooleanType>::Unbox(left); 
-    bool right_data = internal::UnboxScalar<BooleanType>::Unbox(right); 
- 
-    const auto& out_buf = out->buffers[1]; 
- 
-    // out_buf = left & cond | right & ~cond 
-    //    std::shared_ptr<Buffer> out_buf = nullptr; 
-    if (left_data) { 
-      if (right_data) { 
-        // out_buf = ones 
-        BitUtil::SetBitmap(out_buf->mutable_data(), out->offset, cond.length); 
-      } else { 
-        // out_buf = cond 
-        arrow::internal::CopyBitmap(cond.buffers[1]->data(), cond.offset, cond.length, 
-                                    out_buf->mutable_data(), out->offset); 
-      } 
-    } else { 
-      if (right_data) { 
-        // out_buf = ~cond 
-        arrow::internal::InvertBitmap(cond.buffers[1]->data(), cond.offset, cond.length, 
-                                      out_buf->mutable_data(), out->offset); 
-      } else { 
-        // out_buf = zeros 
-        BitUtil::ClearBitmap(out_buf->mutable_data(), out->offset, cond.length); 
-      } 
-    } 
- 
-    return Status::OK(); 
-  } 
-}; 
- 
-template <typename Type> 
-struct IfElseFunctor<Type, enable_if_base_binary<Type>> { 
-  using OffsetType = typename TypeTraits<Type>::OffsetType::c_type; 
-  using ArrayType = typename TypeTraits<Type>::ArrayType; 
-  using BuilderType = typename TypeTraits<Type>::BuilderType; 
- 
-  // A - Array, S - Scalar, X = Array/Scalar 
- 
-  // SXX 
-  static Status Call(KernelContext* ctx, const BooleanScalar& cond, const Datum& left, 
-                     const Datum& right, Datum* out) { 
-    if (left.is_scalar() && right.is_scalar()) { 
-      if (cond.is_valid) { 
-        *out = cond.value ? left.scalar() : right.scalar(); 
-      } else { 
-        *out = MakeNullScalar(left.type()); 
-      } 
-      return Status::OK(); 
-    } 
-    // either left or right is an array. Output is always an array 
-    int64_t out_arr_len = std::max(left.length(), right.length()); 
-    if (!cond.is_valid) { 
-      // cond is null; just create a null array 
-      ARROW_ASSIGN_OR_RAISE(*out, 
-                            MakeArrayOfNull(left.type(), out_arr_len, ctx->memory_pool())) 
-      return Status::OK(); 
-    } 
- 
-    const auto& valid_data = cond.value ? left : right; 
-    if (valid_data.is_array()) { 
-      *out = valid_data; 
-    } else { 
-      // valid data is a scalar that needs to be broadcasted 
-      ARROW_ASSIGN_OR_RAISE(*out, MakeArrayFromScalar(*valid_data.scalar(), out_arr_len, 
-                                                      ctx->memory_pool())); 
-    } 
-    return Status::OK(); 
-  } 
- 
-  //  AAA 
-  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left, 
-                     const ArrayData& right, ArrayData* out) { 
-    const auto* left_offsets = left.GetValues<OffsetType>(1); 
-    const uint8_t* left_data = left.buffers[2]->data(); 
-    const auto* right_offsets = right.GetValues<OffsetType>(1); 
-    const uint8_t* right_data = right.buffers[2]->data(); 
- 
-    // allocate data buffer conservatively 
-    int64_t data_buff_alloc = left_offsets[left.length] - left_offsets[0] + 
-                              right_offsets[right.length] - right_offsets[0]; 
- 
-    BuilderType builder(ctx->memory_pool()); 
-    ARROW_RETURN_NOT_OK(builder.Reserve(cond.length + 1)); 
-    ARROW_RETURN_NOT_OK(builder.ReserveData(data_buff_alloc)); 
- 
-    RunLoop( 
-        cond, *out, 
-        [&](int64_t i) { 
-          builder.UnsafeAppend(left_data + left_offsets[i], 
-                               left_offsets[i + 1] - left_offsets[i]); 
-        }, 
-        [&](int64_t i) { 
-          builder.UnsafeAppend(right_data + right_offsets[i], 
-                               right_offsets[i + 1] - right_offsets[i]); 
-        }, 
-        [&]() { builder.UnsafeAppendNull(); }); 
-    ARROW_ASSIGN_OR_RAISE(auto out_arr, builder.Finish()); 
- 
-    out->SetNullCount(out_arr->data()->null_count); 
-    out->buffers[0] = std::move(out_arr->data()->buffers[0]); 
-    out->buffers[1] = std::move(out_arr->data()->buffers[1]); 
-    out->buffers[2] = std::move(out_arr->data()->buffers[2]); 
-    return Status::OK(); 
-  } 
- 
-  // ASA 
-  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left, 
-                     const ArrayData& right, ArrayData* out) { 
-    util::string_view left_data = internal::UnboxScalar<Type>::Unbox(left); 
-    auto left_size = static_cast<OffsetType>(left_data.size()); 
- 
-    const auto* right_offsets = right.GetValues<OffsetType>(1); 
-    const uint8_t* right_data = right.buffers[2]->data(); 
- 
-    // allocate data buffer conservatively 
-    int64_t data_buff_alloc = 
-        left_size * cond.length + right_offsets[right.length] - right_offsets[0]; 
- 
-    BuilderType builder(ctx->memory_pool()); 
-    ARROW_RETURN_NOT_OK(builder.Reserve(cond.length + 1)); 
-    ARROW_RETURN_NOT_OK(builder.ReserveData(data_buff_alloc)); 
- 
-    RunLoop( 
-        cond, *out, [&](int64_t i) { builder.UnsafeAppend(left_data.data(), left_size); }, 
-        [&](int64_t i) { 
-          builder.UnsafeAppend(right_data + right_offsets[i], 
-                               right_offsets[i + 1] - right_offsets[i]); 
-        }, 
-        [&]() { builder.UnsafeAppendNull(); }); 
-    ARROW_ASSIGN_OR_RAISE(auto out_arr, builder.Finish()); 
- 
-    out->SetNullCount(out_arr->data()->null_count); 
-    out->buffers[0] = std::move(out_arr->data()->buffers[0]); 
-    out->buffers[1] = std::move(out_arr->data()->buffers[1]); 
-    out->buffers[2] = std::move(out_arr->data()->buffers[2]); 
-    return Status::OK(); 
-  } 
- 
-  // AAS 
-  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left, 
-                     const Scalar& right, ArrayData* out) { 
-    const auto* left_offsets = left.GetValues<OffsetType>(1); 
-    const uint8_t* left_data = left.buffers[2]->data(); 
- 
-    util::string_view right_data = internal::UnboxScalar<Type>::Unbox(right); 
-    auto right_size = static_cast<OffsetType>(right_data.size()); 
- 
-    // allocate data buffer conservatively 
-    int64_t data_buff_alloc = 
-        right_size * cond.length + left_offsets[left.length] - left_offsets[0]; 
- 
-    BuilderType builder(ctx->memory_pool()); 
-    ARROW_RETURN_NOT_OK(builder.Reserve(cond.length + 1)); 
-    ARROW_RETURN_NOT_OK(builder.ReserveData(data_buff_alloc)); 
- 
-    RunLoop( 
-        cond, *out, 
-        [&](int64_t i) { 
-          builder.UnsafeAppend(left_data + left_offsets[i], 
-                               left_offsets[i + 1] - left_offsets[i]); 
-        }, 
-        [&](int64_t i) { builder.UnsafeAppend(right_data.data(), right_size); }, 
-        [&]() { builder.UnsafeAppendNull(); }); 
-    ARROW_ASSIGN_OR_RAISE(auto out_arr, builder.Finish()); 
- 
-    out->SetNullCount(out_arr->data()->null_count); 
-    out->buffers[0] = std::move(out_arr->data()->buffers[0]); 
-    out->buffers[1] = std::move(out_arr->data()->buffers[1]); 
-    out->buffers[2] = std::move(out_arr->data()->buffers[2]); 
-    return Status::OK(); 
-  } 
- 
-  // ASS 
-  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left, 
-                     const Scalar& right, ArrayData* out) { 
-    util::string_view left_data = internal::UnboxScalar<Type>::Unbox(left); 
-    auto left_size = static_cast<OffsetType>(left_data.size()); 
- 
-    util::string_view right_data = internal::UnboxScalar<Type>::Unbox(right); 
-    auto right_size = static_cast<OffsetType>(right_data.size()); 
- 
-    // allocate data buffer conservatively 
-    int64_t data_buff_alloc = std::max(right_size, left_size) * cond.length; 
-    BuilderType builder(ctx->memory_pool()); 
-    ARROW_RETURN_NOT_OK(builder.Reserve(cond.length + 1)); 
-    ARROW_RETURN_NOT_OK(builder.ReserveData(data_buff_alloc)); 
- 
-    RunLoop( 
-        cond, *out, [&](int64_t i) { builder.UnsafeAppend(left_data.data(), left_size); }, 
-        [&](int64_t i) { builder.UnsafeAppend(right_data.data(), right_size); }, 
-        [&]() { builder.UnsafeAppendNull(); }); 
-    ARROW_ASSIGN_OR_RAISE(auto out_arr, builder.Finish()); 
- 
-    out->SetNullCount(out_arr->data()->null_count); 
-    out->buffers[0] = std::move(out_arr->data()->buffers[0]); 
-    out->buffers[1] = std::move(out_arr->data()->buffers[1]); 
-    out->buffers[2] = std::move(out_arr->data()->buffers[2]); 
-    return Status::OK(); 
-  } 
- 
-  template <typename HandleLeft, typename HandleRight, typename HandleNull> 
-  static void RunLoop(const ArrayData& cond, const ArrayData& output, 
-                      HandleLeft&& handle_left, HandleRight&& handle_right, 
-                      HandleNull&& handle_null) { 
-    const auto* cond_data = cond.buffers[1]->data(); 
- 
-    if (output.buffers[0]) {  // output may have nulls 
-      // output validity buffer is allocated internally from the IfElseFunctor. Therefore 
-      // it is cond.length'd with 0 offset. 
-      const auto* out_valid = output.buffers[0]->data(); 
- 
-      for (int64_t i = 0; i < cond.length; i++) { 
-        if (BitUtil::GetBit(out_valid, i)) { 
-          BitUtil::GetBit(cond_data, cond.offset + i) ? handle_left(i) : handle_right(i); 
-        } else { 
-          handle_null(); 
-        } 
-      } 
-    } else {  // output is all valid (no nulls) 
-      for (int64_t i = 0; i < cond.length; i++) { 
-        BitUtil::GetBit(cond_data, cond.offset + i) ? handle_left(i) : handle_right(i); 
-      } 
-    } 
-  } 
-}; 
- 
-template <typename Type> 
-struct IfElseFunctor<Type, enable_if_fixed_size_binary<Type>> { 
-  // A - Array, S - Scalar, X = Array/Scalar 
- 
-  // SXX 
-  static Status Call(KernelContext* ctx, const BooleanScalar& cond, const Datum& left, 
-                     const Datum& right, Datum* out) { 
-    ARROW_ASSIGN_OR_RAISE(auto byte_width, GetByteWidth(*left.type(), *right.type())); 
-    return RunIfElseScalar( 
-        cond, left, right, out, 
-        /*CopyArrayData*/ 
-        [&](const ArrayData& valid_array, ArrayData* out_array) { 
-          std::memcpy( 
-              out_array->buffers[1]->mutable_data() + out_array->offset * byte_width, 
-              valid_array.buffers[1]->data() + valid_array.offset * byte_width, 
-              valid_array.length * byte_width); 
-        }, 
-        /*BroadcastScalar*/ 
-        [&](const Scalar& scalar, ArrayData* out_array) { 
-          const util::string_view& scalar_data = 
-              internal::UnboxScalar<FixedSizeBinaryType>::Unbox(scalar); 
-          uint8_t* start = 
-              out_array->buffers[1]->mutable_data() + out_array->offset * byte_width; 
-          for (int64_t i = 0; i < out_array->length; i++) { 
-            std::memcpy(start + i * byte_width, scalar_data.data(), scalar_data.size()); 
-          } 
-        }); 
-  } 
- 
-  //  AAA 
-  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left, 
-                     const ArrayData& right, ArrayData* out) { 
-    ARROW_ASSIGN_OR_RAISE(auto byte_width, GetByteWidth(*left.type, *right.type)); 
-    auto* out_values = out->buffers[1]->mutable_data() + out->offset * byte_width; 
- 
-    // copy right data to out_buff 
-    const uint8_t* right_data = right.buffers[1]->data() + right.offset * byte_width; 
-    std::memcpy(out_values, right_data, right.length * byte_width); 
- 
-    // selectively copy values from left data 
-    const uint8_t* left_data = left.buffers[1]->data() + left.offset * byte_width; 
- 
-    RunIfElseLoop(cond, [&](int64_t data_offset, int64_t num_elems) { 
-      std::memcpy(out_values + data_offset * byte_width, 
-                  left_data + data_offset * byte_width, num_elems * byte_width); 
-    }); 
- 
-    return Status::OK(); 
-  } 
- 
-  // ASA 
-  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left, 
-                     const ArrayData& right, ArrayData* out) { 
-    ARROW_ASSIGN_OR_RAISE(auto byte_width, GetByteWidth(*left.type, *right.type)); 
-    auto* out_values = out->buffers[1]->mutable_data() + out->offset * byte_width; 
- 
-    // copy right data to out_buff 
-    const uint8_t* right_data = right.buffers[1]->data() + right.offset * byte_width; 
-    std::memcpy(out_values, right_data, right.length * byte_width); 
- 
-    // selectively copy values from left data 
-    const util::string_view& left_data = 
-        internal::UnboxScalar<FixedSizeBinaryType>::Unbox(left); 
- 
-    RunIfElseLoop(cond, [&](int64_t data_offset, int64_t num_elems) { 
-      if (left_data.data()) { 
-        for (int64_t i = 0; i < num_elems; i++) { 
-          std::memcpy(out_values + (data_offset + i) * byte_width, left_data.data(), 
-                      left_data.size()); 
-        } 
-      } 
-    }); 
- 
-    return Status::OK(); 
-  } 
- 
-  // AAS 
-  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left, 
-                     const Scalar& right, ArrayData* out) { 
-    ARROW_ASSIGN_OR_RAISE(auto byte_width, GetByteWidth(*left.type, *right.type)); 
-    auto* out_values = out->buffers[1]->mutable_data() + out->offset * byte_width; 
- 
-    // copy left data to out_buff 
-    const uint8_t* left_data = left.buffers[1]->data() + left.offset * byte_width; 
-    std::memcpy(out_values, left_data, left.length * byte_width); 
- 
-    const util::string_view& right_data = 
-        internal::UnboxScalar<FixedSizeBinaryType>::Unbox(right); 
- 
-    RunIfElseLoopInverted(cond, [&](int64_t data_offset, int64_t num_elems) { 
-      if (right_data.data()) { 
-        for (int64_t i = 0; i < num_elems; i++) { 
-          std::memcpy(out_values + (data_offset + i) * byte_width, right_data.data(), 
-                      right_data.size()); 
-        } 
-      } 
-    }); 
- 
-    return Status::OK(); 
-  } 
- 
-  // ASS 
-  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left, 
-                     const Scalar& right, ArrayData* out) { 
-    ARROW_ASSIGN_OR_RAISE(auto byte_width, GetByteWidth(*left.type, *right.type)); 
-    auto* out_values = out->buffers[1]->mutable_data() + out->offset * byte_width; 
- 
-    // copy right data to out_buff 
-    const util::string_view& right_data = 
-        internal::UnboxScalar<FixedSizeBinaryType>::Unbox(right); 
-    if (right_data.data()) { 
-      for (int64_t i = 0; i < cond.length; i++) { 
-        std::memcpy(out_values + i * byte_width, right_data.data(), right_data.size()); 
-      } 
-    } 
- 
-    // selectively copy values from left data 
-    const util::string_view& left_data = 
-        internal::UnboxScalar<FixedSizeBinaryType>::Unbox(left); 
- 
-    RunIfElseLoop(cond, [&](int64_t data_offset, int64_t num_elems) { 
-      if (left_data.data()) { 
-        for (int64_t i = 0; i < num_elems; i++) { 
-          std::memcpy(out_values + (data_offset + i) * byte_width, left_data.data(), 
-                      left_data.size()); 
-        } 
-      } 
-    }); 
- 
-    return Status::OK(); 
-  } 
- 
-  static Result<int32_t> GetByteWidth(const DataType& left_type, 
-                                      const DataType& right_type) { 
-    int width = checked_cast<const FixedSizeBinaryType&>(left_type).byte_width(); 
-    if (width == checked_cast<const FixedSizeBinaryType&>(right_type).byte_width()) { 
-      return width; 
-    } else { 
-      return Status::Invalid("FixedSizeBinaryType byte_widths should be equal"); 
-    } 
-  } 
-}; 
- 
-template <typename Type, typename AllocateMem> 
-struct ResolveIfElseExec { 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    // cond is scalar 
-    if (batch[0].is_scalar()) { 
-      const auto& cond = batch[0].scalar_as<BooleanScalar>(); 
-      return IfElseFunctor<Type>::Call(ctx, cond, batch[1], batch[2], out); 
-    } 
- 
-    // cond is array. Use functors to sort things out 
-    ARROW_RETURN_NOT_OK(PromoteNullsVisitor<AllocateMem>(ctx, batch[0], batch[1], 
-                                                         batch[2], out->mutable_array())); 
- 
-    if (batch[1].kind() == Datum::ARRAY) { 
-      if (batch[2].kind() == Datum::ARRAY) {  // AAA 
-        return IfElseFunctor<Type>::Call(ctx, *batch[0].array(), *batch[1].array(), 
-                                         *batch[2].array(), out->mutable_array()); 
-      } else {  // AAS 
-        return IfElseFunctor<Type>::Call(ctx, *batch[0].array(), *batch[1].array(), 
-                                         *batch[2].scalar(), out->mutable_array()); 
-      } 
-    } else { 
-      if (batch[2].kind() == Datum::ARRAY) {  // ASA 
-        return IfElseFunctor<Type>::Call(ctx, *batch[0].array(), *batch[1].scalar(), 
-                                         *batch[2].array(), out->mutable_array()); 
-      } else {  // ASS 
-        return IfElseFunctor<Type>::Call(ctx, *batch[0].array(), *batch[1].scalar(), 
-                                         *batch[2].scalar(), out->mutable_array()); 
-      } 
-    } 
-  } 
-}; 
- 
-template <typename AllocateMem> 
-struct ResolveIfElseExec<NullType, AllocateMem> { 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    // if all are scalars, return a null scalar 
-    if (batch[0].is_scalar() && batch[1].is_scalar() && batch[2].is_scalar()) { 
-      *out = MakeNullScalar(null()); 
-    } else { 
-      ARROW_ASSIGN_OR_RAISE(*out, 
-                            MakeArrayOfNull(null(), batch.length, ctx->memory_pool())); 
-    } 
-    return Status::OK(); 
-  } 
-}; 
- 
-struct IfElseFunction : ScalarFunction { 
-  using ScalarFunction::ScalarFunction; 
- 
-  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override { 
-    RETURN_NOT_OK(CheckArity(*values)); 
- 
-    using arrow::compute::detail::DispatchExactImpl; 
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel; 
- 
-    // if 0th descriptor is null, replace with bool 
-    if (values->at(0).type->id() == Type::NA) { 
-      values->at(0).type = boolean(); 
-    } 
- 
-    // if-else 0'th descriptor is bool, so skip it 
-    std::vector<ValueDescr> values_copy(values->begin() + 1, values->end()); 
-    internal::EnsureDictionaryDecoded(&values_copy); 
-    internal::ReplaceNullWithOtherType(&values_copy); 
- 
-    if (auto type = internal::CommonNumeric(values_copy)) { 
-      internal::ReplaceTypes(type, &values_copy); 
-    } 
- 
-    std::move(values_copy.begin(), values_copy.end(), values->begin() + 1); 
- 
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel; 
- 
-    return arrow::compute::detail::NoMatchingKernel(this, *values); 
-  } 
-}; 
- 
-void AddNullIfElseKernel(const std::shared_ptr<IfElseFunction>& scalar_function) { 
-  ScalarKernel kernel({boolean(), null(), null()}, null(), 
-                      ResolveIfElseExec<NullType, 
-                                        /*AllocateMem=*/std::true_type>::Exec); 
-  kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE; 
-  kernel.mem_allocation = MemAllocation::NO_PREALLOCATE; 
-  kernel.can_write_into_slices = false; 
- 
-  DCHECK_OK(scalar_function->AddKernel(std::move(kernel))); 
-} 
- 
-void AddPrimitiveIfElseKernels(const std::shared_ptr<ScalarFunction>& scalar_function, 
-                               const std::vector<std::shared_ptr<DataType>>& types) { 
-  for (auto&& type : types) { 
-    auto exec = 
-        internal::GenerateTypeAgnosticPrimitive<ResolveIfElseExec, 
-                                                /*AllocateMem=*/std::false_type>(*type); 
-    // cond array needs to be boolean always 
-    ScalarKernel kernel({boolean(), type, type}, type, exec); 
-    kernel.null_handling = NullHandling::COMPUTED_PREALLOCATE; 
-    kernel.mem_allocation = MemAllocation::PREALLOCATE; 
-    kernel.can_write_into_slices = true; 
- 
-    DCHECK_OK(scalar_function->AddKernel(std::move(kernel))); 
-  } 
-} 
- 
-void AddBinaryIfElseKernels(const std::shared_ptr<IfElseFunction>& scalar_function, 
-                            const std::vector<std::shared_ptr<DataType>>& types) { 
-  for (auto&& type : types) { 
-    auto exec = 
-        internal::GenerateTypeAgnosticVarBinaryBase<ResolveIfElseExec, 
-                                                    /*AllocateMem=*/std::true_type>( 
-            *type); 
-    // cond array needs to be boolean always 
-    ScalarKernel kernel({boolean(), type, type}, type, exec); 
-    kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE; 
-    kernel.mem_allocation = MemAllocation::NO_PREALLOCATE; 
-    kernel.can_write_into_slices = false; 
- 
-    DCHECK_OK(scalar_function->AddKernel(std::move(kernel))); 
-  } 
-} 
- 
-void AddFSBinaryIfElseKernel(const std::shared_ptr<IfElseFunction>& scalar_function) { 
-  // cond array needs to be boolean always 
-  ScalarKernel kernel( 
-      {boolean(), InputType(Type::FIXED_SIZE_BINARY), InputType(Type::FIXED_SIZE_BINARY)}, 
-      OutputType([](KernelContext*, const std::vector<ValueDescr>& descrs) { 
-        return ValueDescr(descrs[1].type, ValueDescr::ANY); 
-      }), 
-      ResolveIfElseExec<FixedSizeBinaryType, /*AllocateMem=*/std::false_type>::Exec); 
-  kernel.null_handling = NullHandling::COMPUTED_PREALLOCATE; 
-  kernel.mem_allocation = MemAllocation::PREALLOCATE; 
-  kernel.can_write_into_slices = true; 
- 
-  DCHECK_OK(scalar_function->AddKernel(std::move(kernel))); 
-} 
- 
-// Helper to copy or broadcast fixed-width values between buffers. 
-template <typename Type, typename Enable = void> 
-struct CopyFixedWidth {}; 
-template <> 
-struct CopyFixedWidth<BooleanType> { 
-  static void CopyScalar(const Scalar& scalar, const int64_t length, 
-                         uint8_t* raw_out_values, const int64_t out_offset) { 
-    const bool value = UnboxScalar<BooleanType>::Unbox(scalar); 
-    BitUtil::SetBitsTo(raw_out_values, out_offset, length, value); 
-  } 
-  static void CopyArray(const DataType&, const uint8_t* in_values, 
-                        const int64_t in_offset, const int64_t length, 
-                        uint8_t* raw_out_values, const int64_t out_offset) { 
-    arrow::internal::CopyBitmap(in_values, in_offset, length, raw_out_values, out_offset); 
-  } 
-}; 
-template <typename Type> 
-struct CopyFixedWidth<Type, enable_if_number<Type>> { 
-  using CType = typename TypeTraits<Type>::CType; 
-  static void CopyScalar(const Scalar& scalar, const int64_t length, 
-                         uint8_t* raw_out_values, const int64_t out_offset) { 
-    CType* out_values = reinterpret_cast<CType*>(raw_out_values); 
-    const CType value = UnboxScalar<Type>::Unbox(scalar); 
-    std::fill(out_values + out_offset, out_values + out_offset + length, value); 
-  } 
-  static void CopyArray(const DataType&, const uint8_t* in_values, 
-                        const int64_t in_offset, const int64_t length, 
-                        uint8_t* raw_out_values, const int64_t out_offset) { 
-    std::memcpy(raw_out_values + out_offset * sizeof(CType), 
-                in_values + in_offset * sizeof(CType), length * sizeof(CType)); 
-  } 
-}; 
-template <typename Type> 
-struct CopyFixedWidth<Type, enable_if_same<Type, FixedSizeBinaryType>> { 
-  static void CopyScalar(const Scalar& values, const int64_t length, 
-                         uint8_t* raw_out_values, const int64_t out_offset) { 
-    const int32_t width = 
-        checked_cast<const FixedSizeBinaryType&>(*values.type).byte_width(); 
-    uint8_t* next = raw_out_values + (width * out_offset); 
-    const auto& scalar = checked_cast<const FixedSizeBinaryScalar&>(values); 
-    // Scalar may have null value buffer 
-    if (!scalar.value) { 
-      std::memset(next, 0x00, width * length); 
-    } else { 
-      DCHECK_EQ(scalar.value->size(), width); 
-      for (int i = 0; i < length; i++) { 
-        std::memcpy(next, scalar.value->data(), width); 
-        next += width; 
-      } 
-    } 
-  } 
-  static void CopyArray(const DataType& type, const uint8_t* in_values, 
-                        const int64_t in_offset, const int64_t length, 
-                        uint8_t* raw_out_values, const int64_t out_offset) { 
-    const int32_t width = checked_cast<const FixedSizeBinaryType&>(type).byte_width(); 
-    uint8_t* next = raw_out_values + (width * out_offset); 
-    std::memcpy(next, in_values + in_offset * width, length * width); 
-  } 
-}; 
-template <typename Type> 
-struct CopyFixedWidth<Type, enable_if_decimal<Type>> { 
-  using ScalarType = typename TypeTraits<Type>::ScalarType; 
-  static void CopyScalar(const Scalar& values, const int64_t length, 
-                         uint8_t* raw_out_values, const int64_t out_offset) { 
-    const int32_t width = 
-        checked_cast<const FixedSizeBinaryType&>(*values.type).byte_width(); 
-    uint8_t* next = raw_out_values + (width * out_offset); 
-    const auto& scalar = checked_cast<const ScalarType&>(values); 
-    const auto value = scalar.value.ToBytes(); 
-    for (int i = 0; i < length; i++) { 
-      std::memcpy(next, value.data(), width); 
-      next += width; 
-    } 
-  } 
-  static void CopyArray(const DataType& type, const uint8_t* in_values, 
-                        const int64_t in_offset, const int64_t length, 
-                        uint8_t* raw_out_values, const int64_t out_offset) { 
-    const int32_t width = checked_cast<const FixedSizeBinaryType&>(type).byte_width(); 
-    uint8_t* next = raw_out_values + (width * out_offset); 
-    std::memcpy(next, in_values + in_offset * width, length * width); 
-  } 
-}; 
-// Copy fixed-width values from a scalar/array datum into an output values buffer 
-template <typename Type> 
-void CopyValues(const Datum& in_values, const int64_t in_offset, const int64_t length, 
-                uint8_t* out_valid, uint8_t* out_values, const int64_t out_offset) { 
-  if (in_values.is_scalar()) { 
-    const auto& scalar = *in_values.scalar(); 
-    if (out_valid) { 
-      BitUtil::SetBitsTo(out_valid, out_offset, length, scalar.is_valid); 
-    } 
-    CopyFixedWidth<Type>::CopyScalar(scalar, length, out_values, out_offset); 
-  } else { 
-    const ArrayData& array = *in_values.array(); 
-    if (out_valid) { 
-      if (array.MayHaveNulls()) { 
-        if (length == 1) { 
-          // CopyBitmap is slow for short runs 
-          BitUtil::SetBitTo( 
-              out_valid, out_offset, 
-              BitUtil::GetBit(array.buffers[0]->data(), array.offset + in_offset)); 
-        } else { 
-          arrow::internal::CopyBitmap(array.buffers[0]->data(), array.offset + in_offset, 
-                                      length, out_valid, out_offset); 
-        } 
-      } else { 
-        BitUtil::SetBitsTo(out_valid, out_offset, length, true); 
-      } 
-    } 
-    CopyFixedWidth<Type>::CopyArray(*array.type, array.buffers[1]->data(), 
-                                    array.offset + in_offset, length, out_values, 
-                                    out_offset); 
-  } 
-} 
- 
-// Specialized helper to copy a single value from a source array. Allows avoiding 
-// repeatedly calling MayHaveNulls and Buffer::data() which have internal checks that 
-// add up when called in a loop. 
-template <typename Type> 
-void CopyOneArrayValue(const DataType& type, const uint8_t* in_valid, 
-                       const uint8_t* in_values, const int64_t in_offset, 
-                       uint8_t* out_valid, uint8_t* out_values, 
-                       const int64_t out_offset) { 
-  if (out_valid) { 
-    BitUtil::SetBitTo(out_valid, out_offset, 
-                      !in_valid || BitUtil::GetBit(in_valid, in_offset)); 
-  } 
-  CopyFixedWidth<Type>::CopyArray(type, in_values, in_offset, /*length=*/1, out_values, 
-                                  out_offset); 
-} 
- 
-struct CaseWhenFunction : ScalarFunction { 
-  using ScalarFunction::ScalarFunction; 
- 
-  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override { 
-    // The first function is a struct of booleans, where the number of fields in the 
-    // struct is either equal to the number of other arguments or is one less. 
-    RETURN_NOT_OK(CheckArity(*values)); 
-    EnsureDictionaryDecoded(values); 
-    auto first_type = (*values)[0].type; 
-    if (first_type->id() != Type::STRUCT) { 
-      return Status::TypeError("case_when: first argument must be STRUCT, not ", 
-                               *first_type); 
-    } 
-    auto num_fields = static_cast<size_t>(first_type->num_fields()); 
-    if (num_fields < values->size() - 2 || num_fields >= values->size()) { 
-      return Status::Invalid( 
-          "case_when: number of struct fields must be equal to or one less than count of " 
-          "remaining arguments (", 
-          values->size() - 1, "), got: ", first_type->num_fields()); 
-    } 
-    for (const auto& field : first_type->fields()) { 
-      if (field->type()->id() != Type::BOOL) { 
-        return Status::TypeError( 
-            "case_when: all fields of first argument must be BOOL, but ", field->name(), 
-            " was of type: ", *field->type()); 
-      } 
-    } 
- 
-    if (auto type = CommonNumeric(values->data() + 1, values->size() - 1)) { 
-      for (auto it = values->begin() + 1; it != values->end(); it++) { 
-        it->type = type; 
-      } 
-    } 
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel; 
-    return arrow::compute::detail::NoMatchingKernel(this, *values); 
-  } 
-}; 
- 
-// Implement a 'case when' (SQL)/'select' (NumPy) function for any scalar conditions 
-template <typename Type> 
-Status ExecScalarCaseWhen(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-  const auto& conds = checked_cast<const StructScalar&>(*batch.values[0].scalar()); 
-  if (!conds.is_valid) { 
-    return Status::Invalid("cond struct must not be null"); 
-  } 
-  Datum result; 
-  for (size_t i = 0; i < batch.values.size() - 1; i++) { 
-    if (i < conds.value.size()) { 
-      const Scalar& cond = *conds.value[i]; 
-      if (cond.is_valid && internal::UnboxScalar<BooleanType>::Unbox(cond)) { 
-        result = batch[i + 1]; 
-        break; 
-      } 
-    } else { 
-      // ELSE clause 
-      result = batch[i + 1]; 
-      break; 
-    } 
-  } 
-  if (out->is_scalar()) { 
-    *out = result.is_scalar() ? result.scalar() : MakeNullScalar(out->type()); 
-    return Status::OK(); 
-  } 
-  ArrayData* output = out->mutable_array(); 
-  if (!result.is_value()) { 
-    // All conditions false, no 'else' argument 
-    result = MakeNullScalar(out->type()); 
-  } 
-  CopyValues<Type>(result, /*in_offset=*/0, batch.length, 
-                   output->GetMutableValues<uint8_t>(0, 0), 
-                   output->GetMutableValues<uint8_t>(1, 0), output->offset); 
-  return Status::OK(); 
-} 
- 
-// Implement 'case when' for any mix of scalar/array arguments for any fixed-width type, 
-// given helper functions to copy data from a source array to a target array 
-template <typename Type> 
-Status ExecArrayCaseWhen(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-  const auto& conds_array = *batch.values[0].array(); 
-  if (conds_array.GetNullCount() > 0) { 
-    return Status::Invalid("cond struct must not have top-level nulls"); 
-  } 
-  ArrayData* output = out->mutable_array(); 
-  const int64_t out_offset = output->offset; 
-  const auto num_value_args = batch.values.size() - 1; 
-  const bool have_else_arg = 
-      static_cast<size_t>(conds_array.type->num_fields()) < num_value_args; 
-  uint8_t* out_valid = output->buffers[0]->mutable_data(); 
-  uint8_t* out_values = output->buffers[1]->mutable_data(); 
-  if (have_else_arg) { 
-    // Copy 'else' value into output 
-    CopyValues<Type>(batch.values.back(), /*in_offset=*/0, batch.length, out_valid, 
-                     out_values, out_offset); 
-  } else { 
-    // There's no 'else' argument, so we should have an all-null validity bitmap 
-    BitUtil::SetBitsTo(out_valid, out_offset, batch.length, false); 
-  } 
- 
-  // Allocate a temporary bitmap to determine which elements still need setting. 
-  ARROW_ASSIGN_OR_RAISE(auto mask_buffer, ctx->AllocateBitmap(batch.length)); 
-  uint8_t* mask = mask_buffer->mutable_data(); 
-  std::memset(mask, 0xFF, mask_buffer->size()); 
- 
-  // Then iterate through each argument in turn and set elements. 
-  for (size_t i = 0; i < batch.values.size() - (have_else_arg ? 2 : 1); i++) { 
-    const ArrayData& cond_array = *conds_array.child_data[i]; 
-    const int64_t cond_offset = conds_array.offset + cond_array.offset; 
-    const uint8_t* cond_values = cond_array.buffers[1]->data(); 
-    const Datum& values_datum = batch[i + 1]; 
-    int64_t offset = 0; 
- 
-    if (cond_array.GetNullCount() == 0) { 
-      // If no valid buffer, visit mask & cond bitmap simultaneously 
-      BinaryBitBlockCounter counter(mask, /*start_offset=*/0, cond_values, cond_offset, 
-                                    batch.length); 
-      while (offset < batch.length) { 
-        const auto block = counter.NextAndWord(); 
-        if (block.AllSet()) { 
-          CopyValues<Type>(values_datum, offset, block.length, out_valid, out_values, 
-                           out_offset + offset); 
-          BitUtil::SetBitsTo(mask, offset, block.length, false); 
-        } else if (block.popcount) { 
-          for (int64_t j = 0; j < block.length; ++j) { 
-            if (BitUtil::GetBit(mask, offset + j) && 
-                BitUtil::GetBit(cond_values, cond_offset + offset + j)) { 
-              CopyValues<Type>(values_datum, offset + j, /*length=*/1, out_valid, 
-                               out_values, out_offset + offset + j); 
-              BitUtil::SetBitTo(mask, offset + j, false); 
-            } 
-          } 
-        } 
-        offset += block.length; 
-      } 
-    } else { 
-      // Visit mask & cond bitmap & cond validity 
-      const uint8_t* cond_valid = cond_array.buffers[0]->data(); 
-      Bitmap bitmaps[3] = {{mask, /*offset=*/0, batch.length}, 
-                           {cond_values, cond_offset, batch.length}, 
-                           {cond_valid, cond_offset, batch.length}}; 
-      Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 3> words) { 
-        const uint64_t word = words[0] & words[1] & words[2]; 
-        const int64_t block_length = std::min<int64_t>(64, batch.length - offset); 
-        if (word == std::numeric_limits<uint64_t>::max()) { 
-          CopyValues<Type>(values_datum, offset, block_length, out_valid, out_values, 
-                           out_offset + offset); 
-          BitUtil::SetBitsTo(mask, offset, block_length, false); 
-        } else if (word) { 
-          for (int64_t j = 0; j < block_length; ++j) { 
-            if (BitUtil::GetBit(mask, offset + j) && 
-                BitUtil::GetBit(cond_valid, cond_offset + offset + j) && 
-                BitUtil::GetBit(cond_values, cond_offset + offset + j)) { 
-              CopyValues<Type>(values_datum, offset + j, /*length=*/1, out_valid, 
-                               out_values, out_offset + offset + j); 
-              BitUtil::SetBitTo(mask, offset + j, false); 
-            } 
-          } 
-        } 
-      }); 
-    } 
-  } 
-  if (!have_else_arg) { 
-    // Need to initialize any remaining null slots (uninitialized memory) 
-    BitBlockCounter counter(mask, /*offset=*/0, batch.length); 
-    int64_t offset = 0; 
-    auto bit_width = checked_cast<const FixedWidthType&>(*out->type()).bit_width(); 
-    auto byte_width = BitUtil::BytesForBits(bit_width); 
-    while (offset < batch.length) { 
-      const auto block = counter.NextWord(); 
-      if (block.AllSet()) { 
-        if (bit_width == 1) { 
-          BitUtil::SetBitsTo(out_values, out_offset + offset, block.length, false); 
-        } else { 
-          std::memset(out_values + (out_offset + offset) * byte_width, 0x00, 
-                      byte_width * block.length); 
-        } 
-      } else if (!block.NoneSet()) { 
-        for (int64_t j = 0; j < block.length; ++j) { 
-          if (BitUtil::GetBit(out_valid, out_offset + offset + j)) continue; 
-          if (bit_width == 1) { 
-            BitUtil::ClearBit(out_values, out_offset + offset + j); 
-          } else { 
-            std::memset(out_values + (out_offset + offset + j) * byte_width, 0x00, 
-                        byte_width); 
-          } 
-        } 
-      } 
-      offset += block.length; 
-    } 
-  } 
-  return Status::OK(); 
-} 
- 
-template <typename Type, typename Enable = void> 
-struct CaseWhenFunctor { 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    if (batch.values[0].is_array()) { 
-      return ExecArrayCaseWhen<Type>(ctx, batch, out); 
-    } 
-    return ExecScalarCaseWhen<Type>(ctx, batch, out); 
-  } 
-}; 
- 
-template <> 
-struct CaseWhenFunctor<NullType> { 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    return Status::OK(); 
-  } 
-}; 
- 
-struct CoalesceFunction : ScalarFunction { 
-  using ScalarFunction::ScalarFunction; 
- 
-  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override { 
-    RETURN_NOT_OK(CheckArity(*values)); 
-    using arrow::compute::detail::DispatchExactImpl; 
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel; 
-    EnsureDictionaryDecoded(values); 
-    if (auto type = CommonNumeric(*values)) { 
-      ReplaceTypes(type, values); 
-    } 
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel; 
-    return arrow::compute::detail::NoMatchingKernel(this, *values); 
-  } 
-}; 
- 
-// Implement a 'coalesce' (SQL) operator for any number of scalar inputs 
-Status ExecScalarCoalesce(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-  for (const auto& datum : batch.values) { 
-    if (datum.scalar()->is_valid) { 
-      *out = datum; 
-      break; 
-    } 
-  } 
-  return Status::OK(); 
-} 
- 
-// Helper: copy from a source datum into all null slots of the output 
-template <typename Type> 
-void CopyValuesAllValid(Datum source, uint8_t* out_valid, uint8_t* out_values, 
-                        const int64_t out_offset, const int64_t length) { 
-  BitBlockCounter counter(out_valid, out_offset, length); 
-  int64_t offset = 0; 
-  while (offset < length) { 
-    const auto block = counter.NextWord(); 
-    if (block.NoneSet()) { 
-      CopyValues<Type>(source, offset, block.length, out_valid, out_values, 
-                       out_offset + offset); 
-    } else if (!block.AllSet()) { 
-      for (int64_t j = 0; j < block.length; ++j) { 
-        if (!BitUtil::GetBit(out_valid, out_offset + offset + j)) { 
-          CopyValues<Type>(source, offset + j, 1, out_valid, out_values, 
-                           out_offset + offset + j); 
-        } 
-      } 
-    } 
-    offset += block.length; 
-  } 
-} 
- 
-// Helper: zero the values buffer of the output wherever the slot is null 
-void InitializeNullSlots(const DataType& type, uint8_t* out_valid, uint8_t* out_values, 
-                         const int64_t out_offset, const int64_t length) { 
-  BitBlockCounter counter(out_valid, out_offset, length); 
-  int64_t offset = 0; 
-  auto bit_width = checked_cast<const FixedWidthType&>(type).bit_width(); 
-  auto byte_width = BitUtil::BytesForBits(bit_width); 
-  while (offset < length) { 
-    const auto block = counter.NextWord(); 
-    if (block.NoneSet()) { 
-      if (bit_width == 1) { 
-        BitUtil::SetBitsTo(out_values, out_offset + offset, block.length, false); 
-      } else { 
-        std::memset(out_values + (out_offset + offset) * byte_width, 0x00, 
-                    byte_width * block.length); 
-      } 
-    } else if (!block.AllSet()) { 
-      for (int64_t j = 0; j < block.length; ++j) { 
-        if (BitUtil::GetBit(out_valid, out_offset + offset + j)) continue; 
-        if (bit_width == 1) { 
-          BitUtil::ClearBit(out_values, out_offset + offset + j); 
-        } else { 
-          std::memset(out_values + (out_offset + offset + j) * byte_width, 0x00, 
-                      byte_width); 
-        } 
-      } 
-    } 
-    offset += block.length; 
-  } 
-} 
- 
-// Implement 'coalesce' for any mix of scalar/array arguments for any fixed-width type 
-template <typename Type> 
-Status ExecArrayCoalesce(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-  ArrayData* output = out->mutable_array(); 
-  const int64_t out_offset = output->offset; 
-  // Use output validity buffer as mask to decide what values to copy 
-  uint8_t* out_valid = output->buffers[0]->mutable_data(); 
-  // Clear output buffer - no values are set initially 
-  BitUtil::SetBitsTo(out_valid, out_offset, batch.length, false); 
-  uint8_t* out_values = output->buffers[1]->mutable_data(); 
- 
-  for (const auto& datum : batch.values) { 
-    if ((datum.is_scalar() && datum.scalar()->is_valid) || 
-        (datum.is_array() && !datum.array()->MayHaveNulls())) { 
-      // Valid scalar, or all-valid array 
-      CopyValuesAllValid<Type>(datum, out_valid, out_values, out_offset, batch.length); 
-      break; 
-    } else if (datum.is_array()) { 
-      // Array with nulls 
-      const ArrayData& arr = *datum.array(); 
-      const DataType& type = *datum.type(); 
-      const uint8_t* in_valid = arr.buffers[0]->data(); 
-      const uint8_t* in_values = arr.buffers[1]->data(); 
-      BinaryBitBlockCounter counter(in_valid, arr.offset, out_valid, out_offset, 
-                                    batch.length); 
-      int64_t offset = 0; 
-      while (offset < batch.length) { 
-        const auto block = counter.NextAndNotWord(); 
-        if (block.AllSet()) { 
-          CopyValues<Type>(datum, offset, block.length, out_valid, out_values, 
-                           out_offset + offset); 
-        } else if (block.popcount) { 
-          for (int64_t j = 0; j < block.length; ++j) { 
-            if (!BitUtil::GetBit(out_valid, out_offset + offset + j) && 
-                BitUtil::GetBit(in_valid, arr.offset + offset + j)) { 
-              // This version lets us avoid calling MayHaveNulls() on every iteration 
-              // (which does an atomic load and can add up) 
-              CopyOneArrayValue<Type>(type, in_valid, in_values, arr.offset + offset + j, 
-                                      out_valid, out_values, out_offset + offset + j); 
-            } 
-          } 
-        } 
-        offset += block.length; 
-      } 
-    } 
-  } 
- 
-  // Initialize any remaining null slots (uninitialized memory) 
-  InitializeNullSlots(*out->type(), out_valid, out_values, out_offset, batch.length); 
-  return Status::OK(); 
-} 
- 
-template <typename Type, typename Enable = void> 
-struct CoalesceFunctor { 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    for (const auto& datum : batch.values) { 
-      if (datum.is_array()) { 
-        return ExecArrayCoalesce<Type>(ctx, batch, out); 
-      } 
-    } 
-    return ExecScalarCoalesce(ctx, batch, out); 
-  } 
-}; 
- 
-template <> 
-struct CoalesceFunctor<NullType> { 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    return Status::OK(); 
-  } 
-}; 
- 
-template <typename Type> 
-struct CoalesceFunctor<Type, enable_if_base_binary<Type>> { 
-  using offset_type = typename Type::offset_type; 
-  using BuilderType = typename TypeTraits<Type>::BuilderType; 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    for (const auto& datum : batch.values) { 
-      if (datum.is_array()) { 
-        return ExecArray(ctx, batch, out); 
-      } 
-    } 
-    return ExecScalarCoalesce(ctx, batch, out); 
-  } 
- 
-  static Status ExecArray(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    // Special case: grab any leading non-null scalar or array arguments 
-    for (const auto& datum : batch.values) { 
-      if (datum.is_scalar()) { 
-        if (!datum.scalar()->is_valid) continue; 
-        ARROW_ASSIGN_OR_RAISE( 
-            *out, MakeArrayFromScalar(*datum.scalar(), batch.length, ctx->memory_pool())); 
-        return Status::OK(); 
-      } else if (datum.is_array() && !datum.array()->MayHaveNulls()) { 
-        *out = datum; 
-        return Status::OK(); 
-      } 
-      break; 
-    } 
-    ArrayData* output = out->mutable_array(); 
-    BuilderType builder(batch[0].type(), ctx->memory_pool()); 
-    RETURN_NOT_OK(builder.Reserve(batch.length)); 
-    for (int64_t i = 0; i < batch.length; i++) { 
-      bool set = false; 
-      for (const auto& datum : batch.values) { 
-        if (datum.is_scalar()) { 
-          if (datum.scalar()->is_valid) { 
-            RETURN_NOT_OK(builder.Append(UnboxScalar<Type>::Unbox(*datum.scalar()))); 
-            set = true; 
-            break; 
-          } 
-        } else { 
-          const ArrayData& source = *datum.array(); 
-          if (!source.MayHaveNulls() || 
-              BitUtil::GetBit(source.buffers[0]->data(), source.offset + i)) { 
-            const uint8_t* data = source.buffers[2]->data(); 
-            const offset_type* offsets = source.GetValues<offset_type>(1); 
-            const offset_type offset0 = offsets[i]; 
-            const offset_type offset1 = offsets[i + 1]; 
-            RETURN_NOT_OK(builder.Append(data + offset0, offset1 - offset0)); 
-            set = true; 
-            break; 
-          } 
-        } 
-      } 
-      if (!set) RETURN_NOT_OK(builder.AppendNull()); 
-    } 
-    ARROW_ASSIGN_OR_RAISE(auto temp_output, builder.Finish()); 
-    *output = *temp_output->data(); 
-    // Builder type != logical type due to GenerateTypeAgnosticVarBinaryBase 
-    output->type = batch[0].type(); 
-    return Status::OK(); 
-  } 
-}; 
- 
-Result<ValueDescr> LastType(KernelContext*, const std::vector<ValueDescr>& descrs) { 
-  ValueDescr result = descrs.back(); 
-  result.shape = GetBroadcastShape(descrs); 
-  return result; 
-} 
- 
-void AddCaseWhenKernel(const std::shared_ptr<CaseWhenFunction>& scalar_function, 
-                       detail::GetTypeId get_id, ArrayKernelExec exec) { 
-  ScalarKernel kernel( 
-      KernelSignature::Make({InputType(Type::STRUCT), InputType(get_id.id)}, 
-                            OutputType(LastType), 
-                            /*is_varargs=*/true), 
-      exec); 
-  kernel.null_handling = NullHandling::COMPUTED_PREALLOCATE; 
-  kernel.mem_allocation = MemAllocation::PREALLOCATE; 
-  kernel.can_write_into_slices = is_fixed_width(get_id.id); 
-  DCHECK_OK(scalar_function->AddKernel(std::move(kernel))); 
-} 
- 
-void AddPrimitiveCaseWhenKernels(const std::shared_ptr<CaseWhenFunction>& scalar_function, 
-                                 const std::vector<std::shared_ptr<DataType>>& types) { 
-  for (auto&& type : types) { 
-    auto exec = GenerateTypeAgnosticPrimitive<CaseWhenFunctor>(*type); 
-    AddCaseWhenKernel(scalar_function, type, std::move(exec)); 
-  } 
-} 
- 
-void AddCoalesceKernel(const std::shared_ptr<ScalarFunction>& scalar_function, 
-                       detail::GetTypeId get_id, ArrayKernelExec exec) { 
-  ScalarKernel kernel(KernelSignature::Make({InputType(get_id.id)}, OutputType(FirstType), 
-                                            /*is_varargs=*/true), 
-                      exec); 
-  kernel.null_handling = NullHandling::COMPUTED_PREALLOCATE; 
-  kernel.mem_allocation = MemAllocation::PREALLOCATE; 
-  kernel.can_write_into_slices = is_fixed_width(get_id.id); 
-  DCHECK_OK(scalar_function->AddKernel(std::move(kernel))); 
-} 
- 
-void AddPrimitiveCoalesceKernels(const std::shared_ptr<ScalarFunction>& scalar_function, 
-                                 const std::vector<std::shared_ptr<DataType>>& types) { 
-  for (auto&& type : types) { 
-    auto exec = GenerateTypeAgnosticPrimitive<CoalesceFunctor>(*type); 
-    AddCoalesceKernel(scalar_function, type, std::move(exec)); 
-  } 
-} 
- 
-const FunctionDoc if_else_doc{"Choose values based on a condition", 
-                              ("`cond` must be a Boolean scalar/ array. \n`left` or " 
-                               "`right` must be of the same type scalar/ array.\n" 
-                               "`null` values in `cond` will be promoted to the" 
-                               " output."), 
-                              {"cond", "left", "right"}}; 
- 
-const FunctionDoc case_when_doc{ 
-    "Choose values based on multiple conditions", 
-    ("`cond` must be a struct of Boolean values. `cases` can be a mix " 
-     "of scalar and array arguments (of any type, but all must be the " 
-     "same type or castable to a common type), with either exactly one " 
-     "datum per child of `cond`, or one more `cases` than children of " 
-     "`cond` (in which case we have an \"else\" value).\n" 
-     "Each row of the output will be the corresponding value of the " 
-     "first datum in `cases` for which the corresponding child of `cond` " 
-     "is true, or otherwise the \"else\" value (if given), or null. " 
-     "Essentially, this implements a switch-case or if-else, if-else... " 
-     "statement."), 
-    {"cond", "*cases"}}; 
- 
-const FunctionDoc coalesce_doc{ 
-    "Select the first non-null value in each slot", 
-    ("Each row of the output will be the value from the first corresponding input " 
-     "for which the value is not null. If all inputs are null in a row, the output " 
-     "will be null."), 
-    {"*values"}}; 
-}  // namespace 
- 
-void RegisterScalarIfElse(FunctionRegistry* registry) { 
-  { 
-    auto func = 
-        std::make_shared<IfElseFunction>("if_else", Arity::Ternary(), &if_else_doc); 
- 
-    AddPrimitiveIfElseKernels(func, NumericTypes()); 
-    AddPrimitiveIfElseKernels(func, TemporalTypes()); 
-    AddPrimitiveIfElseKernels(func, {boolean(), day_time_interval(), month_interval()}); 
-    AddNullIfElseKernel(func); 
-    AddBinaryIfElseKernels(func, BaseBinaryTypes()); 
-    AddFSBinaryIfElseKernel(func); 
-    DCHECK_OK(registry->AddFunction(std::move(func))); 
-  } 
-  { 
-    auto func = std::make_shared<CaseWhenFunction>( 
-        "case_when", Arity::VarArgs(/*min_args=*/1), &case_when_doc); 
-    AddPrimitiveCaseWhenKernels(func, NumericTypes()); 
-    AddPrimitiveCaseWhenKernels(func, TemporalTypes()); 
-    AddPrimitiveCaseWhenKernels( 
-        func, {boolean(), null(), day_time_interval(), month_interval()}); 
-    AddCaseWhenKernel(func, Type::FIXED_SIZE_BINARY, 
-                      CaseWhenFunctor<FixedSizeBinaryType>::Exec); 
-    AddCaseWhenKernel(func, Type::DECIMAL128, CaseWhenFunctor<Decimal128Type>::Exec); 
-    AddCaseWhenKernel(func, Type::DECIMAL256, CaseWhenFunctor<Decimal256Type>::Exec); 
-    DCHECK_OK(registry->AddFunction(std::move(func))); 
-  } 
-  { 
-    auto func = std::make_shared<CoalesceFunction>( 
-        "coalesce", Arity::VarArgs(/*min_args=*/1), &coalesce_doc); 
-    AddPrimitiveCoalesceKernels(func, NumericTypes()); 
-    AddPrimitiveCoalesceKernels(func, TemporalTypes()); 
-    AddPrimitiveCoalesceKernels( 
-        func, {boolean(), null(), day_time_interval(), month_interval()}); 
-    AddCoalesceKernel(func, Type::FIXED_SIZE_BINARY, 
-                      CoalesceFunctor<FixedSizeBinaryType>::Exec); 
-    AddCoalesceKernel(func, Type::DECIMAL128, CoalesceFunctor<Decimal128Type>::Exec); 
-    AddCoalesceKernel(func, Type::DECIMAL256, CoalesceFunctor<Decimal256Type>::Exec); 
-    for (const auto& ty : BaseBinaryTypes()) { 
-      AddCoalesceKernel(func, ty, GenerateTypeAgnosticVarBinaryBase<CoalesceFunctor>(ty)); 
-    } 
-    DCHECK_OK(registry->AddFunction(std::move(func))); 
-  } 
-} 
- 
-}  // namespace internal 
-}  // namespace compute 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <arrow/compute/api.h>
+#include <arrow/compute/kernels/codegen_internal.h>
+#include <arrow/compute/util_internal.h>
+#include <arrow/util/bit_block_counter.h>
+#include <arrow/util/bitmap.h>
+#include <arrow/util/bitmap_ops.h>
+#include <arrow/util/bitmap_reader.h>
+
+namespace arrow {
+using internal::BitBlockCount;
+using internal::BitBlockCounter;
+using internal::Bitmap;
+using internal::BitmapWordReader;
+
+namespace compute {
+namespace internal {
+
+namespace {
+
+constexpr uint64_t kAllNull = 0;
+constexpr uint64_t kAllValid = ~kAllNull;
+
+util::optional<uint64_t> GetConstantValidityWord(const Datum& data) {
+  if (data.is_scalar()) {
+    return data.scalar()->is_valid ? kAllValid : kAllNull;
+  }
+
+  if (data.array()->null_count == data.array()->length) return kAllNull;
+
+  if (!data.array()->MayHaveNulls()) return kAllValid;
+
+  // no constant validity word available
+  return {};
+}
+
+inline Bitmap GetBitmap(const Datum& datum, int i) {
+  if (datum.is_scalar()) return {};
+  const ArrayData& a = *datum.array();
+  return Bitmap{a.buffers[i], a.offset, a.length};
+}
+
+// if the condition is null then output is null otherwise we take validity from the
+// selected argument
+// ie. cond.valid & (cond.data & left.valid | ~cond.data & right.valid)
+template <typename AllocateNullBitmap>
+Status PromoteNullsVisitor(KernelContext* ctx, const Datum& cond_d, const Datum& left_d,
+                           const Datum& right_d, ArrayData* output) {
+  auto cond_const = GetConstantValidityWord(cond_d);
+  auto left_const = GetConstantValidityWord(left_d);
+  auto right_const = GetConstantValidityWord(right_d);
+
+  enum { COND_CONST = 1, LEFT_CONST = 2, RIGHT_CONST = 4 };
+  auto flag = COND_CONST * cond_const.has_value() | LEFT_CONST * left_const.has_value() |
+              RIGHT_CONST * right_const.has_value();
+
+  const ArrayData& cond = *cond_d.array();
+  // cond.data will always be available
+  Bitmap cond_data{cond.buffers[1], cond.offset, cond.length};
+  Bitmap cond_valid{cond.buffers[0], cond.offset, cond.length};
+  Bitmap left_valid = GetBitmap(left_d, 0);
+  Bitmap right_valid = GetBitmap(right_d, 0);
+
+  // cond.valid & (cond.data & left.valid | ~cond.data & right.valid)
+  // In the following cases, we dont need to allocate out_valid bitmap
+
+  // if cond & left & right all ones, then output is all valid.
+  // if output validity buffer is already allocated (NullHandling::
+  // COMPUTED_PREALLOCATE) -> set all bits
+  // else, return nullptr
+  if (cond_const == kAllValid && left_const == kAllValid && right_const == kAllValid) {
+    if (AllocateNullBitmap::value) {  // NullHandling::COMPUTED_NO_PREALLOCATE
+      output->buffers[0] = nullptr;
+    } else {  // NullHandling::COMPUTED_PREALLOCATE
+      BitUtil::SetBitmap(output->buffers[0]->mutable_data(), output->offset,
+                         output->length);
+    }
+    return Status::OK();
+  }
+
+  if (left_const == kAllValid && right_const == kAllValid) {
+    // if both left and right are valid, no need to calculate out_valid bitmap. Copy
+    // cond validity buffer
+    if (AllocateNullBitmap::value) {  // NullHandling::COMPUTED_NO_PREALLOCATE
+      // if there's an offset, copy bitmap (cannot slice a bitmap)
+      if (cond.offset) {
+        ARROW_ASSIGN_OR_RAISE(
+            output->buffers[0],
+            arrow::internal::CopyBitmap(ctx->memory_pool(), cond.buffers[0]->data(),
+                                        cond.offset, cond.length));
+      } else {  // just copy assign cond validity buffer
+        output->buffers[0] = cond.buffers[0];
+      }
+    } else {  // NullHandling::COMPUTED_PREALLOCATE
+      arrow::internal::CopyBitmap(cond.buffers[0]->data(), cond.offset, cond.length,
+                                  output->buffers[0]->mutable_data(), output->offset);
+    }
+    return Status::OK();
+  }
+
+  // lambda function that will be used inside the visitor
+  auto apply = [&](uint64_t c_valid, uint64_t c_data, uint64_t l_valid,
+                   uint64_t r_valid) {
+    return c_valid & ((c_data & l_valid) | (~c_data & r_valid));
+  };
+
+  if (AllocateNullBitmap::value) {
+    // following cases requires a separate out_valid buffer. COMPUTED_NO_PREALLOCATE
+    // would not have allocated buffers for it.
+    ARROW_ASSIGN_OR_RAISE(output->buffers[0], ctx->AllocateBitmap(cond.length));
+  }
+
+  std::array<Bitmap, 1> out_bitmaps{
+      Bitmap{output->buffers[0], output->offset, output->length}};
+
+  switch (flag) {
+    case COND_CONST | LEFT_CONST | RIGHT_CONST: {
+      std::array<Bitmap, 1> bitmaps{cond_data};
+      Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps,
+                                 [&](const std::array<uint64_t, 1>& words_in,
+                                     std::array<uint64_t, 1>* word_out) {
+                                   word_out->at(0) = apply(*cond_const, words_in[0],
+                                                           *left_const, *right_const);
+                                 });
+      break;
+    }
+    case LEFT_CONST | RIGHT_CONST: {
+      std::array<Bitmap, 2> bitmaps{cond_valid, cond_data};
+      Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps,
+                                 [&](const std::array<uint64_t, 2>& words_in,
+                                     std::array<uint64_t, 1>* word_out) {
+                                   word_out->at(0) = apply(words_in[0], words_in[1],
+                                                           *left_const, *right_const);
+                                 });
+      break;
+    }
+    case COND_CONST | RIGHT_CONST: {
+      // bitmaps[C_VALID], bitmaps[R_VALID] might be null; override to make it safe for
+      // Visit()
+      std::array<Bitmap, 2> bitmaps{cond_data, left_valid};
+      Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps,
+                                 [&](const std::array<uint64_t, 2>& words_in,
+                                     std::array<uint64_t, 1>* word_out) {
+                                   word_out->at(0) = apply(*cond_const, words_in[0],
+                                                           words_in[1], *right_const);
+                                 });
+      break;
+    }
+    case RIGHT_CONST: {
+      // bitmaps[R_VALID] might be null; override to make it safe for Visit()
+      std::array<Bitmap, 3> bitmaps{cond_valid, cond_data, left_valid};
+      Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps,
+                                 [&](const std::array<uint64_t, 3>& words_in,
+                                     std::array<uint64_t, 1>* word_out) {
+                                   word_out->at(0) = apply(words_in[0], words_in[1],
+                                                           words_in[2], *right_const);
+                                 });
+      break;
+    }
+    case COND_CONST | LEFT_CONST: {
+      // bitmaps[C_VALID], bitmaps[L_VALID] might be null; override to make it safe for
+      // Visit()
+      std::array<Bitmap, 2> bitmaps{cond_data, right_valid};
+      Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps,
+                                 [&](const std::array<uint64_t, 2>& words_in,
+                                     std::array<uint64_t, 1>* word_out) {
+                                   word_out->at(0) = apply(*cond_const, words_in[0],
+                                                           *left_const, words_in[1]);
+                                 });
+      break;
+    }
+    case LEFT_CONST: {
+      // bitmaps[L_VALID] might be null; override to make it safe for Visit()
+      std::array<Bitmap, 3> bitmaps{cond_valid, cond_data, right_valid};
+      Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps,
+                                 [&](const std::array<uint64_t, 3>& words_in,
+                                     std::array<uint64_t, 1>* word_out) {
+                                   word_out->at(0) = apply(words_in[0], words_in[1],
+                                                           *left_const, words_in[2]);
+                                 });
+      break;
+    }
+    case COND_CONST: {
+      // bitmaps[C_VALID] might be null; override to make it safe for Visit()
+      std::array<Bitmap, 3> bitmaps{cond_data, left_valid, right_valid};
+      Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps,
+                                 [&](const std::array<uint64_t, 3>& words_in,
+                                     std::array<uint64_t, 1>* word_out) {
+                                   word_out->at(0) = apply(*cond_const, words_in[0],
+                                                           words_in[1], words_in[2]);
+                                 });
+      break;
+    }
+    case 0: {
+      std::array<Bitmap, 4> bitmaps{cond_valid, cond_data, left_valid, right_valid};
+      Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps,
+                                 [&](const std::array<uint64_t, 4>& words_in,
+                                     std::array<uint64_t, 1>* word_out) {
+                                   word_out->at(0) = apply(words_in[0], words_in[1],
+                                                           words_in[2], words_in[3]);
+                                 });
+      break;
+    }
+  }
+  return Status::OK();
+}
+
+using Word = uint64_t;
+static constexpr int64_t word_len = sizeof(Word) * 8;
+
+/// Runs the main if_else loop. Here, it is expected that the right data has already
+/// been copied to the output.
+/// If `invert` is meant to invert the cond.data. If is set to `true`, then the
+/// buffer will be inverted before calling the handle_block or handle_each functions.
+/// This is useful, when left is an array and right is scalar. Then rather than
+/// copying data from the right to output, we can copy left data to the output and
+/// invert the cond data to fill right values. Filling out with a scalar is presumed to
+/// be more efficient than filling with an array
+///
+/// `HandleBlock` has the signature:
+///     [](int64_t offset, int64_t length){...}
+/// It should copy `length` number of elements from source array to output array with
+/// `offset` offset in both arrays
+template <typename HandleBlock, bool invert = false>
+void RunIfElseLoop(const ArrayData& cond, const HandleBlock& handle_block) {
+  int64_t data_offset = 0;
+  int64_t bit_offset = cond.offset;
+  const auto* cond_data = cond.buffers[1]->data();  // this is a BoolArray
+
+  BitmapWordReader<Word> cond_reader(cond_data, cond.offset, cond.length);
+
+  constexpr Word pickAll = invert ? 0 : UINT64_MAX;
+  constexpr Word pickNone = ~pickAll;
+
+  int64_t cnt = cond_reader.words();
+  while (cnt--) {
+    Word word = cond_reader.NextWord();
+
+    if (word == pickAll) {
+      handle_block(data_offset, word_len);
+    } else if (word != pickNone) {
+      for (int64_t i = 0; i < word_len; ++i) {
+        if (BitUtil::GetBit(cond_data, bit_offset + i) != invert) {
+          handle_block(data_offset + i, 1);
+        }
+      }
+    }
+    data_offset += word_len;
+    bit_offset += word_len;
+  }
+
+  constexpr uint8_t pickAllByte = invert ? 0 : UINT8_MAX;
+  // byte bit-wise inversion is int-wide. Hence XOR with 0xff
+  constexpr uint8_t pickNoneByte = pickAllByte ^ 0xff;
+
+  cnt = cond_reader.trailing_bytes();
+  while (cnt--) {
+    int valid_bits;
+    uint8_t byte = cond_reader.NextTrailingByte(valid_bits);
+
+    if (byte == pickAllByte && valid_bits == 8) {
+      handle_block(data_offset, 8);
+    } else if (byte != pickNoneByte) {
+      for (int i = 0; i < valid_bits; ++i) {
+        if (BitUtil::GetBit(cond_data, bit_offset + i) != invert) {
+          handle_block(data_offset + i, 1);
+        }
+      }
+    }
+    data_offset += 8;
+    bit_offset += 8;
+  }
+}
+
+template <typename HandleBlock>
+void RunIfElseLoopInverted(const ArrayData& cond, const HandleBlock& handle_block) {
+  RunIfElseLoop<HandleBlock, true>(cond, handle_block);
+}
+
+/// Runs if-else when cond is a scalar. Two special functions are required,
+/// 1.CopyArrayData, 2. BroadcastScalar
+template <typename CopyArrayData, typename BroadcastScalar>
+Status RunIfElseScalar(const BooleanScalar& cond, const Datum& left, const Datum& right,
+                       Datum* out, const CopyArrayData& copy_array_data,
+                       const BroadcastScalar& broadcast_scalar) {
+  if (left.is_scalar() && right.is_scalar()) {  // output will be a scalar
+    if (cond.is_valid) {
+      *out = cond.value ? left.scalar() : right.scalar();
+    } else {
+      *out = MakeNullScalar(left.type());
+    }
+    return Status::OK();
+  }
+
+  // either left or right is an array. Output is always an array`
+  const std::shared_ptr<ArrayData>& out_array = out->array();
+  if (!cond.is_valid) {
+    // cond is null; output is all null --> clear validity buffer
+    BitUtil::ClearBitmap(out_array->buffers[0]->mutable_data(), out_array->offset,
+                         out_array->length);
+    return Status::OK();
+  }
+
+  // cond is a non-null scalar
+  const auto& valid_data = cond.value ? left : right;
+  if (valid_data.is_array()) {
+    // valid_data is an array. Hence copy data to the output buffers
+    const auto& valid_array = valid_data.array();
+    if (valid_array->MayHaveNulls()) {
+      arrow::internal::CopyBitmap(
+          valid_array->buffers[0]->data(), valid_array->offset, valid_array->length,
+          out_array->buffers[0]->mutable_data(), out_array->offset);
+    } else {  // validity buffer is nullptr --> set all bits
+      BitUtil::SetBitmap(out_array->buffers[0]->mutable_data(), out_array->offset,
+                         out_array->length);
+    }
+    copy_array_data(*valid_array, out_array.get());
+    return Status::OK();
+
+  } else {  // valid data is scalar
+    // valid data is a scalar that needs to be broadcasted
+    const auto& valid_scalar = *valid_data.scalar();
+    if (valid_scalar.is_valid) {  // if the scalar is non-null, broadcast
+      BitUtil::SetBitmap(out_array->buffers[0]->mutable_data(), out_array->offset,
+                         out_array->length);
+      broadcast_scalar(*valid_data.scalar(), out_array.get());
+    } else {  // scalar is null, clear the output validity buffer
+      BitUtil::ClearBitmap(out_array->buffers[0]->mutable_data(), out_array->offset,
+                           out_array->length);
+    }
+    return Status::OK();
+  }
+}
+
+template <typename Type, typename Enable = void>
+struct IfElseFunctor {};
+
+// only number types needs to be handled for Fixed sized primitive data types because,
+// internal::GenerateTypeAgnosticPrimitive forwards types to the corresponding unsigned
+// int type
+template <typename Type>
+struct IfElseFunctor<Type, enable_if_number<Type>> {
+  using T = typename TypeTraits<Type>::CType;
+  // A - Array, S - Scalar, X = Array/Scalar
+
+  // SXX
+  static Status Call(KernelContext* ctx, const BooleanScalar& cond, const Datum& left,
+                     const Datum& right, Datum* out) {
+    return RunIfElseScalar(
+        cond, left, right, out,
+        /*CopyArrayData*/
+        [&](const ArrayData& valid_array, ArrayData* out_array) {
+          std::memcpy(out_array->GetMutableValues<T>(1), valid_array.GetValues<T>(1),
+                      valid_array.length * sizeof(T));
+        },
+        /*BroadcastScalar*/
+        [&](const Scalar& scalar, ArrayData* out_array) {
+          T scalar_data = internal::UnboxScalar<Type>::Unbox(scalar);
+          std::fill(out_array->GetMutableValues<T>(1),
+                    out_array->GetMutableValues<T>(1) + out_array->length, scalar_data);
+        });
+  }
+
+  //  AAA
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left,
+                     const ArrayData& right, ArrayData* out) {
+    T* out_values = out->template GetMutableValues<T>(1);
+
+    // copy right data to out_buff
+    const T* right_data = right.GetValues<T>(1);
+    std::memcpy(out_values, right_data, right.length * sizeof(T));
+
+    // selectively copy values from left data
+    const T* left_data = left.GetValues<T>(1);
+
+    RunIfElseLoop(cond, [&](int64_t data_offset, int64_t num_elems) {
+      std::memcpy(out_values + data_offset, left_data + data_offset,
+                  num_elems * sizeof(T));
+    });
+
+    return Status::OK();
+  }
+
+  // ASA
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left,
+                     const ArrayData& right, ArrayData* out) {
+    T* out_values = out->template GetMutableValues<T>(1);
+
+    // copy right data to out_buff
+    const T* right_data = right.GetValues<T>(1);
+    std::memcpy(out_values, right_data, right.length * sizeof(T));
+
+    // selectively copy values from left data
+    T left_data = internal::UnboxScalar<Type>::Unbox(left);
+
+    RunIfElseLoop(cond, [&](int64_t data_offset, int64_t num_elems) {
+      std::fill(out_values + data_offset, out_values + data_offset + num_elems,
+                left_data);
+    });
+
+    return Status::OK();
+  }
+
+  // AAS
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left,
+                     const Scalar& right, ArrayData* out) {
+    T* out_values = out->template GetMutableValues<T>(1);
+
+    // copy left data to out_buff
+    const T* left_data = left.GetValues<T>(1);
+    std::memcpy(out_values, left_data, left.length * sizeof(T));
+
+    T right_data = internal::UnboxScalar<Type>::Unbox(right);
+
+    RunIfElseLoopInverted(cond, [&](int64_t data_offset, int64_t num_elems) {
+      std::fill(out_values + data_offset, out_values + data_offset + num_elems,
+                right_data);
+    });
+
+    return Status::OK();
+  }
+
+  // ASS
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left,
+                     const Scalar& right, ArrayData* out) {
+    T* out_values = out->template GetMutableValues<T>(1);
+
+    // copy right data to out_buff
+    T right_data = internal::UnboxScalar<Type>::Unbox(right);
+    std::fill(out_values, out_values + cond.length, right_data);
+
+    // selectively copy values from left data
+    T left_data = internal::UnboxScalar<Type>::Unbox(left);
+    RunIfElseLoop(cond, [&](int64_t data_offset, int64_t num_elems) {
+      std::fill(out_values + data_offset, out_values + data_offset + num_elems,
+                left_data);
+    });
+
+    return Status::OK();
+  }
+};
+
+template <typename Type>
+struct IfElseFunctor<Type, enable_if_boolean<Type>> {
+  // A - Array, S - Scalar, X = Array/Scalar
+
+  // SXX
+  static Status Call(KernelContext* ctx, const BooleanScalar& cond, const Datum& left,
+                     const Datum& right, Datum* out) {
+    return RunIfElseScalar(
+        cond, left, right, out,
+        /*CopyArrayData*/
+        [&](const ArrayData& valid_array, ArrayData* out_array) {
+          arrow::internal::CopyBitmap(
+              valid_array.buffers[1]->data(), valid_array.offset, valid_array.length,
+              out_array->buffers[1]->mutable_data(), out_array->offset);
+        },
+        /*BroadcastScalar*/
+        [&](const Scalar& scalar, ArrayData* out_array) {
+          bool scalar_data = internal::UnboxScalar<Type>::Unbox(scalar);
+          BitUtil::SetBitsTo(out_array->buffers[1]->mutable_data(), out_array->offset,
+                             out_array->length, scalar_data);
+        });
+  }
+
+  // AAA
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left,
+                     const ArrayData& right, ArrayData* out) {
+    // out_buff = right & ~cond
+    const auto& out_buf = out->buffers[1];
+    arrow::internal::BitmapAndNot(right.buffers[1]->data(), right.offset,
+                                  cond.buffers[1]->data(), cond.offset, cond.length,
+                                  out->offset, out_buf->mutable_data());
+
+    // out_buff = left & cond
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> temp_buf,
+                          arrow::internal::BitmapAnd(
+                              ctx->memory_pool(), left.buffers[1]->data(), left.offset,
+                              cond.buffers[1]->data(), cond.offset, cond.length, 0));
+
+    arrow::internal::BitmapOr(out_buf->data(), out->offset, temp_buf->data(), 0,
+                              cond.length, out->offset, out_buf->mutable_data());
+
+    return Status::OK();
+  }
+
+  // ASA
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left,
+                     const ArrayData& right, ArrayData* out) {
+    // out_buff = right & ~cond
+    const auto& out_buf = out->buffers[1];
+    arrow::internal::BitmapAndNot(right.buffers[1]->data(), right.offset,
+                                  cond.buffers[1]->data(), cond.offset, cond.length,
+                                  out->offset, out_buf->mutable_data());
+
+    // out_buff = left & cond
+    bool left_data = internal::UnboxScalar<BooleanType>::Unbox(left);
+    if (left_data) {
+      arrow::internal::BitmapOr(out_buf->data(), out->offset, cond.buffers[1]->data(),
+                                cond.offset, cond.length, out->offset,
+                                out_buf->mutable_data());
+    }
+
+    return Status::OK();
+  }
+
+  // AAS
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left,
+                     const Scalar& right, ArrayData* out) {
+    // out_buff = left & cond
+    const auto& out_buf = out->buffers[1];
+    arrow::internal::BitmapAnd(left.buffers[1]->data(), left.offset,
+                               cond.buffers[1]->data(), cond.offset, cond.length,
+                               out->offset, out_buf->mutable_data());
+
+    bool right_data = internal::UnboxScalar<BooleanType>::Unbox(right);
+
+    // out_buff = left & cond | right & ~cond
+    if (right_data) {
+      arrow::internal::BitmapOrNot(out_buf->data(), out->offset, cond.buffers[1]->data(),
+                                   cond.offset, cond.length, out->offset,
+                                   out_buf->mutable_data());
+    }
+
+    return Status::OK();
+  }
+
+  // ASS
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left,
+                     const Scalar& right, ArrayData* out) {
+    bool left_data = internal::UnboxScalar<BooleanType>::Unbox(left);
+    bool right_data = internal::UnboxScalar<BooleanType>::Unbox(right);
+
+    const auto& out_buf = out->buffers[1];
+
+    // out_buf = left & cond | right & ~cond
+    //    std::shared_ptr<Buffer> out_buf = nullptr;
+    if (left_data) {
+      if (right_data) {
+        // out_buf = ones
+        BitUtil::SetBitmap(out_buf->mutable_data(), out->offset, cond.length);
+      } else {
+        // out_buf = cond
+        arrow::internal::CopyBitmap(cond.buffers[1]->data(), cond.offset, cond.length,
+                                    out_buf->mutable_data(), out->offset);
+      }
+    } else {
+      if (right_data) {
+        // out_buf = ~cond
+        arrow::internal::InvertBitmap(cond.buffers[1]->data(), cond.offset, cond.length,
+                                      out_buf->mutable_data(), out->offset);
+      } else {
+        // out_buf = zeros
+        BitUtil::ClearBitmap(out_buf->mutable_data(), out->offset, cond.length);
+      }
+    }
+
+    return Status::OK();
+  }
+};
+
+template <typename Type>
+struct IfElseFunctor<Type, enable_if_base_binary<Type>> {
+  using OffsetType = typename TypeTraits<Type>::OffsetType::c_type;
+  using ArrayType = typename TypeTraits<Type>::ArrayType;
+  using BuilderType = typename TypeTraits<Type>::BuilderType;
+
+  // A - Array, S - Scalar, X = Array/Scalar
+
+  // SXX
+  static Status Call(KernelContext* ctx, const BooleanScalar& cond, const Datum& left,
+                     const Datum& right, Datum* out) {
+    if (left.is_scalar() && right.is_scalar()) {
+      if (cond.is_valid) {
+        *out = cond.value ? left.scalar() : right.scalar();
+      } else {
+        *out = MakeNullScalar(left.type());
+      }
+      return Status::OK();
+    }
+    // either left or right is an array. Output is always an array
+    int64_t out_arr_len = std::max(left.length(), right.length());
+    if (!cond.is_valid) {
+      // cond is null; just create a null array
+      ARROW_ASSIGN_OR_RAISE(*out,
+                            MakeArrayOfNull(left.type(), out_arr_len, ctx->memory_pool()))
+      return Status::OK();
+    }
+
+    const auto& valid_data = cond.value ? left : right;
+    if (valid_data.is_array()) {
+      *out = valid_data;
+    } else {
+      // valid data is a scalar that needs to be broadcasted
+      ARROW_ASSIGN_OR_RAISE(*out, MakeArrayFromScalar(*valid_data.scalar(), out_arr_len,
+                                                      ctx->memory_pool()));
+    }
+    return Status::OK();
+  }
+
+  //  AAA
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left,
+                     const ArrayData& right, ArrayData* out) {
+    const auto* left_offsets = left.GetValues<OffsetType>(1);
+    const uint8_t* left_data = left.buffers[2]->data();
+    const auto* right_offsets = right.GetValues<OffsetType>(1);
+    const uint8_t* right_data = right.buffers[2]->data();
+
+    // allocate data buffer conservatively
+    int64_t data_buff_alloc = left_offsets[left.length] - left_offsets[0] +
+                              right_offsets[right.length] - right_offsets[0];
+
+    BuilderType builder(ctx->memory_pool());
+    ARROW_RETURN_NOT_OK(builder.Reserve(cond.length + 1));
+    ARROW_RETURN_NOT_OK(builder.ReserveData(data_buff_alloc));
+
+    RunLoop(
+        cond, *out,
+        [&](int64_t i) {
+          builder.UnsafeAppend(left_data + left_offsets[i],
+                               left_offsets[i + 1] - left_offsets[i]);
+        },
+        [&](int64_t i) {
+          builder.UnsafeAppend(right_data + right_offsets[i],
+                               right_offsets[i + 1] - right_offsets[i]);
+        },
+        [&]() { builder.UnsafeAppendNull(); });
+    ARROW_ASSIGN_OR_RAISE(auto out_arr, builder.Finish());
+
+    out->SetNullCount(out_arr->data()->null_count);
+    out->buffers[0] = std::move(out_arr->data()->buffers[0]);
+    out->buffers[1] = std::move(out_arr->data()->buffers[1]);
+    out->buffers[2] = std::move(out_arr->data()->buffers[2]);
+    return Status::OK();
+  }
+
+  // ASA
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left,
+                     const ArrayData& right, ArrayData* out) {
+    util::string_view left_data = internal::UnboxScalar<Type>::Unbox(left);
+    auto left_size = static_cast<OffsetType>(left_data.size());
+
+    const auto* right_offsets = right.GetValues<OffsetType>(1);
+    const uint8_t* right_data = right.buffers[2]->data();
+
+    // allocate data buffer conservatively
+    int64_t data_buff_alloc =
+        left_size * cond.length + right_offsets[right.length] - right_offsets[0];
+
+    BuilderType builder(ctx->memory_pool());
+    ARROW_RETURN_NOT_OK(builder.Reserve(cond.length + 1));
+    ARROW_RETURN_NOT_OK(builder.ReserveData(data_buff_alloc));
+
+    RunLoop(
+        cond, *out, [&](int64_t i) { builder.UnsafeAppend(left_data.data(), left_size); },
+        [&](int64_t i) {
+          builder.UnsafeAppend(right_data + right_offsets[i],
+                               right_offsets[i + 1] - right_offsets[i]);
+        },
+        [&]() { builder.UnsafeAppendNull(); });
+    ARROW_ASSIGN_OR_RAISE(auto out_arr, builder.Finish());
+
+    out->SetNullCount(out_arr->data()->null_count);
+    out->buffers[0] = std::move(out_arr->data()->buffers[0]);
+    out->buffers[1] = std::move(out_arr->data()->buffers[1]);
+    out->buffers[2] = std::move(out_arr->data()->buffers[2]);
+    return Status::OK();
+  }
+
+  // AAS
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left,
+                     const Scalar& right, ArrayData* out) {
+    const auto* left_offsets = left.GetValues<OffsetType>(1);
+    const uint8_t* left_data = left.buffers[2]->data();
+
+    util::string_view right_data = internal::UnboxScalar<Type>::Unbox(right);
+    auto right_size = static_cast<OffsetType>(right_data.size());
+
+    // allocate data buffer conservatively
+    int64_t data_buff_alloc =
+        right_size * cond.length + left_offsets[left.length] - left_offsets[0];
+
+    BuilderType builder(ctx->memory_pool());
+    ARROW_RETURN_NOT_OK(builder.Reserve(cond.length + 1));
+    ARROW_RETURN_NOT_OK(builder.ReserveData(data_buff_alloc));
+
+    RunLoop(
+        cond, *out,
+        [&](int64_t i) {
+          builder.UnsafeAppend(left_data + left_offsets[i],
+                               left_offsets[i + 1] - left_offsets[i]);
+        },
+        [&](int64_t i) { builder.UnsafeAppend(right_data.data(), right_size); },
+        [&]() { builder.UnsafeAppendNull(); });
+    ARROW_ASSIGN_OR_RAISE(auto out_arr, builder.Finish());
+
+    out->SetNullCount(out_arr->data()->null_count);
+    out->buffers[0] = std::move(out_arr->data()->buffers[0]);
+    out->buffers[1] = std::move(out_arr->data()->buffers[1]);
+    out->buffers[2] = std::move(out_arr->data()->buffers[2]);
+    return Status::OK();
+  }
+
+  // ASS
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left,
+                     const Scalar& right, ArrayData* out) {
+    util::string_view left_data = internal::UnboxScalar<Type>::Unbox(left);
+    auto left_size = static_cast<OffsetType>(left_data.size());
+
+    util::string_view right_data = internal::UnboxScalar<Type>::Unbox(right);
+    auto right_size = static_cast<OffsetType>(right_data.size());
+
+    // allocate data buffer conservatively
+    int64_t data_buff_alloc = std::max(right_size, left_size) * cond.length;
+    BuilderType builder(ctx->memory_pool());
+    ARROW_RETURN_NOT_OK(builder.Reserve(cond.length + 1));
+    ARROW_RETURN_NOT_OK(builder.ReserveData(data_buff_alloc));
+
+    RunLoop(
+        cond, *out, [&](int64_t i) { builder.UnsafeAppend(left_data.data(), left_size); },
+        [&](int64_t i) { builder.UnsafeAppend(right_data.data(), right_size); },
+        [&]() { builder.UnsafeAppendNull(); });
+    ARROW_ASSIGN_OR_RAISE(auto out_arr, builder.Finish());
+
+    out->SetNullCount(out_arr->data()->null_count);
+    out->buffers[0] = std::move(out_arr->data()->buffers[0]);
+    out->buffers[1] = std::move(out_arr->data()->buffers[1]);
+    out->buffers[2] = std::move(out_arr->data()->buffers[2]);
+    return Status::OK();
+  }
+
+  template <typename HandleLeft, typename HandleRight, typename HandleNull>
+  static void RunLoop(const ArrayData& cond, const ArrayData& output,
+                      HandleLeft&& handle_left, HandleRight&& handle_right,
+                      HandleNull&& handle_null) {
+    const auto* cond_data = cond.buffers[1]->data();
+
+    if (output.buffers[0]) {  // output may have nulls
+      // output validity buffer is allocated internally from the IfElseFunctor. Therefore
+      // it is cond.length'd with 0 offset.
+      const auto* out_valid = output.buffers[0]->data();
+
+      for (int64_t i = 0; i < cond.length; i++) {
+        if (BitUtil::GetBit(out_valid, i)) {
+          BitUtil::GetBit(cond_data, cond.offset + i) ? handle_left(i) : handle_right(i);
+        } else {
+          handle_null();
+        }
+      }
+    } else {  // output is all valid (no nulls)
+      for (int64_t i = 0; i < cond.length; i++) {
+        BitUtil::GetBit(cond_data, cond.offset + i) ? handle_left(i) : handle_right(i);
+      }
+    }
+  }
+};
+
+template <typename Type>
+struct IfElseFunctor<Type, enable_if_fixed_size_binary<Type>> {
+  // A - Array, S - Scalar, X = Array/Scalar
+
+  // SXX
+  static Status Call(KernelContext* ctx, const BooleanScalar& cond, const Datum& left,
+                     const Datum& right, Datum* out) {
+    ARROW_ASSIGN_OR_RAISE(auto byte_width, GetByteWidth(*left.type(), *right.type()));
+    return RunIfElseScalar(
+        cond, left, right, out,
+        /*CopyArrayData*/
+        [&](const ArrayData& valid_array, ArrayData* out_array) {
+          std::memcpy(
+              out_array->buffers[1]->mutable_data() + out_array->offset * byte_width,
+              valid_array.buffers[1]->data() + valid_array.offset * byte_width,
+              valid_array.length * byte_width);
+        },
+        /*BroadcastScalar*/
+        [&](const Scalar& scalar, ArrayData* out_array) {
+          const util::string_view& scalar_data =
+              internal::UnboxScalar<FixedSizeBinaryType>::Unbox(scalar);
+          uint8_t* start =
+              out_array->buffers[1]->mutable_data() + out_array->offset * byte_width;
+          for (int64_t i = 0; i < out_array->length; i++) {
+            std::memcpy(start + i * byte_width, scalar_data.data(), scalar_data.size());
+          }
+        });
+  }
+
+  //  AAA
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left,
+                     const ArrayData& right, ArrayData* out) {
+    ARROW_ASSIGN_OR_RAISE(auto byte_width, GetByteWidth(*left.type, *right.type));
+    auto* out_values = out->buffers[1]->mutable_data() + out->offset * byte_width;
+
+    // copy right data to out_buff
+    const uint8_t* right_data = right.buffers[1]->data() + right.offset * byte_width;
+    std::memcpy(out_values, right_data, right.length * byte_width);
+
+    // selectively copy values from left data
+    const uint8_t* left_data = left.buffers[1]->data() + left.offset * byte_width;
+
+    RunIfElseLoop(cond, [&](int64_t data_offset, int64_t num_elems) {
+      std::memcpy(out_values + data_offset * byte_width,
+                  left_data + data_offset * byte_width, num_elems * byte_width);
+    });
+
+    return Status::OK();
+  }
+
+  // ASA
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left,
+                     const ArrayData& right, ArrayData* out) {
+    ARROW_ASSIGN_OR_RAISE(auto byte_width, GetByteWidth(*left.type, *right.type));
+    auto* out_values = out->buffers[1]->mutable_data() + out->offset * byte_width;
+
+    // copy right data to out_buff
+    const uint8_t* right_data = right.buffers[1]->data() + right.offset * byte_width;
+    std::memcpy(out_values, right_data, right.length * byte_width);
+
+    // selectively copy values from left data
+    const util::string_view& left_data =
+        internal::UnboxScalar<FixedSizeBinaryType>::Unbox(left);
+
+    RunIfElseLoop(cond, [&](int64_t data_offset, int64_t num_elems) {
+      if (left_data.data()) {
+        for (int64_t i = 0; i < num_elems; i++) {
+          std::memcpy(out_values + (data_offset + i) * byte_width, left_data.data(),
+                      left_data.size());
+        }
+      }
+    });
+
+    return Status::OK();
+  }
+
+  // AAS
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left,
+                     const Scalar& right, ArrayData* out) {
+    ARROW_ASSIGN_OR_RAISE(auto byte_width, GetByteWidth(*left.type, *right.type));
+    auto* out_values = out->buffers[1]->mutable_data() + out->offset * byte_width;
+
+    // copy left data to out_buff
+    const uint8_t* left_data = left.buffers[1]->data() + left.offset * byte_width;
+    std::memcpy(out_values, left_data, left.length * byte_width);
+
+    const util::string_view& right_data =
+        internal::UnboxScalar<FixedSizeBinaryType>::Unbox(right);
+
+    RunIfElseLoopInverted(cond, [&](int64_t data_offset, int64_t num_elems) {
+      if (right_data.data()) {
+        for (int64_t i = 0; i < num_elems; i++) {
+          std::memcpy(out_values + (data_offset + i) * byte_width, right_data.data(),
+                      right_data.size());
+        }
+      }
+    });
+
+    return Status::OK();
+  }
+
+  // ASS
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left,
+                     const Scalar& right, ArrayData* out) {
+    ARROW_ASSIGN_OR_RAISE(auto byte_width, GetByteWidth(*left.type, *right.type));
+    auto* out_values = out->buffers[1]->mutable_data() + out->offset * byte_width;
+
+    // copy right data to out_buff
+    const util::string_view& right_data =
+        internal::UnboxScalar<FixedSizeBinaryType>::Unbox(right);
+    if (right_data.data()) {
+      for (int64_t i = 0; i < cond.length; i++) {
+        std::memcpy(out_values + i * byte_width, right_data.data(), right_data.size());
+      }
+    }
+
+    // selectively copy values from left data
+    const util::string_view& left_data =
+        internal::UnboxScalar<FixedSizeBinaryType>::Unbox(left);
+
+    RunIfElseLoop(cond, [&](int64_t data_offset, int64_t num_elems) {
+      if (left_data.data()) {
+        for (int64_t i = 0; i < num_elems; i++) {
+          std::memcpy(out_values + (data_offset + i) * byte_width, left_data.data(),
+                      left_data.size());
+        }
+      }
+    });
+
+    return Status::OK();
+  }
+
+  static Result<int32_t> GetByteWidth(const DataType& left_type,
+                                      const DataType& right_type) {
+    int width = checked_cast<const FixedSizeBinaryType&>(left_type).byte_width();
+    if (width == checked_cast<const FixedSizeBinaryType&>(right_type).byte_width()) {
+      return width;
+    } else {
+      return Status::Invalid("FixedSizeBinaryType byte_widths should be equal");
+    }
+  }
+};
+
+template <typename Type, typename AllocateMem>
+struct ResolveIfElseExec {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    // cond is scalar
+    if (batch[0].is_scalar()) {
+      const auto& cond = batch[0].scalar_as<BooleanScalar>();
+      return IfElseFunctor<Type>::Call(ctx, cond, batch[1], batch[2], out);
+    }
+
+    // cond is array. Use functors to sort things out
+    ARROW_RETURN_NOT_OK(PromoteNullsVisitor<AllocateMem>(ctx, batch[0], batch[1],
+                                                         batch[2], out->mutable_array()));
+
+    if (batch[1].kind() == Datum::ARRAY) {
+      if (batch[2].kind() == Datum::ARRAY) {  // AAA
+        return IfElseFunctor<Type>::Call(ctx, *batch[0].array(), *batch[1].array(),
+                                         *batch[2].array(), out->mutable_array());
+      } else {  // AAS
+        return IfElseFunctor<Type>::Call(ctx, *batch[0].array(), *batch[1].array(),
+                                         *batch[2].scalar(), out->mutable_array());
+      }
+    } else {
+      if (batch[2].kind() == Datum::ARRAY) {  // ASA
+        return IfElseFunctor<Type>::Call(ctx, *batch[0].array(), *batch[1].scalar(),
+                                         *batch[2].array(), out->mutable_array());
+      } else {  // ASS
+        return IfElseFunctor<Type>::Call(ctx, *batch[0].array(), *batch[1].scalar(),
+                                         *batch[2].scalar(), out->mutable_array());
+      }
+    }
+  }
+};
+
+template <typename AllocateMem>
+struct ResolveIfElseExec<NullType, AllocateMem> {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    // if all are scalars, return a null scalar
+    if (batch[0].is_scalar() && batch[1].is_scalar() && batch[2].is_scalar()) {
+      *out = MakeNullScalar(null());
+    } else {
+      ARROW_ASSIGN_OR_RAISE(*out,
+                            MakeArrayOfNull(null(), batch.length, ctx->memory_pool()));
+    }
+    return Status::OK();
+  }
+};
+
+struct IfElseFunction : ScalarFunction {
+  using ScalarFunction::ScalarFunction;
+
+  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
+    RETURN_NOT_OK(CheckArity(*values));
+
+    using arrow::compute::detail::DispatchExactImpl;
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+
+    // if 0th descriptor is null, replace with bool
+    if (values->at(0).type->id() == Type::NA) {
+      values->at(0).type = boolean();
+    }
+
+    // if-else 0'th descriptor is bool, so skip it
+    std::vector<ValueDescr> values_copy(values->begin() + 1, values->end());
+    internal::EnsureDictionaryDecoded(&values_copy);
+    internal::ReplaceNullWithOtherType(&values_copy);
+
+    if (auto type = internal::CommonNumeric(values_copy)) {
+      internal::ReplaceTypes(type, &values_copy);
+    }
+
+    std::move(values_copy.begin(), values_copy.end(), values->begin() + 1);
+
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+
+    return arrow::compute::detail::NoMatchingKernel(this, *values);
+  }
+};
+
+void AddNullIfElseKernel(const std::shared_ptr<IfElseFunction>& scalar_function) {
+  ScalarKernel kernel({boolean(), null(), null()}, null(),
+                      ResolveIfElseExec<NullType,
+                                        /*AllocateMem=*/std::true_type>::Exec);
+  kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
+  kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+  kernel.can_write_into_slices = false;
+
+  DCHECK_OK(scalar_function->AddKernel(std::move(kernel)));
+}
+
+void AddPrimitiveIfElseKernels(const std::shared_ptr<ScalarFunction>& scalar_function,
+                               const std::vector<std::shared_ptr<DataType>>& types) {
+  for (auto&& type : types) {
+    auto exec =
+        internal::GenerateTypeAgnosticPrimitive<ResolveIfElseExec,
+                                                /*AllocateMem=*/std::false_type>(*type);
+    // cond array needs to be boolean always
+    ScalarKernel kernel({boolean(), type, type}, type, exec);
+    kernel.null_handling = NullHandling::COMPUTED_PREALLOCATE;
+    kernel.mem_allocation = MemAllocation::PREALLOCATE;
+    kernel.can_write_into_slices = true;
+
+    DCHECK_OK(scalar_function->AddKernel(std::move(kernel)));
+  }
+}
+
+void AddBinaryIfElseKernels(const std::shared_ptr<IfElseFunction>& scalar_function,
+                            const std::vector<std::shared_ptr<DataType>>& types) {
+  for (auto&& type : types) {
+    auto exec =
+        internal::GenerateTypeAgnosticVarBinaryBase<ResolveIfElseExec,
+                                                    /*AllocateMem=*/std::true_type>(
+            *type);
+    // cond array needs to be boolean always
+    ScalarKernel kernel({boolean(), type, type}, type, exec);
+    kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
+    kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+    kernel.can_write_into_slices = false;
+
+    DCHECK_OK(scalar_function->AddKernel(std::move(kernel)));
+  }
+}
+
+void AddFSBinaryIfElseKernel(const std::shared_ptr<IfElseFunction>& scalar_function) {
+  // cond array needs to be boolean always
+  ScalarKernel kernel(
+      {boolean(), InputType(Type::FIXED_SIZE_BINARY), InputType(Type::FIXED_SIZE_BINARY)},
+      OutputType([](KernelContext*, const std::vector<ValueDescr>& descrs) {
+        return ValueDescr(descrs[1].type, ValueDescr::ANY);
+      }),
+      ResolveIfElseExec<FixedSizeBinaryType, /*AllocateMem=*/std::false_type>::Exec);
+  kernel.null_handling = NullHandling::COMPUTED_PREALLOCATE;
+  kernel.mem_allocation = MemAllocation::PREALLOCATE;
+  kernel.can_write_into_slices = true;
+
+  DCHECK_OK(scalar_function->AddKernel(std::move(kernel)));
+}
+
+// Helper to copy or broadcast fixed-width values between buffers.
+template <typename Type, typename Enable = void>
+struct CopyFixedWidth {};
+template <>
+struct CopyFixedWidth<BooleanType> {
+  static void CopyScalar(const Scalar& scalar, const int64_t length,
+                         uint8_t* raw_out_values, const int64_t out_offset) {
+    const bool value = UnboxScalar<BooleanType>::Unbox(scalar);
+    BitUtil::SetBitsTo(raw_out_values, out_offset, length, value);
+  }
+  static void CopyArray(const DataType&, const uint8_t* in_values,
+                        const int64_t in_offset, const int64_t length,
+                        uint8_t* raw_out_values, const int64_t out_offset) {
+    arrow::internal::CopyBitmap(in_values, in_offset, length, raw_out_values, out_offset);
+  }
+};
+template <typename Type>
+struct CopyFixedWidth<Type, enable_if_number<Type>> {
+  using CType = typename TypeTraits<Type>::CType;
+  static void CopyScalar(const Scalar& scalar, const int64_t length,
+                         uint8_t* raw_out_values, const int64_t out_offset) {
+    CType* out_values = reinterpret_cast<CType*>(raw_out_values);
+    const CType value = UnboxScalar<Type>::Unbox(scalar);
+    std::fill(out_values + out_offset, out_values + out_offset + length, value);
+  }
+  static void CopyArray(const DataType&, const uint8_t* in_values,
+                        const int64_t in_offset, const int64_t length,
+                        uint8_t* raw_out_values, const int64_t out_offset) {
+    std::memcpy(raw_out_values + out_offset * sizeof(CType),
+                in_values + in_offset * sizeof(CType), length * sizeof(CType));
+  }
+};
+template <typename Type>
+struct CopyFixedWidth<Type, enable_if_same<Type, FixedSizeBinaryType>> {
+  static void CopyScalar(const Scalar& values, const int64_t length,
+                         uint8_t* raw_out_values, const int64_t out_offset) {
+    const int32_t width =
+        checked_cast<const FixedSizeBinaryType&>(*values.type).byte_width();
+    uint8_t* next = raw_out_values + (width * out_offset);
+    const auto& scalar = checked_cast<const FixedSizeBinaryScalar&>(values);
+    // Scalar may have null value buffer
+    if (!scalar.value) {
+      std::memset(next, 0x00, width * length);
+    } else {
+      DCHECK_EQ(scalar.value->size(), width);
+      for (int i = 0; i < length; i++) {
+        std::memcpy(next, scalar.value->data(), width);
+        next += width;
+      }
+    }
+  }
+  static void CopyArray(const DataType& type, const uint8_t* in_values,
+                        const int64_t in_offset, const int64_t length,
+                        uint8_t* raw_out_values, const int64_t out_offset) {
+    const int32_t width = checked_cast<const FixedSizeBinaryType&>(type).byte_width();
+    uint8_t* next = raw_out_values + (width * out_offset);
+    std::memcpy(next, in_values + in_offset * width, length * width);
+  }
+};
+template <typename Type>
+struct CopyFixedWidth<Type, enable_if_decimal<Type>> {
+  using ScalarType = typename TypeTraits<Type>::ScalarType;
+  static void CopyScalar(const Scalar& values, const int64_t length,
+                         uint8_t* raw_out_values, const int64_t out_offset) {
+    const int32_t width =
+        checked_cast<const FixedSizeBinaryType&>(*values.type).byte_width();
+    uint8_t* next = raw_out_values + (width * out_offset);
+    const auto& scalar = checked_cast<const ScalarType&>(values);
+    const auto value = scalar.value.ToBytes();
+    for (int i = 0; i < length; i++) {
+      std::memcpy(next, value.data(), width);
+      next += width;
+    }
+  }
+  static void CopyArray(const DataType& type, const uint8_t* in_values,
+                        const int64_t in_offset, const int64_t length,
+                        uint8_t* raw_out_values, const int64_t out_offset) {
+    const int32_t width = checked_cast<const FixedSizeBinaryType&>(type).byte_width();
+    uint8_t* next = raw_out_values + (width * out_offset);
+    std::memcpy(next, in_values + in_offset * width, length * width);
+  }
+};
+// Copy fixed-width values from a scalar/array datum into an output values buffer
+template <typename Type>
+void CopyValues(const Datum& in_values, const int64_t in_offset, const int64_t length,
+                uint8_t* out_valid, uint8_t* out_values, const int64_t out_offset) {
+  if (in_values.is_scalar()) {
+    const auto& scalar = *in_values.scalar();
+    if (out_valid) {
+      BitUtil::SetBitsTo(out_valid, out_offset, length, scalar.is_valid);
+    }
+    CopyFixedWidth<Type>::CopyScalar(scalar, length, out_values, out_offset);
+  } else {
+    const ArrayData& array = *in_values.array();
+    if (out_valid) {
+      if (array.MayHaveNulls()) {
+        if (length == 1) {
+          // CopyBitmap is slow for short runs
+          BitUtil::SetBitTo(
+              out_valid, out_offset,
+              BitUtil::GetBit(array.buffers[0]->data(), array.offset + in_offset));
+        } else {
+          arrow::internal::CopyBitmap(array.buffers[0]->data(), array.offset + in_offset,
+                                      length, out_valid, out_offset);
+        }
+      } else {
+        BitUtil::SetBitsTo(out_valid, out_offset, length, true);
+      }
+    }
+    CopyFixedWidth<Type>::CopyArray(*array.type, array.buffers[1]->data(),
+                                    array.offset + in_offset, length, out_values,
+                                    out_offset);
+  }
+}
+
+// Specialized helper to copy a single value from a source array. Allows avoiding
+// repeatedly calling MayHaveNulls and Buffer::data() which have internal checks that
+// add up when called in a loop.
+template <typename Type>
+void CopyOneArrayValue(const DataType& type, const uint8_t* in_valid,
+                       const uint8_t* in_values, const int64_t in_offset,
+                       uint8_t* out_valid, uint8_t* out_values,
+                       const int64_t out_offset) {
+  if (out_valid) {
+    BitUtil::SetBitTo(out_valid, out_offset,
+                      !in_valid || BitUtil::GetBit(in_valid, in_offset));
+  }
+  CopyFixedWidth<Type>::CopyArray(type, in_values, in_offset, /*length=*/1, out_values,
+                                  out_offset);
+}
+
+struct CaseWhenFunction : ScalarFunction {
+  using ScalarFunction::ScalarFunction;
+
+  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
+    // The first function is a struct of booleans, where the number of fields in the
+    // struct is either equal to the number of other arguments or is one less.
+    RETURN_NOT_OK(CheckArity(*values));
+    EnsureDictionaryDecoded(values);
+    auto first_type = (*values)[0].type;
+    if (first_type->id() != Type::STRUCT) {
+      return Status::TypeError("case_when: first argument must be STRUCT, not ",
+                               *first_type);
+    }
+    auto num_fields = static_cast<size_t>(first_type->num_fields());
+    if (num_fields < values->size() - 2 || num_fields >= values->size()) {
+      return Status::Invalid(
+          "case_when: number of struct fields must be equal to or one less than count of "
+          "remaining arguments (",
+          values->size() - 1, "), got: ", first_type->num_fields());
+    }
+    for (const auto& field : first_type->fields()) {
+      if (field->type()->id() != Type::BOOL) {
+        return Status::TypeError(
+            "case_when: all fields of first argument must be BOOL, but ", field->name(),
+            " was of type: ", *field->type());
+      }
+    }
+
+    if (auto type = CommonNumeric(values->data() + 1, values->size() - 1)) {
+      for (auto it = values->begin() + 1; it != values->end(); it++) {
+        it->type = type;
+      }
+    }
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *values);
+  }
+};
+
+// Implement a 'case when' (SQL)/'select' (NumPy) function for any scalar conditions
+template <typename Type>
+Status ExecScalarCaseWhen(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  const auto& conds = checked_cast<const StructScalar&>(*batch.values[0].scalar());
+  if (!conds.is_valid) {
+    return Status::Invalid("cond struct must not be null");
+  }
+  Datum result;
+  for (size_t i = 0; i < batch.values.size() - 1; i++) {
+    if (i < conds.value.size()) {
+      const Scalar& cond = *conds.value[i];
+      if (cond.is_valid && internal::UnboxScalar<BooleanType>::Unbox(cond)) {
+        result = batch[i + 1];
+        break;
+      }
+    } else {
+      // ELSE clause
+      result = batch[i + 1];
+      break;
+    }
+  }
+  if (out->is_scalar()) {
+    *out = result.is_scalar() ? result.scalar() : MakeNullScalar(out->type());
+    return Status::OK();
+  }
+  ArrayData* output = out->mutable_array();
+  if (!result.is_value()) {
+    // All conditions false, no 'else' argument
+    result = MakeNullScalar(out->type());
+  }
+  CopyValues<Type>(result, /*in_offset=*/0, batch.length,
+                   output->GetMutableValues<uint8_t>(0, 0),
+                   output->GetMutableValues<uint8_t>(1, 0), output->offset);
+  return Status::OK();
+}
+
+// Implement 'case when' for any mix of scalar/array arguments for any fixed-width type,
+// given helper functions to copy data from a source array to a target array
+template <typename Type>
+Status ExecArrayCaseWhen(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  const auto& conds_array = *batch.values[0].array();
+  if (conds_array.GetNullCount() > 0) {
+    return Status::Invalid("cond struct must not have top-level nulls");
+  }
+  ArrayData* output = out->mutable_array();
+  const int64_t out_offset = output->offset;
+  const auto num_value_args = batch.values.size() - 1;
+  const bool have_else_arg =
+      static_cast<size_t>(conds_array.type->num_fields()) < num_value_args;
+  uint8_t* out_valid = output->buffers[0]->mutable_data();
+  uint8_t* out_values = output->buffers[1]->mutable_data();
+  if (have_else_arg) {
+    // Copy 'else' value into output
+    CopyValues<Type>(batch.values.back(), /*in_offset=*/0, batch.length, out_valid,
+                     out_values, out_offset);
+  } else {
+    // There's no 'else' argument, so we should have an all-null validity bitmap
+    BitUtil::SetBitsTo(out_valid, out_offset, batch.length, false);
+  }
+
+  // Allocate a temporary bitmap to determine which elements still need setting.
+  ARROW_ASSIGN_OR_RAISE(auto mask_buffer, ctx->AllocateBitmap(batch.length));
+  uint8_t* mask = mask_buffer->mutable_data();
+  std::memset(mask, 0xFF, mask_buffer->size());
+
+  // Then iterate through each argument in turn and set elements.
+  for (size_t i = 0; i < batch.values.size() - (have_else_arg ? 2 : 1); i++) {
+    const ArrayData& cond_array = *conds_array.child_data[i];
+    const int64_t cond_offset = conds_array.offset + cond_array.offset;
+    const uint8_t* cond_values = cond_array.buffers[1]->data();
+    const Datum& values_datum = batch[i + 1];
+    int64_t offset = 0;
+
+    if (cond_array.GetNullCount() == 0) {
+      // If no valid buffer, visit mask & cond bitmap simultaneously
+      BinaryBitBlockCounter counter(mask, /*start_offset=*/0, cond_values, cond_offset,
+                                    batch.length);
+      while (offset < batch.length) {
+        const auto block = counter.NextAndWord();
+        if (block.AllSet()) {
+          CopyValues<Type>(values_datum, offset, block.length, out_valid, out_values,
+                           out_offset + offset);
+          BitUtil::SetBitsTo(mask, offset, block.length, false);
+        } else if (block.popcount) {
+          for (int64_t j = 0; j < block.length; ++j) {
+            if (BitUtil::GetBit(mask, offset + j) &&
+                BitUtil::GetBit(cond_values, cond_offset + offset + j)) {
+              CopyValues<Type>(values_datum, offset + j, /*length=*/1, out_valid,
+                               out_values, out_offset + offset + j);
+              BitUtil::SetBitTo(mask, offset + j, false);
+            }
+          }
+        }
+        offset += block.length;
+      }
+    } else {
+      // Visit mask & cond bitmap & cond validity
+      const uint8_t* cond_valid = cond_array.buffers[0]->data();
+      Bitmap bitmaps[3] = {{mask, /*offset=*/0, batch.length},
+                           {cond_values, cond_offset, batch.length},
+                           {cond_valid, cond_offset, batch.length}};
+      Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 3> words) {
+        const uint64_t word = words[0] & words[1] & words[2];
+        const int64_t block_length = std::min<int64_t>(64, batch.length - offset);
+        if (word == std::numeric_limits<uint64_t>::max()) {
+          CopyValues<Type>(values_datum, offset, block_length, out_valid, out_values,
+                           out_offset + offset);
+          BitUtil::SetBitsTo(mask, offset, block_length, false);
+        } else if (word) {
+          for (int64_t j = 0; j < block_length; ++j) {
+            if (BitUtil::GetBit(mask, offset + j) &&
+                BitUtil::GetBit(cond_valid, cond_offset + offset + j) &&
+                BitUtil::GetBit(cond_values, cond_offset + offset + j)) {
+              CopyValues<Type>(values_datum, offset + j, /*length=*/1, out_valid,
+                               out_values, out_offset + offset + j);
+              BitUtil::SetBitTo(mask, offset + j, false);
+            }
+          }
+        }
+      });
+    }
+  }
+  if (!have_else_arg) {
+    // Need to initialize any remaining null slots (uninitialized memory)
+    BitBlockCounter counter(mask, /*offset=*/0, batch.length);
+    int64_t offset = 0;
+    auto bit_width = checked_cast<const FixedWidthType&>(*out->type()).bit_width();
+    auto byte_width = BitUtil::BytesForBits(bit_width);
+    while (offset < batch.length) {
+      const auto block = counter.NextWord();
+      if (block.AllSet()) {
+        if (bit_width == 1) {
+          BitUtil::SetBitsTo(out_values, out_offset + offset, block.length, false);
+        } else {
+          std::memset(out_values + (out_offset + offset) * byte_width, 0x00,
+                      byte_width * block.length);
+        }
+      } else if (!block.NoneSet()) {
+        for (int64_t j = 0; j < block.length; ++j) {
+          if (BitUtil::GetBit(out_valid, out_offset + offset + j)) continue;
+          if (bit_width == 1) {
+            BitUtil::ClearBit(out_values, out_offset + offset + j);
+          } else {
+            std::memset(out_values + (out_offset + offset + j) * byte_width, 0x00,
+                        byte_width);
+          }
+        }
+      }
+      offset += block.length;
+    }
+  }
+  return Status::OK();
+}
+
+template <typename Type, typename Enable = void>
+struct CaseWhenFunctor {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    if (batch.values[0].is_array()) {
+      return ExecArrayCaseWhen<Type>(ctx, batch, out);
+    }
+    return ExecScalarCaseWhen<Type>(ctx, batch, out);
+  }
+};
+
+template <>
+struct CaseWhenFunctor<NullType> {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    return Status::OK();
+  }
+};
+
+struct CoalesceFunction : ScalarFunction {
+  using ScalarFunction::ScalarFunction;
+
+  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
+    RETURN_NOT_OK(CheckArity(*values));
+    using arrow::compute::detail::DispatchExactImpl;
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    EnsureDictionaryDecoded(values);
+    if (auto type = CommonNumeric(*values)) {
+      ReplaceTypes(type, values);
+    }
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *values);
+  }
+};
+
+// Implement a 'coalesce' (SQL) operator for any number of scalar inputs
+Status ExecScalarCoalesce(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  for (const auto& datum : batch.values) {
+    if (datum.scalar()->is_valid) {
+      *out = datum;
+      break;
+    }
+  }
+  return Status::OK();
+}
+
+// Helper: copy from a source datum into all null slots of the output
+template <typename Type>
+void CopyValuesAllValid(Datum source, uint8_t* out_valid, uint8_t* out_values,
+                        const int64_t out_offset, const int64_t length) {
+  BitBlockCounter counter(out_valid, out_offset, length);
+  int64_t offset = 0;
+  while (offset < length) {
+    const auto block = counter.NextWord();
+    if (block.NoneSet()) {
+      CopyValues<Type>(source, offset, block.length, out_valid, out_values,
+                       out_offset + offset);
+    } else if (!block.AllSet()) {
+      for (int64_t j = 0; j < block.length; ++j) {
+        if (!BitUtil::GetBit(out_valid, out_offset + offset + j)) {
+          CopyValues<Type>(source, offset + j, 1, out_valid, out_values,
+                           out_offset + offset + j);
+        }
+      }
+    }
+    offset += block.length;
+  }
+}
+
+// Helper: zero the values buffer of the output wherever the slot is null
+void InitializeNullSlots(const DataType& type, uint8_t* out_valid, uint8_t* out_values,
+                         const int64_t out_offset, const int64_t length) {
+  BitBlockCounter counter(out_valid, out_offset, length);
+  int64_t offset = 0;
+  auto bit_width = checked_cast<const FixedWidthType&>(type).bit_width();
+  auto byte_width = BitUtil::BytesForBits(bit_width);
+  while (offset < length) {
+    const auto block = counter.NextWord();
+    if (block.NoneSet()) {
+      if (bit_width == 1) {
+        BitUtil::SetBitsTo(out_values, out_offset + offset, block.length, false);
+      } else {
+        std::memset(out_values + (out_offset + offset) * byte_width, 0x00,
+                    byte_width * block.length);
+      }
+    } else if (!block.AllSet()) {
+      for (int64_t j = 0; j < block.length; ++j) {
+        if (BitUtil::GetBit(out_valid, out_offset + offset + j)) continue;
+        if (bit_width == 1) {
+          BitUtil::ClearBit(out_values, out_offset + offset + j);
+        } else {
+          std::memset(out_values + (out_offset + offset + j) * byte_width, 0x00,
+                      byte_width);
+        }
+      }
+    }
+    offset += block.length;
+  }
+}
+
+// Implement 'coalesce' for any mix of scalar/array arguments for any fixed-width type
+template <typename Type>
+Status ExecArrayCoalesce(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  ArrayData* output = out->mutable_array();
+  const int64_t out_offset = output->offset;
+  // Use output validity buffer as mask to decide what values to copy
+  uint8_t* out_valid = output->buffers[0]->mutable_data();
+  // Clear output buffer - no values are set initially
+  BitUtil::SetBitsTo(out_valid, out_offset, batch.length, false);
+  uint8_t* out_values = output->buffers[1]->mutable_data();
+
+  for (const auto& datum : batch.values) {
+    if ((datum.is_scalar() && datum.scalar()->is_valid) ||
+        (datum.is_array() && !datum.array()->MayHaveNulls())) {
+      // Valid scalar, or all-valid array
+      CopyValuesAllValid<Type>(datum, out_valid, out_values, out_offset, batch.length);
+      break;
+    } else if (datum.is_array()) {
+      // Array with nulls
+      const ArrayData& arr = *datum.array();
+      const DataType& type = *datum.type();
+      const uint8_t* in_valid = arr.buffers[0]->data();
+      const uint8_t* in_values = arr.buffers[1]->data();
+      BinaryBitBlockCounter counter(in_valid, arr.offset, out_valid, out_offset,
+                                    batch.length);
+      int64_t offset = 0;
+      while (offset < batch.length) {
+        const auto block = counter.NextAndNotWord();
+        if (block.AllSet()) {
+          CopyValues<Type>(datum, offset, block.length, out_valid, out_values,
+                           out_offset + offset);
+        } else if (block.popcount) {
+          for (int64_t j = 0; j < block.length; ++j) {
+            if (!BitUtil::GetBit(out_valid, out_offset + offset + j) &&
+                BitUtil::GetBit(in_valid, arr.offset + offset + j)) {
+              // This version lets us avoid calling MayHaveNulls() on every iteration
+              // (which does an atomic load and can add up)
+              CopyOneArrayValue<Type>(type, in_valid, in_values, arr.offset + offset + j,
+                                      out_valid, out_values, out_offset + offset + j);
+            }
+          }
+        }
+        offset += block.length;
+      }
+    }
+  }
+
+  // Initialize any remaining null slots (uninitialized memory)
+  InitializeNullSlots(*out->type(), out_valid, out_values, out_offset, batch.length);
+  return Status::OK();
+}
+
+template <typename Type, typename Enable = void>
+struct CoalesceFunctor {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    for (const auto& datum : batch.values) {
+      if (datum.is_array()) {
+        return ExecArrayCoalesce<Type>(ctx, batch, out);
+      }
+    }
+    return ExecScalarCoalesce(ctx, batch, out);
+  }
+};
+
+template <>
+struct CoalesceFunctor<NullType> {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    return Status::OK();
+  }
+};
+
+template <typename Type>
+struct CoalesceFunctor<Type, enable_if_base_binary<Type>> {
+  using offset_type = typename Type::offset_type;
+  using BuilderType = typename TypeTraits<Type>::BuilderType;
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    for (const auto& datum : batch.values) {
+      if (datum.is_array()) {
+        return ExecArray(ctx, batch, out);
+      }
+    }
+    return ExecScalarCoalesce(ctx, batch, out);
+  }
+
+  static Status ExecArray(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    // Special case: grab any leading non-null scalar or array arguments
+    for (const auto& datum : batch.values) {
+      if (datum.is_scalar()) {
+        if (!datum.scalar()->is_valid) continue;
+        ARROW_ASSIGN_OR_RAISE(
+            *out, MakeArrayFromScalar(*datum.scalar(), batch.length, ctx->memory_pool()));
+        return Status::OK();
+      } else if (datum.is_array() && !datum.array()->MayHaveNulls()) {
+        *out = datum;
+        return Status::OK();
+      }
+      break;
+    }
+    ArrayData* output = out->mutable_array();
+    BuilderType builder(batch[0].type(), ctx->memory_pool());
+    RETURN_NOT_OK(builder.Reserve(batch.length));
+    for (int64_t i = 0; i < batch.length; i++) {
+      bool set = false;
+      for (const auto& datum : batch.values) {
+        if (datum.is_scalar()) {
+          if (datum.scalar()->is_valid) {
+            RETURN_NOT_OK(builder.Append(UnboxScalar<Type>::Unbox(*datum.scalar())));
+            set = true;
+            break;
+          }
+        } else {
+          const ArrayData& source = *datum.array();
+          if (!source.MayHaveNulls() ||
+              BitUtil::GetBit(source.buffers[0]->data(), source.offset + i)) {
+            const uint8_t* data = source.buffers[2]->data();
+            const offset_type* offsets = source.GetValues<offset_type>(1);
+            const offset_type offset0 = offsets[i];
+            const offset_type offset1 = offsets[i + 1];
+            RETURN_NOT_OK(builder.Append(data + offset0, offset1 - offset0));
+            set = true;
+            break;
+          }
+        }
+      }
+      if (!set) RETURN_NOT_OK(builder.AppendNull());
+    }
+    ARROW_ASSIGN_OR_RAISE(auto temp_output, builder.Finish());
+    *output = *temp_output->data();
+    // Builder type != logical type due to GenerateTypeAgnosticVarBinaryBase
+    output->type = batch[0].type();
+    return Status::OK();
+  }
+};
+
+Result<ValueDescr> LastType(KernelContext*, const std::vector<ValueDescr>& descrs) {
+  ValueDescr result = descrs.back();
+  result.shape = GetBroadcastShape(descrs);
+  return result;
+}
+
+void AddCaseWhenKernel(const std::shared_ptr<CaseWhenFunction>& scalar_function,
+                       detail::GetTypeId get_id, ArrayKernelExec exec) {
+  ScalarKernel kernel(
+      KernelSignature::Make({InputType(Type::STRUCT), InputType(get_id.id)},
+                            OutputType(LastType),
+                            /*is_varargs=*/true),
+      exec);
+  kernel.null_handling = NullHandling::COMPUTED_PREALLOCATE;
+  kernel.mem_allocation = MemAllocation::PREALLOCATE;
+  kernel.can_write_into_slices = is_fixed_width(get_id.id);
+  DCHECK_OK(scalar_function->AddKernel(std::move(kernel)));
+}
+
+void AddPrimitiveCaseWhenKernels(const std::shared_ptr<CaseWhenFunction>& scalar_function,
+                                 const std::vector<std::shared_ptr<DataType>>& types) {
+  for (auto&& type : types) {
+    auto exec = GenerateTypeAgnosticPrimitive<CaseWhenFunctor>(*type);
+    AddCaseWhenKernel(scalar_function, type, std::move(exec));
+  }
+}
+
+void AddCoalesceKernel(const std::shared_ptr<ScalarFunction>& scalar_function,
+                       detail::GetTypeId get_id, ArrayKernelExec exec) {
+  ScalarKernel kernel(KernelSignature::Make({InputType(get_id.id)}, OutputType(FirstType),
+                                            /*is_varargs=*/true),
+                      exec);
+  kernel.null_handling = NullHandling::COMPUTED_PREALLOCATE;
+  kernel.mem_allocation = MemAllocation::PREALLOCATE;
+  kernel.can_write_into_slices = is_fixed_width(get_id.id);
+  DCHECK_OK(scalar_function->AddKernel(std::move(kernel)));
+}
+
+void AddPrimitiveCoalesceKernels(const std::shared_ptr<ScalarFunction>& scalar_function,
+                                 const std::vector<std::shared_ptr<DataType>>& types) {
+  for (auto&& type : types) {
+    auto exec = GenerateTypeAgnosticPrimitive<CoalesceFunctor>(*type);
+    AddCoalesceKernel(scalar_function, type, std::move(exec));
+  }
+}
+
+const FunctionDoc if_else_doc{"Choose values based on a condition",
+                              ("`cond` must be a Boolean scalar/ array. \n`left` or "
+                               "`right` must be of the same type scalar/ array.\n"
+                               "`null` values in `cond` will be promoted to the"
+                               " output."),
+                              {"cond", "left", "right"}};
+
+const FunctionDoc case_when_doc{
+    "Choose values based on multiple conditions",
+    ("`cond` must be a struct of Boolean values. `cases` can be a mix "
+     "of scalar and array arguments (of any type, but all must be the "
+     "same type or castable to a common type), with either exactly one "
+     "datum per child of `cond`, or one more `cases` than children of "
+     "`cond` (in which case we have an \"else\" value).\n"
+     "Each row of the output will be the corresponding value of the "
+     "first datum in `cases` for which the corresponding child of `cond` "
+     "is true, or otherwise the \"else\" value (if given), or null. "
+     "Essentially, this implements a switch-case or if-else, if-else... "
+     "statement."),
+    {"cond", "*cases"}};
+
+const FunctionDoc coalesce_doc{
+    "Select the first non-null value in each slot",
+    ("Each row of the output will be the value from the first corresponding input "
+     "for which the value is not null. If all inputs are null in a row, the output "
+     "will be null."),
+    {"*values"}};
+}  // namespace
+
+void RegisterScalarIfElse(FunctionRegistry* registry) {
+  {
+    auto func =
+        std::make_shared<IfElseFunction>("if_else", Arity::Ternary(), &if_else_doc);
+
+    AddPrimitiveIfElseKernels(func, NumericTypes());
+    AddPrimitiveIfElseKernels(func, TemporalTypes());
+    AddPrimitiveIfElseKernels(func, {boolean(), day_time_interval(), month_interval()});
+    AddNullIfElseKernel(func);
+    AddBinaryIfElseKernels(func, BaseBinaryTypes());
+    AddFSBinaryIfElseKernel(func);
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+  {
+    auto func = std::make_shared<CaseWhenFunction>(
+        "case_when", Arity::VarArgs(/*min_args=*/1), &case_when_doc);
+    AddPrimitiveCaseWhenKernels(func, NumericTypes());
+    AddPrimitiveCaseWhenKernels(func, TemporalTypes());
+    AddPrimitiveCaseWhenKernels(
+        func, {boolean(), null(), day_time_interval(), month_interval()});
+    AddCaseWhenKernel(func, Type::FIXED_SIZE_BINARY,
+                      CaseWhenFunctor<FixedSizeBinaryType>::Exec);
+    AddCaseWhenKernel(func, Type::DECIMAL128, CaseWhenFunctor<Decimal128Type>::Exec);
+    AddCaseWhenKernel(func, Type::DECIMAL256, CaseWhenFunctor<Decimal256Type>::Exec);
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+  {
+    auto func = std::make_shared<CoalesceFunction>(
+        "coalesce", Arity::VarArgs(/*min_args=*/1), &coalesce_doc);
+    AddPrimitiveCoalesceKernels(func, NumericTypes());
+    AddPrimitiveCoalesceKernels(func, TemporalTypes());
+    AddPrimitiveCoalesceKernels(
+        func, {boolean(), null(), day_time_interval(), month_interval()});
+    AddCoalesceKernel(func, Type::FIXED_SIZE_BINARY,
+                      CoalesceFunctor<FixedSizeBinaryType>::Exec);
+    AddCoalesceKernel(func, Type::DECIMAL128, CoalesceFunctor<Decimal128Type>::Exec);
+    AddCoalesceKernel(func, Type::DECIMAL256, CoalesceFunctor<Decimal256Type>::Exec);
+    for (const auto& ty : BaseBinaryTypes()) {
+      AddCoalesceKernel(func, ty, GenerateTypeAgnosticVarBinaryBase<CoalesceFunctor>(ty));
+    }
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+}
+
+}  // namespace internal
+}  // namespace compute
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_nested.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_nested.cc
index 7810577b1fe..e9f0696c8fd 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_nested.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_nested.cc
@@ -18,7 +18,7 @@
 // Vector kernels involving nested types
 
 #include "arrow/array/array_base.h"
-#include "arrow/compute/api_scalar.h" 
+#include "arrow/compute/api_scalar.h"
 #include "arrow/compute/kernels/common.h"
 #include "arrow/result.h"
 #include "arrow/util/bit_block_counter.h"
@@ -29,7 +29,7 @@ namespace internal {
 namespace {
 
 template <typename Type, typename offset_type = typename Type::offset_type>
-Status ListValueLength(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+Status ListValueLength(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   using ScalarType = typename TypeTraits<Type>::ScalarType;
   using OffsetScalarType = typename TypeTraits<Type>::OffsetScalarType;
 
@@ -51,131 +51,131 @@ Status ListValueLength(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
           static_cast<offset_type>(arg0.value->length());
     }
   }
- 
-  return Status::OK(); 
+
+  return Status::OK();
+}
+
+const FunctionDoc list_value_length_doc{
+    "Compute list lengths",
+    ("`lists` must have a list-like type.\n"
+     "For each non-null value in `lists`, its length is emitted.\n"
+     "Null values emit a null in the output."),
+    {"lists"}};
+
+Result<ValueDescr> MakeStructResolve(KernelContext* ctx,
+                                     const std::vector<ValueDescr>& descrs) {
+  auto names = OptionsWrapper<MakeStructOptions>::Get(ctx).field_names;
+  auto nullable = OptionsWrapper<MakeStructOptions>::Get(ctx).field_nullability;
+  auto metadata = OptionsWrapper<MakeStructOptions>::Get(ctx).field_metadata;
+
+  if (names.size() == 0) {
+    names.resize(descrs.size());
+    nullable.resize(descrs.size(), true);
+    metadata.resize(descrs.size(), nullptr);
+    int i = 0;
+    for (auto& name : names) {
+      name = std::to_string(i++);
+    }
+  } else if (names.size() != descrs.size() || nullable.size() != descrs.size() ||
+             metadata.size() != descrs.size()) {
+    return Status::Invalid("make_struct() was passed ", descrs.size(), " arguments but ",
+                           names.size(), " field names, ", nullable.size(),
+                           " nullability bits, and ", metadata.size(),
+                           " metadata dictionaries.");
+  }
+
+  size_t i = 0;
+  FieldVector fields(descrs.size());
+
+  ValueDescr::Shape shape = ValueDescr::SCALAR;
+  for (const ValueDescr& descr : descrs) {
+    if (descr.shape != ValueDescr::SCALAR) {
+      shape = ValueDescr::ARRAY;
+    } else {
+      switch (descr.type->id()) {
+        case Type::EXTENSION:
+        case Type::DENSE_UNION:
+        case Type::SPARSE_UNION:
+          return Status::NotImplemented("Broadcasting scalars of type ", *descr.type);
+        default:
+          break;
+      }
+    }
+
+    fields[i] =
+        field(std::move(names[i]), descr.type, nullable[i], std::move(metadata[i]));
+    ++i;
+  }
+
+  return ValueDescr{struct_(std::move(fields)), shape};
+}
+
+Status MakeStructExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  ARROW_ASSIGN_OR_RAISE(auto descr, MakeStructResolve(ctx, batch.GetDescriptors()));
+
+  for (int i = 0; i < batch.num_values(); ++i) {
+    const auto& field = checked_cast<const StructType&>(*descr.type).field(i);
+    if (batch[i].null_count() > 0 && !field->nullable()) {
+      return Status::Invalid("Output field ", field, " (#", i,
+                             ") does not allow nulls but the corresponding "
+                             "argument was not entirely valid.");
+    }
+  }
+
+  if (descr.shape == ValueDescr::SCALAR) {
+    ScalarVector scalars(batch.num_values());
+    for (int i = 0; i < batch.num_values(); ++i) {
+      scalars[i] = batch[i].scalar();
+    }
+
+    *out =
+        Datum(std::make_shared<StructScalar>(std::move(scalars), std::move(descr.type)));
+    return Status::OK();
+  }
+
+  ArrayVector arrays(batch.num_values());
+  for (int i = 0; i < batch.num_values(); ++i) {
+    if (batch[i].is_array()) {
+      arrays[i] = batch[i].make_array();
+      continue;
+    }
+
+    ARROW_ASSIGN_OR_RAISE(arrays[i], MakeArrayFromScalar(*batch[i].scalar(), batch.length,
+                                                         ctx->memory_pool()));
+  }
+
+  *out = std::make_shared<StructArray>(descr.type, batch.length, std::move(arrays));
+  return Status::OK();
 }
 
-const FunctionDoc list_value_length_doc{ 
-    "Compute list lengths", 
-    ("`lists` must have a list-like type.\n" 
-     "For each non-null value in `lists`, its length is emitted.\n" 
-     "Null values emit a null in the output."), 
-    {"lists"}}; 
- 
-Result<ValueDescr> MakeStructResolve(KernelContext* ctx, 
-                                     const std::vector<ValueDescr>& descrs) { 
-  auto names = OptionsWrapper<MakeStructOptions>::Get(ctx).field_names; 
-  auto nullable = OptionsWrapper<MakeStructOptions>::Get(ctx).field_nullability; 
-  auto metadata = OptionsWrapper<MakeStructOptions>::Get(ctx).field_metadata; 
- 
-  if (names.size() == 0) { 
-    names.resize(descrs.size()); 
-    nullable.resize(descrs.size(), true); 
-    metadata.resize(descrs.size(), nullptr); 
-    int i = 0; 
-    for (auto& name : names) { 
-      name = std::to_string(i++); 
-    } 
-  } else if (names.size() != descrs.size() || nullable.size() != descrs.size() || 
-             metadata.size() != descrs.size()) { 
-    return Status::Invalid("make_struct() was passed ", descrs.size(), " arguments but ", 
-                           names.size(), " field names, ", nullable.size(), 
-                           " nullability bits, and ", metadata.size(), 
-                           " metadata dictionaries."); 
-  } 
- 
-  size_t i = 0; 
-  FieldVector fields(descrs.size()); 
- 
-  ValueDescr::Shape shape = ValueDescr::SCALAR; 
-  for (const ValueDescr& descr : descrs) { 
-    if (descr.shape != ValueDescr::SCALAR) { 
-      shape = ValueDescr::ARRAY; 
-    } else { 
-      switch (descr.type->id()) { 
-        case Type::EXTENSION: 
-        case Type::DENSE_UNION: 
-        case Type::SPARSE_UNION: 
-          return Status::NotImplemented("Broadcasting scalars of type ", *descr.type); 
-        default: 
-          break; 
-      } 
-    } 
- 
-    fields[i] = 
-        field(std::move(names[i]), descr.type, nullable[i], std::move(metadata[i])); 
-    ++i; 
-  } 
- 
-  return ValueDescr{struct_(std::move(fields)), shape}; 
-} 
- 
-Status MakeStructExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-  ARROW_ASSIGN_OR_RAISE(auto descr, MakeStructResolve(ctx, batch.GetDescriptors())); 
- 
-  for (int i = 0; i < batch.num_values(); ++i) { 
-    const auto& field = checked_cast<const StructType&>(*descr.type).field(i); 
-    if (batch[i].null_count() > 0 && !field->nullable()) { 
-      return Status::Invalid("Output field ", field, " (#", i, 
-                             ") does not allow nulls but the corresponding " 
-                             "argument was not entirely valid."); 
-    } 
-  } 
- 
-  if (descr.shape == ValueDescr::SCALAR) { 
-    ScalarVector scalars(batch.num_values()); 
-    for (int i = 0; i < batch.num_values(); ++i) { 
-      scalars[i] = batch[i].scalar(); 
-    } 
- 
-    *out = 
-        Datum(std::make_shared<StructScalar>(std::move(scalars), std::move(descr.type))); 
-    return Status::OK(); 
-  } 
- 
-  ArrayVector arrays(batch.num_values()); 
-  for (int i = 0; i < batch.num_values(); ++i) { 
-    if (batch[i].is_array()) { 
-      arrays[i] = batch[i].make_array(); 
-      continue; 
-    } 
- 
-    ARROW_ASSIGN_OR_RAISE(arrays[i], MakeArrayFromScalar(*batch[i].scalar(), batch.length, 
-                                                         ctx->memory_pool())); 
-  } 
- 
-  *out = std::make_shared<StructArray>(descr.type, batch.length, std::move(arrays)); 
-  return Status::OK(); 
-} 
- 
-const FunctionDoc make_struct_doc{"Wrap Arrays into a StructArray", 
-                                  ("Names of the StructArray's fields are\n" 
-                                   "specified through MakeStructOptions."), 
-                                  {"*args"}, 
-                                  "MakeStructOptions"}; 
- 
+const FunctionDoc make_struct_doc{"Wrap Arrays into a StructArray",
+                                  ("Names of the StructArray's fields are\n"
+                                   "specified through MakeStructOptions."),
+                                  {"*args"},
+                                  "MakeStructOptions"};
+
 }  // namespace
 
 void RegisterScalarNested(FunctionRegistry* registry) {
-  auto list_value_length = std::make_shared<ScalarFunction>( 
-      "list_value_length", Arity::Unary(), &list_value_length_doc); 
+  auto list_value_length = std::make_shared<ScalarFunction>(
+      "list_value_length", Arity::Unary(), &list_value_length_doc);
   DCHECK_OK(list_value_length->AddKernel({InputType(Type::LIST)}, int32(),
                                          ListValueLength<ListType>));
   DCHECK_OK(list_value_length->AddKernel({InputType(Type::LARGE_LIST)}, int64(),
                                          ListValueLength<LargeListType>));
   DCHECK_OK(registry->AddFunction(std::move(list_value_length)));
- 
-  static MakeStructOptions kDefaultMakeStructOptions; 
-  auto make_struct_function = std::make_shared<ScalarFunction>( 
-      "make_struct", Arity::VarArgs(), &make_struct_doc, &kDefaultMakeStructOptions); 
- 
-  ScalarKernel kernel{KernelSignature::Make({InputType{}}, OutputType{MakeStructResolve}, 
-                                            /*is_varargs=*/true), 
-                      MakeStructExec, OptionsWrapper<MakeStructOptions>::Init}; 
-  kernel.null_handling = NullHandling::OUTPUT_NOT_NULL; 
-  kernel.mem_allocation = MemAllocation::NO_PREALLOCATE; 
-  DCHECK_OK(make_struct_function->AddKernel(std::move(kernel))); 
-  DCHECK_OK(registry->AddFunction(std::move(make_struct_function))); 
+
+  static MakeStructOptions kDefaultMakeStructOptions;
+  auto make_struct_function = std::make_shared<ScalarFunction>(
+      "make_struct", Arity::VarArgs(), &make_struct_doc, &kDefaultMakeStructOptions);
+
+  ScalarKernel kernel{KernelSignature::Make({InputType{}}, OutputType{MakeStructResolve},
+                                            /*is_varargs=*/true),
+                      MakeStructExec, OptionsWrapper<MakeStructOptions>::Init};
+  kernel.null_handling = NullHandling::OUTPUT_NOT_NULL;
+  kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+  DCHECK_OK(make_struct_function->AddKernel(std::move(kernel)));
+  DCHECK_OK(registry->AddFunction(std::move(make_struct_function)));
 }
 
 }  // namespace internal
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc
index 8fe28aae920..3e2e95e5401 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc
@@ -18,9 +18,9 @@
 #include "arrow/array/array_base.h"
 #include "arrow/array/builder_primitive.h"
 #include "arrow/compute/api_scalar.h"
-#include "arrow/compute/cast.h" 
+#include "arrow/compute/cast.h"
 #include "arrow/compute/kernels/common.h"
-#include "arrow/compute/kernels/util_internal.h" 
+#include "arrow/compute/kernels/util_internal.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/bitmap_writer.h"
 #include "arrow/util/hashing.h"
@@ -37,68 +37,68 @@ namespace {
 
 template <typename Type>
 struct SetLookupState : public KernelState {
-  explicit SetLookupState(MemoryPool* pool) : lookup_table(pool, 0) {} 
+  explicit SetLookupState(MemoryPool* pool) : lookup_table(pool, 0) {}
 
   Status Init(const SetLookupOptions& options) {
-    if (options.value_set.kind() == Datum::ARRAY) { 
-      const ArrayData& value_set = *options.value_set.array(); 
-      memo_index_to_value_index.reserve(value_set.length); 
-      RETURN_NOT_OK(AddArrayValueSet(options, *options.value_set.array())); 
-    } else if (options.value_set.kind() == Datum::CHUNKED_ARRAY) { 
-      const ChunkedArray& value_set = *options.value_set.chunked_array(); 
-      memo_index_to_value_index.reserve(value_set.length()); 
-      int64_t offset = 0; 
-      for (const std::shared_ptr<Array>& chunk : value_set.chunks()) { 
-        RETURN_NOT_OK(AddArrayValueSet(options, *chunk->data(), offset)); 
-        offset += chunk->length(); 
-      } 
-    } else { 
-      return Status::Invalid("value_set should be an array or chunked array"); 
-    } 
-    if (!options.skip_nulls && lookup_table.GetNull() >= 0) { 
-      null_index = memo_index_to_value_index[lookup_table.GetNull()]; 
-    } 
-    return Status::OK(); 
-  } 
- 
-  Status AddArrayValueSet(const SetLookupOptions& options, const ArrayData& data, 
-                          int64_t start_index = 0) { 
+    if (options.value_set.kind() == Datum::ARRAY) {
+      const ArrayData& value_set = *options.value_set.array();
+      memo_index_to_value_index.reserve(value_set.length);
+      RETURN_NOT_OK(AddArrayValueSet(options, *options.value_set.array()));
+    } else if (options.value_set.kind() == Datum::CHUNKED_ARRAY) {
+      const ChunkedArray& value_set = *options.value_set.chunked_array();
+      memo_index_to_value_index.reserve(value_set.length());
+      int64_t offset = 0;
+      for (const std::shared_ptr<Array>& chunk : value_set.chunks()) {
+        RETURN_NOT_OK(AddArrayValueSet(options, *chunk->data(), offset));
+        offset += chunk->length();
+      }
+    } else {
+      return Status::Invalid("value_set should be an array or chunked array");
+    }
+    if (!options.skip_nulls && lookup_table.GetNull() >= 0) {
+      null_index = memo_index_to_value_index[lookup_table.GetNull()];
+    }
+    return Status::OK();
+  }
+
+  Status AddArrayValueSet(const SetLookupOptions& options, const ArrayData& data,
+                          int64_t start_index = 0) {
     using T = typename GetViewType<Type>::T;
-    int32_t index = static_cast<int32_t>(start_index); 
+    int32_t index = static_cast<int32_t>(start_index);
     auto visit_valid = [&](T v) {
-      const auto memo_size = static_cast<int32_t>(memo_index_to_value_index.size()); 
+      const auto memo_size = static_cast<int32_t>(memo_index_to_value_index.size());
       int32_t unused_memo_index;
-      auto on_found = [&](int32_t memo_index) { DCHECK_LT(memo_index, memo_size); }; 
-      auto on_not_found = [&](int32_t memo_index) { 
-        DCHECK_EQ(memo_index, memo_size); 
-        memo_index_to_value_index.push_back(index); 
-      }; 
-      RETURN_NOT_OK(lookup_table.GetOrInsert( 
-          v, std::move(on_found), std::move(on_not_found), &unused_memo_index)); 
-      ++index; 
-      return Status::OK(); 
+      auto on_found = [&](int32_t memo_index) { DCHECK_LT(memo_index, memo_size); };
+      auto on_not_found = [&](int32_t memo_index) {
+        DCHECK_EQ(memo_index, memo_size);
+        memo_index_to_value_index.push_back(index);
+      };
+      RETURN_NOT_OK(lookup_table.GetOrInsert(
+          v, std::move(on_found), std::move(on_not_found), &unused_memo_index));
+      ++index;
+      return Status::OK();
     };
     auto visit_null = [&]() {
-      const auto memo_size = static_cast<int32_t>(memo_index_to_value_index.size()); 
-      auto on_found = [&](int32_t memo_index) { DCHECK_LT(memo_index, memo_size); }; 
-      auto on_not_found = [&](int32_t memo_index) { 
-        DCHECK_EQ(memo_index, memo_size); 
-        memo_index_to_value_index.push_back(index); 
-      }; 
-      lookup_table.GetOrInsertNull(std::move(on_found), std::move(on_not_found)); 
-      ++index; 
+      const auto memo_size = static_cast<int32_t>(memo_index_to_value_index.size());
+      auto on_found = [&](int32_t memo_index) { DCHECK_LT(memo_index, memo_size); };
+      auto on_not_found = [&](int32_t memo_index) {
+        DCHECK_EQ(memo_index, memo_size);
+        memo_index_to_value_index.push_back(index);
+      };
+      lookup_table.GetOrInsertNull(std::move(on_found), std::move(on_not_found));
+      ++index;
       return Status::OK();
     };
- 
-    return VisitArrayDataInline<Type>(data, visit_valid, visit_null); 
+
+    return VisitArrayDataInline<Type>(data, visit_valid, visit_null);
   }
 
   using MemoTable = typename HashTraits<Type>::MemoTableType;
   MemoTable lookup_table;
-  // When there are duplicates in value_set, the MemoTable indices must 
-  // be mapped back to indices in the value_set. 
-  std::vector<int32_t> memo_index_to_value_index; 
-  int32_t null_index = -1; 
+  // When there are duplicates in value_set, the MemoTable indices must
+  // be mapped back to indices in the value_set.
+  std::vector<int32_t> memo_index_to_value_index;
+  int32_t null_index = -1;
 };
 
 template <>
@@ -106,11 +106,11 @@ struct SetLookupState<NullType> : public KernelState {
   explicit SetLookupState(MemoryPool*) {}
 
   Status Init(const SetLookupOptions& options) {
-    value_set_has_null = (options.value_set.length() > 0) && !options.skip_nulls; 
+    value_set_has_null = (options.value_set.length() > 0) && !options.skip_nulls;
     return Status::OK();
   }
 
-  bool value_set_has_null; 
+  bool value_set_has_null;
 };
 
 // TODO: Put this concept somewhere reusable
@@ -140,20 +140,20 @@ struct UnsignedIntType<8> {
 // Constructing the type requires a type parameter
 struct InitStateVisitor {
   KernelContext* ctx;
-  SetLookupOptions options; 
-  const std::shared_ptr<DataType>& arg_type; 
+  SetLookupOptions options;
+  const std::shared_ptr<DataType>& arg_type;
   std::unique_ptr<KernelState> result;
 
-  InitStateVisitor(KernelContext* ctx, const KernelInitArgs& args) 
-      : ctx(ctx), 
-        options(*checked_cast<const SetLookupOptions*>(args.options)), 
-        arg_type(args.inputs[0].type) {} 
+  InitStateVisitor(KernelContext* ctx, const KernelInitArgs& args)
+      : ctx(ctx),
+        options(*checked_cast<const SetLookupOptions*>(args.options)),
+        arg_type(args.inputs[0].type) {}
 
   template <typename Type>
   Status Init() {
     using StateType = SetLookupState<Type>;
     result.reset(new StateType(ctx->exec_context()->memory_pool()));
-    return static_cast<StateType*>(result.get())->Init(options); 
+    return static_cast<StateType*>(result.get())->Init(options);
   }
 
   Status Visit(const DataType&) { return Init<NullType>(); }
@@ -177,26 +177,26 @@ struct InitStateVisitor {
   // Handle Decimal128Type, FixedSizeBinaryType
   Status Visit(const FixedSizeBinaryType& type) { return Init<FixedSizeBinaryType>(); }
 
-  Result<std::unique_ptr<KernelState>> GetResult() { 
-    if (!options.value_set.type()->Equals(arg_type)) { 
-      ARROW_ASSIGN_OR_RAISE( 
-          options.value_set, 
-          Cast(options.value_set, CastOptions::Safe(arg_type), ctx->exec_context())); 
-    } 
- 
-    RETURN_NOT_OK(VisitTypeInline(*arg_type, this)); 
-    return std::move(result); 
+  Result<std::unique_ptr<KernelState>> GetResult() {
+    if (!options.value_set.type()->Equals(arg_type)) {
+      ARROW_ASSIGN_OR_RAISE(
+          options.value_set,
+          Cast(options.value_set, CastOptions::Safe(arg_type), ctx->exec_context()));
+    }
+
+    RETURN_NOT_OK(VisitTypeInline(*arg_type, this));
+    return std::move(result);
   }
 };
 
-Result<std::unique_ptr<KernelState>> InitSetLookup(KernelContext* ctx, 
-                                                   const KernelInitArgs& args) { 
-  if (args.options == nullptr) { 
-    return Status::Invalid( 
-        "Attempted to call a set lookup function without SetLookupOptions"); 
-  } 
- 
-  return InitStateVisitor{ctx, args}.GetResult(); 
+Result<std::unique_ptr<KernelState>> InitSetLookup(KernelContext* ctx,
+                                                   const KernelInitArgs& args) {
+  if (args.options == nullptr) {
+    return Status::Invalid(
+        "Attempted to call a set lookup function without SetLookupOptions");
+  }
+
+  return InitStateVisitor{ctx, args}.GetResult();
 }
 
 struct IndexInVisitor {
@@ -208,18 +208,18 @@ struct IndexInVisitor {
   IndexInVisitor(KernelContext* ctx, const ArrayData& data, Datum* out)
       : ctx(ctx), data(data), out(out), builder(ctx->exec_context()->memory_pool()) {}
 
-  Status Visit(const DataType& type) { 
-    DCHECK_EQ(type.id(), Type::NA); 
+  Status Visit(const DataType& type) {
+    DCHECK_EQ(type.id(), Type::NA);
     const auto& state = checked_cast<const SetLookupState<NullType>&>(*ctx->state());
     if (data.length != 0) {
-      // skip_nulls is honored for consistency with other types 
-      if (state.value_set_has_null) { 
+      // skip_nulls is honored for consistency with other types
+      if (state.value_set_has_null) {
         RETURN_NOT_OK(this->builder.Reserve(data.length));
         for (int64_t i = 0; i < data.length; ++i) {
           this->builder.UnsafeAppend(0);
         }
-      } else { 
-        RETURN_NOT_OK(this->builder.AppendNulls(data.length)); 
+      } else {
+        RETURN_NOT_OK(this->builder.AppendNulls(data.length));
       }
     }
     return Status::OK();
@@ -238,16 +238,16 @@ struct IndexInVisitor {
           int32_t index = state.lookup_table.Get(v);
           if (index != -1) {
             // matching needle; output index from value_set
-            this->builder.UnsafeAppend(state.memo_index_to_value_index[index]); 
+            this->builder.UnsafeAppend(state.memo_index_to_value_index[index]);
           } else {
             // no matching needle; output null
             this->builder.UnsafeAppendNull();
           }
         },
         [&]() {
-          if (state.null_index != -1) { 
+          if (state.null_index != -1) {
             // value_set included null
-            this->builder.UnsafeAppend(state.null_index); 
+            this->builder.UnsafeAppend(state.null_index);
           } else {
             // value_set does not include null; output null
             this->builder.UnsafeAppendNull();
@@ -290,13 +290,13 @@ struct IndexInVisitor {
   }
 };
 
-Status ExecIndexIn(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-  return IndexInVisitor(ctx, *batch[0].array(), out).Execute(); 
+Status ExecIndexIn(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  return IndexInVisitor(ctx, *batch[0].array(), out).Execute();
 }
 
 // ----------------------------------------------------------------------
 
-// IsIn writes the results into a preallocated boolean data bitmap 
+// IsIn writes the results into a preallocated boolean data bitmap
 struct IsInVisitor {
   KernelContext* ctx;
   const ArrayData& data;
@@ -305,13 +305,13 @@ struct IsInVisitor {
   IsInVisitor(KernelContext* ctx, const ArrayData& data, Datum* out)
       : ctx(ctx), data(data), out(out) {}
 
-  Status Visit(const DataType& type) { 
-    DCHECK_EQ(type.id(), Type::NA); 
+  Status Visit(const DataType& type) {
+    DCHECK_EQ(type.id(), Type::NA);
     const auto& state = checked_cast<const SetLookupState<NullType>&>(*ctx->state());
     ArrayData* output = out->mutable_array();
-    // skip_nulls is honored for consistency with other types 
-    BitUtil::SetBitsTo(output->buffers[1]->mutable_data(), output->offset, output->length, 
-                       state.value_set_has_null); 
+    // skip_nulls is honored for consistency with other types
+    BitUtil::SetBitsTo(output->buffers[1]->mutable_data(), output->offset, output->length,
+                       state.value_set_has_null);
     return Status::OK();
   }
 
@@ -323,7 +323,7 @@ struct IsInVisitor {
 
     FirstTimeBitmapWriter writer(output->buffers[1]->mutable_data(), output->offset,
                                  output->length);
- 
+
     VisitArrayDataInline<Type>(
         this->data,
         [&](T v) {
@@ -335,11 +335,11 @@ struct IsInVisitor {
           writer.Next();
         },
         [&]() {
-          if (state.null_index != -1) { 
-            writer.Set(); 
-          } else { 
-            writer.Clear(); 
-          } 
+          if (state.null_index != -1) {
+            writer.Set();
+          } else {
+            writer.Clear();
+          }
           writer.Next();
         });
     writer.Finish();
@@ -370,8 +370,8 @@ struct IsInVisitor {
   Status Execute() { return VisitTypeInline(*data.type, this); }
 };
 
-Status ExecIsIn(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-  return IsInVisitor(ctx, *batch[0].array(), out).Execute(); 
+Status ExecIsIn(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  return IsInVisitor(ctx, *batch[0].array(), out).Execute();
 }
 
 // Unary set lookup kernels available for the following input types
@@ -408,8 +408,8 @@ void AddBasicSetLookupKernels(ScalarKernel kernel,
 // Enables calling is_in with CallFunction as though it were binary.
 class IsInMetaBinary : public MetaFunction {
  public:
-  IsInMetaBinary() 
-      : MetaFunction("is_in_meta_binary", Arity::Binary(), /*doc=*/nullptr) {} 
+  IsInMetaBinary()
+      : MetaFunction("is_in_meta_binary", Arity::Binary(), /*doc=*/nullptr) {}
 
   Result<Datum> ExecuteImpl(const std::vector<Datum>& args,
                             const FunctionOptions* options,
@@ -424,8 +424,8 @@ class IsInMetaBinary : public MetaFunction {
 // Enables calling index_in with CallFunction as though it were binary.
 class IndexInMetaBinary : public MetaFunction {
  public:
-  IndexInMetaBinary() 
-      : MetaFunction("index_in_meta_binary", Arity::Binary(), /*doc=*/nullptr) {} 
+  IndexInMetaBinary()
+      : MetaFunction("index_in_meta_binary", Arity::Binary(), /*doc=*/nullptr) {}
 
   Result<Datum> ExecuteImpl(const std::vector<Datum>& args,
                             const FunctionOptions* options,
@@ -437,46 +437,46 @@ class IndexInMetaBinary : public MetaFunction {
   }
 };
 
-struct SetLookupFunction : ScalarFunction { 
-  using ScalarFunction::ScalarFunction; 
- 
-  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override { 
-    EnsureDictionaryDecoded(values); 
-    return DispatchExact(*values); 
-  } 
-}; 
- 
-const FunctionDoc is_in_doc{ 
-    "Find each element in a set of values", 
-    ("For each element in `values`, return true if it is found in a given\n" 
-     "set of values, false otherwise.\n" 
-     "The set of values to look for must be given in SetLookupOptions.\n" 
-     "By default, nulls are matched against the value set, this can be\n" 
-     "changed in SetLookupOptions."), 
-    {"values"}, 
-    "SetLookupOptions"}; 
- 
-const FunctionDoc index_in_doc{ 
-    "Return index of each element in a set of values", 
-    ("For each element in `values`, return its index in a given set of\n" 
-     "values, or null if it is not found there.\n" 
-     "The set of values to look for must be given in SetLookupOptions.\n" 
-     "By default, nulls are matched against the value set, this can be\n" 
-     "changed in SetLookupOptions."), 
-    {"values"}, 
-    "SetLookupOptions"}; 
- 
+struct SetLookupFunction : ScalarFunction {
+  using ScalarFunction::ScalarFunction;
+
+  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
+    EnsureDictionaryDecoded(values);
+    return DispatchExact(*values);
+  }
+};
+
+const FunctionDoc is_in_doc{
+    "Find each element in a set of values",
+    ("For each element in `values`, return true if it is found in a given\n"
+     "set of values, false otherwise.\n"
+     "The set of values to look for must be given in SetLookupOptions.\n"
+     "By default, nulls are matched against the value set, this can be\n"
+     "changed in SetLookupOptions."),
+    {"values"},
+    "SetLookupOptions"};
+
+const FunctionDoc index_in_doc{
+    "Return index of each element in a set of values",
+    ("For each element in `values`, return its index in a given set of\n"
+     "values, or null if it is not found there.\n"
+     "The set of values to look for must be given in SetLookupOptions.\n"
+     "By default, nulls are matched against the value set, this can be\n"
+     "changed in SetLookupOptions."),
+    {"values"},
+    "SetLookupOptions"};
+
 }  // namespace
 
 void RegisterScalarSetLookup(FunctionRegistry* registry) {
-  // IsIn writes its boolean output into preallocated memory 
+  // IsIn writes its boolean output into preallocated memory
   {
     ScalarKernel isin_base;
     isin_base.init = InitSetLookup;
-    isin_base.exec = 
-        TrivialScalarUnaryAsArraysExec(ExecIsIn, NullHandling::OUTPUT_NOT_NULL); 
-    isin_base.null_handling = NullHandling::OUTPUT_NOT_NULL; 
-    auto is_in = std::make_shared<SetLookupFunction>("is_in", Arity::Unary(), &is_in_doc); 
+    isin_base.exec =
+        TrivialScalarUnaryAsArraysExec(ExecIsIn, NullHandling::OUTPUT_NOT_NULL);
+    isin_base.null_handling = NullHandling::OUTPUT_NOT_NULL;
+    auto is_in = std::make_shared<SetLookupFunction>("is_in", Arity::Unary(), &is_in_doc);
 
     AddBasicSetLookupKernels(isin_base, /*output_type=*/boolean(), is_in.get());
 
@@ -491,12 +491,12 @@ void RegisterScalarSetLookup(FunctionRegistry* registry) {
   {
     ScalarKernel index_in_base;
     index_in_base.init = InitSetLookup;
-    index_in_base.exec = TrivialScalarUnaryAsArraysExec( 
-        ExecIndexIn, NullHandling::COMPUTED_NO_PREALLOCATE); 
+    index_in_base.exec = TrivialScalarUnaryAsArraysExec(
+        ExecIndexIn, NullHandling::COMPUTED_NO_PREALLOCATE);
     index_in_base.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
     index_in_base.mem_allocation = MemAllocation::NO_PREALLOCATE;
-    auto index_in = 
-        std::make_shared<SetLookupFunction>("index_in", Arity::Unary(), &index_in_doc); 
+    auto index_in =
+        std::make_shared<SetLookupFunction>("index_in", Arity::Unary(), &index_in_doc);
 
     AddBasicSetLookupKernels(index_in_base, /*output_type=*/int32(), index_in.get());
 
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_string.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_string.cc
index ce37b089b6f..ab0a490eeb3 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_string.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_string.cc
@@ -17,55 +17,55 @@
 
 #include <algorithm>
 #include <cctype>
-#include <iterator> 
+#include <iterator>
 #include <string>
 
 #ifdef ARROW_WITH_UTF8PROC
 #include <utf8proc.h>
 #endif
 
-#ifdef ARROW_WITH_RE2 
-#include <re2/re2.h> 
-#endif 
- 
-#include "arrow/array/builder_binary.h" 
-#include "arrow/array/builder_nested.h" 
-#include "arrow/buffer_builder.h" 
- 
-#include "arrow/builder.h" 
+#ifdef ARROW_WITH_RE2
+#include <re2/re2.h>
+#endif
+
+#include "arrow/array/builder_binary.h"
+#include "arrow/array/builder_nested.h"
+#include "arrow/buffer_builder.h"
+
+#include "arrow/builder.h"
 #include "arrow/compute/api_scalar.h"
 #include "arrow/compute/kernels/common.h"
-#include "arrow/util/checked_cast.h" 
+#include "arrow/util/checked_cast.h"
 #include "arrow/util/utf8.h"
 #include "arrow/util/value_parsing.h"
-#include "arrow/visitor_inline.h" 
+#include "arrow/visitor_inline.h"
 
 namespace arrow {
- 
-using internal::checked_cast; 
- 
+
+using internal::checked_cast;
+
 namespace compute {
 namespace internal {
 
 namespace {
 
-#ifdef ARROW_WITH_RE2 
-util::string_view ToStringView(re2::StringPiece piece) { 
-  return {piece.data(), piece.length()}; 
-} 
- 
-re2::StringPiece ToStringPiece(util::string_view view) { 
-  return {view.data(), view.length()}; 
-} 
- 
-Status RegexStatus(const RE2& regex) { 
-  if (!regex.ok()) { 
-    return Status::Invalid("Invalid regular expression: ", regex.error()); 
-  } 
-  return Status::OK(); 
-} 
-#endif 
- 
+#ifdef ARROW_WITH_RE2
+util::string_view ToStringView(re2::StringPiece piece) {
+  return {piece.data(), piece.length()};
+}
+
+re2::StringPiece ToStringPiece(util::string_view view) {
+  return {view.data(), view.length()};
+}
+
+Status RegexStatus(const RE2& regex) {
+  if (!regex.ok()) {
+    return Status::Invalid("Invalid regular expression: ", regex.error());
+  }
+  return Status::OK();
+}
+#endif
+
 // Code units in the range [a-z] can only be an encoding of an ascii
 // character/codepoint, not the 2nd, 3rd or 4th code unit (byte) of an different
 // codepoint. This guaranteed by non-overlap design of the unicode standard. (see
@@ -88,20 +88,20 @@ static inline bool IsAsciiCharacter(T character) {
 
 struct BinaryLength {
   template <typename OutValue, typename Arg0Value = util::string_view>
-  static OutValue Call(KernelContext*, Arg0Value val, Status*) { 
+  static OutValue Call(KernelContext*, Arg0Value val, Status*) {
     return static_cast<OutValue>(val.size());
   }
 };
 
-struct Utf8Length { 
-  template <typename OutValue, typename Arg0Value = util::string_view> 
-  static OutValue Call(KernelContext*, Arg0Value val, Status*) { 
-    auto str = reinterpret_cast<const uint8_t*>(val.data()); 
-    auto strlen = val.size(); 
-    return static_cast<OutValue>(util::UTF8Length(str, str + strlen)); 
-  } 
-}; 
- 
+struct Utf8Length {
+  template <typename OutValue, typename Arg0Value = util::string_view>
+  static OutValue Call(KernelContext*, Arg0Value val, Status*) {
+    auto str = reinterpret_cast<const uint8_t*>(val.data());
+    auto strlen = val.size();
+    return static_cast<OutValue>(util::UTF8Length(str, str + strlen));
+  }
+};
+
 #ifdef ARROW_WITH_UTF8PROC
 
 // Direct lookup tables for unicode properties
@@ -124,239 +124,239 @@ void EnsureLookupTablesFilled() {
   });
 }
 
-#else 
- 
-void EnsureLookupTablesFilled() {} 
- 
-#endif  // ARROW_WITH_UTF8PROC 
- 
-constexpr int64_t kTransformError = -1; 
- 
-struct StringTransformBase { 
-  virtual Status PreExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    return Status::OK(); 
-  } 
- 
-  // Return the maximum total size of the output in codeunits (i.e. bytes) 
-  // given input characteristics. 
-  virtual int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) { 
-    return input_ncodeunits; 
-  } 
- 
-  virtual Status InvalidStatus() { 
-    return Status::Invalid("Invalid UTF8 sequence in input"); 
-  } 
- 
-  // Derived classes should also define this method: 
-  //   int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits, 
-  //                     uint8_t* output); 
-}; 
- 
-template <typename Type, typename StringTransform> 
-struct StringTransformExecBase { 
+#else
+
+void EnsureLookupTablesFilled() {}
+
+#endif  // ARROW_WITH_UTF8PROC
+
+constexpr int64_t kTransformError = -1;
+
+struct StringTransformBase {
+  virtual Status PreExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    return Status::OK();
+  }
+
+  // Return the maximum total size of the output in codeunits (i.e. bytes)
+  // given input characteristics.
+  virtual int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) {
+    return input_ncodeunits;
+  }
+
+  virtual Status InvalidStatus() {
+    return Status::Invalid("Invalid UTF8 sequence in input");
+  }
+
+  // Derived classes should also define this method:
+  //   int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+  //                     uint8_t* output);
+};
+
+template <typename Type, typename StringTransform>
+struct StringTransformExecBase {
   using offset_type = typename Type::offset_type;
   using ArrayType = typename TypeTraits<Type>::ArrayType;
 
-  static Status Execute(KernelContext* ctx, StringTransform* transform, 
-                        const ExecBatch& batch, Datum* out) { 
-    if (batch[0].kind() == Datum::ARRAY) { 
-      return ExecArray(ctx, transform, batch[0].array(), out); 
+  static Status Execute(KernelContext* ctx, StringTransform* transform,
+                        const ExecBatch& batch, Datum* out) {
+    if (batch[0].kind() == Datum::ARRAY) {
+      return ExecArray(ctx, transform, batch[0].array(), out);
     }
-    DCHECK_EQ(batch[0].kind(), Datum::SCALAR); 
-    return ExecScalar(ctx, transform, batch[0].scalar(), out); 
+    DCHECK_EQ(batch[0].kind(), Datum::SCALAR);
+    return ExecScalar(ctx, transform, batch[0].scalar(), out);
   }
 
-  static Status ExecArray(KernelContext* ctx, StringTransform* transform, 
-                          const std::shared_ptr<ArrayData>& data, Datum* out) { 
-    ArrayType input(data); 
-    ArrayData* output = out->mutable_array(); 
+  static Status ExecArray(KernelContext* ctx, StringTransform* transform,
+                          const std::shared_ptr<ArrayData>& data, Datum* out) {
+    ArrayType input(data);
+    ArrayData* output = out->mutable_array();
 
-    const int64_t input_ncodeunits = input.total_values_length(); 
-    const int64_t input_nstrings = input.length(); 
+    const int64_t input_ncodeunits = input.total_values_length();
+    const int64_t input_nstrings = input.length();
 
-    const int64_t output_ncodeunits_max = 
-        transform->MaxCodeunits(input_nstrings, input_ncodeunits); 
-    if (output_ncodeunits_max > std::numeric_limits<offset_type>::max()) { 
-      return Status::CapacityError( 
-          "Result might not fit in a 32bit utf8 array, convert to large_utf8"); 
-    } 
+    const int64_t output_ncodeunits_max =
+        transform->MaxCodeunits(input_nstrings, input_ncodeunits);
+    if (output_ncodeunits_max > std::numeric_limits<offset_type>::max()) {
+      return Status::CapacityError(
+          "Result might not fit in a 32bit utf8 array, convert to large_utf8");
+    }
 
-    ARROW_ASSIGN_OR_RAISE(auto values_buffer, ctx->Allocate(output_ncodeunits_max)); 
-    output->buffers[2] = values_buffer; 
+    ARROW_ASSIGN_OR_RAISE(auto values_buffer, ctx->Allocate(output_ncodeunits_max));
+    output->buffers[2] = values_buffer;
 
-    // String offsets are preallocated 
-    offset_type* output_string_offsets = output->GetMutableValues<offset_type>(1); 
-    uint8_t* output_str = output->buffers[2]->mutable_data(); 
-    offset_type output_ncodeunits = 0; 
+    // String offsets are preallocated
+    offset_type* output_string_offsets = output->GetMutableValues<offset_type>(1);
+    uint8_t* output_str = output->buffers[2]->mutable_data();
+    offset_type output_ncodeunits = 0;
 
-    output_string_offsets[0] = 0; 
-    for (int64_t i = 0; i < input_nstrings; i++) { 
-      if (!input.IsNull(i)) { 
+    output_string_offsets[0] = 0;
+    for (int64_t i = 0; i < input_nstrings; i++) {
+      if (!input.IsNull(i)) {
         offset_type input_string_ncodeunits;
-        const uint8_t* input_string = input.GetValue(i, &input_string_ncodeunits); 
-        auto encoded_nbytes = static_cast<offset_type>(transform->Transform( 
-            input_string, input_string_ncodeunits, output_str + output_ncodeunits)); 
-        if (encoded_nbytes < 0) { 
-          return transform->InvalidStatus(); 
+        const uint8_t* input_string = input.GetValue(i, &input_string_ncodeunits);
+        auto encoded_nbytes = static_cast<offset_type>(transform->Transform(
+            input_string, input_string_ncodeunits, output_str + output_ncodeunits));
+        if (encoded_nbytes < 0) {
+          return transform->InvalidStatus();
         }
         output_ncodeunits += encoded_nbytes;
       }
-      output_string_offsets[i + 1] = output_ncodeunits; 
-    } 
-    DCHECK_LE(output_ncodeunits, output_ncodeunits_max); 
-
-    // Trim the codepoint buffer, since we allocated too much 
-    return values_buffer->Resize(output_ncodeunits, /*shrink_to_fit=*/true); 
-  } 
-
-  static Status ExecScalar(KernelContext* ctx, StringTransform* transform, 
-                           const std::shared_ptr<Scalar>& scalar, Datum* out) { 
-    const auto& input = checked_cast<const BaseBinaryScalar&>(*scalar); 
-    if (!input.is_valid) { 
-      return Status::OK(); 
+      output_string_offsets[i + 1] = output_ncodeunits;
+    }
+    DCHECK_LE(output_ncodeunits, output_ncodeunits_max);
+
+    // Trim the codepoint buffer, since we allocated too much
+    return values_buffer->Resize(output_ncodeunits, /*shrink_to_fit=*/true);
+  }
+
+  static Status ExecScalar(KernelContext* ctx, StringTransform* transform,
+                           const std::shared_ptr<Scalar>& scalar, Datum* out) {
+    const auto& input = checked_cast<const BaseBinaryScalar&>(*scalar);
+    if (!input.is_valid) {
+      return Status::OK();
+    }
+    auto* result = checked_cast<BaseBinaryScalar*>(out->scalar().get());
+    result->is_valid = true;
+    const int64_t data_nbytes = static_cast<int64_t>(input.value->size());
+
+    const int64_t output_ncodeunits_max = transform->MaxCodeunits(1, data_nbytes);
+    if (output_ncodeunits_max > std::numeric_limits<offset_type>::max()) {
+      return Status::CapacityError(
+          "Result might not fit in a 32bit utf8 array, convert to large_utf8");
     }
-    auto* result = checked_cast<BaseBinaryScalar*>(out->scalar().get()); 
-    result->is_valid = true; 
-    const int64_t data_nbytes = static_cast<int64_t>(input.value->size()); 
- 
-    const int64_t output_ncodeunits_max = transform->MaxCodeunits(1, data_nbytes); 
-    if (output_ncodeunits_max > std::numeric_limits<offset_type>::max()) { 
-      return Status::CapacityError( 
-          "Result might not fit in a 32bit utf8 array, convert to large_utf8"); 
-    } 
-    ARROW_ASSIGN_OR_RAISE(auto value_buffer, ctx->Allocate(output_ncodeunits_max)); 
-    result->value = value_buffer; 
-    auto encoded_nbytes = static_cast<offset_type>(transform->Transform( 
-        input.value->data(), data_nbytes, value_buffer->mutable_data())); 
-    if (encoded_nbytes < 0) { 
-      return transform->InvalidStatus(); 
-    } 
-    DCHECK_LE(encoded_nbytes, output_ncodeunits_max); 
-    return value_buffer->Resize(encoded_nbytes, /*shrink_to_fit=*/true); 
+    ARROW_ASSIGN_OR_RAISE(auto value_buffer, ctx->Allocate(output_ncodeunits_max));
+    result->value = value_buffer;
+    auto encoded_nbytes = static_cast<offset_type>(transform->Transform(
+        input.value->data(), data_nbytes, value_buffer->mutable_data()));
+    if (encoded_nbytes < 0) {
+      return transform->InvalidStatus();
+    }
+    DCHECK_LE(encoded_nbytes, output_ncodeunits_max);
+    return value_buffer->Resize(encoded_nbytes, /*shrink_to_fit=*/true);
   }
 };
 
-template <typename Type, typename StringTransform> 
-struct StringTransformExec : public StringTransformExecBase<Type, StringTransform> { 
-  using StringTransformExecBase<Type, StringTransform>::Execute; 
- 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    StringTransform transform; 
-    RETURN_NOT_OK(transform.PreExec(ctx, batch, out)); 
-    return Execute(ctx, &transform, batch, out); 
-  } 
-}; 
- 
-template <typename Type, typename StringTransform> 
-struct StringTransformExecWithState 
-    : public StringTransformExecBase<Type, StringTransform> { 
-  using State = typename StringTransform::State; 
-  using StringTransformExecBase<Type, StringTransform>::Execute; 
- 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    StringTransform transform(State::Get(ctx)); 
-    RETURN_NOT_OK(transform.PreExec(ctx, batch, out)); 
-    return Execute(ctx, &transform, batch, out); 
-  } 
-}; 
- 
-#ifdef ARROW_WITH_UTF8PROC 
- 
-template <typename CodepointTransform> 
-struct StringTransformCodepoint : public StringTransformBase { 
-  Status PreExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) override { 
-    EnsureLookupTablesFilled(); 
-    return Status::OK(); 
-  } 
- 
-  int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) override { 
-    return CodepointTransform::MaxCodeunits(ninputs, input_ncodeunits); 
-  } 
- 
-  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits, 
-                    uint8_t* output) { 
-    uint8_t* output_start = output; 
-    if (ARROW_PREDICT_FALSE( 
-            !arrow::util::UTF8Transform(input, input + input_string_ncodeunits, &output, 
-                                        CodepointTransform::TransformCodepoint))) { 
-      return kTransformError; 
-    } 
-    return output - output_start; 
-  } 
-}; 
- 
-// struct CaseMappingMixin { 
-struct CaseMappingTransform { 
-  static int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) { 
-    // Section 5.18 of the Unicode spec claim that the number of codepoints for case 
-    // mapping can grow by a factor of 3. This means grow by a factor of 3 in bytes 
-    // However, since we don't support all casings (SpecialCasing.txt) the growth 
-    // in bytes iss actually only at max 3/2 (as covered by the unittest). 
-    // Note that rounding down the 3/2 is ok, since only codepoints encoded by 
-    // two code units (even) can grow to 3 code units. 
-    return static_cast<int64_t>(input_ncodeunits) * 3 / 2; 
-  } 
-}; 
- 
-struct UTF8UpperTransform : public CaseMappingTransform { 
-  static uint32_t TransformCodepoint(uint32_t codepoint) { 
+template <typename Type, typename StringTransform>
+struct StringTransformExec : public StringTransformExecBase<Type, StringTransform> {
+  using StringTransformExecBase<Type, StringTransform>::Execute;
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    StringTransform transform;
+    RETURN_NOT_OK(transform.PreExec(ctx, batch, out));
+    return Execute(ctx, &transform, batch, out);
+  }
+};
+
+template <typename Type, typename StringTransform>
+struct StringTransformExecWithState
+    : public StringTransformExecBase<Type, StringTransform> {
+  using State = typename StringTransform::State;
+  using StringTransformExecBase<Type, StringTransform>::Execute;
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    StringTransform transform(State::Get(ctx));
+    RETURN_NOT_OK(transform.PreExec(ctx, batch, out));
+    return Execute(ctx, &transform, batch, out);
+  }
+};
+
+#ifdef ARROW_WITH_UTF8PROC
+
+template <typename CodepointTransform>
+struct StringTransformCodepoint : public StringTransformBase {
+  Status PreExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) override {
+    EnsureLookupTablesFilled();
+    return Status::OK();
+  }
+
+  int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) override {
+    return CodepointTransform::MaxCodeunits(ninputs, input_ncodeunits);
+  }
+
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
+    uint8_t* output_start = output;
+    if (ARROW_PREDICT_FALSE(
+            !arrow::util::UTF8Transform(input, input + input_string_ncodeunits, &output,
+                                        CodepointTransform::TransformCodepoint))) {
+      return kTransformError;
+    }
+    return output - output_start;
+  }
+};
+
+// struct CaseMappingMixin {
+struct CaseMappingTransform {
+  static int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) {
+    // Section 5.18 of the Unicode spec claim that the number of codepoints for case
+    // mapping can grow by a factor of 3. This means grow by a factor of 3 in bytes
+    // However, since we don't support all casings (SpecialCasing.txt) the growth
+    // in bytes iss actually only at max 3/2 (as covered by the unittest).
+    // Note that rounding down the 3/2 is ok, since only codepoints encoded by
+    // two code units (even) can grow to 3 code units.
+    return static_cast<int64_t>(input_ncodeunits) * 3 / 2;
+  }
+};
+
+struct UTF8UpperTransform : public CaseMappingTransform {
+  static uint32_t TransformCodepoint(uint32_t codepoint) {
     return codepoint <= kMaxCodepointLookup ? lut_upper_codepoint[codepoint]
                                             : utf8proc_toupper(codepoint);
   }
 };
 
 template <typename Type>
-using UTF8Upper = StringTransformExec<Type, StringTransformCodepoint<UTF8UpperTransform>>; 
- 
-struct UTF8LowerTransform : public CaseMappingTransform { 
+using UTF8Upper = StringTransformExec<Type, StringTransformCodepoint<UTF8UpperTransform>>;
+
+struct UTF8LowerTransform : public CaseMappingTransform {
   static uint32_t TransformCodepoint(uint32_t codepoint) {
     return codepoint <= kMaxCodepointLookup ? lut_lower_codepoint[codepoint]
                                             : utf8proc_tolower(codepoint);
   }
 };
 
-template <typename Type> 
-using UTF8Lower = StringTransformExec<Type, StringTransformCodepoint<UTF8LowerTransform>>; 
+template <typename Type>
+using UTF8Lower = StringTransformExec<Type, StringTransformCodepoint<UTF8LowerTransform>>;
 
 #endif  // ARROW_WITH_UTF8PROC
 
-struct AsciiReverseTransform : public StringTransformBase { 
-  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits, 
-                    uint8_t* output) { 
-    uint8_t utf8_char_found = 0; 
-    for (int64_t i = 0; i < input_string_ncodeunits; i++) { 
-      // if a utf8 char is found, report to utf8_char_found 
-      utf8_char_found |= input[i] & 0x80; 
-      output[input_string_ncodeunits - i - 1] = input[i]; 
-    } 
-    return utf8_char_found ? kTransformError : input_string_ncodeunits; 
-  } 
- 
-  Status InvalidStatus() override { 
-    return Status::Invalid("Non-ASCII sequence in input"); 
-  } 
-}; 
- 
-template <typename Type> 
-using AsciiReverse = StringTransformExec<Type, AsciiReverseTransform>; 
- 
-struct Utf8ReverseTransform : public StringTransformBase { 
-  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits, 
-                    uint8_t* output) { 
-    int64_t i = 0; 
-    while (i < input_string_ncodeunits) { 
-      int64_t char_end = std::min(i + util::ValidUtf8CodepointByteSize(input + i), 
-                                  input_string_ncodeunits); 
-      std::copy(input + i, input + char_end, output + input_string_ncodeunits - char_end); 
-      i = char_end; 
-    } 
-    return input_string_ncodeunits; 
-  } 
-}; 
- 
-template <typename Type> 
-using Utf8Reverse = StringTransformExec<Type, Utf8ReverseTransform>; 
- 
+struct AsciiReverseTransform : public StringTransformBase {
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
+    uint8_t utf8_char_found = 0;
+    for (int64_t i = 0; i < input_string_ncodeunits; i++) {
+      // if a utf8 char is found, report to utf8_char_found
+      utf8_char_found |= input[i] & 0x80;
+      output[input_string_ncodeunits - i - 1] = input[i];
+    }
+    return utf8_char_found ? kTransformError : input_string_ncodeunits;
+  }
+
+  Status InvalidStatus() override {
+    return Status::Invalid("Non-ASCII sequence in input");
+  }
+};
+
+template <typename Type>
+using AsciiReverse = StringTransformExec<Type, AsciiReverseTransform>;
+
+struct Utf8ReverseTransform : public StringTransformBase {
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
+    int64_t i = 0;
+    while (i < input_string_ncodeunits) {
+      int64_t char_end = std::min(i + util::ValidUtf8CodepointByteSize(input + i),
+                                  input_string_ncodeunits);
+      std::copy(input + i, input + char_end, output + input_string_ncodeunits - char_end);
+      i = char_end;
+    }
+    return input_string_ncodeunits;
+  }
+};
+
+template <typename Type>
+using Utf8Reverse = StringTransformExec<Type, Utf8ReverseTransform>;
+
 using TransformFunc = std::function<void(const uint8_t*, int64_t, uint8_t*)>;
 
 // Transform a buffer of offsets to one which begins with 0 and has same
@@ -378,8 +378,8 @@ Status GetShiftedOffsets(KernelContext* ctx, const Buffer& input_buffer, int64_t
 // Apply `transform` to input character data- this function cannot change the
 // length
 template <typename Type>
-Status StringDataTransform(KernelContext* ctx, const ExecBatch& batch, 
-                           TransformFunc transform, Datum* out) { 
+Status StringDataTransform(KernelContext* ctx, const ExecBatch& batch,
+                           TransformFunc transform, Datum* out) {
   using ArrayType = typename TypeTraits<Type>::ArrayType;
   using offset_type = typename Type::offset_type;
 
@@ -395,13 +395,13 @@ Status StringDataTransform(KernelContext* ctx, const ExecBatch& batch,
     } else {
       DCHECK(input.buffers[1]);
       // We must allocate new space for the offsets and shift the existing offsets
-      RETURN_NOT_OK(GetShiftedOffsets<offset_type>(ctx, *input.buffers[1], input.offset, 
-                                                   input.length, &out_arr->buffers[1])); 
+      RETURN_NOT_OK(GetShiftedOffsets<offset_type>(ctx, *input.buffers[1], input.offset,
+                                                   input.length, &out_arr->buffers[1]));
     }
 
     // Allocate space for output data
     int64_t data_nbytes = input_boxed.total_values_length();
-    RETURN_NOT_OK(ctx->Allocate(data_nbytes).Value(&out_arr->buffers[2])); 
+    RETURN_NOT_OK(ctx->Allocate(data_nbytes).Value(&out_arr->buffers[2]));
     if (input.length > 0) {
       transform(input.buffers[2]->data() + input_boxed.value_offset(0), data_nbytes,
                 out_arr->buffers[2]->mutable_data());
@@ -412,13 +412,13 @@ Status StringDataTransform(KernelContext* ctx, const ExecBatch& batch,
     if (input.is_valid) {
       result->is_valid = true;
       int64_t data_nbytes = input.value->size();
-      RETURN_NOT_OK(ctx->Allocate(data_nbytes).Value(&result->value)); 
+      RETURN_NOT_OK(ctx->Allocate(data_nbytes).Value(&result->value));
       transform(input.value->data(), data_nbytes, result->value->mutable_data());
     }
-    out->value = result; 
+    out->value = result;
   }
- 
-  return Status::OK(); 
+
+  return Status::OK();
 }
 
 void TransformAsciiUpper(const uint8_t* input, int64_t length, uint8_t* output) {
@@ -427,8 +427,8 @@ void TransformAsciiUpper(const uint8_t* input, int64_t length, uint8_t* output)
 
 template <typename Type>
 struct AsciiUpper {
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    return StringDataTransform<Type>(ctx, batch, TransformAsciiUpper, out); 
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    return StringDataTransform<Type>(ctx, batch, TransformAsciiUpper, out);
   }
 };
 
@@ -438,8 +438,8 @@ void TransformAsciiLower(const uint8_t* input, int64_t length, uint8_t* output)
 
 template <typename Type>
 struct AsciiLower {
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    return StringDataTransform<Type>(ctx, batch, TransformAsciiLower, out); 
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    return StringDataTransform<Type>(ctx, batch, TransformAsciiLower, out);
   }
 };
 
@@ -473,881 +473,881 @@ void StringBoolTransform(KernelContext* ctx, const ExecBatch& batch,
                                          static_cast<offset_type>(input.value->size())};
       transform(offsets.data(), input.value->data(), 1, /*output_offset=*/0,
                 &result_value);
-      out->value = std::make_shared<BooleanScalar>(result_value > 0); 
+      out->value = std::make_shared<BooleanScalar>(result_value > 0);
     }
   }
 }
 
-using MatchSubstringState = OptionsWrapper<MatchSubstringOptions>; 
-
-// This is an implementation of the Knuth-Morris-Pratt algorithm 
-struct PlainSubstringMatcher { 
-  const MatchSubstringOptions& options_; 
-  std::vector<int64_t> prefix_table; 
- 
-  static Result<std::unique_ptr<PlainSubstringMatcher>> Make( 
-      const MatchSubstringOptions& options) { 
-    // Should be handled by partial template specialization below 
-    DCHECK(!options.ignore_case); 
-    return ::arrow::internal::make_unique<PlainSubstringMatcher>(options); 
-  } 
- 
-  explicit PlainSubstringMatcher(const MatchSubstringOptions& options) 
-      : options_(options) { 
-    // Phase 1: Build the prefix table 
-    const auto pattern_length = options_.pattern.size(); 
-    prefix_table.resize(pattern_length + 1, /*value=*/0); 
-    int64_t prefix_length = -1; 
-    prefix_table[0] = -1; 
-    for (size_t pos = 0; pos < pattern_length; ++pos) { 
-      // The prefix cannot be expanded, reset. 
-      while (prefix_length >= 0 && 
-             options_.pattern[pos] != options_.pattern[prefix_length]) { 
-        prefix_length = prefix_table[prefix_length]; 
-      } 
-      prefix_length++; 
-      prefix_table[pos + 1] = prefix_length; 
+using MatchSubstringState = OptionsWrapper<MatchSubstringOptions>;
+
+// This is an implementation of the Knuth-Morris-Pratt algorithm
+struct PlainSubstringMatcher {
+  const MatchSubstringOptions& options_;
+  std::vector<int64_t> prefix_table;
+
+  static Result<std::unique_ptr<PlainSubstringMatcher>> Make(
+      const MatchSubstringOptions& options) {
+    // Should be handled by partial template specialization below
+    DCHECK(!options.ignore_case);
+    return ::arrow::internal::make_unique<PlainSubstringMatcher>(options);
+  }
+
+  explicit PlainSubstringMatcher(const MatchSubstringOptions& options)
+      : options_(options) {
+    // Phase 1: Build the prefix table
+    const auto pattern_length = options_.pattern.size();
+    prefix_table.resize(pattern_length + 1, /*value=*/0);
+    int64_t prefix_length = -1;
+    prefix_table[0] = -1;
+    for (size_t pos = 0; pos < pattern_length; ++pos) {
+      // The prefix cannot be expanded, reset.
+      while (prefix_length >= 0 &&
+             options_.pattern[pos] != options_.pattern[prefix_length]) {
+        prefix_length = prefix_table[prefix_length];
+      }
+      prefix_length++;
+      prefix_table[pos + 1] = prefix_length;
     }
   }
 
-  int64_t Find(util::string_view current) const { 
-    // Phase 2: Find the prefix in the data 
-    const auto pattern_length = options_.pattern.size(); 
+  int64_t Find(util::string_view current) const {
+    // Phase 2: Find the prefix in the data
+    const auto pattern_length = options_.pattern.size();
     int64_t pattern_pos = 0;
-    int64_t pos = 0; 
-    if (pattern_length == 0) return 0; 
-    for (const auto c : current) { 
-      while ((pattern_pos >= 0) && (options_.pattern[pattern_pos] != c)) { 
+    int64_t pos = 0;
+    if (pattern_length == 0) return 0;
+    for (const auto c : current) {
+      while ((pattern_pos >= 0) && (options_.pattern[pattern_pos] != c)) {
         pattern_pos = prefix_table[pattern_pos];
       }
       pattern_pos++;
-      if (static_cast<size_t>(pattern_pos) == pattern_length) { 
-        return pos + 1 - pattern_length; 
+      if (static_cast<size_t>(pattern_pos) == pattern_length) {
+        return pos + 1 - pattern_length;
       }
-      pos++; 
+      pos++;
     }
-    return -1; 
-  }
-
-  bool Match(util::string_view current) const { return Find(current) >= 0; } 
-}; 
-
-struct PlainStartsWithMatcher { 
-  const MatchSubstringOptions& options_; 
- 
-  explicit PlainStartsWithMatcher(const MatchSubstringOptions& options) 
-      : options_(options) {} 
- 
-  static Result<std::unique_ptr<PlainStartsWithMatcher>> Make( 
-      const MatchSubstringOptions& options) { 
-    // Should be handled by partial template specialization below 
-    DCHECK(!options.ignore_case); 
-    return ::arrow::internal::make_unique<PlainStartsWithMatcher>(options); 
-  } 
- 
-  bool Match(util::string_view current) const { 
-    // string_view::starts_with is C++20 
-    return current.substr(0, options_.pattern.size()) == options_.pattern; 
-  } 
-}; 
- 
-struct PlainEndsWithMatcher { 
-  const MatchSubstringOptions& options_; 
- 
-  explicit PlainEndsWithMatcher(const MatchSubstringOptions& options) 
-      : options_(options) {} 
- 
-  static Result<std::unique_ptr<PlainEndsWithMatcher>> Make( 
-      const MatchSubstringOptions& options) { 
-    // Should be handled by partial template specialization below 
-    DCHECK(!options.ignore_case); 
-    return ::arrow::internal::make_unique<PlainEndsWithMatcher>(options); 
-  } 
- 
-  bool Match(util::string_view current) const { 
-    // string_view::ends_with is C++20 
-    return current.size() >= options_.pattern.size() && 
-           current.substr(current.size() - options_.pattern.size(), 
-                          options_.pattern.size()) == options_.pattern; 
-  } 
-}; 
- 
-#ifdef ARROW_WITH_RE2 
-struct RegexSubstringMatcher { 
-  const MatchSubstringOptions& options_; 
-  const RE2 regex_match_; 
- 
-  static Result<std::unique_ptr<RegexSubstringMatcher>> Make( 
-      const MatchSubstringOptions& options, bool literal = false) { 
-    auto matcher = 
-        ::arrow::internal::make_unique<RegexSubstringMatcher>(options, literal); 
-    RETURN_NOT_OK(RegexStatus(matcher->regex_match_)); 
-    return std::move(matcher); 
-  } 
- 
-  explicit RegexSubstringMatcher(const MatchSubstringOptions& options, 
-                                 bool literal = false) 
-      : options_(options), 
-        regex_match_(options_.pattern, MakeRE2Options(options, literal)) {} 
- 
-  bool Match(util::string_view current) const { 
-    auto piece = re2::StringPiece(current.data(), current.length()); 
-    return re2::RE2::PartialMatch(piece, regex_match_); 
-  } 
- 
-  static RE2::RE2::Options MakeRE2Options(const MatchSubstringOptions& options, 
-                                          bool literal) { 
-    RE2::RE2::Options re2_options(RE2::Quiet); 
-    re2_options.set_case_sensitive(!options.ignore_case); 
-    re2_options.set_literal(literal); 
-    return re2_options; 
-  } 
-}; 
-#endif 
- 
-template <typename Type, typename Matcher> 
-struct MatchSubstringImpl { 
+    return -1;
+  }
+
+  bool Match(util::string_view current) const { return Find(current) >= 0; }
+};
+
+struct PlainStartsWithMatcher {
+  const MatchSubstringOptions& options_;
+
+  explicit PlainStartsWithMatcher(const MatchSubstringOptions& options)
+      : options_(options) {}
+
+  static Result<std::unique_ptr<PlainStartsWithMatcher>> Make(
+      const MatchSubstringOptions& options) {
+    // Should be handled by partial template specialization below
+    DCHECK(!options.ignore_case);
+    return ::arrow::internal::make_unique<PlainStartsWithMatcher>(options);
+  }
+
+  bool Match(util::string_view current) const {
+    // string_view::starts_with is C++20
+    return current.substr(0, options_.pattern.size()) == options_.pattern;
+  }
+};
+
+struct PlainEndsWithMatcher {
+  const MatchSubstringOptions& options_;
+
+  explicit PlainEndsWithMatcher(const MatchSubstringOptions& options)
+      : options_(options) {}
+
+  static Result<std::unique_ptr<PlainEndsWithMatcher>> Make(
+      const MatchSubstringOptions& options) {
+    // Should be handled by partial template specialization below
+    DCHECK(!options.ignore_case);
+    return ::arrow::internal::make_unique<PlainEndsWithMatcher>(options);
+  }
+
+  bool Match(util::string_view current) const {
+    // string_view::ends_with is C++20
+    return current.size() >= options_.pattern.size() &&
+           current.substr(current.size() - options_.pattern.size(),
+                          options_.pattern.size()) == options_.pattern;
+  }
+};
+
+#ifdef ARROW_WITH_RE2
+struct RegexSubstringMatcher {
+  const MatchSubstringOptions& options_;
+  const RE2 regex_match_;
+
+  static Result<std::unique_ptr<RegexSubstringMatcher>> Make(
+      const MatchSubstringOptions& options, bool literal = false) {
+    auto matcher =
+        ::arrow::internal::make_unique<RegexSubstringMatcher>(options, literal);
+    RETURN_NOT_OK(RegexStatus(matcher->regex_match_));
+    return std::move(matcher);
+  }
+
+  explicit RegexSubstringMatcher(const MatchSubstringOptions& options,
+                                 bool literal = false)
+      : options_(options),
+        regex_match_(options_.pattern, MakeRE2Options(options, literal)) {}
+
+  bool Match(util::string_view current) const {
+    auto piece = re2::StringPiece(current.data(), current.length());
+    return re2::RE2::PartialMatch(piece, regex_match_);
+  }
+
+  static RE2::RE2::Options MakeRE2Options(const MatchSubstringOptions& options,
+                                          bool literal) {
+    RE2::RE2::Options re2_options(RE2::Quiet);
+    re2_options.set_case_sensitive(!options.ignore_case);
+    re2_options.set_literal(literal);
+    return re2_options;
+  }
+};
+#endif
+
+template <typename Type, typename Matcher>
+struct MatchSubstringImpl {
   using offset_type = typename Type::offset_type;
- 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out, 
-                     const Matcher* matcher) { 
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out,
+                     const Matcher* matcher) {
     StringBoolTransform<Type>(
         ctx, batch,
-        [&matcher](const void* raw_offsets, const uint8_t* data, int64_t length, 
-                   int64_t output_offset, uint8_t* output) { 
-          const offset_type* offsets = reinterpret_cast<const offset_type*>(raw_offsets); 
-          FirstTimeBitmapWriter bitmap_writer(output, output_offset, length); 
-          for (int64_t i = 0; i < length; ++i) { 
-            const char* current_data = reinterpret_cast<const char*>(data + offsets[i]); 
-            int64_t current_length = offsets[i + 1] - offsets[i]; 
-            if (matcher->Match(util::string_view(current_data, current_length))) { 
-              bitmap_writer.Set(); 
-            } 
-            bitmap_writer.Next(); 
-          } 
-          bitmap_writer.Finish(); 
+        [&matcher](const void* raw_offsets, const uint8_t* data, int64_t length,
+                   int64_t output_offset, uint8_t* output) {
+          const offset_type* offsets = reinterpret_cast<const offset_type*>(raw_offsets);
+          FirstTimeBitmapWriter bitmap_writer(output, output_offset, length);
+          for (int64_t i = 0; i < length; ++i) {
+            const char* current_data = reinterpret_cast<const char*>(data + offsets[i]);
+            int64_t current_length = offsets[i + 1] - offsets[i];
+            if (matcher->Match(util::string_view(current_data, current_length))) {
+              bitmap_writer.Set();
+            }
+            bitmap_writer.Next();
+          }
+          bitmap_writer.Finish();
         },
         out);
-    return Status::OK(); 
+    return Status::OK();
+  }
+};
+
+template <typename Type, typename Matcher>
+struct MatchSubstring {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    // TODO Cache matcher across invocations (for regex compilation)
+    ARROW_ASSIGN_OR_RAISE(auto matcher, Matcher::Make(MatchSubstringState::Get(ctx)));
+    return MatchSubstringImpl<Type, Matcher>::Exec(ctx, batch, out, matcher.get());
+  }
+};
+
+template <typename Type>
+struct MatchSubstring<Type, PlainSubstringMatcher> {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    auto options = MatchSubstringState::Get(ctx);
+    if (options.ignore_case) {
+#ifdef ARROW_WITH_RE2
+      ARROW_ASSIGN_OR_RAISE(auto matcher,
+                            RegexSubstringMatcher::Make(options, /*literal=*/true));
+      return MatchSubstringImpl<Type, RegexSubstringMatcher>::Exec(ctx, batch, out,
+                                                                   matcher.get());
+#else
+      return Status::NotImplemented("ignore_case requires RE2");
+#endif
+    }
+    ARROW_ASSIGN_OR_RAISE(auto matcher, PlainSubstringMatcher::Make(options));
+    return MatchSubstringImpl<Type, PlainSubstringMatcher>::Exec(ctx, batch, out,
+                                                                 matcher.get());
+  }
+};
+
+template <typename Type>
+struct MatchSubstring<Type, PlainStartsWithMatcher> {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    auto options = MatchSubstringState::Get(ctx);
+    if (options.ignore_case) {
+#ifdef ARROW_WITH_RE2
+      MatchSubstringOptions converted_options = options;
+      converted_options.pattern = "^" + RE2::QuoteMeta(options.pattern);
+      ARROW_ASSIGN_OR_RAISE(auto matcher, RegexSubstringMatcher::Make(converted_options));
+      return MatchSubstringImpl<Type, RegexSubstringMatcher>::Exec(ctx, batch, out,
+                                                                   matcher.get());
+#else
+      return Status::NotImplemented("ignore_case requires RE2");
+#endif
+    }
+    ARROW_ASSIGN_OR_RAISE(auto matcher, PlainStartsWithMatcher::Make(options));
+    return MatchSubstringImpl<Type, PlainStartsWithMatcher>::Exec(ctx, batch, out,
+                                                                  matcher.get());
+  }
+};
+
+template <typename Type>
+struct MatchSubstring<Type, PlainEndsWithMatcher> {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    auto options = MatchSubstringState::Get(ctx);
+    if (options.ignore_case) {
+#ifdef ARROW_WITH_RE2
+      MatchSubstringOptions converted_options = options;
+      converted_options.pattern = RE2::QuoteMeta(options.pattern) + "$";
+      ARROW_ASSIGN_OR_RAISE(auto matcher, RegexSubstringMatcher::Make(converted_options));
+      return MatchSubstringImpl<Type, RegexSubstringMatcher>::Exec(ctx, batch, out,
+                                                                   matcher.get());
+#else
+      return Status::NotImplemented("ignore_case requires RE2");
+#endif
+    }
+    ARROW_ASSIGN_OR_RAISE(auto matcher, PlainEndsWithMatcher::Make(options));
+    return MatchSubstringImpl<Type, PlainEndsWithMatcher>::Exec(ctx, batch, out,
+                                                                matcher.get());
   }
 };
 
-template <typename Type, typename Matcher> 
-struct MatchSubstring { 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    // TODO Cache matcher across invocations (for regex compilation) 
-    ARROW_ASSIGN_OR_RAISE(auto matcher, Matcher::Make(MatchSubstringState::Get(ctx))); 
-    return MatchSubstringImpl<Type, Matcher>::Exec(ctx, batch, out, matcher.get()); 
-  } 
-}; 
- 
-template <typename Type> 
-struct MatchSubstring<Type, PlainSubstringMatcher> { 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    auto options = MatchSubstringState::Get(ctx); 
-    if (options.ignore_case) { 
-#ifdef ARROW_WITH_RE2 
-      ARROW_ASSIGN_OR_RAISE(auto matcher, 
-                            RegexSubstringMatcher::Make(options, /*literal=*/true)); 
-      return MatchSubstringImpl<Type, RegexSubstringMatcher>::Exec(ctx, batch, out, 
-                                                                   matcher.get()); 
-#else 
-      return Status::NotImplemented("ignore_case requires RE2"); 
-#endif 
-    } 
-    ARROW_ASSIGN_OR_RAISE(auto matcher, PlainSubstringMatcher::Make(options)); 
-    return MatchSubstringImpl<Type, PlainSubstringMatcher>::Exec(ctx, batch, out, 
-                                                                 matcher.get()); 
-  } 
-}; 
- 
-template <typename Type> 
-struct MatchSubstring<Type, PlainStartsWithMatcher> { 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    auto options = MatchSubstringState::Get(ctx); 
-    if (options.ignore_case) { 
-#ifdef ARROW_WITH_RE2 
-      MatchSubstringOptions converted_options = options; 
-      converted_options.pattern = "^" + RE2::QuoteMeta(options.pattern); 
-      ARROW_ASSIGN_OR_RAISE(auto matcher, RegexSubstringMatcher::Make(converted_options)); 
-      return MatchSubstringImpl<Type, RegexSubstringMatcher>::Exec(ctx, batch, out, 
-                                                                   matcher.get()); 
-#else 
-      return Status::NotImplemented("ignore_case requires RE2"); 
-#endif 
-    } 
-    ARROW_ASSIGN_OR_RAISE(auto matcher, PlainStartsWithMatcher::Make(options)); 
-    return MatchSubstringImpl<Type, PlainStartsWithMatcher>::Exec(ctx, batch, out, 
-                                                                  matcher.get()); 
-  } 
-}; 
- 
-template <typename Type> 
-struct MatchSubstring<Type, PlainEndsWithMatcher> { 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    auto options = MatchSubstringState::Get(ctx); 
-    if (options.ignore_case) { 
-#ifdef ARROW_WITH_RE2 
-      MatchSubstringOptions converted_options = options; 
-      converted_options.pattern = RE2::QuoteMeta(options.pattern) + "$"; 
-      ARROW_ASSIGN_OR_RAISE(auto matcher, RegexSubstringMatcher::Make(converted_options)); 
-      return MatchSubstringImpl<Type, RegexSubstringMatcher>::Exec(ctx, batch, out, 
-                                                                   matcher.get()); 
-#else 
-      return Status::NotImplemented("ignore_case requires RE2"); 
-#endif 
-    } 
-    ARROW_ASSIGN_OR_RAISE(auto matcher, PlainEndsWithMatcher::Make(options)); 
-    return MatchSubstringImpl<Type, PlainEndsWithMatcher>::Exec(ctx, batch, out, 
-                                                                matcher.get()); 
-  } 
-}; 
- 
-const FunctionDoc match_substring_doc( 
-    "Match strings against literal pattern", 
-    ("For each string in `strings`, emit true iff it contains a given pattern.\n" 
-     "Null inputs emit null.  The pattern must be given in MatchSubstringOptions. " 
-     "If ignore_case is set, only simple case folding is performed."), 
-    {"strings"}, "MatchSubstringOptions"); 
- 
-const FunctionDoc starts_with_doc( 
-    "Check if strings start with a literal pattern", 
-    ("For each string in `strings`, emit true iff it starts with a given pattern.\n" 
-     "Null inputs emit null.  The pattern must be given in MatchSubstringOptions. " 
-     "If ignore_case is set, only simple case folding is performed."), 
-    {"strings"}, "MatchSubstringOptions"); 
- 
-const FunctionDoc ends_with_doc( 
-    "Check if strings end with a literal pattern", 
-    ("For each string in `strings`, emit true iff it ends with a given pattern.\n" 
-     "Null inputs emit null.  The pattern must be given in MatchSubstringOptions. " 
-     "If ignore_case is set, only simple case folding is performed."), 
-    {"strings"}, "MatchSubstringOptions"); 
- 
-#ifdef ARROW_WITH_RE2 
-const FunctionDoc match_substring_regex_doc( 
-    "Match strings against regex pattern", 
-    ("For each string in `strings`, emit true iff it matches a given pattern at any " 
-     "position.\n" 
-     "Null inputs emit null.  The pattern must be given in MatchSubstringOptions. " 
-     "If ignore_case is set, only simple case folding is performed."), 
-    {"strings"}, "MatchSubstringOptions"); 
- 
-// SQL LIKE match 
- 
-/// Convert a SQL-style LIKE pattern (using '%' and '_') into a regex pattern 
-std::string MakeLikeRegex(const MatchSubstringOptions& options) { 
-  // Allow . to match \n 
-  std::string like_pattern = "(?s:^"; 
-  like_pattern.reserve(options.pattern.size() + 7); 
-  bool escaped = false; 
-  for (const char c : options.pattern) { 
-    if (!escaped && c == '%') { 
-      like_pattern.append(".*"); 
-    } else if (!escaped && c == '_') { 
-      like_pattern.append("."); 
-    } else if (!escaped && c == '\\') { 
-      escaped = true; 
-    } else { 
-      switch (c) { 
-        case '.': 
-        case '?': 
-        case '+': 
-        case '*': 
-        case '^': 
-        case '$': 
-        case '\\': 
-        case '[': 
-        case '{': 
-        case '(': 
-        case ')': 
-        case '|': { 
-          like_pattern.push_back('\\'); 
-          like_pattern.push_back(c); 
-          escaped = false; 
-          break; 
-        } 
-        default: { 
-          like_pattern.push_back(c); 
-          escaped = false; 
-          break; 
-        } 
-      } 
-    } 
-  } 
-  like_pattern.append("$)"); 
-  return like_pattern; 
-} 
- 
-// Evaluate a SQL-like LIKE pattern by translating it to a regexp or 
-// substring search as appropriate. See what Apache Impala does: 
-// https://github.com/apache/impala/blob/9c38568657d62b6f6d7b10aa1c721ba843374dd8/be/src/exprs/like-predicate.cc 
-template <typename StringType> 
-struct MatchLike { 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    // NOTE: avoid making those constants global to avoid compiling regexes at startup 
-    // A LIKE pattern matching this regex can be translated into a substring search. 
-    static const RE2 kLikePatternIsSubstringMatch(R"(%+([^%_]*[^\\%_])?%+)"); 
-    // A LIKE pattern matching this regex can be translated into a prefix search. 
-    static const RE2 kLikePatternIsStartsWith(R"(([^%_]*[^\\%_])?%+)"); 
-    // A LIKE pattern matching this regex can be translated into a suffix search. 
-    static const RE2 kLikePatternIsEndsWith(R"(%+([^%_]*))"); 
- 
-    auto original_options = MatchSubstringState::Get(ctx); 
-    auto original_state = ctx->state(); 
- 
-    Status status; 
-    std::string pattern; 
-    if (!original_options.ignore_case && 
-        re2::RE2::FullMatch(original_options.pattern, kLikePatternIsSubstringMatch, 
-                            &pattern)) { 
-      MatchSubstringOptions converted_options{pattern, original_options.ignore_case}; 
-      MatchSubstringState converted_state(converted_options); 
-      ctx->SetState(&converted_state); 
-      status = MatchSubstring<StringType, PlainSubstringMatcher>::Exec(ctx, batch, out); 
-    } else if (!original_options.ignore_case && 
-               re2::RE2::FullMatch(original_options.pattern, kLikePatternIsStartsWith, 
-                                   &pattern)) { 
-      MatchSubstringOptions converted_options{pattern, original_options.ignore_case}; 
-      MatchSubstringState converted_state(converted_options); 
-      ctx->SetState(&converted_state); 
-      status = MatchSubstring<StringType, PlainStartsWithMatcher>::Exec(ctx, batch, out); 
-    } else if (!original_options.ignore_case && 
-               re2::RE2::FullMatch(original_options.pattern, kLikePatternIsEndsWith, 
-                                   &pattern)) { 
-      MatchSubstringOptions converted_options{pattern, original_options.ignore_case}; 
-      MatchSubstringState converted_state(converted_options); 
-      ctx->SetState(&converted_state); 
-      status = MatchSubstring<StringType, PlainEndsWithMatcher>::Exec(ctx, batch, out); 
-    } else { 
-      MatchSubstringOptions converted_options{MakeLikeRegex(original_options), 
-                                              original_options.ignore_case}; 
-      MatchSubstringState converted_state(converted_options); 
-      ctx->SetState(&converted_state); 
-      status = MatchSubstring<StringType, RegexSubstringMatcher>::Exec(ctx, batch, out); 
-    } 
-    ctx->SetState(original_state); 
-    return status; 
-  } 
-}; 
- 
-const FunctionDoc match_like_doc( 
-    "Match strings against SQL-style LIKE pattern", 
-    ("For each string in `strings`, emit true iff it fully matches a given pattern " 
-     "at any position. That is, '%' will match any number of characters, '_' will " 
-     "match exactly one character, and any other character matches itself. To " 
-     "match a literal '%', '_', or '\\', precede the character with a backslash.\n" 
-     "Null inputs emit null.  The pattern must be given in MatchSubstringOptions."), 
-    {"strings"}, "MatchSubstringOptions"); 
- 
-#endif 
- 
+const FunctionDoc match_substring_doc(
+    "Match strings against literal pattern",
+    ("For each string in `strings`, emit true iff it contains a given pattern.\n"
+     "Null inputs emit null.  The pattern must be given in MatchSubstringOptions. "
+     "If ignore_case is set, only simple case folding is performed."),
+    {"strings"}, "MatchSubstringOptions");
+
+const FunctionDoc starts_with_doc(
+    "Check if strings start with a literal pattern",
+    ("For each string in `strings`, emit true iff it starts with a given pattern.\n"
+     "Null inputs emit null.  The pattern must be given in MatchSubstringOptions. "
+     "If ignore_case is set, only simple case folding is performed."),
+    {"strings"}, "MatchSubstringOptions");
+
+const FunctionDoc ends_with_doc(
+    "Check if strings end with a literal pattern",
+    ("For each string in `strings`, emit true iff it ends with a given pattern.\n"
+     "Null inputs emit null.  The pattern must be given in MatchSubstringOptions. "
+     "If ignore_case is set, only simple case folding is performed."),
+    {"strings"}, "MatchSubstringOptions");
+
+#ifdef ARROW_WITH_RE2
+const FunctionDoc match_substring_regex_doc(
+    "Match strings against regex pattern",
+    ("For each string in `strings`, emit true iff it matches a given pattern at any "
+     "position.\n"
+     "Null inputs emit null.  The pattern must be given in MatchSubstringOptions. "
+     "If ignore_case is set, only simple case folding is performed."),
+    {"strings"}, "MatchSubstringOptions");
+
+// SQL LIKE match
+
+/// Convert a SQL-style LIKE pattern (using '%' and '_') into a regex pattern
+std::string MakeLikeRegex(const MatchSubstringOptions& options) {
+  // Allow . to match \n
+  std::string like_pattern = "(?s:^";
+  like_pattern.reserve(options.pattern.size() + 7);
+  bool escaped = false;
+  for (const char c : options.pattern) {
+    if (!escaped && c == '%') {
+      like_pattern.append(".*");
+    } else if (!escaped && c == '_') {
+      like_pattern.append(".");
+    } else if (!escaped && c == '\\') {
+      escaped = true;
+    } else {
+      switch (c) {
+        case '.':
+        case '?':
+        case '+':
+        case '*':
+        case '^':
+        case '$':
+        case '\\':
+        case '[':
+        case '{':
+        case '(':
+        case ')':
+        case '|': {
+          like_pattern.push_back('\\');
+          like_pattern.push_back(c);
+          escaped = false;
+          break;
+        }
+        default: {
+          like_pattern.push_back(c);
+          escaped = false;
+          break;
+        }
+      }
+    }
+  }
+  like_pattern.append("$)");
+  return like_pattern;
+}
+
+// Evaluate a SQL-like LIKE pattern by translating it to a regexp or
+// substring search as appropriate. See what Apache Impala does:
+// https://github.com/apache/impala/blob/9c38568657d62b6f6d7b10aa1c721ba843374dd8/be/src/exprs/like-predicate.cc
+template <typename StringType>
+struct MatchLike {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    // NOTE: avoid making those constants global to avoid compiling regexes at startup
+    // A LIKE pattern matching this regex can be translated into a substring search.
+    static const RE2 kLikePatternIsSubstringMatch(R"(%+([^%_]*[^\\%_])?%+)");
+    // A LIKE pattern matching this regex can be translated into a prefix search.
+    static const RE2 kLikePatternIsStartsWith(R"(([^%_]*[^\\%_])?%+)");
+    // A LIKE pattern matching this regex can be translated into a suffix search.
+    static const RE2 kLikePatternIsEndsWith(R"(%+([^%_]*))");
+
+    auto original_options = MatchSubstringState::Get(ctx);
+    auto original_state = ctx->state();
+
+    Status status;
+    std::string pattern;
+    if (!original_options.ignore_case &&
+        re2::RE2::FullMatch(original_options.pattern, kLikePatternIsSubstringMatch,
+                            &pattern)) {
+      MatchSubstringOptions converted_options{pattern, original_options.ignore_case};
+      MatchSubstringState converted_state(converted_options);
+      ctx->SetState(&converted_state);
+      status = MatchSubstring<StringType, PlainSubstringMatcher>::Exec(ctx, batch, out);
+    } else if (!original_options.ignore_case &&
+               re2::RE2::FullMatch(original_options.pattern, kLikePatternIsStartsWith,
+                                   &pattern)) {
+      MatchSubstringOptions converted_options{pattern, original_options.ignore_case};
+      MatchSubstringState converted_state(converted_options);
+      ctx->SetState(&converted_state);
+      status = MatchSubstring<StringType, PlainStartsWithMatcher>::Exec(ctx, batch, out);
+    } else if (!original_options.ignore_case &&
+               re2::RE2::FullMatch(original_options.pattern, kLikePatternIsEndsWith,
+                                   &pattern)) {
+      MatchSubstringOptions converted_options{pattern, original_options.ignore_case};
+      MatchSubstringState converted_state(converted_options);
+      ctx->SetState(&converted_state);
+      status = MatchSubstring<StringType, PlainEndsWithMatcher>::Exec(ctx, batch, out);
+    } else {
+      MatchSubstringOptions converted_options{MakeLikeRegex(original_options),
+                                              original_options.ignore_case};
+      MatchSubstringState converted_state(converted_options);
+      ctx->SetState(&converted_state);
+      status = MatchSubstring<StringType, RegexSubstringMatcher>::Exec(ctx, batch, out);
+    }
+    ctx->SetState(original_state);
+    return status;
+  }
+};
+
+const FunctionDoc match_like_doc(
+    "Match strings against SQL-style LIKE pattern",
+    ("For each string in `strings`, emit true iff it fully matches a given pattern "
+     "at any position. That is, '%' will match any number of characters, '_' will "
+     "match exactly one character, and any other character matches itself. To "
+     "match a literal '%', '_', or '\\', precede the character with a backslash.\n"
+     "Null inputs emit null.  The pattern must be given in MatchSubstringOptions."),
+    {"strings"}, "MatchSubstringOptions");
+
+#endif
+
 void AddMatchSubstring(FunctionRegistry* registry) {
-  { 
-    auto func = std::make_shared<ScalarFunction>("match_substring", Arity::Unary(), 
-                                                 &match_substring_doc); 
-    auto exec_32 = MatchSubstring<StringType, PlainSubstringMatcher>::Exec; 
-    auto exec_64 = MatchSubstring<LargeStringType, PlainSubstringMatcher>::Exec; 
-    DCHECK_OK(func->AddKernel({utf8()}, boolean(), exec_32, MatchSubstringState::Init)); 
-    DCHECK_OK( 
-        func->AddKernel({large_utf8()}, boolean(), exec_64, MatchSubstringState::Init)); 
-    DCHECK_OK(registry->AddFunction(std::move(func))); 
-  } 
-  { 
-    auto func = std::make_shared<ScalarFunction>("starts_with", Arity::Unary(), 
-                                                 &match_substring_doc); 
-    auto exec_32 = MatchSubstring<StringType, PlainStartsWithMatcher>::Exec; 
-    auto exec_64 = MatchSubstring<LargeStringType, PlainStartsWithMatcher>::Exec; 
-    DCHECK_OK(func->AddKernel({utf8()}, boolean(), exec_32, MatchSubstringState::Init)); 
-    DCHECK_OK( 
-        func->AddKernel({large_utf8()}, boolean(), exec_64, MatchSubstringState::Init)); 
-    DCHECK_OK(registry->AddFunction(std::move(func))); 
-  } 
-  { 
-    auto func = std::make_shared<ScalarFunction>("ends_with", Arity::Unary(), 
-                                                 &match_substring_doc); 
-    auto exec_32 = MatchSubstring<StringType, PlainEndsWithMatcher>::Exec; 
-    auto exec_64 = MatchSubstring<LargeStringType, PlainEndsWithMatcher>::Exec; 
-    DCHECK_OK(func->AddKernel({utf8()}, boolean(), exec_32, MatchSubstringState::Init)); 
-    DCHECK_OK( 
-        func->AddKernel({large_utf8()}, boolean(), exec_64, MatchSubstringState::Init)); 
-    DCHECK_OK(registry->AddFunction(std::move(func))); 
-  } 
-#ifdef ARROW_WITH_RE2 
-  { 
-    auto func = std::make_shared<ScalarFunction>("match_substring_regex", Arity::Unary(), 
-                                                 &match_substring_regex_doc); 
-    auto exec_32 = MatchSubstring<StringType, RegexSubstringMatcher>::Exec; 
-    auto exec_64 = MatchSubstring<LargeStringType, RegexSubstringMatcher>::Exec; 
-    DCHECK_OK(func->AddKernel({utf8()}, boolean(), exec_32, MatchSubstringState::Init)); 
-    DCHECK_OK( 
-        func->AddKernel({large_utf8()}, boolean(), exec_64, MatchSubstringState::Init)); 
-    DCHECK_OK(registry->AddFunction(std::move(func))); 
-  } 
-  { 
-    auto func = 
-        std::make_shared<ScalarFunction>("match_like", Arity::Unary(), &match_like_doc); 
-    auto exec_32 = MatchLike<StringType>::Exec; 
-    auto exec_64 = MatchLike<LargeStringType>::Exec; 
-    DCHECK_OK(func->AddKernel({utf8()}, boolean(), exec_32, MatchSubstringState::Init)); 
-    DCHECK_OK( 
-        func->AddKernel({large_utf8()}, boolean(), exec_64, MatchSubstringState::Init)); 
-    DCHECK_OK(registry->AddFunction(std::move(func))); 
-  } 
-#endif 
-} 
- 
-// Substring find - lfind/index/etc. 
- 
-struct FindSubstring { 
-  const PlainSubstringMatcher matcher_; 
- 
-  explicit FindSubstring(PlainSubstringMatcher matcher) : matcher_(std::move(matcher)) {} 
- 
-  template <typename OutValue, typename... Ignored> 
-  OutValue Call(KernelContext*, util::string_view val, Status*) const { 
-    return static_cast<OutValue>(matcher_.Find(val)); 
-  } 
-}; 
- 
-#ifdef ARROW_WITH_RE2 
-struct FindSubstringRegex { 
-  std::unique_ptr<RE2> regex_match_; 
- 
-  explicit FindSubstringRegex(const MatchSubstringOptions& options, 
-                              bool literal = false) { 
-    std::string regex = "("; 
-    regex.reserve(options.pattern.length() + 2); 
-    regex += literal ? RE2::QuoteMeta(options.pattern) : options.pattern; 
-    regex += ")"; 
-    regex_match_.reset(new RE2(std::move(regex), RegexSubstringMatcher::MakeRE2Options( 
-                                                     options, /*literal=*/false))); 
-  } 
- 
-  template <typename OutValue, typename... Ignored> 
-  OutValue Call(KernelContext*, util::string_view val, Status*) const { 
-    re2::StringPiece piece(val.data(), val.length()); 
-    re2::StringPiece match; 
-    if (re2::RE2::PartialMatch(piece, *regex_match_, &match)) { 
-      return static_cast<OutValue>(match.data() - piece.data()); 
-    } 
-    return -1; 
-  } 
-}; 
-#endif 
- 
-template <typename InputType> 
-struct FindSubstringExec { 
-  using OffsetType = typename TypeTraits<InputType>::OffsetType; 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    const MatchSubstringOptions& options = MatchSubstringState::Get(ctx); 
-    if (options.ignore_case) { 
-#ifdef ARROW_WITH_RE2 
-      applicator::ScalarUnaryNotNullStateful<OffsetType, InputType, FindSubstringRegex> 
-          kernel{FindSubstringRegex(options, /*literal=*/true)}; 
-      return kernel.Exec(ctx, batch, out); 
-#endif 
-      return Status::NotImplemented("ignore_case requires RE2"); 
-    } 
-    applicator::ScalarUnaryNotNullStateful<OffsetType, InputType, FindSubstring> kernel{ 
-        FindSubstring(PlainSubstringMatcher(options))}; 
-    return kernel.Exec(ctx, batch, out); 
-  } 
-}; 
- 
-const FunctionDoc find_substring_doc( 
-    "Find first occurrence of substring", 
-    ("For each string in `strings`, emit the index of the first occurrence of the given " 
-     "pattern, or -1 if not found.\n" 
-     "Null inputs emit null. The pattern must be given in MatchSubstringOptions."), 
-    {"strings"}, "MatchSubstringOptions"); 
- 
-#ifdef ARROW_WITH_RE2 
-template <typename InputType> 
-struct FindSubstringRegexExec { 
-  using OffsetType = typename TypeTraits<InputType>::OffsetType; 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    const MatchSubstringOptions& options = MatchSubstringState::Get(ctx); 
-    applicator::ScalarUnaryNotNullStateful<OffsetType, InputType, FindSubstringRegex> 
-        kernel{FindSubstringRegex(options, /*literal=*/false)}; 
-    return kernel.Exec(ctx, batch, out); 
-  } 
-}; 
- 
-const FunctionDoc find_substring_regex_doc( 
-    "Find location of first match of regex pattern", 
-    ("For each string in `strings`, emit the index of the first match of the given " 
-     "pattern, or -1 if not found.\n" 
-     "Null inputs emit null. The pattern must be given in MatchSubstringOptions."), 
-    {"strings"}, "MatchSubstringOptions"); 
-#endif 
- 
-void AddFindSubstring(FunctionRegistry* registry) { 
-  { 
-    auto func = std::make_shared<ScalarFunction>("find_substring", Arity::Unary(), 
-                                                 &find_substring_doc); 
-    for (const auto& ty : BaseBinaryTypes()) { 
-      auto offset_type = offset_bit_width(ty->id()) == 64 ? int64() : int32(); 
-      DCHECK_OK(func->AddKernel({ty}, offset_type, 
-                                GenerateTypeAgnosticVarBinaryBase<FindSubstringExec>(ty), 
-                                MatchSubstringState::Init)); 
-    } 
-    DCHECK_OK(registry->AddFunction(std::move(func))); 
-  } 
-#ifdef ARROW_WITH_RE2 
-  { 
-    auto func = std::make_shared<ScalarFunction>("find_substring_regex", Arity::Unary(), 
-                                                 &find_substring_regex_doc); 
-    for (const auto& ty : BaseBinaryTypes()) { 
-      auto offset_type = offset_bit_width(ty->id()) == 64 ? int64() : int32(); 
-      DCHECK_OK( 
-          func->AddKernel({ty}, offset_type, 
-                          GenerateTypeAgnosticVarBinaryBase<FindSubstringRegexExec>(ty), 
-                          MatchSubstringState::Init)); 
-    } 
-    DCHECK_OK(registry->AddFunction(std::move(func))); 
-  } 
-#endif 
-} 
- 
-// Substring count 
- 
-struct CountSubstring { 
-  const PlainSubstringMatcher matcher_; 
- 
-  explicit CountSubstring(PlainSubstringMatcher matcher) : matcher_(std::move(matcher)) {} 
- 
-  template <typename OutValue, typename... Ignored> 
-  OutValue Call(KernelContext*, util::string_view val, Status*) const { 
-    OutValue count = 0; 
-    uint64_t start = 0; 
-    const auto pattern_size = std::max<uint64_t>(1, matcher_.options_.pattern.size()); 
-    while (start <= val.size()) { 
-      const int64_t index = matcher_.Find(val.substr(start)); 
-      if (index >= 0) { 
-        count++; 
-        start += index + pattern_size; 
-      } else { 
-        break; 
-      } 
-    } 
-    return count; 
-  } 
-}; 
- 
-#ifdef ARROW_WITH_RE2 
-struct CountSubstringRegex { 
-  std::unique_ptr<RE2> regex_match_; 
- 
-  explicit CountSubstringRegex(const MatchSubstringOptions& options, bool literal = false) 
-      : regex_match_(new RE2(options.pattern, 
-                             RegexSubstringMatcher::MakeRE2Options(options, literal))) {} 
- 
-  static Result<CountSubstringRegex> Make(const MatchSubstringOptions& options, 
-                                          bool literal = false) { 
-    CountSubstringRegex counter(options, literal); 
-    RETURN_NOT_OK(RegexStatus(*counter.regex_match_)); 
-    return std::move(counter); 
-  } 
- 
-  template <typename OutValue, typename... Ignored> 
-  OutValue Call(KernelContext*, util::string_view val, Status*) const { 
-    OutValue count = 0; 
-    re2::StringPiece input(val.data(), val.size()); 
-    auto last_size = input.size(); 
-    while (re2::RE2::FindAndConsume(&input, *regex_match_)) { 
-      count++; 
-      if (last_size == input.size()) { 
-        // 0-length match 
-        if (input.size() > 0) { 
-          input.remove_prefix(1); 
-        } else { 
-          break; 
-        } 
-      } 
-      last_size = input.size(); 
-    } 
-    return count; 
-  } 
-}; 
- 
-template <typename InputType> 
-struct CountSubstringRegexExec { 
-  using OffsetType = typename TypeTraits<InputType>::OffsetType; 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    const MatchSubstringOptions& options = MatchSubstringState::Get(ctx); 
-    ARROW_ASSIGN_OR_RAISE(auto counter, CountSubstringRegex::Make(options)); 
-    applicator::ScalarUnaryNotNullStateful<OffsetType, InputType, CountSubstringRegex> 
-        kernel{std::move(counter)}; 
-    return kernel.Exec(ctx, batch, out); 
-  } 
-}; 
-#endif 
- 
-template <typename InputType> 
-struct CountSubstringExec { 
-  using OffsetType = typename TypeTraits<InputType>::OffsetType; 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    const MatchSubstringOptions& options = MatchSubstringState::Get(ctx); 
-    if (options.ignore_case) { 
-#ifdef ARROW_WITH_RE2 
-      ARROW_ASSIGN_OR_RAISE(auto counter, 
-                            CountSubstringRegex::Make(options, /*literal=*/true)); 
-      applicator::ScalarUnaryNotNullStateful<OffsetType, InputType, CountSubstringRegex> 
-          kernel{std::move(counter)}; 
-      return kernel.Exec(ctx, batch, out); 
-#else 
-      return Status::NotImplemented("ignore_case requires RE2"); 
-#endif 
-    } 
-    applicator::ScalarUnaryNotNullStateful<OffsetType, InputType, CountSubstring> kernel{ 
-        CountSubstring(PlainSubstringMatcher(options))}; 
-    return kernel.Exec(ctx, batch, out); 
-  } 
-}; 
- 
-const FunctionDoc count_substring_doc( 
-    "Count occurrences of substring", 
-    ("For each string in `strings`, emit the number of occurrences of the given " 
-     "pattern.\n" 
-     "Null inputs emit null. The pattern must be given in MatchSubstringOptions."), 
-    {"strings"}, "MatchSubstringOptions"); 
- 
-#ifdef ARROW_WITH_RE2 
-const FunctionDoc count_substring_regex_doc( 
-    "Count occurrences of substring", 
-    ("For each string in `strings`, emit the number of occurrences of the given " 
-     "regex pattern.\n" 
-     "Null inputs emit null. The pattern must be given in MatchSubstringOptions."), 
-    {"strings"}, "MatchSubstringOptions"); 
-#endif 
- 
-void AddCountSubstring(FunctionRegistry* registry) { 
-  { 
-    auto func = std::make_shared<ScalarFunction>("count_substring", Arity::Unary(), 
-                                                 &count_substring_doc); 
-    for (const auto& ty : BaseBinaryTypes()) { 
-      auto offset_type = offset_bit_width(ty->id()) == 64 ? int64() : int32(); 
-      DCHECK_OK(func->AddKernel({ty}, offset_type, 
-                                GenerateTypeAgnosticVarBinaryBase<CountSubstringExec>(ty), 
-                                MatchSubstringState::Init)); 
-    } 
-    DCHECK_OK(registry->AddFunction(std::move(func))); 
-  } 
-#ifdef ARROW_WITH_RE2 
-  { 
-    auto func = std::make_shared<ScalarFunction>("count_substring_regex", Arity::Unary(), 
-                                                 &count_substring_regex_doc); 
-    for (const auto& ty : BaseBinaryTypes()) { 
-      auto offset_type = offset_bit_width(ty->id()) == 64 ? int64() : int32(); 
-      DCHECK_OK( 
-          func->AddKernel({ty}, offset_type, 
-                          GenerateTypeAgnosticVarBinaryBase<CountSubstringRegexExec>(ty), 
-                          MatchSubstringState::Init)); 
-    } 
-    DCHECK_OK(registry->AddFunction(std::move(func))); 
-  } 
-#endif 
-} 
- 
-// Slicing 
- 
-struct SliceTransformBase : public StringTransformBase { 
-  using State = OptionsWrapper<SliceOptions>; 
- 
-  const SliceOptions* options; 
- 
-  Status PreExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) override { 
-    options = &State::Get(ctx); 
-    if (options->step == 0) { 
-      return Status::Invalid("Slice step cannot be zero"); 
-    } 
-    return Status::OK(); 
-  } 
-}; 
- 
-struct SliceCodeunitsTransform : SliceTransformBase { 
-  int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) override { 
-    const SliceOptions& opt = *this->options; 
-    if ((opt.start >= 0) != (opt.stop >= 0)) { 
-      // If start and stop don't have the same sign, we can't guess an upper bound 
-      // on the resulting slice lengths, so return a worst case estimate. 
-      return input_ncodeunits; 
-    } 
-    int64_t max_slice_codepoints = (opt.stop - opt.start + opt.step - 1) / opt.step; 
-    // The maximum UTF8 byte size of a codepoint is 4 
-    return std::min(input_ncodeunits, 
-                    4 * ninputs * std::max<int64_t>(0, max_slice_codepoints)); 
-  } 
- 
-  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits, 
-                    uint8_t* output) { 
-    if (options->step >= 1) { 
-      return SliceForward(input, input_string_ncodeunits, output); 
-    } 
-    return SliceBackward(input, input_string_ncodeunits, output); 
-  } 
- 
-#define RETURN_IF_UTF8_ERROR(expr)    \ 
-  do {                                \ 
-    if (ARROW_PREDICT_FALSE(!expr)) { \ 
-      return kTransformError;         \ 
-    }                                 \ 
-  } while (0) 
- 
-  int64_t SliceForward(const uint8_t* input, int64_t input_string_ncodeunits, 
-                       uint8_t* output) { 
-    // Slice in forward order (step > 0) 
-    const SliceOptions& opt = *this->options; 
-    const uint8_t* begin = input; 
-    const uint8_t* end = input + input_string_ncodeunits; 
-    const uint8_t* begin_sliced = begin; 
-    const uint8_t* end_sliced = end; 
- 
-    // First, compute begin_sliced and end_sliced 
-    if (opt.start >= 0) { 
-      // start counting from the left 
-      RETURN_IF_UTF8_ERROR( 
-          arrow::util::UTF8AdvanceCodepoints(begin, end, &begin_sliced, opt.start)); 
-      if (opt.stop > opt.start) { 
-        // continue counting from begin_sliced 
-        const int64_t length = opt.stop - opt.start; 
-        RETURN_IF_UTF8_ERROR( 
-            arrow::util::UTF8AdvanceCodepoints(begin_sliced, end, &end_sliced, length)); 
-      } else if (opt.stop < 0) { 
-        // or from the end (but we will never need to < begin_sliced) 
-        RETURN_IF_UTF8_ERROR(arrow::util::UTF8AdvanceCodepointsReverse( 
-            begin_sliced, end, &end_sliced, -opt.stop)); 
-      } else { 
-        // zero length slice 
-        return 0; 
-      } 
-    } else { 
-      // start counting from the right 
-      RETURN_IF_UTF8_ERROR(arrow::util::UTF8AdvanceCodepointsReverse( 
-          begin, end, &begin_sliced, -opt.start)); 
-      if (opt.stop > 0) { 
-        // continue counting from the left, we cannot start from begin_sliced because we 
-        // don't know how many codepoints are between begin and begin_sliced 
-        RETURN_IF_UTF8_ERROR( 
-            arrow::util::UTF8AdvanceCodepoints(begin, end, &end_sliced, opt.stop)); 
-        // and therefore we also needs this 
-        if (end_sliced <= begin_sliced) { 
-          // zero length slice 
-          return 0; 
-        } 
-      } else if ((opt.stop < 0) && (opt.stop > opt.start)) { 
-        // stop is negative, but larger than start, so we count again from the right 
-        // in some cases we can optimize this, depending on the shortest path (from end 
-        // or begin_sliced), but begin_sliced and opt.start can be 'out of sync', 
-        // for instance when start=-100, when the string length is only 10. 
-        RETURN_IF_UTF8_ERROR(arrow::util::UTF8AdvanceCodepointsReverse( 
-            begin_sliced, end, &end_sliced, -opt.stop)); 
-      } else { 
-        // zero length slice 
-        return 0; 
-      } 
-    } 
- 
-    // Second, copy computed slice to output 
-    DCHECK(begin_sliced <= end_sliced); 
-    if (opt.step == 1) { 
-      // fast case, where we simply can finish with a memcpy 
-      std::copy(begin_sliced, end_sliced, output); 
-      return end_sliced - begin_sliced; 
-    } 
-    uint8_t* dest = output; 
-    const uint8_t* i = begin_sliced; 
- 
-    while (i < end_sliced) { 
-      uint32_t codepoint = 0; 
-      // write a single codepoint 
-      RETURN_IF_UTF8_ERROR(arrow::util::UTF8Decode(&i, &codepoint)); 
-      dest = arrow::util::UTF8Encode(dest, codepoint); 
-      // and skip the remainder 
-      int64_t skips = opt.step - 1; 
-      while ((skips--) && (i < end_sliced)) { 
-        RETURN_IF_UTF8_ERROR(arrow::util::UTF8Decode(&i, &codepoint)); 
-      } 
-    } 
-    return dest - output; 
-  } 
- 
-  int64_t SliceBackward(const uint8_t* input, int64_t input_string_ncodeunits, 
-                        uint8_t* output) { 
-    // Slice in reverse order (step < 0) 
-    const SliceOptions& opt = *this->options; 
-    const uint8_t* begin = input; 
-    const uint8_t* end = input + input_string_ncodeunits; 
-    const uint8_t* begin_sliced = begin; 
-    const uint8_t* end_sliced = end; 
- 
-    // Serious +1 -1 kung fu because begin_sliced and end_sliced act like 
-    // reverse iterators. 
-    if (opt.start >= 0) { 
-      // +1 because begin_sliced acts as as the end of a reverse iterator 
-      RETURN_IF_UTF8_ERROR( 
-          arrow::util::UTF8AdvanceCodepoints(begin, end, &begin_sliced, opt.start + 1)); 
-    } else { 
-      // -1 because start=-1 means the last codeunit, which is 0 advances 
-      RETURN_IF_UTF8_ERROR(arrow::util::UTF8AdvanceCodepointsReverse( 
-          begin, end, &begin_sliced, -opt.start - 1)); 
-    } 
-    // make it point at the last codeunit of the previous codeunit 
-    begin_sliced--; 
- 
-    // similar to opt.start 
-    if (opt.stop >= 0) { 
-      RETURN_IF_UTF8_ERROR( 
-          arrow::util::UTF8AdvanceCodepoints(begin, end, &end_sliced, opt.stop + 1)); 
-    } else { 
-      RETURN_IF_UTF8_ERROR(arrow::util::UTF8AdvanceCodepointsReverse( 
-          begin, end, &end_sliced, -opt.stop - 1)); 
-    } 
-    end_sliced--; 
- 
-    // Copy computed slice to output 
-    uint8_t* dest = output; 
-    const uint8_t* i = begin_sliced; 
-    while (i > end_sliced) { 
-      uint32_t codepoint = 0; 
-      // write a single codepoint 
-      RETURN_IF_UTF8_ERROR(arrow::util::UTF8DecodeReverse(&i, &codepoint)); 
-      dest = arrow::util::UTF8Encode(dest, codepoint); 
-      // and skip the remainder 
-      int64_t skips = -opt.step - 1; 
-      while ((skips--) && (i > end_sliced)) { 
-        RETURN_IF_UTF8_ERROR(arrow::util::UTF8DecodeReverse(&i, &codepoint)); 
-      } 
-    } 
-    return dest - output; 
-  } 
- 
-#undef RETURN_IF_UTF8_ERROR 
-}; 
- 
-template <typename Type> 
-using SliceCodeunits = StringTransformExec<Type, SliceCodeunitsTransform>; 
- 
-const FunctionDoc utf8_slice_codeunits_doc( 
-    "Slice string ", 
-    ("For each string in `strings`, slice into a substring defined by\n" 
-     "`start`, `stop`, `step`) as given by `SliceOptions` where `start` is inclusive\n" 
-     "and `stop` is exclusive and are measured in codeunits. If step is negative, the\n" 
-     "string will be advanced in reversed order. A `step` of zero is considered an\n" 
-     "error.\n" 
-     "Null inputs emit null."), 
-    {"strings"}, "SliceOptions"); 
- 
-void AddSlice(FunctionRegistry* registry) { 
-  auto func = std::make_shared<ScalarFunction>("utf8_slice_codeunits", Arity::Unary(), 
-                                               &utf8_slice_codeunits_doc); 
-  using t32 = SliceCodeunits<StringType>; 
-  using t64 = SliceCodeunits<LargeStringType>; 
+  {
+    auto func = std::make_shared<ScalarFunction>("match_substring", Arity::Unary(),
+                                                 &match_substring_doc);
+    auto exec_32 = MatchSubstring<StringType, PlainSubstringMatcher>::Exec;
+    auto exec_64 = MatchSubstring<LargeStringType, PlainSubstringMatcher>::Exec;
+    DCHECK_OK(func->AddKernel({utf8()}, boolean(), exec_32, MatchSubstringState::Init));
+    DCHECK_OK(
+        func->AddKernel({large_utf8()}, boolean(), exec_64, MatchSubstringState::Init));
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+  {
+    auto func = std::make_shared<ScalarFunction>("starts_with", Arity::Unary(),
+                                                 &match_substring_doc);
+    auto exec_32 = MatchSubstring<StringType, PlainStartsWithMatcher>::Exec;
+    auto exec_64 = MatchSubstring<LargeStringType, PlainStartsWithMatcher>::Exec;
+    DCHECK_OK(func->AddKernel({utf8()}, boolean(), exec_32, MatchSubstringState::Init));
+    DCHECK_OK(
+        func->AddKernel({large_utf8()}, boolean(), exec_64, MatchSubstringState::Init));
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+  {
+    auto func = std::make_shared<ScalarFunction>("ends_with", Arity::Unary(),
+                                                 &match_substring_doc);
+    auto exec_32 = MatchSubstring<StringType, PlainEndsWithMatcher>::Exec;
+    auto exec_64 = MatchSubstring<LargeStringType, PlainEndsWithMatcher>::Exec;
+    DCHECK_OK(func->AddKernel({utf8()}, boolean(), exec_32, MatchSubstringState::Init));
+    DCHECK_OK(
+        func->AddKernel({large_utf8()}, boolean(), exec_64, MatchSubstringState::Init));
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+#ifdef ARROW_WITH_RE2
+  {
+    auto func = std::make_shared<ScalarFunction>("match_substring_regex", Arity::Unary(),
+                                                 &match_substring_regex_doc);
+    auto exec_32 = MatchSubstring<StringType, RegexSubstringMatcher>::Exec;
+    auto exec_64 = MatchSubstring<LargeStringType, RegexSubstringMatcher>::Exec;
+    DCHECK_OK(func->AddKernel({utf8()}, boolean(), exec_32, MatchSubstringState::Init));
+    DCHECK_OK(
+        func->AddKernel({large_utf8()}, boolean(), exec_64, MatchSubstringState::Init));
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+  {
+    auto func =
+        std::make_shared<ScalarFunction>("match_like", Arity::Unary(), &match_like_doc);
+    auto exec_32 = MatchLike<StringType>::Exec;
+    auto exec_64 = MatchLike<LargeStringType>::Exec;
+    DCHECK_OK(func->AddKernel({utf8()}, boolean(), exec_32, MatchSubstringState::Init));
+    DCHECK_OK(
+        func->AddKernel({large_utf8()}, boolean(), exec_64, MatchSubstringState::Init));
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+#endif
+}
+
+// Substring find - lfind/index/etc.
+
+struct FindSubstring {
+  const PlainSubstringMatcher matcher_;
+
+  explicit FindSubstring(PlainSubstringMatcher matcher) : matcher_(std::move(matcher)) {}
+
+  template <typename OutValue, typename... Ignored>
+  OutValue Call(KernelContext*, util::string_view val, Status*) const {
+    return static_cast<OutValue>(matcher_.Find(val));
+  }
+};
+
+#ifdef ARROW_WITH_RE2
+struct FindSubstringRegex {
+  std::unique_ptr<RE2> regex_match_;
+
+  explicit FindSubstringRegex(const MatchSubstringOptions& options,
+                              bool literal = false) {
+    std::string regex = "(";
+    regex.reserve(options.pattern.length() + 2);
+    regex += literal ? RE2::QuoteMeta(options.pattern) : options.pattern;
+    regex += ")";
+    regex_match_.reset(new RE2(std::move(regex), RegexSubstringMatcher::MakeRE2Options(
+                                                     options, /*literal=*/false)));
+  }
+
+  template <typename OutValue, typename... Ignored>
+  OutValue Call(KernelContext*, util::string_view val, Status*) const {
+    re2::StringPiece piece(val.data(), val.length());
+    re2::StringPiece match;
+    if (re2::RE2::PartialMatch(piece, *regex_match_, &match)) {
+      return static_cast<OutValue>(match.data() - piece.data());
+    }
+    return -1;
+  }
+};
+#endif
+
+template <typename InputType>
+struct FindSubstringExec {
+  using OffsetType = typename TypeTraits<InputType>::OffsetType;
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const MatchSubstringOptions& options = MatchSubstringState::Get(ctx);
+    if (options.ignore_case) {
+#ifdef ARROW_WITH_RE2
+      applicator::ScalarUnaryNotNullStateful<OffsetType, InputType, FindSubstringRegex>
+          kernel{FindSubstringRegex(options, /*literal=*/true)};
+      return kernel.Exec(ctx, batch, out);
+#endif
+      return Status::NotImplemented("ignore_case requires RE2");
+    }
+    applicator::ScalarUnaryNotNullStateful<OffsetType, InputType, FindSubstring> kernel{
+        FindSubstring(PlainSubstringMatcher(options))};
+    return kernel.Exec(ctx, batch, out);
+  }
+};
+
+const FunctionDoc find_substring_doc(
+    "Find first occurrence of substring",
+    ("For each string in `strings`, emit the index of the first occurrence of the given "
+     "pattern, or -1 if not found.\n"
+     "Null inputs emit null. The pattern must be given in MatchSubstringOptions."),
+    {"strings"}, "MatchSubstringOptions");
+
+#ifdef ARROW_WITH_RE2
+template <typename InputType>
+struct FindSubstringRegexExec {
+  using OffsetType = typename TypeTraits<InputType>::OffsetType;
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const MatchSubstringOptions& options = MatchSubstringState::Get(ctx);
+    applicator::ScalarUnaryNotNullStateful<OffsetType, InputType, FindSubstringRegex>
+        kernel{FindSubstringRegex(options, /*literal=*/false)};
+    return kernel.Exec(ctx, batch, out);
+  }
+};
+
+const FunctionDoc find_substring_regex_doc(
+    "Find location of first match of regex pattern",
+    ("For each string in `strings`, emit the index of the first match of the given "
+     "pattern, or -1 if not found.\n"
+     "Null inputs emit null. The pattern must be given in MatchSubstringOptions."),
+    {"strings"}, "MatchSubstringOptions");
+#endif
+
+void AddFindSubstring(FunctionRegistry* registry) {
+  {
+    auto func = std::make_shared<ScalarFunction>("find_substring", Arity::Unary(),
+                                                 &find_substring_doc);
+    for (const auto& ty : BaseBinaryTypes()) {
+      auto offset_type = offset_bit_width(ty->id()) == 64 ? int64() : int32();
+      DCHECK_OK(func->AddKernel({ty}, offset_type,
+                                GenerateTypeAgnosticVarBinaryBase<FindSubstringExec>(ty),
+                                MatchSubstringState::Init));
+    }
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+#ifdef ARROW_WITH_RE2
+  {
+    auto func = std::make_shared<ScalarFunction>("find_substring_regex", Arity::Unary(),
+                                                 &find_substring_regex_doc);
+    for (const auto& ty : BaseBinaryTypes()) {
+      auto offset_type = offset_bit_width(ty->id()) == 64 ? int64() : int32();
+      DCHECK_OK(
+          func->AddKernel({ty}, offset_type,
+                          GenerateTypeAgnosticVarBinaryBase<FindSubstringRegexExec>(ty),
+                          MatchSubstringState::Init));
+    }
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+#endif
+}
+
+// Substring count
+
+struct CountSubstring {
+  const PlainSubstringMatcher matcher_;
+
+  explicit CountSubstring(PlainSubstringMatcher matcher) : matcher_(std::move(matcher)) {}
+
+  template <typename OutValue, typename... Ignored>
+  OutValue Call(KernelContext*, util::string_view val, Status*) const {
+    OutValue count = 0;
+    uint64_t start = 0;
+    const auto pattern_size = std::max<uint64_t>(1, matcher_.options_.pattern.size());
+    while (start <= val.size()) {
+      const int64_t index = matcher_.Find(val.substr(start));
+      if (index >= 0) {
+        count++;
+        start += index + pattern_size;
+      } else {
+        break;
+      }
+    }
+    return count;
+  }
+};
+
+#ifdef ARROW_WITH_RE2
+struct CountSubstringRegex {
+  std::unique_ptr<RE2> regex_match_;
+
+  explicit CountSubstringRegex(const MatchSubstringOptions& options, bool literal = false)
+      : regex_match_(new RE2(options.pattern,
+                             RegexSubstringMatcher::MakeRE2Options(options, literal))) {}
+
+  static Result<CountSubstringRegex> Make(const MatchSubstringOptions& options,
+                                          bool literal = false) {
+    CountSubstringRegex counter(options, literal);
+    RETURN_NOT_OK(RegexStatus(*counter.regex_match_));
+    return std::move(counter);
+  }
+
+  template <typename OutValue, typename... Ignored>
+  OutValue Call(KernelContext*, util::string_view val, Status*) const {
+    OutValue count = 0;
+    re2::StringPiece input(val.data(), val.size());
+    auto last_size = input.size();
+    while (re2::RE2::FindAndConsume(&input, *regex_match_)) {
+      count++;
+      if (last_size == input.size()) {
+        // 0-length match
+        if (input.size() > 0) {
+          input.remove_prefix(1);
+        } else {
+          break;
+        }
+      }
+      last_size = input.size();
+    }
+    return count;
+  }
+};
+
+template <typename InputType>
+struct CountSubstringRegexExec {
+  using OffsetType = typename TypeTraits<InputType>::OffsetType;
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const MatchSubstringOptions& options = MatchSubstringState::Get(ctx);
+    ARROW_ASSIGN_OR_RAISE(auto counter, CountSubstringRegex::Make(options));
+    applicator::ScalarUnaryNotNullStateful<OffsetType, InputType, CountSubstringRegex>
+        kernel{std::move(counter)};
+    return kernel.Exec(ctx, batch, out);
+  }
+};
+#endif
+
+template <typename InputType>
+struct CountSubstringExec {
+  using OffsetType = typename TypeTraits<InputType>::OffsetType;
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const MatchSubstringOptions& options = MatchSubstringState::Get(ctx);
+    if (options.ignore_case) {
+#ifdef ARROW_WITH_RE2
+      ARROW_ASSIGN_OR_RAISE(auto counter,
+                            CountSubstringRegex::Make(options, /*literal=*/true));
+      applicator::ScalarUnaryNotNullStateful<OffsetType, InputType, CountSubstringRegex>
+          kernel{std::move(counter)};
+      return kernel.Exec(ctx, batch, out);
+#else
+      return Status::NotImplemented("ignore_case requires RE2");
+#endif
+    }
+    applicator::ScalarUnaryNotNullStateful<OffsetType, InputType, CountSubstring> kernel{
+        CountSubstring(PlainSubstringMatcher(options))};
+    return kernel.Exec(ctx, batch, out);
+  }
+};
+
+const FunctionDoc count_substring_doc(
+    "Count occurrences of substring",
+    ("For each string in `strings`, emit the number of occurrences of the given "
+     "pattern.\n"
+     "Null inputs emit null. The pattern must be given in MatchSubstringOptions."),
+    {"strings"}, "MatchSubstringOptions");
+
+#ifdef ARROW_WITH_RE2
+const FunctionDoc count_substring_regex_doc(
+    "Count occurrences of substring",
+    ("For each string in `strings`, emit the number of occurrences of the given "
+     "regex pattern.\n"
+     "Null inputs emit null. The pattern must be given in MatchSubstringOptions."),
+    {"strings"}, "MatchSubstringOptions");
+#endif
+
+void AddCountSubstring(FunctionRegistry* registry) {
+  {
+    auto func = std::make_shared<ScalarFunction>("count_substring", Arity::Unary(),
+                                                 &count_substring_doc);
+    for (const auto& ty : BaseBinaryTypes()) {
+      auto offset_type = offset_bit_width(ty->id()) == 64 ? int64() : int32();
+      DCHECK_OK(func->AddKernel({ty}, offset_type,
+                                GenerateTypeAgnosticVarBinaryBase<CountSubstringExec>(ty),
+                                MatchSubstringState::Init));
+    }
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+#ifdef ARROW_WITH_RE2
+  {
+    auto func = std::make_shared<ScalarFunction>("count_substring_regex", Arity::Unary(),
+                                                 &count_substring_regex_doc);
+    for (const auto& ty : BaseBinaryTypes()) {
+      auto offset_type = offset_bit_width(ty->id()) == 64 ? int64() : int32();
+      DCHECK_OK(
+          func->AddKernel({ty}, offset_type,
+                          GenerateTypeAgnosticVarBinaryBase<CountSubstringRegexExec>(ty),
+                          MatchSubstringState::Init));
+    }
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+#endif
+}
+
+// Slicing
+
+struct SliceTransformBase : public StringTransformBase {
+  using State = OptionsWrapper<SliceOptions>;
+
+  const SliceOptions* options;
+
+  Status PreExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) override {
+    options = &State::Get(ctx);
+    if (options->step == 0) {
+      return Status::Invalid("Slice step cannot be zero");
+    }
+    return Status::OK();
+  }
+};
+
+struct SliceCodeunitsTransform : SliceTransformBase {
+  int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) override {
+    const SliceOptions& opt = *this->options;
+    if ((opt.start >= 0) != (opt.stop >= 0)) {
+      // If start and stop don't have the same sign, we can't guess an upper bound
+      // on the resulting slice lengths, so return a worst case estimate.
+      return input_ncodeunits;
+    }
+    int64_t max_slice_codepoints = (opt.stop - opt.start + opt.step - 1) / opt.step;
+    // The maximum UTF8 byte size of a codepoint is 4
+    return std::min(input_ncodeunits,
+                    4 * ninputs * std::max<int64_t>(0, max_slice_codepoints));
+  }
+
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
+    if (options->step >= 1) {
+      return SliceForward(input, input_string_ncodeunits, output);
+    }
+    return SliceBackward(input, input_string_ncodeunits, output);
+  }
+
+#define RETURN_IF_UTF8_ERROR(expr)    \
+  do {                                \
+    if (ARROW_PREDICT_FALSE(!expr)) { \
+      return kTransformError;         \
+    }                                 \
+  } while (0)
+
+  int64_t SliceForward(const uint8_t* input, int64_t input_string_ncodeunits,
+                       uint8_t* output) {
+    // Slice in forward order (step > 0)
+    const SliceOptions& opt = *this->options;
+    const uint8_t* begin = input;
+    const uint8_t* end = input + input_string_ncodeunits;
+    const uint8_t* begin_sliced = begin;
+    const uint8_t* end_sliced = end;
+
+    // First, compute begin_sliced and end_sliced
+    if (opt.start >= 0) {
+      // start counting from the left
+      RETURN_IF_UTF8_ERROR(
+          arrow::util::UTF8AdvanceCodepoints(begin, end, &begin_sliced, opt.start));
+      if (opt.stop > opt.start) {
+        // continue counting from begin_sliced
+        const int64_t length = opt.stop - opt.start;
+        RETURN_IF_UTF8_ERROR(
+            arrow::util::UTF8AdvanceCodepoints(begin_sliced, end, &end_sliced, length));
+      } else if (opt.stop < 0) {
+        // or from the end (but we will never need to < begin_sliced)
+        RETURN_IF_UTF8_ERROR(arrow::util::UTF8AdvanceCodepointsReverse(
+            begin_sliced, end, &end_sliced, -opt.stop));
+      } else {
+        // zero length slice
+        return 0;
+      }
+    } else {
+      // start counting from the right
+      RETURN_IF_UTF8_ERROR(arrow::util::UTF8AdvanceCodepointsReverse(
+          begin, end, &begin_sliced, -opt.start));
+      if (opt.stop > 0) {
+        // continue counting from the left, we cannot start from begin_sliced because we
+        // don't know how many codepoints are between begin and begin_sliced
+        RETURN_IF_UTF8_ERROR(
+            arrow::util::UTF8AdvanceCodepoints(begin, end, &end_sliced, opt.stop));
+        // and therefore we also needs this
+        if (end_sliced <= begin_sliced) {
+          // zero length slice
+          return 0;
+        }
+      } else if ((opt.stop < 0) && (opt.stop > opt.start)) {
+        // stop is negative, but larger than start, so we count again from the right
+        // in some cases we can optimize this, depending on the shortest path (from end
+        // or begin_sliced), but begin_sliced and opt.start can be 'out of sync',
+        // for instance when start=-100, when the string length is only 10.
+        RETURN_IF_UTF8_ERROR(arrow::util::UTF8AdvanceCodepointsReverse(
+            begin_sliced, end, &end_sliced, -opt.stop));
+      } else {
+        // zero length slice
+        return 0;
+      }
+    }
+
+    // Second, copy computed slice to output
+    DCHECK(begin_sliced <= end_sliced);
+    if (opt.step == 1) {
+      // fast case, where we simply can finish with a memcpy
+      std::copy(begin_sliced, end_sliced, output);
+      return end_sliced - begin_sliced;
+    }
+    uint8_t* dest = output;
+    const uint8_t* i = begin_sliced;
+
+    while (i < end_sliced) {
+      uint32_t codepoint = 0;
+      // write a single codepoint
+      RETURN_IF_UTF8_ERROR(arrow::util::UTF8Decode(&i, &codepoint));
+      dest = arrow::util::UTF8Encode(dest, codepoint);
+      // and skip the remainder
+      int64_t skips = opt.step - 1;
+      while ((skips--) && (i < end_sliced)) {
+        RETURN_IF_UTF8_ERROR(arrow::util::UTF8Decode(&i, &codepoint));
+      }
+    }
+    return dest - output;
+  }
+
+  int64_t SliceBackward(const uint8_t* input, int64_t input_string_ncodeunits,
+                        uint8_t* output) {
+    // Slice in reverse order (step < 0)
+    const SliceOptions& opt = *this->options;
+    const uint8_t* begin = input;
+    const uint8_t* end = input + input_string_ncodeunits;
+    const uint8_t* begin_sliced = begin;
+    const uint8_t* end_sliced = end;
+
+    // Serious +1 -1 kung fu because begin_sliced and end_sliced act like
+    // reverse iterators.
+    if (opt.start >= 0) {
+      // +1 because begin_sliced acts as as the end of a reverse iterator
+      RETURN_IF_UTF8_ERROR(
+          arrow::util::UTF8AdvanceCodepoints(begin, end, &begin_sliced, opt.start + 1));
+    } else {
+      // -1 because start=-1 means the last codeunit, which is 0 advances
+      RETURN_IF_UTF8_ERROR(arrow::util::UTF8AdvanceCodepointsReverse(
+          begin, end, &begin_sliced, -opt.start - 1));
+    }
+    // make it point at the last codeunit of the previous codeunit
+    begin_sliced--;
+
+    // similar to opt.start
+    if (opt.stop >= 0) {
+      RETURN_IF_UTF8_ERROR(
+          arrow::util::UTF8AdvanceCodepoints(begin, end, &end_sliced, opt.stop + 1));
+    } else {
+      RETURN_IF_UTF8_ERROR(arrow::util::UTF8AdvanceCodepointsReverse(
+          begin, end, &end_sliced, -opt.stop - 1));
+    }
+    end_sliced--;
+
+    // Copy computed slice to output
+    uint8_t* dest = output;
+    const uint8_t* i = begin_sliced;
+    while (i > end_sliced) {
+      uint32_t codepoint = 0;
+      // write a single codepoint
+      RETURN_IF_UTF8_ERROR(arrow::util::UTF8DecodeReverse(&i, &codepoint));
+      dest = arrow::util::UTF8Encode(dest, codepoint);
+      // and skip the remainder
+      int64_t skips = -opt.step - 1;
+      while ((skips--) && (i > end_sliced)) {
+        RETURN_IF_UTF8_ERROR(arrow::util::UTF8DecodeReverse(&i, &codepoint));
+      }
+    }
+    return dest - output;
+  }
+
+#undef RETURN_IF_UTF8_ERROR
+};
+
+template <typename Type>
+using SliceCodeunits = StringTransformExec<Type, SliceCodeunitsTransform>;
+
+const FunctionDoc utf8_slice_codeunits_doc(
+    "Slice string ",
+    ("For each string in `strings`, slice into a substring defined by\n"
+     "`start`, `stop`, `step`) as given by `SliceOptions` where `start` is inclusive\n"
+     "and `stop` is exclusive and are measured in codeunits. If step is negative, the\n"
+     "string will be advanced in reversed order. A `step` of zero is considered an\n"
+     "error.\n"
+     "Null inputs emit null."),
+    {"strings"}, "SliceOptions");
+
+void AddSlice(FunctionRegistry* registry) {
+  auto func = std::make_shared<ScalarFunction>("utf8_slice_codeunits", Arity::Unary(),
+                                               &utf8_slice_codeunits_doc);
+  using t32 = SliceCodeunits<StringType>;
+  using t64 = SliceCodeunits<LargeStringType>;
   DCHECK_OK(
-      func->AddKernel({utf8()}, utf8(), t32::Exec, SliceCodeunitsTransform::State::Init)); 
-  DCHECK_OK(func->AddKernel({large_utf8()}, large_utf8(), t64::Exec, 
-                            SliceCodeunitsTransform::State::Init)); 
+      func->AddKernel({utf8()}, utf8(), t32::Exec, SliceCodeunitsTransform::State::Init));
+  DCHECK_OK(func->AddKernel({large_utf8()}, large_utf8(), t64::Exec,
+                            SliceCodeunitsTransform::State::Init));
   DCHECK_OK(registry->AddFunction(std::move(func)));
 }
 
@@ -1496,8 +1496,8 @@ static inline bool IsPrintableCharacterAscii(uint8_t ascii_character) {
 
 template <typename Derived, bool allow_empty = false>
 struct CharacterPredicateUnicode {
-  static bool Call(KernelContext*, const uint8_t* input, size_t input_string_ncodeunits, 
-                   Status* st) { 
+  static bool Call(KernelContext*, const uint8_t* input, size_t input_string_ncodeunits,
+                   Status* st) {
     if (allow_empty && input_string_ncodeunits == 0) {
       return true;
     }
@@ -1508,7 +1508,7 @@ struct CharacterPredicateUnicode {
               any |= Derived::PredicateCharacterAny(codepoint);
               return Derived::PredicateCharacterAll(codepoint);
             }))) {
-      *st = Status::Invalid("Invalid UTF8 sequence in input"); 
+      *st = Status::Invalid("Invalid UTF8 sequence in input");
       return false;
     }
     return all & any;
@@ -1521,8 +1521,8 @@ struct CharacterPredicateUnicode {
 
 template <typename Derived, bool allow_empty = false>
 struct CharacterPredicateAscii {
-  static bool Call(KernelContext*, const uint8_t* input, size_t input_string_ncodeunits, 
-                   Status*) { 
+  static bool Call(KernelContext*, const uint8_t* input, size_t input_string_ncodeunits,
+                   Status*) {
     if (allow_empty && input_string_ncodeunits == 0) {
       return true;
     }
@@ -1599,8 +1599,8 @@ struct IsNumericUnicode : CharacterPredicateUnicode<IsNumericUnicode> {
 #endif
 
 struct IsAscii {
-  static bool Call(KernelContext*, const uint8_t* input, 
-                   size_t input_string_nascii_characters, Status*) { 
+  static bool Call(KernelContext*, const uint8_t* input,
+                   size_t input_string_nascii_characters, Status*) {
     return std::all_of(input, input + input_string_nascii_characters,
                        IsAsciiCharacter<uint8_t>);
   }
@@ -1661,8 +1661,8 @@ struct IsSpaceAscii : CharacterPredicateAscii<IsSpaceAscii> {
 
 #ifdef ARROW_WITH_UTF8PROC
 struct IsTitleUnicode {
-  static bool Call(KernelContext*, const uint8_t* input, size_t input_string_ncodeunits, 
-                   Status* st) { 
+  static bool Call(KernelContext*, const uint8_t* input, size_t input_string_ncodeunits,
+                   Status* st) {
     // rules:
     // * 1: lower case follows cased
     // * 2: upper case follows uncased
@@ -1689,7 +1689,7 @@ struct IsTitleUnicode {
                                  return true;
                                });
     if (!ARROW_PREDICT_TRUE(status)) {
-      *st = Status::Invalid("Invalid UTF8 sequence in input"); 
+      *st = Status::Invalid("Invalid UTF8 sequence in input");
       return false;
     }
     return rules_1_and_2 & rule_3;
@@ -1698,8 +1698,8 @@ struct IsTitleUnicode {
 #endif
 
 struct IsTitleAscii {
-  static bool Call(KernelContext*, const uint8_t* input, size_t input_string_ncodeunits, 
-                   Status*) { 
+  static bool Call(KernelContext*, const uint8_t* input, size_t input_string_ncodeunits,
+                   Status*) {
     // rules:
     // * 1: lower case follows cased
     // * 2: upper case follows uncased
@@ -1758,1021 +1758,1021 @@ struct IsUpperAscii : CharacterPredicateAscii<IsUpperAscii> {
   }
 };
 
-// splitting 
- 
-template <typename Options> 
-struct SplitFinderBase { 
-  virtual Status PreExec(const Options& options) { return Status::OK(); } 
- 
-  // Derived classes should also define these methods: 
-  //   static bool Find(const uint8_t* begin, const uint8_t* end, 
-  //                    const uint8_t** separator_begin, 
-  //                    const uint8_t** separator_end, 
-  //                    const SplitPatternOptions& options); 
-  // 
-  //   static bool FindReverse(const uint8_t* begin, const uint8_t* end, 
-  //                           const uint8_t** separator_begin, 
-  //                           const uint8_t** separator_end, 
-  //                           const SplitPatternOptions& options); 
-}; 
- 
-template <typename Type, typename ListType, typename SplitFinder, 
-          typename Options = typename SplitFinder::Options> 
-struct SplitExec { 
-  using string_offset_type = typename Type::offset_type; 
-  using list_offset_type = typename ListType::offset_type; 
-  using ArrayType = typename TypeTraits<Type>::ArrayType; 
-  using ArrayListType = typename TypeTraits<ListType>::ArrayType; 
-  using ListScalarType = typename TypeTraits<ListType>::ScalarType; 
-  using ScalarType = typename TypeTraits<Type>::ScalarType; 
-  using BuilderType = typename TypeTraits<Type>::BuilderType; 
-  using ListOffsetsBuilderType = TypedBufferBuilder<list_offset_type>; 
-  using State = OptionsWrapper<Options>; 
- 
-  // Keep the temporary storage accross individual values, to minimize reallocations 
-  std::vector<util::string_view> parts; 
-  Options options; 
- 
-  explicit SplitExec(const Options& options) : options(options) {} 
- 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    return SplitExec{State::Get(ctx)}.Execute(ctx, batch, out); 
-  } 
- 
-  Status Execute(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    SplitFinder finder; 
-    RETURN_NOT_OK(finder.PreExec(options)); 
-    if (batch[0].kind() == Datum::ARRAY) { 
-      return Execute(ctx, &finder, batch[0].array(), out); 
-    } 
-    DCHECK_EQ(batch[0].kind(), Datum::SCALAR); 
-    return Execute(ctx, &finder, batch[0].scalar(), out); 
-  } 
- 
-  Status Execute(KernelContext* ctx, SplitFinder* finder, 
-                 const std::shared_ptr<ArrayData>& data, Datum* out) { 
-    const ArrayType input(data); 
- 
-    BuilderType builder(input.type(), ctx->memory_pool()); 
-    // A slight overestimate of the data needed 
-    RETURN_NOT_OK(builder.ReserveData(input.total_values_length())); 
-    // The minimum amount of strings needed 
-    RETURN_NOT_OK(builder.Resize(input.length() - input.null_count())); 
- 
-    ArrayData* output_list = out->mutable_array(); 
-    // List offsets were preallocated 
-    auto* list_offsets = output_list->GetMutableValues<list_offset_type>(1); 
-    DCHECK_NE(list_offsets, nullptr); 
-    // Initial value 
-    *list_offsets++ = 0; 
-    for (int64_t i = 0; i < input.length(); ++i) { 
-      if (!input.IsNull(i)) { 
-        RETURN_NOT_OK(SplitString(input.GetView(i), finder, &builder)); 
-        if (ARROW_PREDICT_FALSE(builder.length() > 
-                                std::numeric_limits<list_offset_type>::max())) { 
-          return Status::CapacityError("List offset does not fit into 32 bit"); 
-        } 
-      } 
-      *list_offsets++ = static_cast<list_offset_type>(builder.length()); 
-    } 
-    // Assign string array to list child data 
-    std::shared_ptr<Array> string_array; 
-    RETURN_NOT_OK(builder.Finish(&string_array)); 
-    output_list->child_data.push_back(string_array->data()); 
-    return Status::OK(); 
-  } 
- 
-  Status Execute(KernelContext* ctx, SplitFinder* finder, 
-                 const std::shared_ptr<Scalar>& scalar, Datum* out) { 
-    const auto& input = checked_cast<const ScalarType&>(*scalar); 
-    auto result = checked_cast<ListScalarType*>(out->scalar().get()); 
-    if (input.is_valid) { 
-      result->is_valid = true; 
-      BuilderType builder(input.type, ctx->memory_pool()); 
-      util::string_view s(*input.value); 
-      RETURN_NOT_OK(SplitString(s, finder, &builder)); 
-      RETURN_NOT_OK(builder.Finish(&result->value)); 
-    } 
-    return Status::OK(); 
-  } 
- 
-  Status SplitString(const util::string_view& s, SplitFinder* finder, 
-                     BuilderType* builder) { 
-    const uint8_t* begin = reinterpret_cast<const uint8_t*>(s.data()); 
-    const uint8_t* end = begin + s.length(); 
- 
-    int64_t max_splits = options.max_splits; 
-    // if there is no max splits, reversing does not make sense (and is probably less 
-    // efficient), but is useful for testing 
-    if (options.reverse) { 
-      // note that i points 1 further than the 'current' 
-      const uint8_t* i = end; 
-      // we will record the parts in reverse order 
-      parts.clear(); 
-      if (max_splits > -1) { 
-        parts.reserve(max_splits + 1); 
-      } 
-      while (max_splits != 0) { 
-        const uint8_t *separator_begin, *separator_end; 
-        // find with whatever algo the part we will 'cut out' 
-        if (finder->FindReverse(begin, i, &separator_begin, &separator_end, options)) { 
-          parts.emplace_back(reinterpret_cast<const char*>(separator_end), 
-                             i - separator_end); 
-          i = separator_begin; 
-          max_splits--; 
-        } else { 
-          // if we cannot find a separator, we're done 
-          break; 
-        } 
-      } 
-      parts.emplace_back(reinterpret_cast<const char*>(begin), i - begin); 
-      // now we do the copying 
-      for (auto it = parts.rbegin(); it != parts.rend(); ++it) { 
-        RETURN_NOT_OK(builder->Append(*it)); 
-      } 
-    } else { 
-      const uint8_t* i = begin; 
-      while (max_splits != 0) { 
-        const uint8_t *separator_begin, *separator_end; 
-        // find with whatever algo the part we will 'cut out' 
-        if (finder->Find(i, end, &separator_begin, &separator_end, options)) { 
-          // the part till the beginning of the 'cut' 
-          RETURN_NOT_OK( 
-              builder->Append(i, static_cast<string_offset_type>(separator_begin - i))); 
-          i = separator_end; 
-          max_splits--; 
-        } else { 
-          // if we cannot find a separator, we're done 
-          break; 
-        } 
-      } 
-      // trailing part 
-      RETURN_NOT_OK(builder->Append(i, static_cast<string_offset_type>(end - i))); 
-    } 
-    return Status::OK(); 
-  } 
-}; 
- 
-struct SplitPatternFinder : public SplitFinderBase<SplitPatternOptions> { 
-  using Options = SplitPatternOptions; 
- 
-  Status PreExec(const SplitPatternOptions& options) override { 
-    if (options.pattern.length() == 0) { 
-      return Status::Invalid("Empty separator"); 
-    } 
-    return Status::OK(); 
-  } 
- 
-  static bool Find(const uint8_t* begin, const uint8_t* end, 
-                   const uint8_t** separator_begin, const uint8_t** separator_end, 
-                   const SplitPatternOptions& options) { 
-    const uint8_t* pattern = reinterpret_cast<const uint8_t*>(options.pattern.c_str()); 
-    const int64_t pattern_length = options.pattern.length(); 
-    const uint8_t* i = begin; 
-    // this is O(n*m) complexity, we could use the Knuth-Morris-Pratt algorithm used in 
-    // the match kernel 
-    while ((i + pattern_length <= end)) { 
-      i = std::search(i, end, pattern, pattern + pattern_length); 
-      if (i != end) { 
-        *separator_begin = i; 
-        *separator_end = i + pattern_length; 
-        return true; 
-      } 
-    } 
-    return false; 
-  } 
- 
-  static bool FindReverse(const uint8_t* begin, const uint8_t* end, 
-                          const uint8_t** separator_begin, const uint8_t** separator_end, 
-                          const SplitPatternOptions& options) { 
-    const uint8_t* pattern = reinterpret_cast<const uint8_t*>(options.pattern.c_str()); 
-    const int64_t pattern_length = options.pattern.length(); 
-    // this is O(n*m) complexity, we could use the Knuth-Morris-Pratt algorithm used in 
-    // the match kernel 
-    std::reverse_iterator<const uint8_t*> ri(end); 
-    std::reverse_iterator<const uint8_t*> rend(begin); 
-    std::reverse_iterator<const uint8_t*> pattern_rbegin(pattern + pattern_length); 
-    std::reverse_iterator<const uint8_t*> pattern_rend(pattern); 
-    while (begin <= ri.base() - pattern_length) { 
-      ri = std::search(ri, rend, pattern_rbegin, pattern_rend); 
-      if (ri != rend) { 
-        *separator_begin = ri.base() - pattern_length; 
-        *separator_end = ri.base(); 
-        return true; 
-      } 
-    } 
-    return false; 
-  } 
-}; 
- 
-template <typename Type, typename ListType> 
-using SplitPatternExec = SplitExec<Type, ListType, SplitPatternFinder>; 
- 
-const FunctionDoc split_pattern_doc( 
-    "Split string according to separator", 
-    ("Split each string according to the exact `pattern` defined in\n" 
-     "SplitPatternOptions.  The output for each string input is a list\n" 
-     "of strings.\n" 
-     "\n" 
-     "The maximum number of splits and direction of splitting\n" 
-     "(forward, reverse) can optionally be defined in SplitPatternOptions."), 
-    {"strings"}, "SplitPatternOptions"); 
- 
-const FunctionDoc ascii_split_whitespace_doc( 
-    "Split string according to any ASCII whitespace", 
-    ("Split each string according any non-zero length sequence of ASCII\n" 
-     "whitespace characters.  The output for each string input is a list\n" 
-     "of strings.\n" 
-     "\n" 
-     "The maximum number of splits and direction of splitting\n" 
-     "(forward, reverse) can optionally be defined in SplitOptions."), 
-    {"strings"}, "SplitOptions"); 
- 
-const FunctionDoc utf8_split_whitespace_doc( 
-    "Split string according to any Unicode whitespace", 
-    ("Split each string according any non-zero length sequence of Unicode\n" 
-     "whitespace characters.  The output for each string input is a list\n" 
-     "of strings.\n" 
-     "\n" 
-     "The maximum number of splits and direction of splitting\n" 
-     "(forward, reverse) can optionally be defined in SplitOptions."), 
-    {"strings"}, "SplitOptions"); 
- 
-void AddSplitPattern(FunctionRegistry* registry) { 
-  auto func = std::make_shared<ScalarFunction>("split_pattern", Arity::Unary(), 
-                                               &split_pattern_doc); 
-  using t32 = SplitPatternExec<StringType, ListType>; 
-  using t64 = SplitPatternExec<LargeStringType, ListType>; 
-  DCHECK_OK(func->AddKernel({utf8()}, {list(utf8())}, t32::Exec, t32::State::Init)); 
-  DCHECK_OK( 
-      func->AddKernel({large_utf8()}, {list(large_utf8())}, t64::Exec, t64::State::Init)); 
-  DCHECK_OK(registry->AddFunction(std::move(func))); 
-} 
- 
-struct SplitWhitespaceAsciiFinder : public SplitFinderBase<SplitOptions> { 
-  using Options = SplitOptions; 
- 
-  static bool Find(const uint8_t* begin, const uint8_t* end, 
-                   const uint8_t** separator_begin, const uint8_t** separator_end, 
-                   const SplitOptions& options) { 
-    const uint8_t* i = begin; 
-    while (i < end) { 
-      if (IsSpaceCharacterAscii(*i)) { 
-        *separator_begin = i; 
-        do { 
-          i++; 
-        } while (IsSpaceCharacterAscii(*i) && i < end); 
-        *separator_end = i; 
-        return true; 
-      } 
-      i++; 
-    } 
-    return false; 
-  } 
- 
-  static bool FindReverse(const uint8_t* begin, const uint8_t* end, 
-                          const uint8_t** separator_begin, const uint8_t** separator_end, 
-                          const SplitOptions& options) { 
-    const uint8_t* i = end - 1; 
-    while ((i >= begin)) { 
-      if (IsSpaceCharacterAscii(*i)) { 
-        *separator_end = i + 1; 
-        do { 
-          i--; 
-        } while (IsSpaceCharacterAscii(*i) && i >= begin); 
-        *separator_begin = i + 1; 
-        return true; 
-      } 
-      i--; 
-    } 
-    return false; 
-  } 
-}; 
- 
-template <typename Type, typename ListType> 
-using SplitWhitespaceAsciiExec = SplitExec<Type, ListType, SplitWhitespaceAsciiFinder>; 
- 
-void AddSplitWhitespaceAscii(FunctionRegistry* registry) { 
-  static const SplitOptions default_options{}; 
-  auto func = 
-      std::make_shared<ScalarFunction>("ascii_split_whitespace", Arity::Unary(), 
-                                       &ascii_split_whitespace_doc, &default_options); 
-  using t32 = SplitWhitespaceAsciiExec<StringType, ListType>; 
-  using t64 = SplitWhitespaceAsciiExec<LargeStringType, ListType>; 
-  DCHECK_OK(func->AddKernel({utf8()}, {list(utf8())}, t32::Exec, t32::State::Init)); 
-  DCHECK_OK( 
-      func->AddKernel({large_utf8()}, {list(large_utf8())}, t64::Exec, t64::State::Init)); 
-  DCHECK_OK(registry->AddFunction(std::move(func))); 
-} 
- 
-#ifdef ARROW_WITH_UTF8PROC 
-struct SplitWhitespaceUtf8Finder : public SplitFinderBase<SplitOptions> { 
-  using Options = SplitOptions; 
- 
-  Status PreExec(const SplitOptions& options) override { 
-    EnsureLookupTablesFilled(); 
-    return Status::OK(); 
-  } 
- 
-  bool Find(const uint8_t* begin, const uint8_t* end, const uint8_t** separator_begin, 
-            const uint8_t** separator_end, const SplitOptions& options) { 
-    const uint8_t* i = begin; 
-    while ((i < end)) { 
-      uint32_t codepoint = 0; 
-      *separator_begin = i; 
-      if (ARROW_PREDICT_FALSE(!arrow::util::UTF8Decode(&i, &codepoint))) { 
-        return false; 
-      } 
-      if (IsSpaceCharacterUnicode(codepoint)) { 
-        do { 
-          *separator_end = i; 
-          if (ARROW_PREDICT_FALSE(!arrow::util::UTF8Decode(&i, &codepoint))) { 
-            return false; 
-          } 
-        } while (IsSpaceCharacterUnicode(codepoint) && i < end); 
-        return true; 
-      } 
-    } 
-    return false; 
-  } 
- 
-  bool FindReverse(const uint8_t* begin, const uint8_t* end, 
-                   const uint8_t** separator_begin, const uint8_t** separator_end, 
-                   const SplitOptions& options) { 
-    const uint8_t* i = end - 1; 
-    while ((i >= begin)) { 
-      uint32_t codepoint = 0; 
-      *separator_end = i + 1; 
-      if (ARROW_PREDICT_FALSE(!arrow::util::UTF8DecodeReverse(&i, &codepoint))) { 
-        return false; 
-      } 
-      if (IsSpaceCharacterUnicode(codepoint)) { 
-        do { 
-          *separator_begin = i + 1; 
-          if (ARROW_PREDICT_FALSE(!arrow::util::UTF8DecodeReverse(&i, &codepoint))) { 
-            return false; 
-          } 
-        } while (IsSpaceCharacterUnicode(codepoint) && i >= begin); 
-        return true; 
-      } 
-    } 
-    return false; 
-  } 
-}; 
- 
-template <typename Type, typename ListType> 
-using SplitWhitespaceUtf8Exec = SplitExec<Type, ListType, SplitWhitespaceUtf8Finder>; 
- 
-void AddSplitWhitespaceUTF8(FunctionRegistry* registry) { 
-  static const SplitOptions default_options{}; 
-  auto func = 
-      std::make_shared<ScalarFunction>("utf8_split_whitespace", Arity::Unary(), 
-                                       &utf8_split_whitespace_doc, &default_options); 
-  using t32 = SplitWhitespaceUtf8Exec<StringType, ListType>; 
-  using t64 = SplitWhitespaceUtf8Exec<LargeStringType, ListType>; 
-  DCHECK_OK(func->AddKernel({utf8()}, {list(utf8())}, t32::Exec, t32::State::Init)); 
-  DCHECK_OK( 
-      func->AddKernel({large_utf8()}, {list(large_utf8())}, t64::Exec, t64::State::Init)); 
-  DCHECK_OK(registry->AddFunction(std::move(func))); 
-} 
-#endif  // ARROW_WITH_UTF8PROC 
- 
-#ifdef ARROW_WITH_RE2 
-struct SplitRegexFinder : public SplitFinderBase<SplitPatternOptions> { 
-  using Options = SplitPatternOptions; 
- 
-  util::optional<RE2> regex_split; 
- 
-  Status PreExec(const SplitPatternOptions& options) override { 
-    if (options.reverse) { 
-      return Status::NotImplemented("Cannot split in reverse with regex"); 
-    } 
-    // RE2 does *not* give you the full match! Must wrap the regex in a capture group 
-    // There is FindAndConsume, but it would give only the end of the separator 
-    std::string pattern = "("; 
-    pattern.reserve(options.pattern.size() + 2); 
-    pattern += options.pattern; 
-    pattern += ')'; 
-    regex_split.emplace(std::move(pattern)); 
-    return RegexStatus(*regex_split); 
-  } 
- 
-  bool Find(const uint8_t* begin, const uint8_t* end, const uint8_t** separator_begin, 
-            const uint8_t** separator_end, const SplitPatternOptions& options) { 
-    re2::StringPiece piece(reinterpret_cast<const char*>(begin), 
-                           std::distance(begin, end)); 
-    // "StringPiece is mutated to point to matched piece" 
-    re2::StringPiece result; 
-    if (!re2::RE2::PartialMatch(piece, *regex_split, &result)) { 
-      return false; 
-    } 
-    *separator_begin = reinterpret_cast<const uint8_t*>(result.data()); 
-    *separator_end = reinterpret_cast<const uint8_t*>(result.data() + result.size()); 
-    return true; 
-  } 
- 
-  bool FindReverse(const uint8_t* begin, const uint8_t* end, 
-                   const uint8_t** separator_begin, const uint8_t** separator_end, 
-                   const SplitPatternOptions& options) { 
-    // Unsupported (see PreExec) 
-    return false; 
-  } 
-}; 
- 
-template <typename Type, typename ListType> 
-using SplitRegexExec = SplitExec<Type, ListType, SplitRegexFinder>; 
- 
-const FunctionDoc split_pattern_regex_doc( 
-    "Split string according to regex pattern", 
-    ("Split each string according to the regex `pattern` defined in\n" 
-     "SplitPatternOptions.  The output for each string input is a list\n" 
-     "of strings.\n" 
-     "\n" 
-     "The maximum number of splits and direction of splitting\n" 
-     "(forward, reverse) can optionally be defined in SplitPatternOptions."), 
-    {"strings"}, "SplitPatternOptions"); 
- 
-void AddSplitRegex(FunctionRegistry* registry) { 
-  auto func = std::make_shared<ScalarFunction>("split_pattern_regex", Arity::Unary(), 
-                                               &split_pattern_regex_doc); 
-  using t32 = SplitRegexExec<StringType, ListType>; 
-  using t64 = SplitRegexExec<LargeStringType, ListType>; 
-  DCHECK_OK(func->AddKernel({utf8()}, {list(utf8())}, t32::Exec, t32::State::Init)); 
-  DCHECK_OK( 
-      func->AddKernel({large_utf8()}, {list(large_utf8())}, t64::Exec, t64::State::Init)); 
-  DCHECK_OK(registry->AddFunction(std::move(func))); 
-} 
-#endif  // ARROW_WITH_RE2 
- 
-void AddSplit(FunctionRegistry* registry) { 
-  AddSplitPattern(registry); 
-  AddSplitWhitespaceAscii(registry); 
-#ifdef ARROW_WITH_UTF8PROC 
-  AddSplitWhitespaceUTF8(registry); 
-#endif 
-#ifdef ARROW_WITH_RE2 
-  AddSplitRegex(registry); 
-#endif 
-} 
- 
+// splitting
+
+template <typename Options>
+struct SplitFinderBase {
+  virtual Status PreExec(const Options& options) { return Status::OK(); }
+
+  // Derived classes should also define these methods:
+  //   static bool Find(const uint8_t* begin, const uint8_t* end,
+  //                    const uint8_t** separator_begin,
+  //                    const uint8_t** separator_end,
+  //                    const SplitPatternOptions& options);
+  //
+  //   static bool FindReverse(const uint8_t* begin, const uint8_t* end,
+  //                           const uint8_t** separator_begin,
+  //                           const uint8_t** separator_end,
+  //                           const SplitPatternOptions& options);
+};
+
+template <typename Type, typename ListType, typename SplitFinder,
+          typename Options = typename SplitFinder::Options>
+struct SplitExec {
+  using string_offset_type = typename Type::offset_type;
+  using list_offset_type = typename ListType::offset_type;
+  using ArrayType = typename TypeTraits<Type>::ArrayType;
+  using ArrayListType = typename TypeTraits<ListType>::ArrayType;
+  using ListScalarType = typename TypeTraits<ListType>::ScalarType;
+  using ScalarType = typename TypeTraits<Type>::ScalarType;
+  using BuilderType = typename TypeTraits<Type>::BuilderType;
+  using ListOffsetsBuilderType = TypedBufferBuilder<list_offset_type>;
+  using State = OptionsWrapper<Options>;
+
+  // Keep the temporary storage accross individual values, to minimize reallocations
+  std::vector<util::string_view> parts;
+  Options options;
+
+  explicit SplitExec(const Options& options) : options(options) {}
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    return SplitExec{State::Get(ctx)}.Execute(ctx, batch, out);
+  }
+
+  Status Execute(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    SplitFinder finder;
+    RETURN_NOT_OK(finder.PreExec(options));
+    if (batch[0].kind() == Datum::ARRAY) {
+      return Execute(ctx, &finder, batch[0].array(), out);
+    }
+    DCHECK_EQ(batch[0].kind(), Datum::SCALAR);
+    return Execute(ctx, &finder, batch[0].scalar(), out);
+  }
+
+  Status Execute(KernelContext* ctx, SplitFinder* finder,
+                 const std::shared_ptr<ArrayData>& data, Datum* out) {
+    const ArrayType input(data);
+
+    BuilderType builder(input.type(), ctx->memory_pool());
+    // A slight overestimate of the data needed
+    RETURN_NOT_OK(builder.ReserveData(input.total_values_length()));
+    // The minimum amount of strings needed
+    RETURN_NOT_OK(builder.Resize(input.length() - input.null_count()));
+
+    ArrayData* output_list = out->mutable_array();
+    // List offsets were preallocated
+    auto* list_offsets = output_list->GetMutableValues<list_offset_type>(1);
+    DCHECK_NE(list_offsets, nullptr);
+    // Initial value
+    *list_offsets++ = 0;
+    for (int64_t i = 0; i < input.length(); ++i) {
+      if (!input.IsNull(i)) {
+        RETURN_NOT_OK(SplitString(input.GetView(i), finder, &builder));
+        if (ARROW_PREDICT_FALSE(builder.length() >
+                                std::numeric_limits<list_offset_type>::max())) {
+          return Status::CapacityError("List offset does not fit into 32 bit");
+        }
+      }
+      *list_offsets++ = static_cast<list_offset_type>(builder.length());
+    }
+    // Assign string array to list child data
+    std::shared_ptr<Array> string_array;
+    RETURN_NOT_OK(builder.Finish(&string_array));
+    output_list->child_data.push_back(string_array->data());
+    return Status::OK();
+  }
+
+  Status Execute(KernelContext* ctx, SplitFinder* finder,
+                 const std::shared_ptr<Scalar>& scalar, Datum* out) {
+    const auto& input = checked_cast<const ScalarType&>(*scalar);
+    auto result = checked_cast<ListScalarType*>(out->scalar().get());
+    if (input.is_valid) {
+      result->is_valid = true;
+      BuilderType builder(input.type, ctx->memory_pool());
+      util::string_view s(*input.value);
+      RETURN_NOT_OK(SplitString(s, finder, &builder));
+      RETURN_NOT_OK(builder.Finish(&result->value));
+    }
+    return Status::OK();
+  }
+
+  Status SplitString(const util::string_view& s, SplitFinder* finder,
+                     BuilderType* builder) {
+    const uint8_t* begin = reinterpret_cast<const uint8_t*>(s.data());
+    const uint8_t* end = begin + s.length();
+
+    int64_t max_splits = options.max_splits;
+    // if there is no max splits, reversing does not make sense (and is probably less
+    // efficient), but is useful for testing
+    if (options.reverse) {
+      // note that i points 1 further than the 'current'
+      const uint8_t* i = end;
+      // we will record the parts in reverse order
+      parts.clear();
+      if (max_splits > -1) {
+        parts.reserve(max_splits + 1);
+      }
+      while (max_splits != 0) {
+        const uint8_t *separator_begin, *separator_end;
+        // find with whatever algo the part we will 'cut out'
+        if (finder->FindReverse(begin, i, &separator_begin, &separator_end, options)) {
+          parts.emplace_back(reinterpret_cast<const char*>(separator_end),
+                             i - separator_end);
+          i = separator_begin;
+          max_splits--;
+        } else {
+          // if we cannot find a separator, we're done
+          break;
+        }
+      }
+      parts.emplace_back(reinterpret_cast<const char*>(begin), i - begin);
+      // now we do the copying
+      for (auto it = parts.rbegin(); it != parts.rend(); ++it) {
+        RETURN_NOT_OK(builder->Append(*it));
+      }
+    } else {
+      const uint8_t* i = begin;
+      while (max_splits != 0) {
+        const uint8_t *separator_begin, *separator_end;
+        // find with whatever algo the part we will 'cut out'
+        if (finder->Find(i, end, &separator_begin, &separator_end, options)) {
+          // the part till the beginning of the 'cut'
+          RETURN_NOT_OK(
+              builder->Append(i, static_cast<string_offset_type>(separator_begin - i)));
+          i = separator_end;
+          max_splits--;
+        } else {
+          // if we cannot find a separator, we're done
+          break;
+        }
+      }
+      // trailing part
+      RETURN_NOT_OK(builder->Append(i, static_cast<string_offset_type>(end - i)));
+    }
+    return Status::OK();
+  }
+};
+
+struct SplitPatternFinder : public SplitFinderBase<SplitPatternOptions> {
+  using Options = SplitPatternOptions;
+
+  Status PreExec(const SplitPatternOptions& options) override {
+    if (options.pattern.length() == 0) {
+      return Status::Invalid("Empty separator");
+    }
+    return Status::OK();
+  }
+
+  static bool Find(const uint8_t* begin, const uint8_t* end,
+                   const uint8_t** separator_begin, const uint8_t** separator_end,
+                   const SplitPatternOptions& options) {
+    const uint8_t* pattern = reinterpret_cast<const uint8_t*>(options.pattern.c_str());
+    const int64_t pattern_length = options.pattern.length();
+    const uint8_t* i = begin;
+    // this is O(n*m) complexity, we could use the Knuth-Morris-Pratt algorithm used in
+    // the match kernel
+    while ((i + pattern_length <= end)) {
+      i = std::search(i, end, pattern, pattern + pattern_length);
+      if (i != end) {
+        *separator_begin = i;
+        *separator_end = i + pattern_length;
+        return true;
+      }
+    }
+    return false;
+  }
+
+  static bool FindReverse(const uint8_t* begin, const uint8_t* end,
+                          const uint8_t** separator_begin, const uint8_t** separator_end,
+                          const SplitPatternOptions& options) {
+    const uint8_t* pattern = reinterpret_cast<const uint8_t*>(options.pattern.c_str());
+    const int64_t pattern_length = options.pattern.length();
+    // this is O(n*m) complexity, we could use the Knuth-Morris-Pratt algorithm used in
+    // the match kernel
+    std::reverse_iterator<const uint8_t*> ri(end);
+    std::reverse_iterator<const uint8_t*> rend(begin);
+    std::reverse_iterator<const uint8_t*> pattern_rbegin(pattern + pattern_length);
+    std::reverse_iterator<const uint8_t*> pattern_rend(pattern);
+    while (begin <= ri.base() - pattern_length) {
+      ri = std::search(ri, rend, pattern_rbegin, pattern_rend);
+      if (ri != rend) {
+        *separator_begin = ri.base() - pattern_length;
+        *separator_end = ri.base();
+        return true;
+      }
+    }
+    return false;
+  }
+};
+
+template <typename Type, typename ListType>
+using SplitPatternExec = SplitExec<Type, ListType, SplitPatternFinder>;
+
+const FunctionDoc split_pattern_doc(
+    "Split string according to separator",
+    ("Split each string according to the exact `pattern` defined in\n"
+     "SplitPatternOptions.  The output for each string input is a list\n"
+     "of strings.\n"
+     "\n"
+     "The maximum number of splits and direction of splitting\n"
+     "(forward, reverse) can optionally be defined in SplitPatternOptions."),
+    {"strings"}, "SplitPatternOptions");
+
+const FunctionDoc ascii_split_whitespace_doc(
+    "Split string according to any ASCII whitespace",
+    ("Split each string according any non-zero length sequence of ASCII\n"
+     "whitespace characters.  The output for each string input is a list\n"
+     "of strings.\n"
+     "\n"
+     "The maximum number of splits and direction of splitting\n"
+     "(forward, reverse) can optionally be defined in SplitOptions."),
+    {"strings"}, "SplitOptions");
+
+const FunctionDoc utf8_split_whitespace_doc(
+    "Split string according to any Unicode whitespace",
+    ("Split each string according any non-zero length sequence of Unicode\n"
+     "whitespace characters.  The output for each string input is a list\n"
+     "of strings.\n"
+     "\n"
+     "The maximum number of splits and direction of splitting\n"
+     "(forward, reverse) can optionally be defined in SplitOptions."),
+    {"strings"}, "SplitOptions");
+
+void AddSplitPattern(FunctionRegistry* registry) {
+  auto func = std::make_shared<ScalarFunction>("split_pattern", Arity::Unary(),
+                                               &split_pattern_doc);
+  using t32 = SplitPatternExec<StringType, ListType>;
+  using t64 = SplitPatternExec<LargeStringType, ListType>;
+  DCHECK_OK(func->AddKernel({utf8()}, {list(utf8())}, t32::Exec, t32::State::Init));
+  DCHECK_OK(
+      func->AddKernel({large_utf8()}, {list(large_utf8())}, t64::Exec, t64::State::Init));
+  DCHECK_OK(registry->AddFunction(std::move(func)));
+}
+
+struct SplitWhitespaceAsciiFinder : public SplitFinderBase<SplitOptions> {
+  using Options = SplitOptions;
+
+  static bool Find(const uint8_t* begin, const uint8_t* end,
+                   const uint8_t** separator_begin, const uint8_t** separator_end,
+                   const SplitOptions& options) {
+    const uint8_t* i = begin;
+    while (i < end) {
+      if (IsSpaceCharacterAscii(*i)) {
+        *separator_begin = i;
+        do {
+          i++;
+        } while (IsSpaceCharacterAscii(*i) && i < end);
+        *separator_end = i;
+        return true;
+      }
+      i++;
+    }
+    return false;
+  }
+
+  static bool FindReverse(const uint8_t* begin, const uint8_t* end,
+                          const uint8_t** separator_begin, const uint8_t** separator_end,
+                          const SplitOptions& options) {
+    const uint8_t* i = end - 1;
+    while ((i >= begin)) {
+      if (IsSpaceCharacterAscii(*i)) {
+        *separator_end = i + 1;
+        do {
+          i--;
+        } while (IsSpaceCharacterAscii(*i) && i >= begin);
+        *separator_begin = i + 1;
+        return true;
+      }
+      i--;
+    }
+    return false;
+  }
+};
+
+template <typename Type, typename ListType>
+using SplitWhitespaceAsciiExec = SplitExec<Type, ListType, SplitWhitespaceAsciiFinder>;
+
+void AddSplitWhitespaceAscii(FunctionRegistry* registry) {
+  static const SplitOptions default_options{};
+  auto func =
+      std::make_shared<ScalarFunction>("ascii_split_whitespace", Arity::Unary(),
+                                       &ascii_split_whitespace_doc, &default_options);
+  using t32 = SplitWhitespaceAsciiExec<StringType, ListType>;
+  using t64 = SplitWhitespaceAsciiExec<LargeStringType, ListType>;
+  DCHECK_OK(func->AddKernel({utf8()}, {list(utf8())}, t32::Exec, t32::State::Init));
+  DCHECK_OK(
+      func->AddKernel({large_utf8()}, {list(large_utf8())}, t64::Exec, t64::State::Init));
+  DCHECK_OK(registry->AddFunction(std::move(func)));
+}
+
+#ifdef ARROW_WITH_UTF8PROC
+struct SplitWhitespaceUtf8Finder : public SplitFinderBase<SplitOptions> {
+  using Options = SplitOptions;
+
+  Status PreExec(const SplitOptions& options) override {
+    EnsureLookupTablesFilled();
+    return Status::OK();
+  }
+
+  bool Find(const uint8_t* begin, const uint8_t* end, const uint8_t** separator_begin,
+            const uint8_t** separator_end, const SplitOptions& options) {
+    const uint8_t* i = begin;
+    while ((i < end)) {
+      uint32_t codepoint = 0;
+      *separator_begin = i;
+      if (ARROW_PREDICT_FALSE(!arrow::util::UTF8Decode(&i, &codepoint))) {
+        return false;
+      }
+      if (IsSpaceCharacterUnicode(codepoint)) {
+        do {
+          *separator_end = i;
+          if (ARROW_PREDICT_FALSE(!arrow::util::UTF8Decode(&i, &codepoint))) {
+            return false;
+          }
+        } while (IsSpaceCharacterUnicode(codepoint) && i < end);
+        return true;
+      }
+    }
+    return false;
+  }
+
+  bool FindReverse(const uint8_t* begin, const uint8_t* end,
+                   const uint8_t** separator_begin, const uint8_t** separator_end,
+                   const SplitOptions& options) {
+    const uint8_t* i = end - 1;
+    while ((i >= begin)) {
+      uint32_t codepoint = 0;
+      *separator_end = i + 1;
+      if (ARROW_PREDICT_FALSE(!arrow::util::UTF8DecodeReverse(&i, &codepoint))) {
+        return false;
+      }
+      if (IsSpaceCharacterUnicode(codepoint)) {
+        do {
+          *separator_begin = i + 1;
+          if (ARROW_PREDICT_FALSE(!arrow::util::UTF8DecodeReverse(&i, &codepoint))) {
+            return false;
+          }
+        } while (IsSpaceCharacterUnicode(codepoint) && i >= begin);
+        return true;
+      }
+    }
+    return false;
+  }
+};
+
+template <typename Type, typename ListType>
+using SplitWhitespaceUtf8Exec = SplitExec<Type, ListType, SplitWhitespaceUtf8Finder>;
+
+void AddSplitWhitespaceUTF8(FunctionRegistry* registry) {
+  static const SplitOptions default_options{};
+  auto func =
+      std::make_shared<ScalarFunction>("utf8_split_whitespace", Arity::Unary(),
+                                       &utf8_split_whitespace_doc, &default_options);
+  using t32 = SplitWhitespaceUtf8Exec<StringType, ListType>;
+  using t64 = SplitWhitespaceUtf8Exec<LargeStringType, ListType>;
+  DCHECK_OK(func->AddKernel({utf8()}, {list(utf8())}, t32::Exec, t32::State::Init));
+  DCHECK_OK(
+      func->AddKernel({large_utf8()}, {list(large_utf8())}, t64::Exec, t64::State::Init));
+  DCHECK_OK(registry->AddFunction(std::move(func)));
+}
+#endif  // ARROW_WITH_UTF8PROC
+
+#ifdef ARROW_WITH_RE2
+struct SplitRegexFinder : public SplitFinderBase<SplitPatternOptions> {
+  using Options = SplitPatternOptions;
+
+  util::optional<RE2> regex_split;
+
+  Status PreExec(const SplitPatternOptions& options) override {
+    if (options.reverse) {
+      return Status::NotImplemented("Cannot split in reverse with regex");
+    }
+    // RE2 does *not* give you the full match! Must wrap the regex in a capture group
+    // There is FindAndConsume, but it would give only the end of the separator
+    std::string pattern = "(";
+    pattern.reserve(options.pattern.size() + 2);
+    pattern += options.pattern;
+    pattern += ')';
+    regex_split.emplace(std::move(pattern));
+    return RegexStatus(*regex_split);
+  }
+
+  bool Find(const uint8_t* begin, const uint8_t* end, const uint8_t** separator_begin,
+            const uint8_t** separator_end, const SplitPatternOptions& options) {
+    re2::StringPiece piece(reinterpret_cast<const char*>(begin),
+                           std::distance(begin, end));
+    // "StringPiece is mutated to point to matched piece"
+    re2::StringPiece result;
+    if (!re2::RE2::PartialMatch(piece, *regex_split, &result)) {
+      return false;
+    }
+    *separator_begin = reinterpret_cast<const uint8_t*>(result.data());
+    *separator_end = reinterpret_cast<const uint8_t*>(result.data() + result.size());
+    return true;
+  }
+
+  bool FindReverse(const uint8_t* begin, const uint8_t* end,
+                   const uint8_t** separator_begin, const uint8_t** separator_end,
+                   const SplitPatternOptions& options) {
+    // Unsupported (see PreExec)
+    return false;
+  }
+};
+
+template <typename Type, typename ListType>
+using SplitRegexExec = SplitExec<Type, ListType, SplitRegexFinder>;
+
+const FunctionDoc split_pattern_regex_doc(
+    "Split string according to regex pattern",
+    ("Split each string according to the regex `pattern` defined in\n"
+     "SplitPatternOptions.  The output for each string input is a list\n"
+     "of strings.\n"
+     "\n"
+     "The maximum number of splits and direction of splitting\n"
+     "(forward, reverse) can optionally be defined in SplitPatternOptions."),
+    {"strings"}, "SplitPatternOptions");
+
+void AddSplitRegex(FunctionRegistry* registry) {
+  auto func = std::make_shared<ScalarFunction>("split_pattern_regex", Arity::Unary(),
+                                               &split_pattern_regex_doc);
+  using t32 = SplitRegexExec<StringType, ListType>;
+  using t64 = SplitRegexExec<LargeStringType, ListType>;
+  DCHECK_OK(func->AddKernel({utf8()}, {list(utf8())}, t32::Exec, t32::State::Init));
+  DCHECK_OK(
+      func->AddKernel({large_utf8()}, {list(large_utf8())}, t64::Exec, t64::State::Init));
+  DCHECK_OK(registry->AddFunction(std::move(func)));
+}
+#endif  // ARROW_WITH_RE2
+
+void AddSplit(FunctionRegistry* registry) {
+  AddSplitPattern(registry);
+  AddSplitWhitespaceAscii(registry);
+#ifdef ARROW_WITH_UTF8PROC
+  AddSplitWhitespaceUTF8(registry);
+#endif
+#ifdef ARROW_WITH_RE2
+  AddSplitRegex(registry);
+#endif
+}
+
+// ----------------------------------------------------------------------
+// Replace substring (plain, regex)
+
+template <typename Type, typename Replacer>
+struct ReplaceSubString {
+  using ScalarType = typename TypeTraits<Type>::ScalarType;
+  using offset_type = typename Type::offset_type;
+  using ValueDataBuilder = TypedBufferBuilder<uint8_t>;
+  using OffsetBuilder = TypedBufferBuilder<offset_type>;
+  using State = OptionsWrapper<ReplaceSubstringOptions>;
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    // TODO Cache replacer across invocations (for regex compilation)
+    ARROW_ASSIGN_OR_RAISE(auto replacer, Replacer::Make(State::Get(ctx)));
+    return Replace(ctx, batch, *replacer, out);
+  }
+
+  static Status Replace(KernelContext* ctx, const ExecBatch& batch,
+                        const Replacer& replacer, Datum* out) {
+    ValueDataBuilder value_data_builder(ctx->memory_pool());
+    OffsetBuilder offset_builder(ctx->memory_pool());
+
+    if (batch[0].kind() == Datum::ARRAY) {
+      // We already know how many strings we have, so we can use Reserve/UnsafeAppend
+      RETURN_NOT_OK(offset_builder.Reserve(batch[0].array()->length + 1));
+      offset_builder.UnsafeAppend(0);  // offsets start at 0
+
+      const ArrayData& input = *batch[0].array();
+      RETURN_NOT_OK(VisitArrayDataInline<Type>(
+          input,
+          [&](util::string_view s) {
+            RETURN_NOT_OK(replacer.ReplaceString(s, &value_data_builder));
+            offset_builder.UnsafeAppend(
+                static_cast<offset_type>(value_data_builder.length()));
+            return Status::OK();
+          },
+          [&]() {
+            // offset for null value
+            offset_builder.UnsafeAppend(
+                static_cast<offset_type>(value_data_builder.length()));
+            return Status::OK();
+          }));
+      ArrayData* output = out->mutable_array();
+      RETURN_NOT_OK(value_data_builder.Finish(&output->buffers[2]));
+      RETURN_NOT_OK(offset_builder.Finish(&output->buffers[1]));
+    } else {
+      const auto& input = checked_cast<const ScalarType&>(*batch[0].scalar());
+      auto result = std::make_shared<ScalarType>();
+      if (input.is_valid) {
+        util::string_view s = static_cast<util::string_view>(*input.value);
+        RETURN_NOT_OK(replacer.ReplaceString(s, &value_data_builder));
+        RETURN_NOT_OK(value_data_builder.Finish(&result->value));
+        result->is_valid = true;
+      }
+      out->value = result;
+    }
+
+    return Status::OK();
+  }
+};
+
+struct PlainSubStringReplacer {
+  const ReplaceSubstringOptions& options_;
+
+  static Result<std::unique_ptr<PlainSubStringReplacer>> Make(
+      const ReplaceSubstringOptions& options) {
+    return arrow::internal::make_unique<PlainSubStringReplacer>(options);
+  }
+
+  explicit PlainSubStringReplacer(const ReplaceSubstringOptions& options)
+      : options_(options) {}
+
+  Status ReplaceString(util::string_view s, TypedBufferBuilder<uint8_t>* builder) const {
+    const char* i = s.begin();
+    const char* end = s.end();
+    int64_t max_replacements = options_.max_replacements;
+    while ((i < end) && (max_replacements != 0)) {
+      const char* pos =
+          std::search(i, end, options_.pattern.begin(), options_.pattern.end());
+      if (pos == end) {
+        RETURN_NOT_OK(builder->Append(reinterpret_cast<const uint8_t*>(i),
+                                      static_cast<int64_t>(end - i)));
+        i = end;
+      } else {
+        // the string before the pattern
+        RETURN_NOT_OK(builder->Append(reinterpret_cast<const uint8_t*>(i),
+                                      static_cast<int64_t>(pos - i)));
+        // the replacement
+        RETURN_NOT_OK(
+            builder->Append(reinterpret_cast<const uint8_t*>(options_.replacement.data()),
+                            options_.replacement.length()));
+        // skip pattern
+        i = pos + options_.pattern.length();
+        max_replacements--;
+      }
+    }
+    // if we exited early due to max_replacements, add the trailing part
+    return builder->Append(reinterpret_cast<const uint8_t*>(i),
+                           static_cast<int64_t>(end - i));
+  }
+};
+
+#ifdef ARROW_WITH_RE2
+struct RegexSubStringReplacer {
+  const ReplaceSubstringOptions& options_;
+  const RE2 regex_find_;
+  const RE2 regex_replacement_;
+
+  static Result<std::unique_ptr<RegexSubStringReplacer>> Make(
+      const ReplaceSubstringOptions& options) {
+    auto replacer = arrow::internal::make_unique<RegexSubStringReplacer>(options);
+
+    RETURN_NOT_OK(RegexStatus(replacer->regex_find_));
+    RETURN_NOT_OK(RegexStatus(replacer->regex_replacement_));
+
+    std::string replacement_error;
+    if (!replacer->regex_replacement_.CheckRewriteString(replacer->options_.replacement,
+                                                         &replacement_error)) {
+      return Status::Invalid("Invalid replacement string: ",
+                             std::move(replacement_error));
+    }
+
+    return std::move(replacer);
+  }
+
+  // Using RE2::FindAndConsume we can only find the pattern if it is a group, therefore
+  // we have 2 regexes, one with () around it, one without.
+  explicit RegexSubStringReplacer(const ReplaceSubstringOptions& options)
+      : options_(options),
+        regex_find_("(" + options_.pattern + ")", RE2::Quiet),
+        regex_replacement_(options_.pattern, RE2::Quiet) {}
+
+  Status ReplaceString(util::string_view s, TypedBufferBuilder<uint8_t>* builder) const {
+    re2::StringPiece replacement(options_.replacement);
+
+    if (options_.max_replacements == -1) {
+      std::string s_copy(s.to_string());
+      re2::RE2::GlobalReplace(&s_copy, regex_replacement_, replacement);
+      return builder->Append(reinterpret_cast<const uint8_t*>(s_copy.data()),
+                             s_copy.length());
+    }
+
+    // Since RE2 does not have the concept of max_replacements, we have to do some work
+    // ourselves.
+    // We might do this faster similar to RE2::GlobalReplace using Match and Rewrite
+    const char* i = s.begin();
+    const char* end = s.end();
+    re2::StringPiece piece(s.data(), s.length());
+
+    int64_t max_replacements = options_.max_replacements;
+    while ((i < end) && (max_replacements != 0)) {
+      std::string found;
+      if (!re2::RE2::FindAndConsume(&piece, regex_find_, &found)) {
+        RETURN_NOT_OK(builder->Append(reinterpret_cast<const uint8_t*>(i),
+                                      static_cast<int64_t>(end - i)));
+        i = end;
+      } else {
+        // wind back to the beginning of the match
+        const char* pos = piece.begin() - found.length();
+        // the string before the pattern
+        RETURN_NOT_OK(builder->Append(reinterpret_cast<const uint8_t*>(i),
+                                      static_cast<int64_t>(pos - i)));
+        // replace the pattern in what we found
+        if (!re2::RE2::Replace(&found, regex_replacement_, replacement)) {
+          return Status::Invalid("Regex found, but replacement failed");
+        }
+        RETURN_NOT_OK(builder->Append(reinterpret_cast<const uint8_t*>(found.data()),
+                                      static_cast<int64_t>(found.length())));
+        // skip pattern
+        i = piece.begin();
+        max_replacements--;
+      }
+    }
+    // If we exited early due to max_replacements, add the trailing part
+    return builder->Append(reinterpret_cast<const uint8_t*>(i),
+                           static_cast<int64_t>(end - i));
+  }
+};
+#endif
+
+template <typename Type>
+using ReplaceSubStringPlain = ReplaceSubString<Type, PlainSubStringReplacer>;
+
+const FunctionDoc replace_substring_doc(
+    "Replace non-overlapping substrings that match pattern by replacement",
+    ("For each string in `strings`, replace non-overlapping substrings that match\n"
+     "`pattern` by `replacement`. If `max_replacements != -1`, it determines the\n"
+     "maximum amount of replacements made, counting from the left. Null values emit\n"
+     "null."),
+    {"strings"}, "ReplaceSubstringOptions");
+
+#ifdef ARROW_WITH_RE2
+template <typename Type>
+using ReplaceSubStringRegex = ReplaceSubString<Type, RegexSubStringReplacer>;
+
+const FunctionDoc replace_substring_regex_doc(
+    "Replace non-overlapping substrings that match regex `pattern` by `replacement`",
+    ("For each string in `strings`, replace non-overlapping substrings that match the\n"
+     "regular expression `pattern` by `replacement` using the Google RE2 library.\n"
+     "If `max_replacements != -1`, it determines the maximum amount of replacements\n"
+     "made, counting from the left. Note that if the pattern contains groups,\n"
+     "backreferencing macan be used. Null values emit null."),
+    {"strings"}, "ReplaceSubstringOptions");
+#endif
+
+// ----------------------------------------------------------------------
+// Replace slice
+
+struct ReplaceSliceTransformBase : public StringTransformBase {
+  using State = OptionsWrapper<ReplaceSliceOptions>;
+
+  const ReplaceSliceOptions* options;
+
+  explicit ReplaceSliceTransformBase(const ReplaceSliceOptions& options)
+      : options{&options} {}
+
+  int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) override {
+    return ninputs * options->replacement.size() + input_ncodeunits;
+  }
+};
+
+struct BinaryReplaceSliceTransform : ReplaceSliceTransformBase {
+  using ReplaceSliceTransformBase::ReplaceSliceTransformBase;
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
+    const auto& opts = *options;
+    int64_t before_slice = 0;
+    int64_t after_slice = 0;
+    uint8_t* output_start = output;
+
+    if (opts.start >= 0) {
+      // Count from left
+      before_slice = std::min<int64_t>(input_string_ncodeunits, opts.start);
+    } else {
+      // Count from right
+      before_slice = std::max<int64_t>(0, input_string_ncodeunits + opts.start);
+    }
+    // Mimic Pandas: if stop would be before start, treat as 0-length slice
+    if (opts.stop >= 0) {
+      // Count from left
+      after_slice =
+          std::min<int64_t>(input_string_ncodeunits, std::max(before_slice, opts.stop));
+    } else {
+      // Count from right
+      after_slice = std::max<int64_t>(before_slice, input_string_ncodeunits + opts.stop);
+    }
+    output = std::copy(input, input + before_slice, output);
+    output = std::copy(opts.replacement.begin(), opts.replacement.end(), output);
+    output = std::copy(input + after_slice, input + input_string_ncodeunits, output);
+    return output - output_start;
+  }
+};
+
+struct Utf8ReplaceSliceTransform : ReplaceSliceTransformBase {
+  using ReplaceSliceTransformBase::ReplaceSliceTransformBase;
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
+    const auto& opts = *options;
+    const uint8_t* begin = input;
+    const uint8_t* end = input + input_string_ncodeunits;
+    const uint8_t *begin_sliced, *end_sliced;
+    uint8_t* output_start = output;
+
+    // Mimic Pandas: if stop would be before start, treat as 0-length slice
+    if (opts.start >= 0) {
+      // Count from left
+      if (!arrow::util::UTF8AdvanceCodepoints(begin, end, &begin_sliced, opts.start)) {
+        return kTransformError;
+      }
+      if (opts.stop > options->start) {
+        // Continue counting from left
+        const int64_t length = opts.stop - options->start;
+        if (!arrow::util::UTF8AdvanceCodepoints(begin_sliced, end, &end_sliced, length)) {
+          return kTransformError;
+        }
+      } else if (opts.stop < 0) {
+        // Count from right
+        if (!arrow::util::UTF8AdvanceCodepointsReverse(begin_sliced, end, &end_sliced,
+                                                       -opts.stop)) {
+          return kTransformError;
+        }
+      } else {
+        // Zero-length slice
+        end_sliced = begin_sliced;
+      }
+    } else {
+      // Count from right
+      if (!arrow::util::UTF8AdvanceCodepointsReverse(begin, end, &begin_sliced,
+                                                     -opts.start)) {
+        return kTransformError;
+      }
+      if (opts.stop >= 0) {
+        // Restart counting from left
+        if (!arrow::util::UTF8AdvanceCodepoints(begin, end, &end_sliced, opts.stop)) {
+          return kTransformError;
+        }
+        if (end_sliced <= begin_sliced) {
+          // Zero-length slice
+          end_sliced = begin_sliced;
+        }
+      } else if ((opts.stop < 0) && (options->stop > options->start)) {
+        // Count from right
+        if (!arrow::util::UTF8AdvanceCodepointsReverse(begin_sliced, end, &end_sliced,
+                                                       -opts.stop)) {
+          return kTransformError;
+        }
+      } else {
+        // zero-length slice
+        end_sliced = begin_sliced;
+      }
+    }
+    output = std::copy(begin, begin_sliced, output);
+    output = std::copy(opts.replacement.begin(), options->replacement.end(), output);
+    output = std::copy(end_sliced, end, output);
+    return output - output_start;
+  }
+};
+
+template <typename Type>
+using BinaryReplaceSlice =
+    StringTransformExecWithState<Type, BinaryReplaceSliceTransform>;
+template <typename Type>
+using Utf8ReplaceSlice = StringTransformExecWithState<Type, Utf8ReplaceSliceTransform>;
+
+const FunctionDoc binary_replace_slice_doc(
+    "Replace a slice of a binary string with `replacement`",
+    ("For each string in `strings`, replace a slice of the string defined by `start`"
+     "and `stop` with `replacement`. `start` is inclusive and `stop` is exclusive, "
+     "and both are measured in bytes.\n"
+     "Null values emit null."),
+    {"strings"}, "ReplaceSliceOptions");
+
+const FunctionDoc utf8_replace_slice_doc(
+    "Replace a slice of a string with `replacement`",
+    ("For each string in `strings`, replace a slice of the string defined by `start`"
+     "and `stop` with `replacement`. `start` is inclusive and `stop` is exclusive, "
+     "and both are measured in codeunits.\n"
+     "Null values emit null."),
+    {"strings"}, "ReplaceSliceOptions");
+
+void AddReplaceSlice(FunctionRegistry* registry) {
+  {
+    auto func = std::make_shared<ScalarFunction>("binary_replace_slice", Arity::Unary(),
+                                                 &binary_replace_slice_doc);
+    for (const auto& ty : BaseBinaryTypes()) {
+      DCHECK_OK(func->AddKernel({ty}, ty,
+                                GenerateTypeAgnosticVarBinaryBase<BinaryReplaceSlice>(ty),
+                                ReplaceSliceTransformBase::State::Init));
+    }
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+
+  {
+    auto func = std::make_shared<ScalarFunction>("utf8_replace_slice", Arity::Unary(),
+                                                 &utf8_replace_slice_doc);
+    DCHECK_OK(func->AddKernel({utf8()}, utf8(), Utf8ReplaceSlice<StringType>::Exec,
+                              ReplaceSliceTransformBase::State::Init));
+    DCHECK_OK(func->AddKernel({large_utf8()}, large_utf8(),
+                              Utf8ReplaceSlice<LargeStringType>::Exec,
+                              ReplaceSliceTransformBase::State::Init));
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+}
+
+// ----------------------------------------------------------------------
+// Extract with regex
+
+#ifdef ARROW_WITH_RE2
+
+// TODO cache this once per ExtractRegexOptions
+struct ExtractRegexData {
+  // Use unique_ptr<> because RE2 is non-movable
+  std::unique_ptr<RE2> regex;
+  std::vector<std::string> group_names;
+
+  static Result<ExtractRegexData> Make(const ExtractRegexOptions& options) {
+    ExtractRegexData data(options.pattern);
+    RETURN_NOT_OK(RegexStatus(*data.regex));
+
+    const int group_count = data.regex->NumberOfCapturingGroups();
+    const auto& name_map = data.regex->CapturingGroupNames();
+    data.group_names.reserve(group_count);
+
+    for (int i = 0; i < group_count; i++) {
+      auto item = name_map.find(i + 1);  // re2 starts counting from 1
+      if (item == name_map.end()) {
+        // XXX should we instead just create fields with an empty name?
+        return Status::Invalid("Regular expression contains unnamed groups");
+      }
+      data.group_names.emplace_back(item->second);
+    }
+    return std::move(data);
+  }
+
+  Result<ValueDescr> ResolveOutputType(const std::vector<ValueDescr>& args) const {
+    const auto& input_type = args[0].type;
+    if (input_type == nullptr) {
+      // No input type specified => propagate shape
+      return args[0];
+    }
+    // Input type is either String or LargeString and is also the type of each
+    // field in the output struct type.
+    DCHECK(input_type->id() == Type::STRING || input_type->id() == Type::LARGE_STRING);
+    FieldVector fields;
+    fields.reserve(group_names.size());
+    std::transform(group_names.begin(), group_names.end(), std::back_inserter(fields),
+                   [&](const std::string& name) { return field(name, input_type); });
+    return struct_(std::move(fields));
+  }
+
+ private:
+  explicit ExtractRegexData(const std::string& pattern)
+      : regex(new RE2(pattern, RE2::Quiet)) {}
+};
+
+Result<ValueDescr> ResolveExtractRegexOutput(KernelContext* ctx,
+                                             const std::vector<ValueDescr>& args) {
+  using State = OptionsWrapper<ExtractRegexOptions>;
+  ExtractRegexOptions options = State::Get(ctx);
+  ARROW_ASSIGN_OR_RAISE(auto data, ExtractRegexData::Make(options));
+  return data.ResolveOutputType(args);
+}
+
+struct ExtractRegexBase {
+  const ExtractRegexData& data;
+  const int group_count;
+  std::vector<re2::StringPiece> found_values;
+  std::vector<re2::RE2::Arg> args;
+  std::vector<const re2::RE2::Arg*> args_pointers;
+  const re2::RE2::Arg** args_pointers_start;
+  const re2::RE2::Arg* null_arg = nullptr;
+
+  explicit ExtractRegexBase(const ExtractRegexData& data)
+      : data(data),
+        group_count(static_cast<int>(data.group_names.size())),
+        found_values(group_count) {
+    args.reserve(group_count);
+    args_pointers.reserve(group_count);
+
+    for (int i = 0; i < group_count; i++) {
+      args.emplace_back(&found_values[i]);
+      // Since we reserved capacity, we're guaranteed the pointer remains valid
+      args_pointers.push_back(&args[i]);
+    }
+    // Avoid null pointer if there is no capture group
+    args_pointers_start = (group_count > 0) ? args_pointers.data() : &null_arg;
+  }
+
+  bool Match(util::string_view s) {
+    return re2::RE2::PartialMatchN(ToStringPiece(s), *data.regex, args_pointers_start,
+                                   group_count);
+  }
+};
+
+template <typename Type>
+struct ExtractRegex : public ExtractRegexBase {
+  using ArrayType = typename TypeTraits<Type>::ArrayType;
+  using ScalarType = typename TypeTraits<Type>::ScalarType;
+  using BuilderType = typename TypeTraits<Type>::BuilderType;
+  using State = OptionsWrapper<ExtractRegexOptions>;
+
+  using ExtractRegexBase::ExtractRegexBase;
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    ExtractRegexOptions options = State::Get(ctx);
+    ARROW_ASSIGN_OR_RAISE(auto data, ExtractRegexData::Make(options));
+    return ExtractRegex{data}.Extract(ctx, batch, out);
+  }
+
+  Status Extract(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    ARROW_ASSIGN_OR_RAISE(auto descr, data.ResolveOutputType(batch.GetDescriptors()));
+    DCHECK_NE(descr.type, nullptr);
+    const auto& type = descr.type;
+
+    if (batch[0].kind() == Datum::ARRAY) {
+      std::unique_ptr<ArrayBuilder> array_builder;
+      RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), type, &array_builder));
+      StructBuilder* struct_builder = checked_cast<StructBuilder*>(array_builder.get());
+
+      std::vector<BuilderType*> field_builders;
+      field_builders.reserve(group_count);
+      for (int i = 0; i < group_count; i++) {
+        field_builders.push_back(
+            checked_cast<BuilderType*>(struct_builder->field_builder(i)));
+      }
+
+      auto visit_null = [&]() { return struct_builder->AppendNull(); };
+      auto visit_value = [&](util::string_view s) {
+        if (Match(s)) {
+          for (int i = 0; i < group_count; i++) {
+            RETURN_NOT_OK(field_builders[i]->Append(ToStringView(found_values[i])));
+          }
+          return struct_builder->Append();
+        } else {
+          return struct_builder->AppendNull();
+        }
+      };
+      const ArrayData& input = *batch[0].array();
+      RETURN_NOT_OK(VisitArrayDataInline<Type>(input, visit_value, visit_null));
+
+      std::shared_ptr<Array> out_array;
+      RETURN_NOT_OK(struct_builder->Finish(&out_array));
+      *out = std::move(out_array);
+    } else {
+      const auto& input = checked_cast<const ScalarType&>(*batch[0].scalar());
+      auto result = std::make_shared<StructScalar>(type);
+      if (input.is_valid && Match(util::string_view(*input.value))) {
+        result->value.reserve(group_count);
+        for (int i = 0; i < group_count; i++) {
+          result->value.push_back(
+              std::make_shared<ScalarType>(found_values[i].as_string()));
+        }
+        result->is_valid = true;
+      } else {
+        result->is_valid = false;
+      }
+      out->value = std::move(result);
+    }
+
+    return Status::OK();
+  }
+};
+
+const FunctionDoc extract_regex_doc(
+    "Extract substrings captured by a regex pattern",
+    ("For each string in `strings`, match the regular expression and, if\n"
+     "successful, emit a struct with field names and values coming from the\n"
+     "regular expression's named capture groups. If the input is null or the\n"
+     "regular expression fails matching, a null output value is emitted.\n"
+     "\n"
+     "Regular expression matching is done using the Google RE2 library."),
+    {"strings"}, "ExtractRegexOptions");
+
+void AddExtractRegex(FunctionRegistry* registry) {
+  auto func = std::make_shared<ScalarFunction>("extract_regex", Arity::Unary(),
+                                               &extract_regex_doc);
+  using t32 = ExtractRegex<StringType>;
+  using t64 = ExtractRegex<LargeStringType>;
+  OutputType out_ty(ResolveExtractRegexOutput);
+  ScalarKernel kernel;
+
+  // Null values will be computed based on regex match or not
+  kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
+  kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+  kernel.signature.reset(new KernelSignature({utf8()}, out_ty));
+  kernel.exec = t32::Exec;
+  kernel.init = t32::State::Init;
+  DCHECK_OK(func->AddKernel(kernel));
+  kernel.signature.reset(new KernelSignature({large_utf8()}, out_ty));
+  kernel.exec = t64::Exec;
+  kernel.init = t64::State::Init;
+  DCHECK_OK(func->AddKernel(kernel));
+
+  DCHECK_OK(registry->AddFunction(std::move(func)));
+}
+#endif  // ARROW_WITH_RE2
+
 // ----------------------------------------------------------------------
-// Replace substring (plain, regex) 
- 
-template <typename Type, typename Replacer> 
-struct ReplaceSubString { 
-  using ScalarType = typename TypeTraits<Type>::ScalarType; 
-  using offset_type = typename Type::offset_type; 
-  using ValueDataBuilder = TypedBufferBuilder<uint8_t>; 
-  using OffsetBuilder = TypedBufferBuilder<offset_type>; 
-  using State = OptionsWrapper<ReplaceSubstringOptions>; 
- 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    // TODO Cache replacer across invocations (for regex compilation) 
-    ARROW_ASSIGN_OR_RAISE(auto replacer, Replacer::Make(State::Get(ctx))); 
-    return Replace(ctx, batch, *replacer, out); 
-  } 
- 
-  static Status Replace(KernelContext* ctx, const ExecBatch& batch, 
-                        const Replacer& replacer, Datum* out) { 
-    ValueDataBuilder value_data_builder(ctx->memory_pool()); 
-    OffsetBuilder offset_builder(ctx->memory_pool()); 
- 
-    if (batch[0].kind() == Datum::ARRAY) { 
-      // We already know how many strings we have, so we can use Reserve/UnsafeAppend 
-      RETURN_NOT_OK(offset_builder.Reserve(batch[0].array()->length + 1)); 
-      offset_builder.UnsafeAppend(0);  // offsets start at 0 
- 
-      const ArrayData& input = *batch[0].array(); 
-      RETURN_NOT_OK(VisitArrayDataInline<Type>( 
-          input, 
-          [&](util::string_view s) { 
-            RETURN_NOT_OK(replacer.ReplaceString(s, &value_data_builder)); 
-            offset_builder.UnsafeAppend( 
-                static_cast<offset_type>(value_data_builder.length())); 
-            return Status::OK(); 
-          }, 
-          [&]() { 
-            // offset for null value 
-            offset_builder.UnsafeAppend( 
-                static_cast<offset_type>(value_data_builder.length())); 
-            return Status::OK(); 
-          })); 
-      ArrayData* output = out->mutable_array(); 
-      RETURN_NOT_OK(value_data_builder.Finish(&output->buffers[2])); 
-      RETURN_NOT_OK(offset_builder.Finish(&output->buffers[1])); 
-    } else { 
-      const auto& input = checked_cast<const ScalarType&>(*batch[0].scalar()); 
-      auto result = std::make_shared<ScalarType>(); 
-      if (input.is_valid) { 
-        util::string_view s = static_cast<util::string_view>(*input.value); 
-        RETURN_NOT_OK(replacer.ReplaceString(s, &value_data_builder)); 
-        RETURN_NOT_OK(value_data_builder.Finish(&result->value)); 
-        result->is_valid = true; 
-      } 
-      out->value = result; 
-    } 
- 
-    return Status::OK(); 
-  } 
-}; 
- 
-struct PlainSubStringReplacer { 
-  const ReplaceSubstringOptions& options_; 
- 
-  static Result<std::unique_ptr<PlainSubStringReplacer>> Make( 
-      const ReplaceSubstringOptions& options) { 
-    return arrow::internal::make_unique<PlainSubStringReplacer>(options); 
-  } 
- 
-  explicit PlainSubStringReplacer(const ReplaceSubstringOptions& options) 
-      : options_(options) {} 
- 
-  Status ReplaceString(util::string_view s, TypedBufferBuilder<uint8_t>* builder) const { 
-    const char* i = s.begin(); 
-    const char* end = s.end(); 
-    int64_t max_replacements = options_.max_replacements; 
-    while ((i < end) && (max_replacements != 0)) { 
-      const char* pos = 
-          std::search(i, end, options_.pattern.begin(), options_.pattern.end()); 
-      if (pos == end) { 
-        RETURN_NOT_OK(builder->Append(reinterpret_cast<const uint8_t*>(i), 
-                                      static_cast<int64_t>(end - i))); 
-        i = end; 
-      } else { 
-        // the string before the pattern 
-        RETURN_NOT_OK(builder->Append(reinterpret_cast<const uint8_t*>(i), 
-                                      static_cast<int64_t>(pos - i))); 
-        // the replacement 
-        RETURN_NOT_OK( 
-            builder->Append(reinterpret_cast<const uint8_t*>(options_.replacement.data()), 
-                            options_.replacement.length())); 
-        // skip pattern 
-        i = pos + options_.pattern.length(); 
-        max_replacements--; 
-      } 
-    } 
-    // if we exited early due to max_replacements, add the trailing part 
-    return builder->Append(reinterpret_cast<const uint8_t*>(i), 
-                           static_cast<int64_t>(end - i)); 
-  } 
-}; 
- 
-#ifdef ARROW_WITH_RE2 
-struct RegexSubStringReplacer { 
-  const ReplaceSubstringOptions& options_; 
-  const RE2 regex_find_; 
-  const RE2 regex_replacement_; 
- 
-  static Result<std::unique_ptr<RegexSubStringReplacer>> Make( 
-      const ReplaceSubstringOptions& options) { 
-    auto replacer = arrow::internal::make_unique<RegexSubStringReplacer>(options); 
- 
-    RETURN_NOT_OK(RegexStatus(replacer->regex_find_)); 
-    RETURN_NOT_OK(RegexStatus(replacer->regex_replacement_)); 
- 
-    std::string replacement_error; 
-    if (!replacer->regex_replacement_.CheckRewriteString(replacer->options_.replacement, 
-                                                         &replacement_error)) { 
-      return Status::Invalid("Invalid replacement string: ", 
-                             std::move(replacement_error)); 
-    } 
- 
-    return std::move(replacer); 
-  } 
- 
-  // Using RE2::FindAndConsume we can only find the pattern if it is a group, therefore 
-  // we have 2 regexes, one with () around it, one without. 
-  explicit RegexSubStringReplacer(const ReplaceSubstringOptions& options) 
-      : options_(options), 
-        regex_find_("(" + options_.pattern + ")", RE2::Quiet), 
-        regex_replacement_(options_.pattern, RE2::Quiet) {} 
- 
-  Status ReplaceString(util::string_view s, TypedBufferBuilder<uint8_t>* builder) const { 
-    re2::StringPiece replacement(options_.replacement); 
- 
-    if (options_.max_replacements == -1) { 
-      std::string s_copy(s.to_string()); 
-      re2::RE2::GlobalReplace(&s_copy, regex_replacement_, replacement); 
-      return builder->Append(reinterpret_cast<const uint8_t*>(s_copy.data()), 
-                             s_copy.length()); 
-    } 
- 
-    // Since RE2 does not have the concept of max_replacements, we have to do some work 
-    // ourselves. 
-    // We might do this faster similar to RE2::GlobalReplace using Match and Rewrite 
-    const char* i = s.begin(); 
-    const char* end = s.end(); 
-    re2::StringPiece piece(s.data(), s.length()); 
- 
-    int64_t max_replacements = options_.max_replacements; 
-    while ((i < end) && (max_replacements != 0)) { 
-      std::string found; 
-      if (!re2::RE2::FindAndConsume(&piece, regex_find_, &found)) { 
-        RETURN_NOT_OK(builder->Append(reinterpret_cast<const uint8_t*>(i), 
-                                      static_cast<int64_t>(end - i))); 
-        i = end; 
-      } else { 
-        // wind back to the beginning of the match 
-        const char* pos = piece.begin() - found.length(); 
-        // the string before the pattern 
-        RETURN_NOT_OK(builder->Append(reinterpret_cast<const uint8_t*>(i), 
-                                      static_cast<int64_t>(pos - i))); 
-        // replace the pattern in what we found 
-        if (!re2::RE2::Replace(&found, regex_replacement_, replacement)) { 
-          return Status::Invalid("Regex found, but replacement failed"); 
-        } 
-        RETURN_NOT_OK(builder->Append(reinterpret_cast<const uint8_t*>(found.data()), 
-                                      static_cast<int64_t>(found.length()))); 
-        // skip pattern 
-        i = piece.begin(); 
-        max_replacements--; 
-      } 
-    } 
-    // If we exited early due to max_replacements, add the trailing part 
-    return builder->Append(reinterpret_cast<const uint8_t*>(i), 
-                           static_cast<int64_t>(end - i)); 
-  } 
-}; 
-#endif 
- 
-template <typename Type> 
-using ReplaceSubStringPlain = ReplaceSubString<Type, PlainSubStringReplacer>; 
- 
-const FunctionDoc replace_substring_doc( 
-    "Replace non-overlapping substrings that match pattern by replacement", 
-    ("For each string in `strings`, replace non-overlapping substrings that match\n" 
-     "`pattern` by `replacement`. If `max_replacements != -1`, it determines the\n" 
-     "maximum amount of replacements made, counting from the left. Null values emit\n" 
-     "null."), 
-    {"strings"}, "ReplaceSubstringOptions"); 
- 
-#ifdef ARROW_WITH_RE2 
-template <typename Type> 
-using ReplaceSubStringRegex = ReplaceSubString<Type, RegexSubStringReplacer>; 
- 
-const FunctionDoc replace_substring_regex_doc( 
-    "Replace non-overlapping substrings that match regex `pattern` by `replacement`", 
-    ("For each string in `strings`, replace non-overlapping substrings that match the\n" 
-     "regular expression `pattern` by `replacement` using the Google RE2 library.\n" 
-     "If `max_replacements != -1`, it determines the maximum amount of replacements\n" 
-     "made, counting from the left. Note that if the pattern contains groups,\n" 
-     "backreferencing macan be used. Null values emit null."), 
-    {"strings"}, "ReplaceSubstringOptions"); 
-#endif 
- 
-// ---------------------------------------------------------------------- 
-// Replace slice 
- 
-struct ReplaceSliceTransformBase : public StringTransformBase { 
-  using State = OptionsWrapper<ReplaceSliceOptions>; 
- 
-  const ReplaceSliceOptions* options; 
- 
-  explicit ReplaceSliceTransformBase(const ReplaceSliceOptions& options) 
-      : options{&options} {} 
- 
-  int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) override { 
-    return ninputs * options->replacement.size() + input_ncodeunits; 
-  } 
-}; 
- 
-struct BinaryReplaceSliceTransform : ReplaceSliceTransformBase { 
-  using ReplaceSliceTransformBase::ReplaceSliceTransformBase; 
-  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits, 
-                    uint8_t* output) { 
-    const auto& opts = *options; 
-    int64_t before_slice = 0; 
-    int64_t after_slice = 0; 
-    uint8_t* output_start = output; 
- 
-    if (opts.start >= 0) { 
-      // Count from left 
-      before_slice = std::min<int64_t>(input_string_ncodeunits, opts.start); 
-    } else { 
-      // Count from right 
-      before_slice = std::max<int64_t>(0, input_string_ncodeunits + opts.start); 
-    } 
-    // Mimic Pandas: if stop would be before start, treat as 0-length slice 
-    if (opts.stop >= 0) { 
-      // Count from left 
-      after_slice = 
-          std::min<int64_t>(input_string_ncodeunits, std::max(before_slice, opts.stop)); 
-    } else { 
-      // Count from right 
-      after_slice = std::max<int64_t>(before_slice, input_string_ncodeunits + opts.stop); 
-    } 
-    output = std::copy(input, input + before_slice, output); 
-    output = std::copy(opts.replacement.begin(), opts.replacement.end(), output); 
-    output = std::copy(input + after_slice, input + input_string_ncodeunits, output); 
-    return output - output_start; 
-  } 
-}; 
- 
-struct Utf8ReplaceSliceTransform : ReplaceSliceTransformBase { 
-  using ReplaceSliceTransformBase::ReplaceSliceTransformBase; 
-  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits, 
-                    uint8_t* output) { 
-    const auto& opts = *options; 
-    const uint8_t* begin = input; 
-    const uint8_t* end = input + input_string_ncodeunits; 
-    const uint8_t *begin_sliced, *end_sliced; 
-    uint8_t* output_start = output; 
- 
-    // Mimic Pandas: if stop would be before start, treat as 0-length slice 
-    if (opts.start >= 0) { 
-      // Count from left 
-      if (!arrow::util::UTF8AdvanceCodepoints(begin, end, &begin_sliced, opts.start)) { 
-        return kTransformError; 
-      } 
-      if (opts.stop > options->start) { 
-        // Continue counting from left 
-        const int64_t length = opts.stop - options->start; 
-        if (!arrow::util::UTF8AdvanceCodepoints(begin_sliced, end, &end_sliced, length)) { 
-          return kTransformError; 
-        } 
-      } else if (opts.stop < 0) { 
-        // Count from right 
-        if (!arrow::util::UTF8AdvanceCodepointsReverse(begin_sliced, end, &end_sliced, 
-                                                       -opts.stop)) { 
-          return kTransformError; 
-        } 
-      } else { 
-        // Zero-length slice 
-        end_sliced = begin_sliced; 
-      } 
-    } else { 
-      // Count from right 
-      if (!arrow::util::UTF8AdvanceCodepointsReverse(begin, end, &begin_sliced, 
-                                                     -opts.start)) { 
-        return kTransformError; 
-      } 
-      if (opts.stop >= 0) { 
-        // Restart counting from left 
-        if (!arrow::util::UTF8AdvanceCodepoints(begin, end, &end_sliced, opts.stop)) { 
-          return kTransformError; 
-        } 
-        if (end_sliced <= begin_sliced) { 
-          // Zero-length slice 
-          end_sliced = begin_sliced; 
-        } 
-      } else if ((opts.stop < 0) && (options->stop > options->start)) { 
-        // Count from right 
-        if (!arrow::util::UTF8AdvanceCodepointsReverse(begin_sliced, end, &end_sliced, 
-                                                       -opts.stop)) { 
-          return kTransformError; 
-        } 
-      } else { 
-        // zero-length slice 
-        end_sliced = begin_sliced; 
-      } 
-    } 
-    output = std::copy(begin, begin_sliced, output); 
-    output = std::copy(opts.replacement.begin(), options->replacement.end(), output); 
-    output = std::copy(end_sliced, end, output); 
-    return output - output_start; 
-  } 
-}; 
- 
-template <typename Type> 
-using BinaryReplaceSlice = 
-    StringTransformExecWithState<Type, BinaryReplaceSliceTransform>; 
-template <typename Type> 
-using Utf8ReplaceSlice = StringTransformExecWithState<Type, Utf8ReplaceSliceTransform>; 
- 
-const FunctionDoc binary_replace_slice_doc( 
-    "Replace a slice of a binary string with `replacement`", 
-    ("For each string in `strings`, replace a slice of the string defined by `start`" 
-     "and `stop` with `replacement`. `start` is inclusive and `stop` is exclusive, " 
-     "and both are measured in bytes.\n" 
-     "Null values emit null."), 
-    {"strings"}, "ReplaceSliceOptions"); 
- 
-const FunctionDoc utf8_replace_slice_doc( 
-    "Replace a slice of a string with `replacement`", 
-    ("For each string in `strings`, replace a slice of the string defined by `start`" 
-     "and `stop` with `replacement`. `start` is inclusive and `stop` is exclusive, " 
-     "and both are measured in codeunits.\n" 
-     "Null values emit null."), 
-    {"strings"}, "ReplaceSliceOptions"); 
- 
-void AddReplaceSlice(FunctionRegistry* registry) { 
-  { 
-    auto func = std::make_shared<ScalarFunction>("binary_replace_slice", Arity::Unary(), 
-                                                 &binary_replace_slice_doc); 
-    for (const auto& ty : BaseBinaryTypes()) { 
-      DCHECK_OK(func->AddKernel({ty}, ty, 
-                                GenerateTypeAgnosticVarBinaryBase<BinaryReplaceSlice>(ty), 
-                                ReplaceSliceTransformBase::State::Init)); 
-    } 
-    DCHECK_OK(registry->AddFunction(std::move(func))); 
-  } 
- 
-  { 
-    auto func = std::make_shared<ScalarFunction>("utf8_replace_slice", Arity::Unary(), 
-                                                 &utf8_replace_slice_doc); 
-    DCHECK_OK(func->AddKernel({utf8()}, utf8(), Utf8ReplaceSlice<StringType>::Exec, 
-                              ReplaceSliceTransformBase::State::Init)); 
-    DCHECK_OK(func->AddKernel({large_utf8()}, large_utf8(), 
-                              Utf8ReplaceSlice<LargeStringType>::Exec, 
-                              ReplaceSliceTransformBase::State::Init)); 
-    DCHECK_OK(registry->AddFunction(std::move(func))); 
-  } 
-} 
- 
-// ---------------------------------------------------------------------- 
-// Extract with regex 
- 
-#ifdef ARROW_WITH_RE2 
- 
-// TODO cache this once per ExtractRegexOptions 
-struct ExtractRegexData { 
-  // Use unique_ptr<> because RE2 is non-movable 
-  std::unique_ptr<RE2> regex; 
-  std::vector<std::string> group_names; 
- 
-  static Result<ExtractRegexData> Make(const ExtractRegexOptions& options) { 
-    ExtractRegexData data(options.pattern); 
-    RETURN_NOT_OK(RegexStatus(*data.regex)); 
- 
-    const int group_count = data.regex->NumberOfCapturingGroups(); 
-    const auto& name_map = data.regex->CapturingGroupNames(); 
-    data.group_names.reserve(group_count); 
- 
-    for (int i = 0; i < group_count; i++) { 
-      auto item = name_map.find(i + 1);  // re2 starts counting from 1 
-      if (item == name_map.end()) { 
-        // XXX should we instead just create fields with an empty name? 
-        return Status::Invalid("Regular expression contains unnamed groups"); 
-      } 
-      data.group_names.emplace_back(item->second); 
-    } 
-    return std::move(data); 
-  } 
- 
-  Result<ValueDescr> ResolveOutputType(const std::vector<ValueDescr>& args) const { 
-    const auto& input_type = args[0].type; 
-    if (input_type == nullptr) { 
-      // No input type specified => propagate shape 
-      return args[0]; 
-    } 
-    // Input type is either String or LargeString and is also the type of each 
-    // field in the output struct type. 
-    DCHECK(input_type->id() == Type::STRING || input_type->id() == Type::LARGE_STRING); 
-    FieldVector fields; 
-    fields.reserve(group_names.size()); 
-    std::transform(group_names.begin(), group_names.end(), std::back_inserter(fields), 
-                   [&](const std::string& name) { return field(name, input_type); }); 
-    return struct_(std::move(fields)); 
-  } 
- 
- private: 
-  explicit ExtractRegexData(const std::string& pattern) 
-      : regex(new RE2(pattern, RE2::Quiet)) {} 
-}; 
- 
-Result<ValueDescr> ResolveExtractRegexOutput(KernelContext* ctx, 
-                                             const std::vector<ValueDescr>& args) { 
-  using State = OptionsWrapper<ExtractRegexOptions>; 
-  ExtractRegexOptions options = State::Get(ctx); 
-  ARROW_ASSIGN_OR_RAISE(auto data, ExtractRegexData::Make(options)); 
-  return data.ResolveOutputType(args); 
-} 
- 
-struct ExtractRegexBase { 
-  const ExtractRegexData& data; 
-  const int group_count; 
-  std::vector<re2::StringPiece> found_values; 
-  std::vector<re2::RE2::Arg> args; 
-  std::vector<const re2::RE2::Arg*> args_pointers; 
-  const re2::RE2::Arg** args_pointers_start; 
-  const re2::RE2::Arg* null_arg = nullptr; 
- 
-  explicit ExtractRegexBase(const ExtractRegexData& data) 
-      : data(data), 
-        group_count(static_cast<int>(data.group_names.size())), 
-        found_values(group_count) { 
-    args.reserve(group_count); 
-    args_pointers.reserve(group_count); 
- 
-    for (int i = 0; i < group_count; i++) { 
-      args.emplace_back(&found_values[i]); 
-      // Since we reserved capacity, we're guaranteed the pointer remains valid 
-      args_pointers.push_back(&args[i]); 
-    } 
-    // Avoid null pointer if there is no capture group 
-    args_pointers_start = (group_count > 0) ? args_pointers.data() : &null_arg; 
-  } 
- 
-  bool Match(util::string_view s) { 
-    return re2::RE2::PartialMatchN(ToStringPiece(s), *data.regex, args_pointers_start, 
-                                   group_count); 
-  } 
-}; 
- 
-template <typename Type> 
-struct ExtractRegex : public ExtractRegexBase { 
-  using ArrayType = typename TypeTraits<Type>::ArrayType; 
-  using ScalarType = typename TypeTraits<Type>::ScalarType; 
-  using BuilderType = typename TypeTraits<Type>::BuilderType; 
-  using State = OptionsWrapper<ExtractRegexOptions>; 
- 
-  using ExtractRegexBase::ExtractRegexBase; 
- 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    ExtractRegexOptions options = State::Get(ctx); 
-    ARROW_ASSIGN_OR_RAISE(auto data, ExtractRegexData::Make(options)); 
-    return ExtractRegex{data}.Extract(ctx, batch, out); 
-  } 
- 
-  Status Extract(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    ARROW_ASSIGN_OR_RAISE(auto descr, data.ResolveOutputType(batch.GetDescriptors())); 
-    DCHECK_NE(descr.type, nullptr); 
-    const auto& type = descr.type; 
- 
-    if (batch[0].kind() == Datum::ARRAY) { 
-      std::unique_ptr<ArrayBuilder> array_builder; 
-      RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), type, &array_builder)); 
-      StructBuilder* struct_builder = checked_cast<StructBuilder*>(array_builder.get()); 
- 
-      std::vector<BuilderType*> field_builders; 
-      field_builders.reserve(group_count); 
-      for (int i = 0; i < group_count; i++) { 
-        field_builders.push_back( 
-            checked_cast<BuilderType*>(struct_builder->field_builder(i))); 
-      } 
- 
-      auto visit_null = [&]() { return struct_builder->AppendNull(); }; 
-      auto visit_value = [&](util::string_view s) { 
-        if (Match(s)) { 
-          for (int i = 0; i < group_count; i++) { 
-            RETURN_NOT_OK(field_builders[i]->Append(ToStringView(found_values[i]))); 
-          } 
-          return struct_builder->Append(); 
-        } else { 
-          return struct_builder->AppendNull(); 
-        } 
-      }; 
-      const ArrayData& input = *batch[0].array(); 
-      RETURN_NOT_OK(VisitArrayDataInline<Type>(input, visit_value, visit_null)); 
- 
-      std::shared_ptr<Array> out_array; 
-      RETURN_NOT_OK(struct_builder->Finish(&out_array)); 
-      *out = std::move(out_array); 
-    } else { 
-      const auto& input = checked_cast<const ScalarType&>(*batch[0].scalar()); 
-      auto result = std::make_shared<StructScalar>(type); 
-      if (input.is_valid && Match(util::string_view(*input.value))) { 
-        result->value.reserve(group_count); 
-        for (int i = 0; i < group_count; i++) { 
-          result->value.push_back( 
-              std::make_shared<ScalarType>(found_values[i].as_string())); 
-        } 
-        result->is_valid = true; 
-      } else { 
-        result->is_valid = false; 
-      } 
-      out->value = std::move(result); 
-    } 
- 
-    return Status::OK(); 
-  } 
-}; 
- 
-const FunctionDoc extract_regex_doc( 
-    "Extract substrings captured by a regex pattern", 
-    ("For each string in `strings`, match the regular expression and, if\n" 
-     "successful, emit a struct with field names and values coming from the\n" 
-     "regular expression's named capture groups. If the input is null or the\n" 
-     "regular expression fails matching, a null output value is emitted.\n" 
-     "\n" 
-     "Regular expression matching is done using the Google RE2 library."), 
-    {"strings"}, "ExtractRegexOptions"); 
- 
-void AddExtractRegex(FunctionRegistry* registry) { 
-  auto func = std::make_shared<ScalarFunction>("extract_regex", Arity::Unary(), 
-                                               &extract_regex_doc); 
-  using t32 = ExtractRegex<StringType>; 
-  using t64 = ExtractRegex<LargeStringType>; 
-  OutputType out_ty(ResolveExtractRegexOutput); 
-  ScalarKernel kernel; 
- 
-  // Null values will be computed based on regex match or not 
-  kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE; 
-  kernel.mem_allocation = MemAllocation::NO_PREALLOCATE; 
-  kernel.signature.reset(new KernelSignature({utf8()}, out_ty)); 
-  kernel.exec = t32::Exec; 
-  kernel.init = t32::State::Init; 
-  DCHECK_OK(func->AddKernel(kernel)); 
-  kernel.signature.reset(new KernelSignature({large_utf8()}, out_ty)); 
-  kernel.exec = t64::Exec; 
-  kernel.init = t64::State::Init; 
-  DCHECK_OK(func->AddKernel(kernel)); 
- 
-  DCHECK_OK(registry->AddFunction(std::move(func))); 
-} 
-#endif  // ARROW_WITH_RE2 
- 
-// ---------------------------------------------------------------------- 
 // strptime string parsing
 
 using StrptimeState = OptionsWrapper<StrptimeOptions>;
@@ -2782,11 +2782,11 @@ struct ParseStrptime {
       : parser(TimestampParser::MakeStrptime(options.format)), unit(options.unit) {}
 
   template <typename... Ignored>
-  int64_t Call(KernelContext*, util::string_view val, Status* st) const { 
+  int64_t Call(KernelContext*, util::string_view val, Status* st) const {
     int64_t result = 0;
     if (!(*parser)(val.data(), val.size(), unit, &result)) {
-      *st = Status::Invalid("Failed to parse string: '", val, "' as a scalar of type ", 
-                            TimestampType(unit).ToString()); 
+      *st = Status::Invalid("Failed to parse string: '", val, "' as a scalar of type ",
+                            TimestampType(unit).ToString());
     }
     return result;
   }
@@ -2796,7 +2796,7 @@ struct ParseStrptime {
 };
 
 template <typename InputType>
-Status StrptimeExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+Status StrptimeExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   applicator::ScalarUnaryNotNullStateful<TimestampType, InputType, ParseStrptime> kernel{
       ParseStrptime(StrptimeState::Get(ctx))};
   return kernel.Exec(ctx, batch, out);
@@ -2810,471 +2810,471 @@ Result<ValueDescr> StrptimeResolve(KernelContext* ctx, const std::vector<ValueDe
   return Status::Invalid("strptime does not provide default StrptimeOptions");
 }
 
-// ---------------------------------------------------------------------- 
-// string padding 
- 
-template <bool PadLeft, bool PadRight> 
-struct AsciiPadTransform : public StringTransformBase { 
-  using State = OptionsWrapper<PadOptions>; 
- 
-  const PadOptions& options_; 
- 
-  explicit AsciiPadTransform(const PadOptions& options) : options_(options) {} 
- 
-  Status PreExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) override { 
-    if (options_.padding.size() != 1) { 
-      return Status::Invalid("Padding must be one byte, got '", options_.padding, "'"); 
-    } 
-    return Status::OK(); 
-  } 
- 
-  int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) override { 
-    // This is likely very overallocated but hard to do better without 
-    // actually looking at each string (because of strings that may be 
-    // longer than the given width) 
-    return input_ncodeunits + ninputs * options_.width; 
-  } 
- 
-  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits, 
-                    uint8_t* output) { 
-    if (input_string_ncodeunits >= options_.width) { 
-      std::copy(input, input + input_string_ncodeunits, output); 
-      return input_string_ncodeunits; 
-    } 
-    const int64_t spaces = options_.width - input_string_ncodeunits; 
-    int64_t left = 0; 
-    int64_t right = 0; 
-    if (PadLeft && PadRight) { 
-      // If odd number of spaces, put the extra space on the right 
-      left = spaces / 2; 
-      right = spaces - left; 
-    } else if (PadLeft) { 
-      left = spaces; 
-    } else if (PadRight) { 
-      right = spaces; 
-    } else { 
-      DCHECK(false) << "unreachable"; 
-      return 0; 
-    } 
-    std::fill(output, output + left, options_.padding[0]); 
-    output += left; 
-    output = std::copy(input, input + input_string_ncodeunits, output); 
-    std::fill(output, output + right, options_.padding[0]); 
-    return options_.width; 
-  } 
-}; 
- 
-template <bool PadLeft, bool PadRight> 
-struct Utf8PadTransform : public StringTransformBase { 
-  using State = OptionsWrapper<PadOptions>; 
- 
-  const PadOptions& options_; 
- 
-  explicit Utf8PadTransform(const PadOptions& options) : options_(options) {} 
- 
-  Status PreExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) override { 
-    auto str = reinterpret_cast<const uint8_t*>(options_.padding.data()); 
-    auto strlen = options_.padding.size(); 
-    if (util::UTF8Length(str, str + strlen) != 1) { 
-      return Status::Invalid("Padding must be one codepoint, got '", options_.padding, 
-                             "'"); 
-    } 
-    return Status::OK(); 
-  } 
- 
-  int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) override { 
-    // This is likely very overallocated but hard to do better without 
-    // actually looking at each string (because of strings that may be 
-    // longer than the given width) 
-    // One codepoint may be up to 4 bytes 
-    return input_ncodeunits + 4 * ninputs * options_.width; 
-  } 
- 
-  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits, 
-                    uint8_t* output) { 
-    const int64_t input_width = util::UTF8Length(input, input + input_string_ncodeunits); 
-    if (input_width >= options_.width) { 
-      std::copy(input, input + input_string_ncodeunits, output); 
-      return input_string_ncodeunits; 
-    } 
-    const int64_t spaces = options_.width - input_width; 
-    int64_t left = 0; 
-    int64_t right = 0; 
-    if (PadLeft && PadRight) { 
-      // If odd number of spaces, put the extra space on the right 
-      left = spaces / 2; 
-      right = spaces - left; 
-    } else if (PadLeft) { 
-      left = spaces; 
-    } else if (PadRight) { 
-      right = spaces; 
-    } else { 
-      DCHECK(false) << "unreachable"; 
-      return 0; 
-    } 
-    uint8_t* start = output; 
-    while (left) { 
-      output = std::copy(options_.padding.begin(), options_.padding.end(), output); 
-      left--; 
-    } 
-    output = std::copy(input, input + input_string_ncodeunits, output); 
-    while (right) { 
-      output = std::copy(options_.padding.begin(), options_.padding.end(), output); 
-      right--; 
-    } 
-    return output - start; 
-  } 
-}; 
- 
-template <typename Type> 
-using AsciiLPad = StringTransformExecWithState<Type, AsciiPadTransform<true, false>>; 
-template <typename Type> 
-using AsciiRPad = StringTransformExecWithState<Type, AsciiPadTransform<false, true>>; 
-template <typename Type> 
-using AsciiCenter = StringTransformExecWithState<Type, AsciiPadTransform<true, true>>; 
-template <typename Type> 
-using Utf8LPad = StringTransformExecWithState<Type, Utf8PadTransform<true, false>>; 
-template <typename Type> 
-using Utf8RPad = StringTransformExecWithState<Type, Utf8PadTransform<false, true>>; 
-template <typename Type> 
-using Utf8Center = StringTransformExecWithState<Type, Utf8PadTransform<true, true>>; 
- 
-// ---------------------------------------------------------------------- 
-// string trimming 
- 
-#ifdef ARROW_WITH_UTF8PROC 
- 
-template <bool TrimLeft, bool TrimRight> 
-struct UTF8TrimWhitespaceTransform : public StringTransformBase { 
-  Status PreExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) override { 
-    EnsureLookupTablesFilled(); 
-    return Status::OK(); 
-  } 
- 
-  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits, 
-                    uint8_t* output) { 
-    const uint8_t* begin = input; 
-    const uint8_t* end = input + input_string_ncodeunits; 
-    const uint8_t* end_trimmed = end; 
-    const uint8_t* begin_trimmed = begin; 
- 
-    auto predicate = [](uint32_t c) { return !IsSpaceCharacterUnicode(c); }; 
-    if (TrimLeft && !ARROW_PREDICT_TRUE( 
-                        arrow::util::UTF8FindIf(begin, end, predicate, &begin_trimmed))) { 
-      return kTransformError; 
-    } 
-    if (TrimRight && begin_trimmed < end) { 
-      if (!ARROW_PREDICT_TRUE(arrow::util::UTF8FindIfReverse(begin_trimmed, end, 
-                                                             predicate, &end_trimmed))) { 
-        return kTransformError; 
-      } 
-    } 
-    std::copy(begin_trimmed, end_trimmed, output); 
-    return end_trimmed - begin_trimmed; 
-  } 
-}; 
- 
-template <typename Type> 
-using UTF8TrimWhitespace = 
-    StringTransformExec<Type, UTF8TrimWhitespaceTransform<true, true>>; 
- 
-template <typename Type> 
-using UTF8LTrimWhitespace = 
-    StringTransformExec<Type, UTF8TrimWhitespaceTransform<true, false>>; 
- 
-template <typename Type> 
-using UTF8RTrimWhitespace = 
-    StringTransformExec<Type, UTF8TrimWhitespaceTransform<false, true>>; 
- 
-struct UTF8TrimState { 
-  TrimOptions options_; 
-  std::vector<bool> codepoints_; 
-  Status status_ = Status::OK(); 
- 
-  explicit UTF8TrimState(KernelContext* ctx, TrimOptions options) 
-      : options_(std::move(options)) { 
-    if (!ARROW_PREDICT_TRUE( 
-            arrow::util::UTF8ForEach(options_.characters, [&](uint32_t c) { 
-              codepoints_.resize( 
-                  std::max(c + 1, static_cast<uint32_t>(codepoints_.size()))); 
-              codepoints_.at(c) = true; 
-            }))) { 
-      status_ = Status::Invalid("Invalid UTF8 sequence in input"); 
-    } 
-  } 
-}; 
- 
-template <bool TrimLeft, bool TrimRight> 
-struct UTF8TrimTransform : public StringTransformBase { 
-  using State = KernelStateFromFunctionOptions<UTF8TrimState, TrimOptions>; 
- 
-  const UTF8TrimState& state_; 
- 
-  explicit UTF8TrimTransform(const UTF8TrimState& state) : state_(state) {} 
- 
-  Status PreExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) override { 
-    return state_.status_; 
-  } 
- 
-  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits, 
-                    uint8_t* output) { 
-    const uint8_t* begin = input; 
-    const uint8_t* end = input + input_string_ncodeunits; 
-    const uint8_t* end_trimmed = end; 
-    const uint8_t* begin_trimmed = begin; 
- 
-    auto predicate = [&](uint32_t c) { return !state_.codepoints_[c]; }; 
-    if (TrimLeft && !ARROW_PREDICT_TRUE( 
-                        arrow::util::UTF8FindIf(begin, end, predicate, &begin_trimmed))) { 
-      return kTransformError; 
-    } 
-    if (TrimRight && begin_trimmed < end) { 
-      if (!ARROW_PREDICT_TRUE(arrow::util::UTF8FindIfReverse(begin_trimmed, end, 
-                                                             predicate, &end_trimmed))) { 
-        return kTransformError; 
-      } 
-    } 
-    std::copy(begin_trimmed, end_trimmed, output); 
-    return end_trimmed - begin_trimmed; 
-  } 
-}; 
- 
-template <typename Type> 
-using UTF8Trim = StringTransformExecWithState<Type, UTF8TrimTransform<true, true>>; 
- 
-template <typename Type> 
-using UTF8LTrim = StringTransformExecWithState<Type, UTF8TrimTransform<true, false>>; 
- 
-template <typename Type> 
-using UTF8RTrim = StringTransformExecWithState<Type, UTF8TrimTransform<false, true>>; 
- 
-#endif 
- 
-template <bool TrimLeft, bool TrimRight> 
-struct AsciiTrimWhitespaceTransform : public StringTransformBase { 
-  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits, 
-                    uint8_t* output) { 
-    const uint8_t* begin = input; 
-    const uint8_t* end = input + input_string_ncodeunits; 
-    const uint8_t* end_trimmed = end; 
-    const uint8_t* begin_trimmed = begin; 
- 
-    auto predicate = [](unsigned char c) { return !IsSpaceCharacterAscii(c); }; 
-    if (TrimLeft) { 
-      begin_trimmed = std::find_if(begin, end, predicate); 
-    } 
-    if (TrimRight && begin_trimmed < end) { 
-      std::reverse_iterator<const uint8_t*> rbegin(end); 
-      std::reverse_iterator<const uint8_t*> rend(begin_trimmed); 
-      end_trimmed = std::find_if(rbegin, rend, predicate).base(); 
-    } 
-    std::copy(begin_trimmed, end_trimmed, output); 
-    return end_trimmed - begin_trimmed; 
-  } 
-}; 
- 
-template <typename Type> 
-using AsciiTrimWhitespace = 
-    StringTransformExec<Type, AsciiTrimWhitespaceTransform<true, true>>; 
- 
-template <typename Type> 
-using AsciiLTrimWhitespace = 
-    StringTransformExec<Type, AsciiTrimWhitespaceTransform<true, false>>; 
- 
-template <typename Type> 
-using AsciiRTrimWhitespace = 
-    StringTransformExec<Type, AsciiTrimWhitespaceTransform<false, true>>; 
- 
-struct AsciiTrimState { 
-  TrimOptions options_; 
-  std::vector<bool> characters_; 
- 
-  explicit AsciiTrimState(KernelContext* ctx, TrimOptions options) 
-      : options_(std::move(options)), characters_(256) { 
-    for (const auto c : options_.characters) { 
-      characters_[static_cast<unsigned char>(c)] = true; 
-    } 
-  } 
-}; 
- 
-template <bool TrimLeft, bool TrimRight> 
-struct AsciiTrimTransform : public StringTransformBase { 
-  using State = KernelStateFromFunctionOptions<AsciiTrimState, TrimOptions>; 
- 
-  const AsciiTrimState& state_; 
- 
-  explicit AsciiTrimTransform(const AsciiTrimState& state) : state_(state) {} 
- 
-  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits, 
-                    uint8_t* output) { 
-    const uint8_t* begin = input; 
-    const uint8_t* end = input + input_string_ncodeunits; 
-    const uint8_t* end_trimmed = end; 
-    const uint8_t* begin_trimmed = begin; 
- 
-    auto predicate = [&](uint8_t c) { return !state_.characters_[c]; }; 
-    if (TrimLeft) { 
-      begin_trimmed = std::find_if(begin, end, predicate); 
-    } 
-    if (TrimRight && begin_trimmed < end) { 
-      std::reverse_iterator<const uint8_t*> rbegin(end); 
-      std::reverse_iterator<const uint8_t*> rend(begin_trimmed); 
-      end_trimmed = std::find_if(rbegin, rend, predicate).base(); 
-    } 
-    std::copy(begin_trimmed, end_trimmed, output); 
-    return end_trimmed - begin_trimmed; 
-  } 
-}; 
- 
-template <typename Type> 
-using AsciiTrim = StringTransformExecWithState<Type, AsciiTrimTransform<true, true>>; 
- 
-template <typename Type> 
-using AsciiLTrim = StringTransformExecWithState<Type, AsciiTrimTransform<true, false>>; 
- 
-template <typename Type> 
-using AsciiRTrim = StringTransformExecWithState<Type, AsciiTrimTransform<false, true>>; 
- 
-const FunctionDoc utf8_center_doc( 
-    "Center strings by padding with a given character", 
-    ("For each string in `strings`, emit a centered string by padding both sides \n" 
-     "with the given UTF8 codeunit.\nNull values emit null."), 
-    {"strings"}, "PadOptions"); 
- 
-const FunctionDoc utf8_lpad_doc( 
-    "Right-align strings by padding with a given character", 
-    ("For each string in `strings`, emit a right-aligned string by prepending \n" 
-     "the given UTF8 codeunit.\nNull values emit null."), 
-    {"strings"}, "PadOptions"); 
- 
-const FunctionDoc utf8_rpad_doc( 
-    "Left-align strings by padding with a given character", 
-    ("For each string in `strings`, emit a left-aligned string by appending \n" 
-     "the given UTF8 codeunit.\nNull values emit null."), 
-    {"strings"}, "PadOptions"); 
- 
-const FunctionDoc ascii_center_doc( 
-    utf8_center_doc.description + "", 
-    ("For each string in `strings`, emit a centered string by padding both sides \n" 
-     "with the given ASCII character.\nNull values emit null."), 
-    {"strings"}, "PadOptions"); 
- 
-const FunctionDoc ascii_lpad_doc( 
-    utf8_lpad_doc.description + "", 
-    ("For each string in `strings`, emit a right-aligned string by prepending \n" 
-     "the given ASCII character.\nNull values emit null."), 
-    {"strings"}, "PadOptions"); 
- 
-const FunctionDoc ascii_rpad_doc( 
-    utf8_rpad_doc.description + "", 
-    ("For each string in `strings`, emit a left-aligned string by appending \n" 
-     "the given ASCII character.\nNull values emit null."), 
-    {"strings"}, "PadOptions"); 
- 
-const FunctionDoc utf8_trim_whitespace_doc( 
-    "Trim leading and trailing whitespace characters", 
-    ("For each string in `strings`, emit a string with leading and trailing whitespace\n" 
-     "characters removed, where whitespace characters are defined by the Unicode\n" 
-     "standard.  Null values emit null."), 
-    {"strings"}); 
- 
-const FunctionDoc utf8_ltrim_whitespace_doc( 
-    "Trim leading whitespace characters", 
-    ("For each string in `strings`, emit a string with leading whitespace\n" 
-     "characters removed, where whitespace characters are defined by the Unicode\n" 
-     "standard.  Null values emit null."), 
-    {"strings"}); 
- 
-const FunctionDoc utf8_rtrim_whitespace_doc( 
-    "Trim trailing whitespace characters", 
-    ("For each string in `strings`, emit a string with trailing whitespace\n" 
-     "characters removed, where whitespace characters are defined by the Unicode\n" 
-     "standard.  Null values emit null."), 
-    {"strings"}); 
- 
-const FunctionDoc ascii_trim_whitespace_doc( 
-    "Trim leading and trailing ASCII whitespace characters", 
-    ("For each string in `strings`, emit a string with leading and trailing ASCII\n" 
-     "whitespace characters removed. Use `utf8_trim_whitespace` to trim Unicode\n" 
-     "whitespace characters. Null values emit null."), 
-    {"strings"}); 
- 
-const FunctionDoc ascii_ltrim_whitespace_doc( 
-    "Trim leading ASCII whitespace characters", 
-    ("For each string in `strings`, emit a string with leading ASCII whitespace\n" 
-     "characters removed.  Use `utf8_ltrim_whitespace` to trim leading Unicode\n" 
-     "whitespace characters. Null values emit null."), 
-    {"strings"}); 
- 
-const FunctionDoc ascii_rtrim_whitespace_doc( 
-    "Trim trailing ASCII whitespace characters", 
-    ("For each string in `strings`, emit a string with trailing ASCII whitespace\n" 
-     "characters removed. Use `utf8_rtrim_whitespace` to trim trailing Unicode\n" 
-     "whitespace characters. Null values emit null."), 
-    {"strings"}); 
- 
-const FunctionDoc utf8_trim_doc( 
-    "Trim leading and trailing characters present in the `characters` arguments", 
-    ("For each string in `strings`, emit a string with leading and trailing\n" 
-     "characters removed that are present in the `characters` argument.  Null values\n" 
-     "emit null."), 
-    {"strings"}, "TrimOptions"); 
- 
-const FunctionDoc utf8_ltrim_doc( 
-    "Trim leading characters present in the `characters` arguments", 
-    ("For each string in `strings`, emit a string with leading\n" 
-     "characters removed that are present in the `characters` argument.  Null values\n" 
-     "emit null."), 
-    {"strings"}, "TrimOptions"); 
- 
-const FunctionDoc utf8_rtrim_doc( 
-    "Trim trailing characters present in the `characters` arguments", 
-    ("For each string in `strings`, emit a string with leading " 
-     "characters removed that are present in the `characters` argument.  Null values\n" 
-     "emit null."), 
-    {"strings"}, "TrimOptions"); 
- 
-const FunctionDoc ascii_trim_doc( 
-    utf8_trim_doc.summary + "", 
-    utf8_trim_doc.description + 
-        ("\nBoth the input string as the `characters` argument are interepreted as\n" 
-         "ASCII characters, to trim non-ASCII characters, use `utf8_trim`."), 
-    {"strings"}, "TrimOptions"); 
- 
-const FunctionDoc ascii_ltrim_doc( 
-    utf8_ltrim_doc.summary + "", 
-    utf8_ltrim_doc.description + 
-        ("\nBoth the input string as the `characters` argument are interepreted as\n" 
-         "ASCII characters, to trim non-ASCII characters, use `utf8_trim`."), 
-    {"strings"}, "TrimOptions"); 
- 
-const FunctionDoc ascii_rtrim_doc( 
-    utf8_rtrim_doc.summary + "", 
-    utf8_rtrim_doc.description + 
-        ("\nBoth the input string as the `characters` argument are interepreted as\n" 
-         "ASCII characters, to trim non-ASCII characters, use `utf8_trim`."), 
-    {"strings"}, "TrimOptions"); 
- 
-const FunctionDoc strptime_doc( 
-    "Parse timestamps", 
-    ("For each string in `strings`, parse it as a timestamp.\n" 
-     "The timestamp unit and the expected string pattern must be given\n" 
-     "in StrptimeOptions.  Null inputs emit null.  If a non-null string\n" 
-     "fails parsing, an error is returned."), 
-    {"strings"}, "StrptimeOptions"); 
- 
-const FunctionDoc binary_length_doc( 
-    "Compute string lengths", 
-    ("For each string in `strings`, emit the number of bytes.  Null values emit null."), 
-    {"strings"}); 
- 
-const FunctionDoc utf8_length_doc("Compute UTF8 string lengths", 
-                                  ("For each string in `strings`, emit the number of " 
-                                   "UTF8 characters.  Null values emit null."), 
-                                  {"strings"}); 
- 
+// ----------------------------------------------------------------------
+// string padding
+
+template <bool PadLeft, bool PadRight>
+struct AsciiPadTransform : public StringTransformBase {
+  using State = OptionsWrapper<PadOptions>;
+
+  const PadOptions& options_;
+
+  explicit AsciiPadTransform(const PadOptions& options) : options_(options) {}
+
+  Status PreExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) override {
+    if (options_.padding.size() != 1) {
+      return Status::Invalid("Padding must be one byte, got '", options_.padding, "'");
+    }
+    return Status::OK();
+  }
+
+  int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) override {
+    // This is likely very overallocated but hard to do better without
+    // actually looking at each string (because of strings that may be
+    // longer than the given width)
+    return input_ncodeunits + ninputs * options_.width;
+  }
+
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
+    if (input_string_ncodeunits >= options_.width) {
+      std::copy(input, input + input_string_ncodeunits, output);
+      return input_string_ncodeunits;
+    }
+    const int64_t spaces = options_.width - input_string_ncodeunits;
+    int64_t left = 0;
+    int64_t right = 0;
+    if (PadLeft && PadRight) {
+      // If odd number of spaces, put the extra space on the right
+      left = spaces / 2;
+      right = spaces - left;
+    } else if (PadLeft) {
+      left = spaces;
+    } else if (PadRight) {
+      right = spaces;
+    } else {
+      DCHECK(false) << "unreachable";
+      return 0;
+    }
+    std::fill(output, output + left, options_.padding[0]);
+    output += left;
+    output = std::copy(input, input + input_string_ncodeunits, output);
+    std::fill(output, output + right, options_.padding[0]);
+    return options_.width;
+  }
+};
+
+template <bool PadLeft, bool PadRight>
+struct Utf8PadTransform : public StringTransformBase {
+  using State = OptionsWrapper<PadOptions>;
+
+  const PadOptions& options_;
+
+  explicit Utf8PadTransform(const PadOptions& options) : options_(options) {}
+
+  Status PreExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) override {
+    auto str = reinterpret_cast<const uint8_t*>(options_.padding.data());
+    auto strlen = options_.padding.size();
+    if (util::UTF8Length(str, str + strlen) != 1) {
+      return Status::Invalid("Padding must be one codepoint, got '", options_.padding,
+                             "'");
+    }
+    return Status::OK();
+  }
+
+  int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) override {
+    // This is likely very overallocated but hard to do better without
+    // actually looking at each string (because of strings that may be
+    // longer than the given width)
+    // One codepoint may be up to 4 bytes
+    return input_ncodeunits + 4 * ninputs * options_.width;
+  }
+
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
+    const int64_t input_width = util::UTF8Length(input, input + input_string_ncodeunits);
+    if (input_width >= options_.width) {
+      std::copy(input, input + input_string_ncodeunits, output);
+      return input_string_ncodeunits;
+    }
+    const int64_t spaces = options_.width - input_width;
+    int64_t left = 0;
+    int64_t right = 0;
+    if (PadLeft && PadRight) {
+      // If odd number of spaces, put the extra space on the right
+      left = spaces / 2;
+      right = spaces - left;
+    } else if (PadLeft) {
+      left = spaces;
+    } else if (PadRight) {
+      right = spaces;
+    } else {
+      DCHECK(false) << "unreachable";
+      return 0;
+    }
+    uint8_t* start = output;
+    while (left) {
+      output = std::copy(options_.padding.begin(), options_.padding.end(), output);
+      left--;
+    }
+    output = std::copy(input, input + input_string_ncodeunits, output);
+    while (right) {
+      output = std::copy(options_.padding.begin(), options_.padding.end(), output);
+      right--;
+    }
+    return output - start;
+  }
+};
+
+template <typename Type>
+using AsciiLPad = StringTransformExecWithState<Type, AsciiPadTransform<true, false>>;
+template <typename Type>
+using AsciiRPad = StringTransformExecWithState<Type, AsciiPadTransform<false, true>>;
+template <typename Type>
+using AsciiCenter = StringTransformExecWithState<Type, AsciiPadTransform<true, true>>;
+template <typename Type>
+using Utf8LPad = StringTransformExecWithState<Type, Utf8PadTransform<true, false>>;
+template <typename Type>
+using Utf8RPad = StringTransformExecWithState<Type, Utf8PadTransform<false, true>>;
+template <typename Type>
+using Utf8Center = StringTransformExecWithState<Type, Utf8PadTransform<true, true>>;
+
+// ----------------------------------------------------------------------
+// string trimming
+
+#ifdef ARROW_WITH_UTF8PROC
+
+template <bool TrimLeft, bool TrimRight>
+struct UTF8TrimWhitespaceTransform : public StringTransformBase {
+  Status PreExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) override {
+    EnsureLookupTablesFilled();
+    return Status::OK();
+  }
+
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
+    const uint8_t* begin = input;
+    const uint8_t* end = input + input_string_ncodeunits;
+    const uint8_t* end_trimmed = end;
+    const uint8_t* begin_trimmed = begin;
+
+    auto predicate = [](uint32_t c) { return !IsSpaceCharacterUnicode(c); };
+    if (TrimLeft && !ARROW_PREDICT_TRUE(
+                        arrow::util::UTF8FindIf(begin, end, predicate, &begin_trimmed))) {
+      return kTransformError;
+    }
+    if (TrimRight && begin_trimmed < end) {
+      if (!ARROW_PREDICT_TRUE(arrow::util::UTF8FindIfReverse(begin_trimmed, end,
+                                                             predicate, &end_trimmed))) {
+        return kTransformError;
+      }
+    }
+    std::copy(begin_trimmed, end_trimmed, output);
+    return end_trimmed - begin_trimmed;
+  }
+};
+
+template <typename Type>
+using UTF8TrimWhitespace =
+    StringTransformExec<Type, UTF8TrimWhitespaceTransform<true, true>>;
+
+template <typename Type>
+using UTF8LTrimWhitespace =
+    StringTransformExec<Type, UTF8TrimWhitespaceTransform<true, false>>;
+
+template <typename Type>
+using UTF8RTrimWhitespace =
+    StringTransformExec<Type, UTF8TrimWhitespaceTransform<false, true>>;
+
+struct UTF8TrimState {
+  TrimOptions options_;
+  std::vector<bool> codepoints_;
+  Status status_ = Status::OK();
+
+  explicit UTF8TrimState(KernelContext* ctx, TrimOptions options)
+      : options_(std::move(options)) {
+    if (!ARROW_PREDICT_TRUE(
+            arrow::util::UTF8ForEach(options_.characters, [&](uint32_t c) {
+              codepoints_.resize(
+                  std::max(c + 1, static_cast<uint32_t>(codepoints_.size())));
+              codepoints_.at(c) = true;
+            }))) {
+      status_ = Status::Invalid("Invalid UTF8 sequence in input");
+    }
+  }
+};
+
+template <bool TrimLeft, bool TrimRight>
+struct UTF8TrimTransform : public StringTransformBase {
+  using State = KernelStateFromFunctionOptions<UTF8TrimState, TrimOptions>;
+
+  const UTF8TrimState& state_;
+
+  explicit UTF8TrimTransform(const UTF8TrimState& state) : state_(state) {}
+
+  Status PreExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) override {
+    return state_.status_;
+  }
+
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
+    const uint8_t* begin = input;
+    const uint8_t* end = input + input_string_ncodeunits;
+    const uint8_t* end_trimmed = end;
+    const uint8_t* begin_trimmed = begin;
+
+    auto predicate = [&](uint32_t c) { return !state_.codepoints_[c]; };
+    if (TrimLeft && !ARROW_PREDICT_TRUE(
+                        arrow::util::UTF8FindIf(begin, end, predicate, &begin_trimmed))) {
+      return kTransformError;
+    }
+    if (TrimRight && begin_trimmed < end) {
+      if (!ARROW_PREDICT_TRUE(arrow::util::UTF8FindIfReverse(begin_trimmed, end,
+                                                             predicate, &end_trimmed))) {
+        return kTransformError;
+      }
+    }
+    std::copy(begin_trimmed, end_trimmed, output);
+    return end_trimmed - begin_trimmed;
+  }
+};
+
+template <typename Type>
+using UTF8Trim = StringTransformExecWithState<Type, UTF8TrimTransform<true, true>>;
+
+template <typename Type>
+using UTF8LTrim = StringTransformExecWithState<Type, UTF8TrimTransform<true, false>>;
+
+template <typename Type>
+using UTF8RTrim = StringTransformExecWithState<Type, UTF8TrimTransform<false, true>>;
+
+#endif
+
+template <bool TrimLeft, bool TrimRight>
+struct AsciiTrimWhitespaceTransform : public StringTransformBase {
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
+    const uint8_t* begin = input;
+    const uint8_t* end = input + input_string_ncodeunits;
+    const uint8_t* end_trimmed = end;
+    const uint8_t* begin_trimmed = begin;
+
+    auto predicate = [](unsigned char c) { return !IsSpaceCharacterAscii(c); };
+    if (TrimLeft) {
+      begin_trimmed = std::find_if(begin, end, predicate);
+    }
+    if (TrimRight && begin_trimmed < end) {
+      std::reverse_iterator<const uint8_t*> rbegin(end);
+      std::reverse_iterator<const uint8_t*> rend(begin_trimmed);
+      end_trimmed = std::find_if(rbegin, rend, predicate).base();
+    }
+    std::copy(begin_trimmed, end_trimmed, output);
+    return end_trimmed - begin_trimmed;
+  }
+};
+
+template <typename Type>
+using AsciiTrimWhitespace =
+    StringTransformExec<Type, AsciiTrimWhitespaceTransform<true, true>>;
+
+template <typename Type>
+using AsciiLTrimWhitespace =
+    StringTransformExec<Type, AsciiTrimWhitespaceTransform<true, false>>;
+
+template <typename Type>
+using AsciiRTrimWhitespace =
+    StringTransformExec<Type, AsciiTrimWhitespaceTransform<false, true>>;
+
+struct AsciiTrimState {
+  TrimOptions options_;
+  std::vector<bool> characters_;
+
+  explicit AsciiTrimState(KernelContext* ctx, TrimOptions options)
+      : options_(std::move(options)), characters_(256) {
+    for (const auto c : options_.characters) {
+      characters_[static_cast<unsigned char>(c)] = true;
+    }
+  }
+};
+
+template <bool TrimLeft, bool TrimRight>
+struct AsciiTrimTransform : public StringTransformBase {
+  using State = KernelStateFromFunctionOptions<AsciiTrimState, TrimOptions>;
+
+  const AsciiTrimState& state_;
+
+  explicit AsciiTrimTransform(const AsciiTrimState& state) : state_(state) {}
+
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
+    const uint8_t* begin = input;
+    const uint8_t* end = input + input_string_ncodeunits;
+    const uint8_t* end_trimmed = end;
+    const uint8_t* begin_trimmed = begin;
+
+    auto predicate = [&](uint8_t c) { return !state_.characters_[c]; };
+    if (TrimLeft) {
+      begin_trimmed = std::find_if(begin, end, predicate);
+    }
+    if (TrimRight && begin_trimmed < end) {
+      std::reverse_iterator<const uint8_t*> rbegin(end);
+      std::reverse_iterator<const uint8_t*> rend(begin_trimmed);
+      end_trimmed = std::find_if(rbegin, rend, predicate).base();
+    }
+    std::copy(begin_trimmed, end_trimmed, output);
+    return end_trimmed - begin_trimmed;
+  }
+};
+
+template <typename Type>
+using AsciiTrim = StringTransformExecWithState<Type, AsciiTrimTransform<true, true>>;
+
+template <typename Type>
+using AsciiLTrim = StringTransformExecWithState<Type, AsciiTrimTransform<true, false>>;
+
+template <typename Type>
+using AsciiRTrim = StringTransformExecWithState<Type, AsciiTrimTransform<false, true>>;
+
+const FunctionDoc utf8_center_doc(
+    "Center strings by padding with a given character",
+    ("For each string in `strings`, emit a centered string by padding both sides \n"
+     "with the given UTF8 codeunit.\nNull values emit null."),
+    {"strings"}, "PadOptions");
+
+const FunctionDoc utf8_lpad_doc(
+    "Right-align strings by padding with a given character",
+    ("For each string in `strings`, emit a right-aligned string by prepending \n"
+     "the given UTF8 codeunit.\nNull values emit null."),
+    {"strings"}, "PadOptions");
+
+const FunctionDoc utf8_rpad_doc(
+    "Left-align strings by padding with a given character",
+    ("For each string in `strings`, emit a left-aligned string by appending \n"
+     "the given UTF8 codeunit.\nNull values emit null."),
+    {"strings"}, "PadOptions");
+
+const FunctionDoc ascii_center_doc(
+    utf8_center_doc.description + "",
+    ("For each string in `strings`, emit a centered string by padding both sides \n"
+     "with the given ASCII character.\nNull values emit null."),
+    {"strings"}, "PadOptions");
+
+const FunctionDoc ascii_lpad_doc(
+    utf8_lpad_doc.description + "",
+    ("For each string in `strings`, emit a right-aligned string by prepending \n"
+     "the given ASCII character.\nNull values emit null."),
+    {"strings"}, "PadOptions");
+
+const FunctionDoc ascii_rpad_doc(
+    utf8_rpad_doc.description + "",
+    ("For each string in `strings`, emit a left-aligned string by appending \n"
+     "the given ASCII character.\nNull values emit null."),
+    {"strings"}, "PadOptions");
+
+const FunctionDoc utf8_trim_whitespace_doc(
+    "Trim leading and trailing whitespace characters",
+    ("For each string in `strings`, emit a string with leading and trailing whitespace\n"
+     "characters removed, where whitespace characters are defined by the Unicode\n"
+     "standard.  Null values emit null."),
+    {"strings"});
+
+const FunctionDoc utf8_ltrim_whitespace_doc(
+    "Trim leading whitespace characters",
+    ("For each string in `strings`, emit a string with leading whitespace\n"
+     "characters removed, where whitespace characters are defined by the Unicode\n"
+     "standard.  Null values emit null."),
+    {"strings"});
+
+const FunctionDoc utf8_rtrim_whitespace_doc(
+    "Trim trailing whitespace characters",
+    ("For each string in `strings`, emit a string with trailing whitespace\n"
+     "characters removed, where whitespace characters are defined by the Unicode\n"
+     "standard.  Null values emit null."),
+    {"strings"});
+
+const FunctionDoc ascii_trim_whitespace_doc(
+    "Trim leading and trailing ASCII whitespace characters",
+    ("For each string in `strings`, emit a string with leading and trailing ASCII\n"
+     "whitespace characters removed. Use `utf8_trim_whitespace` to trim Unicode\n"
+     "whitespace characters. Null values emit null."),
+    {"strings"});
+
+const FunctionDoc ascii_ltrim_whitespace_doc(
+    "Trim leading ASCII whitespace characters",
+    ("For each string in `strings`, emit a string with leading ASCII whitespace\n"
+     "characters removed.  Use `utf8_ltrim_whitespace` to trim leading Unicode\n"
+     "whitespace characters. Null values emit null."),
+    {"strings"});
+
+const FunctionDoc ascii_rtrim_whitespace_doc(
+    "Trim trailing ASCII whitespace characters",
+    ("For each string in `strings`, emit a string with trailing ASCII whitespace\n"
+     "characters removed. Use `utf8_rtrim_whitespace` to trim trailing Unicode\n"
+     "whitespace characters. Null values emit null."),
+    {"strings"});
+
+const FunctionDoc utf8_trim_doc(
+    "Trim leading and trailing characters present in the `characters` arguments",
+    ("For each string in `strings`, emit a string with leading and trailing\n"
+     "characters removed that are present in the `characters` argument.  Null values\n"
+     "emit null."),
+    {"strings"}, "TrimOptions");
+
+const FunctionDoc utf8_ltrim_doc(
+    "Trim leading characters present in the `characters` arguments",
+    ("For each string in `strings`, emit a string with leading\n"
+     "characters removed that are present in the `characters` argument.  Null values\n"
+     "emit null."),
+    {"strings"}, "TrimOptions");
+
+const FunctionDoc utf8_rtrim_doc(
+    "Trim trailing characters present in the `characters` arguments",
+    ("For each string in `strings`, emit a string with leading "
+     "characters removed that are present in the `characters` argument.  Null values\n"
+     "emit null."),
+    {"strings"}, "TrimOptions");
+
+const FunctionDoc ascii_trim_doc(
+    utf8_trim_doc.summary + "",
+    utf8_trim_doc.description +
+        ("\nBoth the input string as the `characters` argument are interepreted as\n"
+         "ASCII characters, to trim non-ASCII characters, use `utf8_trim`."),
+    {"strings"}, "TrimOptions");
+
+const FunctionDoc ascii_ltrim_doc(
+    utf8_ltrim_doc.summary + "",
+    utf8_ltrim_doc.description +
+        ("\nBoth the input string as the `characters` argument are interepreted as\n"
+         "ASCII characters, to trim non-ASCII characters, use `utf8_trim`."),
+    {"strings"}, "TrimOptions");
+
+const FunctionDoc ascii_rtrim_doc(
+    utf8_rtrim_doc.summary + "",
+    utf8_rtrim_doc.description +
+        ("\nBoth the input string as the `characters` argument are interepreted as\n"
+         "ASCII characters, to trim non-ASCII characters, use `utf8_trim`."),
+    {"strings"}, "TrimOptions");
+
+const FunctionDoc strptime_doc(
+    "Parse timestamps",
+    ("For each string in `strings`, parse it as a timestamp.\n"
+     "The timestamp unit and the expected string pattern must be given\n"
+     "in StrptimeOptions.  Null inputs emit null.  If a non-null string\n"
+     "fails parsing, an error is returned."),
+    {"strings"}, "StrptimeOptions");
+
+const FunctionDoc binary_length_doc(
+    "Compute string lengths",
+    ("For each string in `strings`, emit the number of bytes.  Null values emit null."),
+    {"strings"});
+
+const FunctionDoc utf8_length_doc("Compute UTF8 string lengths",
+                                  ("For each string in `strings`, emit the number of "
+                                   "UTF8 characters.  Null values emit null."),
+                                  {"strings"});
+
 void AddStrptime(FunctionRegistry* registry) {
-  auto func = std::make_shared<ScalarFunction>("strptime", Arity::Unary(), &strptime_doc); 
+  auto func = std::make_shared<ScalarFunction>("strptime", Arity::Unary(), &strptime_doc);
   DCHECK_OK(func->AddKernel({utf8()}, OutputType(StrptimeResolve),
                             StrptimeExec<StringType>, StrptimeState::Init));
   DCHECK_OK(func->AddKernel({large_utf8()}, OutputType(StrptimeResolve),
@@ -3283,8 +3283,8 @@ void AddStrptime(FunctionRegistry* registry) {
 }
 
 void AddBinaryLength(FunctionRegistry* registry) {
-  auto func = std::make_shared<ScalarFunction>("binary_length", Arity::Unary(), 
-                                               &binary_length_doc); 
+  auto func = std::make_shared<ScalarFunction>("binary_length", Arity::Unary(),
+                                               &binary_length_doc);
   ArrayKernelExec exec_offset_32 =
       applicator::ScalarUnaryNotNull<Int32Type, StringType, BinaryLength>::Exec;
   ArrayKernelExec exec_offset_64 =
@@ -3298,575 +3298,575 @@ void AddBinaryLength(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunction(std::move(func)));
 }
 
-void AddUtf8Length(FunctionRegistry* registry) { 
-  auto func = 
-      std::make_shared<ScalarFunction>("utf8_length", Arity::Unary(), &utf8_length_doc); 
- 
-  ArrayKernelExec exec_offset_32 = 
-      applicator::ScalarUnaryNotNull<Int32Type, StringType, Utf8Length>::Exec; 
-  DCHECK_OK(func->AddKernel({utf8()}, int32(), std::move(exec_offset_32))); 
- 
-  ArrayKernelExec exec_offset_64 = 
-      applicator::ScalarUnaryNotNull<Int64Type, LargeStringType, Utf8Length>::Exec; 
-  DCHECK_OK(func->AddKernel({large_utf8()}, int64(), std::move(exec_offset_64))); 
- 
-  DCHECK_OK(registry->AddFunction(std::move(func))); 
-} 
- 
-template <typename BinaryType, typename ListType> 
-struct BinaryJoin { 
-  using ArrayType = typename TypeTraits<BinaryType>::ArrayType; 
-  using ListArrayType = typename TypeTraits<ListType>::ArrayType; 
-  using ListScalarType = typename TypeTraits<ListType>::ScalarType; 
-  using ListOffsetType = typename ListArrayType::offset_type; 
-  using BuilderType = typename TypeTraits<BinaryType>::BuilderType; 
- 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    if (batch[0].kind() == Datum::SCALAR) { 
-      if (batch[1].kind() == Datum::SCALAR) { 
-        return ExecScalarScalar(ctx, *batch[0].scalar(), *batch[1].scalar(), out); 
-      } 
-      DCHECK_EQ(batch[1].kind(), Datum::ARRAY); 
-      return ExecScalarArray(ctx, *batch[0].scalar(), batch[1].array(), out); 
-    } 
-    DCHECK_EQ(batch[0].kind(), Datum::ARRAY); 
-    if (batch[1].kind() == Datum::SCALAR) { 
-      return ExecArrayScalar(ctx, batch[0].array(), *batch[1].scalar(), out); 
-    } 
-    DCHECK_EQ(batch[1].kind(), Datum::ARRAY); 
-    return ExecArrayArray(ctx, batch[0].array(), batch[1].array(), out); 
-  } 
- 
-  struct ListScalarOffsetLookup { 
-    const ArrayType& values; 
- 
-    int64_t GetStart(int64_t i) { return 0; } 
-    int64_t GetStop(int64_t i) { return values.length(); } 
-    bool IsNull(int64_t i) { return false; } 
-  }; 
- 
-  struct ListArrayOffsetLookup { 
-    explicit ListArrayOffsetLookup(const ListArrayType& lists) 
-        : lists_(lists), offsets_(lists.raw_value_offsets()) {} 
- 
-    int64_t GetStart(int64_t i) { return offsets_[i]; } 
-    int64_t GetStop(int64_t i) { return offsets_[i + 1]; } 
-    bool IsNull(int64_t i) { return lists_.IsNull(i); } 
- 
-   private: 
-    const ListArrayType& lists_; 
-    const ListOffsetType* offsets_; 
-  }; 
- 
-  struct SeparatorScalarLookup { 
-    const util::string_view separator; 
- 
-    bool IsNull(int64_t i) { return false; } 
-    util::string_view GetView(int64_t i) { return separator; } 
-  }; 
- 
-  struct SeparatorArrayLookup { 
-    const ArrayType& separators; 
- 
-    bool IsNull(int64_t i) { return separators.IsNull(i); } 
-    util::string_view GetView(int64_t i) { return separators.GetView(i); } 
-  }; 
- 
-  // Scalar, scalar -> scalar 
-  static Status ExecScalarScalar(KernelContext* ctx, const Scalar& left, 
-                                 const Scalar& right, Datum* out) { 
-    const auto& list = checked_cast<const ListScalarType&>(left); 
-    const auto& separator_scalar = checked_cast<const BaseBinaryScalar&>(right); 
-    if (!list.is_valid || !separator_scalar.is_valid) { 
-      return Status::OK(); 
-    } 
-    util::string_view separator(*separator_scalar.value); 
- 
-    const auto& strings = checked_cast<const ArrayType&>(*list.value); 
-    if (strings.null_count() > 0) { 
-      out->scalar()->is_valid = false; 
-      return Status::OK(); 
-    } 
- 
-    TypedBufferBuilder<uint8_t> builder(ctx->memory_pool()); 
-    auto Append = [&](util::string_view value) { 
-      return builder.Append(reinterpret_cast<const uint8_t*>(value.data()), 
-                            static_cast<int64_t>(value.size())); 
-    }; 
-    if (strings.length() > 0) { 
-      auto data_length = 
-          strings.total_values_length() + (strings.length() - 1) * separator.length(); 
-      RETURN_NOT_OK(builder.Reserve(data_length)); 
-      RETURN_NOT_OK(Append(strings.GetView(0))); 
-      for (int64_t j = 1; j < strings.length(); j++) { 
-        RETURN_NOT_OK(Append(separator)); 
-        RETURN_NOT_OK(Append(strings.GetView(j))); 
-      } 
-    } 
-    auto out_scalar = checked_cast<BaseBinaryScalar*>(out->scalar().get()); 
-    return builder.Finish(&out_scalar->value); 
-  } 
- 
-  // Scalar, array -> array 
-  static Status ExecScalarArray(KernelContext* ctx, const Scalar& left, 
-                                const std::shared_ptr<ArrayData>& right, Datum* out) { 
-    const auto& list_scalar = checked_cast<const BaseListScalar&>(left); 
-    if (!list_scalar.is_valid) { 
-      ARROW_ASSIGN_OR_RAISE( 
-          auto nulls, MakeArrayOfNull(right->type, right->length, ctx->memory_pool())); 
-      *out = *nulls->data(); 
-      return Status::OK(); 
-    } 
-    const auto& strings = checked_cast<const ArrayType&>(*list_scalar.value); 
-    if (strings.null_count() != 0) { 
-      ARROW_ASSIGN_OR_RAISE( 
-          auto nulls, MakeArrayOfNull(right->type, right->length, ctx->memory_pool())); 
-      *out = *nulls->data(); 
-      return Status::OK(); 
-    } 
-    const ArrayType separators(right); 
- 
-    BuilderType builder(ctx->memory_pool()); 
-    RETURN_NOT_OK(builder.Reserve(separators.length())); 
- 
-    // Presize data to avoid multiple reallocations when joining strings 
-    int64_t total_data_length = 0; 
-    const int64_t list_length = strings.length(); 
-    if (list_length) { 
-      const int64_t string_length = strings.total_values_length(); 
-      total_data_length += 
-          string_length * (separators.length() - separators.null_count()); 
-      for (int64_t i = 0; i < separators.length(); ++i) { 
-        if (separators.IsNull(i)) { 
-          continue; 
-        } 
-        total_data_length += (list_length - 1) * separators.value_length(i); 
-      } 
-    } 
-    RETURN_NOT_OK(builder.ReserveData(total_data_length)); 
- 
-    return JoinStrings(separators.length(), strings, ListScalarOffsetLookup{strings}, 
-                       SeparatorArrayLookup{separators}, &builder, out); 
-  } 
- 
-  // Array, scalar -> array 
-  static Status ExecArrayScalar(KernelContext* ctx, 
-                                const std::shared_ptr<ArrayData>& left, 
-                                const Scalar& right, Datum* out) { 
-    const ListArrayType lists(left); 
-    const auto& separator_scalar = checked_cast<const BaseBinaryScalar&>(right); 
- 
-    if (!separator_scalar.is_valid) { 
-      ARROW_ASSIGN_OR_RAISE( 
-          auto nulls, 
-          MakeArrayOfNull(lists.value_type(), lists.length(), ctx->memory_pool())); 
-      *out = *nulls->data(); 
-      return Status::OK(); 
-    } 
- 
-    util::string_view separator(*separator_scalar.value); 
-    const auto& strings = checked_cast<const ArrayType&>(*lists.values()); 
-    const auto list_offsets = lists.raw_value_offsets(); 
- 
-    BuilderType builder(ctx->memory_pool()); 
-    RETURN_NOT_OK(builder.Reserve(lists.length())); 
- 
-    // Presize data to avoid multiple reallocations when joining strings 
-    int64_t total_data_length = strings.total_values_length(); 
-    for (int64_t i = 0; i < lists.length(); ++i) { 
-      const auto start = list_offsets[i], end = list_offsets[i + 1]; 
-      if (end > start && !ValuesContainNull(strings, start, end)) { 
-        total_data_length += (end - start - 1) * separator.length(); 
-      } 
-    } 
-    RETURN_NOT_OK(builder.ReserveData(total_data_length)); 
- 
-    return JoinStrings(lists.length(), strings, ListArrayOffsetLookup{lists}, 
-                       SeparatorScalarLookup{separator}, &builder, out); 
-  } 
- 
-  // Array, array -> array 
-  static Status ExecArrayArray(KernelContext* ctx, const std::shared_ptr<ArrayData>& left, 
-                               const std::shared_ptr<ArrayData>& right, Datum* out) { 
-    const ListArrayType lists(left); 
-    const auto& strings = checked_cast<const ArrayType&>(*lists.values()); 
-    const auto list_offsets = lists.raw_value_offsets(); 
-    const auto string_offsets = strings.raw_value_offsets(); 
-    const ArrayType separators(right); 
- 
-    BuilderType builder(ctx->memory_pool()); 
-    RETURN_NOT_OK(builder.Reserve(lists.length())); 
- 
-    // Presize data to avoid multiple reallocations when joining strings 
-    int64_t total_data_length = 0; 
-    for (int64_t i = 0; i < lists.length(); ++i) { 
-      if (separators.IsNull(i)) { 
-        continue; 
-      } 
-      const auto start = list_offsets[i], end = list_offsets[i + 1]; 
-      if (end > start && !ValuesContainNull(strings, start, end)) { 
-        total_data_length += string_offsets[end] - string_offsets[start]; 
-        total_data_length += (end - start - 1) * separators.value_length(i); 
-      } 
-    } 
-    RETURN_NOT_OK(builder.ReserveData(total_data_length)); 
- 
-    struct SeparatorLookup { 
-      const ArrayType& separators; 
- 
-      bool IsNull(int64_t i) { return separators.IsNull(i); } 
-      util::string_view GetView(int64_t i) { return separators.GetView(i); } 
-    }; 
-    return JoinStrings(lists.length(), strings, ListArrayOffsetLookup{lists}, 
-                       SeparatorArrayLookup{separators}, &builder, out); 
-  } 
- 
-  template <typename ListOffsetLookup, typename SeparatorLookup> 
-  static Status JoinStrings(int64_t length, const ArrayType& strings, 
-                            ListOffsetLookup&& list_offsets, SeparatorLookup&& separators, 
-                            BuilderType* builder, Datum* out) { 
-    for (int64_t i = 0; i < length; ++i) { 
-      if (list_offsets.IsNull(i) || separators.IsNull(i)) { 
-        builder->UnsafeAppendNull(); 
-        continue; 
-      } 
-      const auto j_start = list_offsets.GetStart(i), j_end = list_offsets.GetStop(i); 
-      if (j_start == j_end) { 
-        builder->UnsafeAppendEmptyValue(); 
-        continue; 
-      } 
-      if (ValuesContainNull(strings, j_start, j_end)) { 
-        builder->UnsafeAppendNull(); 
-        continue; 
-      } 
-      builder->UnsafeAppend(strings.GetView(j_start)); 
-      for (int64_t j = j_start + 1; j < j_end; ++j) { 
-        builder->UnsafeExtendCurrent(separators.GetView(i)); 
-        builder->UnsafeExtendCurrent(strings.GetView(j)); 
-      } 
-    } 
- 
-    std::shared_ptr<Array> string_array; 
-    RETURN_NOT_OK(builder->Finish(&string_array)); 
-    *out = *string_array->data(); 
-    // Correct the output type based on the input 
-    out->mutable_array()->type = strings.type(); 
-    return Status::OK(); 
-  } 
- 
-  static bool ValuesContainNull(const ArrayType& values, int64_t start, int64_t end) { 
-    if (values.null_count() == 0) { 
-      return false; 
-    } 
-    for (int64_t i = start; i < end; ++i) { 
-      if (values.IsNull(i)) { 
-        return true; 
-      } 
-    } 
-    return false; 
-  } 
-}; 
- 
-using BinaryJoinElementWiseState = OptionsWrapper<JoinOptions>; 
- 
-template <typename Type> 
-struct BinaryJoinElementWise { 
-  using ArrayType = typename TypeTraits<Type>::ArrayType; 
-  using BuilderType = typename TypeTraits<Type>::BuilderType; 
-  using offset_type = typename Type::offset_type; 
- 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    JoinOptions options = BinaryJoinElementWiseState::Get(ctx); 
-    // Last argument is the separator (for consistency with binary_join) 
-    if (std::all_of(batch.values.begin(), batch.values.end(), 
-                    [](const Datum& d) { return d.is_scalar(); })) { 
-      return ExecOnlyScalar(ctx, options, batch, out); 
-    } 
-    return ExecContainingArrays(ctx, options, batch, out); 
-  } 
- 
-  static Status ExecOnlyScalar(KernelContext* ctx, const JoinOptions& options, 
-                               const ExecBatch& batch, Datum* out) { 
-    BaseBinaryScalar* output = checked_cast<BaseBinaryScalar*>(out->scalar().get()); 
-    const size_t num_args = batch.values.size(); 
-    if (num_args == 1) { 
-      // Only separator, no values 
-      ARROW_ASSIGN_OR_RAISE(output->value, ctx->Allocate(0)); 
-      output->is_valid = batch.values[0].scalar()->is_valid; 
-      return Status::OK(); 
-    } 
- 
-    int64_t final_size = CalculateRowSize(options, batch, 0); 
-    if (final_size < 0) { 
-      ARROW_ASSIGN_OR_RAISE(output->value, ctx->Allocate(0)); 
-      output->is_valid = false; 
-      return Status::OK(); 
-    } 
-    ARROW_ASSIGN_OR_RAISE(output->value, ctx->Allocate(final_size)); 
-    const auto separator = UnboxScalar<Type>::Unbox(*batch.values.back().scalar()); 
-    uint8_t* buf = output->value->mutable_data(); 
-    bool first = true; 
-    for (size_t i = 0; i < num_args - 1; i++) { 
-      const Scalar& scalar = *batch[i].scalar(); 
-      util::string_view s; 
-      if (scalar.is_valid) { 
-        s = UnboxScalar<Type>::Unbox(scalar); 
-      } else { 
-        switch (options.null_handling) { 
-          case JoinOptions::EMIT_NULL: 
-            // Handled by CalculateRowSize 
-            DCHECK(false) << "unreachable"; 
-            break; 
-          case JoinOptions::SKIP: 
-            continue; 
-          case JoinOptions::REPLACE: 
-            s = options.null_replacement; 
-            break; 
-        } 
-      } 
-      if (!first) { 
-        buf = std::copy(separator.begin(), separator.end(), buf); 
-      } 
-      first = false; 
-      buf = std::copy(s.begin(), s.end(), buf); 
-    } 
-    output->is_valid = true; 
-    DCHECK_EQ(final_size, buf - output->value->mutable_data()); 
-    return Status::OK(); 
-  } 
- 
-  static Status ExecContainingArrays(KernelContext* ctx, const JoinOptions& options, 
-                                     const ExecBatch& batch, Datum* out) { 
-    // Presize data to avoid reallocations 
-    int64_t final_size = 0; 
-    for (int64_t i = 0; i < batch.length; i++) { 
-      auto size = CalculateRowSize(options, batch, i); 
-      if (size > 0) final_size += size; 
-    } 
-    BuilderType builder(ctx->memory_pool()); 
-    RETURN_NOT_OK(builder.Reserve(batch.length)); 
-    RETURN_NOT_OK(builder.ReserveData(final_size)); 
- 
-    std::vector<util::string_view> valid_cols(batch.values.size()); 
-    for (size_t row = 0; row < static_cast<size_t>(batch.length); row++) { 
-      size_t num_valid = 0;  // Not counting separator 
-      for (size_t col = 0; col < batch.values.size(); col++) { 
-        if (batch[col].is_scalar()) { 
-          const auto& scalar = *batch[col].scalar(); 
-          if (scalar.is_valid) { 
-            valid_cols[col] = UnboxScalar<Type>::Unbox(scalar); 
-            if (col < batch.values.size() - 1) num_valid++; 
-          } else { 
-            valid_cols[col] = util::string_view(); 
-          } 
-        } else { 
-          const ArrayData& array = *batch[col].array(); 
-          if (!array.MayHaveNulls() || 
-              BitUtil::GetBit(array.buffers[0]->data(), array.offset + row)) { 
-            const offset_type* offsets = array.GetValues<offset_type>(1); 
-            const uint8_t* data = array.GetValues<uint8_t>(2, /*absolute_offset=*/0); 
-            const int64_t length = offsets[row + 1] - offsets[row]; 
-            valid_cols[col] = util::string_view( 
-                reinterpret_cast<const char*>(data + offsets[row]), length); 
-            if (col < batch.values.size() - 1) num_valid++; 
-          } else { 
-            valid_cols[col] = util::string_view(); 
-          } 
-        } 
-      } 
- 
-      if (!valid_cols.back().data()) { 
-        // Separator is null 
-        builder.UnsafeAppendNull(); 
-        continue; 
-      } else if (batch.values.size() == 1) { 
-        // Only given separator 
-        builder.UnsafeAppendEmptyValue(); 
-        continue; 
-      } else if (num_valid < batch.values.size() - 1) { 
-        // We had some nulls 
-        if (options.null_handling == JoinOptions::EMIT_NULL) { 
-          builder.UnsafeAppendNull(); 
-          continue; 
-        } 
-      } 
-      const auto separator = valid_cols.back(); 
-      bool first = true; 
-      for (size_t col = 0; col < batch.values.size() - 1; col++) { 
-        util::string_view value = valid_cols[col]; 
-        if (!value.data()) { 
-          switch (options.null_handling) { 
-            case JoinOptions::EMIT_NULL: 
-              DCHECK(false) << "unreachable"; 
-              break; 
-            case JoinOptions::SKIP: 
-              continue; 
-            case JoinOptions::REPLACE: 
-              value = options.null_replacement; 
-              break; 
-          } 
-        } 
-        if (first) { 
-          builder.UnsafeAppend(value); 
-          first = false; 
-          continue; 
-        } 
-        builder.UnsafeExtendCurrent(separator); 
-        builder.UnsafeExtendCurrent(value); 
-      } 
-    } 
- 
-    std::shared_ptr<Array> string_array; 
-    RETURN_NOT_OK(builder.Finish(&string_array)); 
-    *out = *string_array->data(); 
-    out->mutable_array()->type = batch[0].type(); 
-    DCHECK_EQ(batch.length, out->array()->length); 
-    DCHECK_EQ(final_size, 
-              checked_cast<const ArrayType&>(*string_array).total_values_length()); 
-    return Status::OK(); 
-  } 
- 
-  // Compute the length of the output for the given position, or -1 if it would be null. 
-  static int64_t CalculateRowSize(const JoinOptions& options, const ExecBatch& batch, 
-                                  const int64_t index) { 
-    const auto num_args = batch.values.size(); 
-    int64_t final_size = 0; 
-    int64_t num_non_null_args = 0; 
-    for (size_t i = 0; i < num_args; i++) { 
-      int64_t element_size = 0; 
-      bool valid = true; 
-      if (batch[i].is_scalar()) { 
-        const Scalar& scalar = *batch[i].scalar(); 
-        valid = scalar.is_valid; 
-        element_size = UnboxScalar<Type>::Unbox(scalar).size(); 
-      } else { 
-        const ArrayData& array = *batch[i].array(); 
-        valid = !array.MayHaveNulls() || 
-                BitUtil::GetBit(array.buffers[0]->data(), array.offset + index); 
-        const offset_type* offsets = array.GetValues<offset_type>(1); 
-        element_size = offsets[index + 1] - offsets[index]; 
-      } 
-      if (i == num_args - 1) { 
-        if (!valid) return -1; 
-        if (num_non_null_args > 1) { 
-          // Add separator size (only if there were values to join) 
-          final_size += (num_non_null_args - 1) * element_size; 
-        } 
-        break; 
-      } 
-      if (!valid) { 
-        switch (options.null_handling) { 
-          case JoinOptions::EMIT_NULL: 
-            return -1; 
-          case JoinOptions::SKIP: 
-            continue; 
-          case JoinOptions::REPLACE: 
-            element_size = options.null_replacement.size(); 
-            break; 
-        } 
-      } 
-      num_non_null_args++; 
-      final_size += element_size; 
-    } 
-    return final_size; 
-  } 
-}; 
- 
-const FunctionDoc binary_join_doc( 
-    "Join a list of strings together with a `separator` to form a single string", 
-    ("Insert `separator` between `list` elements, and concatenate them.\n" 
-     "Any null input and any null `list` element emits a null output.\n"), 
-    {"list", "separator"}); 
- 
-const FunctionDoc binary_join_element_wise_doc( 
-    "Join string arguments into one, using the last argument as the separator", 
-    ("Insert the last argument of `strings` between the rest of the elements, " 
-     "and concatenate them.\n" 
-     "Any null separator element emits a null output. Null elements either " 
-     "emit a null (the default), are skipped, or replaced with a given string.\n"), 
-    {"*strings"}, "JoinOptions"); 
- 
-const auto kDefaultJoinOptions = JoinOptions::Defaults(); 
- 
-template <typename ListType> 
-void AddBinaryJoinForListType(ScalarFunction* func) { 
-  for (const std::shared_ptr<DataType>& ty : BaseBinaryTypes()) { 
-    auto exec = GenerateTypeAgnosticVarBinaryBase<BinaryJoin, ListType>(*ty); 
-    auto list_ty = std::make_shared<ListType>(ty); 
-    DCHECK_OK(func->AddKernel({InputType(list_ty), InputType(ty)}, ty, exec)); 
-  } 
-} 
- 
-void AddBinaryJoin(FunctionRegistry* registry) { 
-  { 
-    auto func = std::make_shared<ScalarFunction>("binary_join", Arity::Binary(), 
-                                                 &binary_join_doc); 
-    AddBinaryJoinForListType<ListType>(func.get()); 
-    AddBinaryJoinForListType<LargeListType>(func.get()); 
-    DCHECK_OK(registry->AddFunction(std::move(func))); 
-  } 
-  { 
-    auto func = std::make_shared<ScalarFunction>( 
-        "binary_join_element_wise", Arity::VarArgs(/*min_args=*/1), 
-        &binary_join_element_wise_doc, &kDefaultJoinOptions); 
-    for (const auto& ty : BaseBinaryTypes()) { 
-      ScalarKernel kernel{KernelSignature::Make({InputType(ty)}, ty, /*is_varargs=*/true), 
-                          GenerateTypeAgnosticVarBinaryBase<BinaryJoinElementWise>(ty), 
-                          BinaryJoinElementWiseState::Init}; 
-      kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE; 
-      kernel.mem_allocation = MemAllocation::NO_PREALLOCATE; 
-      DCHECK_OK(func->AddKernel(std::move(kernel))); 
-    } 
-    DCHECK_OK(registry->AddFunction(std::move(func))); 
-  } 
-} 
- 
+void AddUtf8Length(FunctionRegistry* registry) {
+  auto func =
+      std::make_shared<ScalarFunction>("utf8_length", Arity::Unary(), &utf8_length_doc);
+
+  ArrayKernelExec exec_offset_32 =
+      applicator::ScalarUnaryNotNull<Int32Type, StringType, Utf8Length>::Exec;
+  DCHECK_OK(func->AddKernel({utf8()}, int32(), std::move(exec_offset_32)));
+
+  ArrayKernelExec exec_offset_64 =
+      applicator::ScalarUnaryNotNull<Int64Type, LargeStringType, Utf8Length>::Exec;
+  DCHECK_OK(func->AddKernel({large_utf8()}, int64(), std::move(exec_offset_64)));
+
+  DCHECK_OK(registry->AddFunction(std::move(func)));
+}
+
+template <typename BinaryType, typename ListType>
+struct BinaryJoin {
+  using ArrayType = typename TypeTraits<BinaryType>::ArrayType;
+  using ListArrayType = typename TypeTraits<ListType>::ArrayType;
+  using ListScalarType = typename TypeTraits<ListType>::ScalarType;
+  using ListOffsetType = typename ListArrayType::offset_type;
+  using BuilderType = typename TypeTraits<BinaryType>::BuilderType;
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    if (batch[0].kind() == Datum::SCALAR) {
+      if (batch[1].kind() == Datum::SCALAR) {
+        return ExecScalarScalar(ctx, *batch[0].scalar(), *batch[1].scalar(), out);
+      }
+      DCHECK_EQ(batch[1].kind(), Datum::ARRAY);
+      return ExecScalarArray(ctx, *batch[0].scalar(), batch[1].array(), out);
+    }
+    DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
+    if (batch[1].kind() == Datum::SCALAR) {
+      return ExecArrayScalar(ctx, batch[0].array(), *batch[1].scalar(), out);
+    }
+    DCHECK_EQ(batch[1].kind(), Datum::ARRAY);
+    return ExecArrayArray(ctx, batch[0].array(), batch[1].array(), out);
+  }
+
+  struct ListScalarOffsetLookup {
+    const ArrayType& values;
+
+    int64_t GetStart(int64_t i) { return 0; }
+    int64_t GetStop(int64_t i) { return values.length(); }
+    bool IsNull(int64_t i) { return false; }
+  };
+
+  struct ListArrayOffsetLookup {
+    explicit ListArrayOffsetLookup(const ListArrayType& lists)
+        : lists_(lists), offsets_(lists.raw_value_offsets()) {}
+
+    int64_t GetStart(int64_t i) { return offsets_[i]; }
+    int64_t GetStop(int64_t i) { return offsets_[i + 1]; }
+    bool IsNull(int64_t i) { return lists_.IsNull(i); }
+
+   private:
+    const ListArrayType& lists_;
+    const ListOffsetType* offsets_;
+  };
+
+  struct SeparatorScalarLookup {
+    const util::string_view separator;
+
+    bool IsNull(int64_t i) { return false; }
+    util::string_view GetView(int64_t i) { return separator; }
+  };
+
+  struct SeparatorArrayLookup {
+    const ArrayType& separators;
+
+    bool IsNull(int64_t i) { return separators.IsNull(i); }
+    util::string_view GetView(int64_t i) { return separators.GetView(i); }
+  };
+
+  // Scalar, scalar -> scalar
+  static Status ExecScalarScalar(KernelContext* ctx, const Scalar& left,
+                                 const Scalar& right, Datum* out) {
+    const auto& list = checked_cast<const ListScalarType&>(left);
+    const auto& separator_scalar = checked_cast<const BaseBinaryScalar&>(right);
+    if (!list.is_valid || !separator_scalar.is_valid) {
+      return Status::OK();
+    }
+    util::string_view separator(*separator_scalar.value);
+
+    const auto& strings = checked_cast<const ArrayType&>(*list.value);
+    if (strings.null_count() > 0) {
+      out->scalar()->is_valid = false;
+      return Status::OK();
+    }
+
+    TypedBufferBuilder<uint8_t> builder(ctx->memory_pool());
+    auto Append = [&](util::string_view value) {
+      return builder.Append(reinterpret_cast<const uint8_t*>(value.data()),
+                            static_cast<int64_t>(value.size()));
+    };
+    if (strings.length() > 0) {
+      auto data_length =
+          strings.total_values_length() + (strings.length() - 1) * separator.length();
+      RETURN_NOT_OK(builder.Reserve(data_length));
+      RETURN_NOT_OK(Append(strings.GetView(0)));
+      for (int64_t j = 1; j < strings.length(); j++) {
+        RETURN_NOT_OK(Append(separator));
+        RETURN_NOT_OK(Append(strings.GetView(j)));
+      }
+    }
+    auto out_scalar = checked_cast<BaseBinaryScalar*>(out->scalar().get());
+    return builder.Finish(&out_scalar->value);
+  }
+
+  // Scalar, array -> array
+  static Status ExecScalarArray(KernelContext* ctx, const Scalar& left,
+                                const std::shared_ptr<ArrayData>& right, Datum* out) {
+    const auto& list_scalar = checked_cast<const BaseListScalar&>(left);
+    if (!list_scalar.is_valid) {
+      ARROW_ASSIGN_OR_RAISE(
+          auto nulls, MakeArrayOfNull(right->type, right->length, ctx->memory_pool()));
+      *out = *nulls->data();
+      return Status::OK();
+    }
+    const auto& strings = checked_cast<const ArrayType&>(*list_scalar.value);
+    if (strings.null_count() != 0) {
+      ARROW_ASSIGN_OR_RAISE(
+          auto nulls, MakeArrayOfNull(right->type, right->length, ctx->memory_pool()));
+      *out = *nulls->data();
+      return Status::OK();
+    }
+    const ArrayType separators(right);
+
+    BuilderType builder(ctx->memory_pool());
+    RETURN_NOT_OK(builder.Reserve(separators.length()));
+
+    // Presize data to avoid multiple reallocations when joining strings
+    int64_t total_data_length = 0;
+    const int64_t list_length = strings.length();
+    if (list_length) {
+      const int64_t string_length = strings.total_values_length();
+      total_data_length +=
+          string_length * (separators.length() - separators.null_count());
+      for (int64_t i = 0; i < separators.length(); ++i) {
+        if (separators.IsNull(i)) {
+          continue;
+        }
+        total_data_length += (list_length - 1) * separators.value_length(i);
+      }
+    }
+    RETURN_NOT_OK(builder.ReserveData(total_data_length));
+
+    return JoinStrings(separators.length(), strings, ListScalarOffsetLookup{strings},
+                       SeparatorArrayLookup{separators}, &builder, out);
+  }
+
+  // Array, scalar -> array
+  static Status ExecArrayScalar(KernelContext* ctx,
+                                const std::shared_ptr<ArrayData>& left,
+                                const Scalar& right, Datum* out) {
+    const ListArrayType lists(left);
+    const auto& separator_scalar = checked_cast<const BaseBinaryScalar&>(right);
+
+    if (!separator_scalar.is_valid) {
+      ARROW_ASSIGN_OR_RAISE(
+          auto nulls,
+          MakeArrayOfNull(lists.value_type(), lists.length(), ctx->memory_pool()));
+      *out = *nulls->data();
+      return Status::OK();
+    }
+
+    util::string_view separator(*separator_scalar.value);
+    const auto& strings = checked_cast<const ArrayType&>(*lists.values());
+    const auto list_offsets = lists.raw_value_offsets();
+
+    BuilderType builder(ctx->memory_pool());
+    RETURN_NOT_OK(builder.Reserve(lists.length()));
+
+    // Presize data to avoid multiple reallocations when joining strings
+    int64_t total_data_length = strings.total_values_length();
+    for (int64_t i = 0; i < lists.length(); ++i) {
+      const auto start = list_offsets[i], end = list_offsets[i + 1];
+      if (end > start && !ValuesContainNull(strings, start, end)) {
+        total_data_length += (end - start - 1) * separator.length();
+      }
+    }
+    RETURN_NOT_OK(builder.ReserveData(total_data_length));
+
+    return JoinStrings(lists.length(), strings, ListArrayOffsetLookup{lists},
+                       SeparatorScalarLookup{separator}, &builder, out);
+  }
+
+  // Array, array -> array
+  static Status ExecArrayArray(KernelContext* ctx, const std::shared_ptr<ArrayData>& left,
+                               const std::shared_ptr<ArrayData>& right, Datum* out) {
+    const ListArrayType lists(left);
+    const auto& strings = checked_cast<const ArrayType&>(*lists.values());
+    const auto list_offsets = lists.raw_value_offsets();
+    const auto string_offsets = strings.raw_value_offsets();
+    const ArrayType separators(right);
+
+    BuilderType builder(ctx->memory_pool());
+    RETURN_NOT_OK(builder.Reserve(lists.length()));
+
+    // Presize data to avoid multiple reallocations when joining strings
+    int64_t total_data_length = 0;
+    for (int64_t i = 0; i < lists.length(); ++i) {
+      if (separators.IsNull(i)) {
+        continue;
+      }
+      const auto start = list_offsets[i], end = list_offsets[i + 1];
+      if (end > start && !ValuesContainNull(strings, start, end)) {
+        total_data_length += string_offsets[end] - string_offsets[start];
+        total_data_length += (end - start - 1) * separators.value_length(i);
+      }
+    }
+    RETURN_NOT_OK(builder.ReserveData(total_data_length));
+
+    struct SeparatorLookup {
+      const ArrayType& separators;
+
+      bool IsNull(int64_t i) { return separators.IsNull(i); }
+      util::string_view GetView(int64_t i) { return separators.GetView(i); }
+    };
+    return JoinStrings(lists.length(), strings, ListArrayOffsetLookup{lists},
+                       SeparatorArrayLookup{separators}, &builder, out);
+  }
+
+  template <typename ListOffsetLookup, typename SeparatorLookup>
+  static Status JoinStrings(int64_t length, const ArrayType& strings,
+                            ListOffsetLookup&& list_offsets, SeparatorLookup&& separators,
+                            BuilderType* builder, Datum* out) {
+    for (int64_t i = 0; i < length; ++i) {
+      if (list_offsets.IsNull(i) || separators.IsNull(i)) {
+        builder->UnsafeAppendNull();
+        continue;
+      }
+      const auto j_start = list_offsets.GetStart(i), j_end = list_offsets.GetStop(i);
+      if (j_start == j_end) {
+        builder->UnsafeAppendEmptyValue();
+        continue;
+      }
+      if (ValuesContainNull(strings, j_start, j_end)) {
+        builder->UnsafeAppendNull();
+        continue;
+      }
+      builder->UnsafeAppend(strings.GetView(j_start));
+      for (int64_t j = j_start + 1; j < j_end; ++j) {
+        builder->UnsafeExtendCurrent(separators.GetView(i));
+        builder->UnsafeExtendCurrent(strings.GetView(j));
+      }
+    }
+
+    std::shared_ptr<Array> string_array;
+    RETURN_NOT_OK(builder->Finish(&string_array));
+    *out = *string_array->data();
+    // Correct the output type based on the input
+    out->mutable_array()->type = strings.type();
+    return Status::OK();
+  }
+
+  static bool ValuesContainNull(const ArrayType& values, int64_t start, int64_t end) {
+    if (values.null_count() == 0) {
+      return false;
+    }
+    for (int64_t i = start; i < end; ++i) {
+      if (values.IsNull(i)) {
+        return true;
+      }
+    }
+    return false;
+  }
+};
+
+using BinaryJoinElementWiseState = OptionsWrapper<JoinOptions>;
+
+template <typename Type>
+struct BinaryJoinElementWise {
+  using ArrayType = typename TypeTraits<Type>::ArrayType;
+  using BuilderType = typename TypeTraits<Type>::BuilderType;
+  using offset_type = typename Type::offset_type;
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    JoinOptions options = BinaryJoinElementWiseState::Get(ctx);
+    // Last argument is the separator (for consistency with binary_join)
+    if (std::all_of(batch.values.begin(), batch.values.end(),
+                    [](const Datum& d) { return d.is_scalar(); })) {
+      return ExecOnlyScalar(ctx, options, batch, out);
+    }
+    return ExecContainingArrays(ctx, options, batch, out);
+  }
+
+  static Status ExecOnlyScalar(KernelContext* ctx, const JoinOptions& options,
+                               const ExecBatch& batch, Datum* out) {
+    BaseBinaryScalar* output = checked_cast<BaseBinaryScalar*>(out->scalar().get());
+    const size_t num_args = batch.values.size();
+    if (num_args == 1) {
+      // Only separator, no values
+      ARROW_ASSIGN_OR_RAISE(output->value, ctx->Allocate(0));
+      output->is_valid = batch.values[0].scalar()->is_valid;
+      return Status::OK();
+    }
+
+    int64_t final_size = CalculateRowSize(options, batch, 0);
+    if (final_size < 0) {
+      ARROW_ASSIGN_OR_RAISE(output->value, ctx->Allocate(0));
+      output->is_valid = false;
+      return Status::OK();
+    }
+    ARROW_ASSIGN_OR_RAISE(output->value, ctx->Allocate(final_size));
+    const auto separator = UnboxScalar<Type>::Unbox(*batch.values.back().scalar());
+    uint8_t* buf = output->value->mutable_data();
+    bool first = true;
+    for (size_t i = 0; i < num_args - 1; i++) {
+      const Scalar& scalar = *batch[i].scalar();
+      util::string_view s;
+      if (scalar.is_valid) {
+        s = UnboxScalar<Type>::Unbox(scalar);
+      } else {
+        switch (options.null_handling) {
+          case JoinOptions::EMIT_NULL:
+            // Handled by CalculateRowSize
+            DCHECK(false) << "unreachable";
+            break;
+          case JoinOptions::SKIP:
+            continue;
+          case JoinOptions::REPLACE:
+            s = options.null_replacement;
+            break;
+        }
+      }
+      if (!first) {
+        buf = std::copy(separator.begin(), separator.end(), buf);
+      }
+      first = false;
+      buf = std::copy(s.begin(), s.end(), buf);
+    }
+    output->is_valid = true;
+    DCHECK_EQ(final_size, buf - output->value->mutable_data());
+    return Status::OK();
+  }
+
+  static Status ExecContainingArrays(KernelContext* ctx, const JoinOptions& options,
+                                     const ExecBatch& batch, Datum* out) {
+    // Presize data to avoid reallocations
+    int64_t final_size = 0;
+    for (int64_t i = 0; i < batch.length; i++) {
+      auto size = CalculateRowSize(options, batch, i);
+      if (size > 0) final_size += size;
+    }
+    BuilderType builder(ctx->memory_pool());
+    RETURN_NOT_OK(builder.Reserve(batch.length));
+    RETURN_NOT_OK(builder.ReserveData(final_size));
+
+    std::vector<util::string_view> valid_cols(batch.values.size());
+    for (size_t row = 0; row < static_cast<size_t>(batch.length); row++) {
+      size_t num_valid = 0;  // Not counting separator
+      for (size_t col = 0; col < batch.values.size(); col++) {
+        if (batch[col].is_scalar()) {
+          const auto& scalar = *batch[col].scalar();
+          if (scalar.is_valid) {
+            valid_cols[col] = UnboxScalar<Type>::Unbox(scalar);
+            if (col < batch.values.size() - 1) num_valid++;
+          } else {
+            valid_cols[col] = util::string_view();
+          }
+        } else {
+          const ArrayData& array = *batch[col].array();
+          if (!array.MayHaveNulls() ||
+              BitUtil::GetBit(array.buffers[0]->data(), array.offset + row)) {
+            const offset_type* offsets = array.GetValues<offset_type>(1);
+            const uint8_t* data = array.GetValues<uint8_t>(2, /*absolute_offset=*/0);
+            const int64_t length = offsets[row + 1] - offsets[row];
+            valid_cols[col] = util::string_view(
+                reinterpret_cast<const char*>(data + offsets[row]), length);
+            if (col < batch.values.size() - 1) num_valid++;
+          } else {
+            valid_cols[col] = util::string_view();
+          }
+        }
+      }
+
+      if (!valid_cols.back().data()) {
+        // Separator is null
+        builder.UnsafeAppendNull();
+        continue;
+      } else if (batch.values.size() == 1) {
+        // Only given separator
+        builder.UnsafeAppendEmptyValue();
+        continue;
+      } else if (num_valid < batch.values.size() - 1) {
+        // We had some nulls
+        if (options.null_handling == JoinOptions::EMIT_NULL) {
+          builder.UnsafeAppendNull();
+          continue;
+        }
+      }
+      const auto separator = valid_cols.back();
+      bool first = true;
+      for (size_t col = 0; col < batch.values.size() - 1; col++) {
+        util::string_view value = valid_cols[col];
+        if (!value.data()) {
+          switch (options.null_handling) {
+            case JoinOptions::EMIT_NULL:
+              DCHECK(false) << "unreachable";
+              break;
+            case JoinOptions::SKIP:
+              continue;
+            case JoinOptions::REPLACE:
+              value = options.null_replacement;
+              break;
+          }
+        }
+        if (first) {
+          builder.UnsafeAppend(value);
+          first = false;
+          continue;
+        }
+        builder.UnsafeExtendCurrent(separator);
+        builder.UnsafeExtendCurrent(value);
+      }
+    }
+
+    std::shared_ptr<Array> string_array;
+    RETURN_NOT_OK(builder.Finish(&string_array));
+    *out = *string_array->data();
+    out->mutable_array()->type = batch[0].type();
+    DCHECK_EQ(batch.length, out->array()->length);
+    DCHECK_EQ(final_size,
+              checked_cast<const ArrayType&>(*string_array).total_values_length());
+    return Status::OK();
+  }
+
+  // Compute the length of the output for the given position, or -1 if it would be null.
+  static int64_t CalculateRowSize(const JoinOptions& options, const ExecBatch& batch,
+                                  const int64_t index) {
+    const auto num_args = batch.values.size();
+    int64_t final_size = 0;
+    int64_t num_non_null_args = 0;
+    for (size_t i = 0; i < num_args; i++) {
+      int64_t element_size = 0;
+      bool valid = true;
+      if (batch[i].is_scalar()) {
+        const Scalar& scalar = *batch[i].scalar();
+        valid = scalar.is_valid;
+        element_size = UnboxScalar<Type>::Unbox(scalar).size();
+      } else {
+        const ArrayData& array = *batch[i].array();
+        valid = !array.MayHaveNulls() ||
+                BitUtil::GetBit(array.buffers[0]->data(), array.offset + index);
+        const offset_type* offsets = array.GetValues<offset_type>(1);
+        element_size = offsets[index + 1] - offsets[index];
+      }
+      if (i == num_args - 1) {
+        if (!valid) return -1;
+        if (num_non_null_args > 1) {
+          // Add separator size (only if there were values to join)
+          final_size += (num_non_null_args - 1) * element_size;
+        }
+        break;
+      }
+      if (!valid) {
+        switch (options.null_handling) {
+          case JoinOptions::EMIT_NULL:
+            return -1;
+          case JoinOptions::SKIP:
+            continue;
+          case JoinOptions::REPLACE:
+            element_size = options.null_replacement.size();
+            break;
+        }
+      }
+      num_non_null_args++;
+      final_size += element_size;
+    }
+    return final_size;
+  }
+};
+
+const FunctionDoc binary_join_doc(
+    "Join a list of strings together with a `separator` to form a single string",
+    ("Insert `separator` between `list` elements, and concatenate them.\n"
+     "Any null input and any null `list` element emits a null output.\n"),
+    {"list", "separator"});
+
+const FunctionDoc binary_join_element_wise_doc(
+    "Join string arguments into one, using the last argument as the separator",
+    ("Insert the last argument of `strings` between the rest of the elements, "
+     "and concatenate them.\n"
+     "Any null separator element emits a null output. Null elements either "
+     "emit a null (the default), are skipped, or replaced with a given string.\n"),
+    {"*strings"}, "JoinOptions");
+
+const auto kDefaultJoinOptions = JoinOptions::Defaults();
+
+template <typename ListType>
+void AddBinaryJoinForListType(ScalarFunction* func) {
+  for (const std::shared_ptr<DataType>& ty : BaseBinaryTypes()) {
+    auto exec = GenerateTypeAgnosticVarBinaryBase<BinaryJoin, ListType>(*ty);
+    auto list_ty = std::make_shared<ListType>(ty);
+    DCHECK_OK(func->AddKernel({InputType(list_ty), InputType(ty)}, ty, exec));
+  }
+}
+
+void AddBinaryJoin(FunctionRegistry* registry) {
+  {
+    auto func = std::make_shared<ScalarFunction>("binary_join", Arity::Binary(),
+                                                 &binary_join_doc);
+    AddBinaryJoinForListType<ListType>(func.get());
+    AddBinaryJoinForListType<LargeListType>(func.get());
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+  {
+    auto func = std::make_shared<ScalarFunction>(
+        "binary_join_element_wise", Arity::VarArgs(/*min_args=*/1),
+        &binary_join_element_wise_doc, &kDefaultJoinOptions);
+    for (const auto& ty : BaseBinaryTypes()) {
+      ScalarKernel kernel{KernelSignature::Make({InputType(ty)}, ty, /*is_varargs=*/true),
+                          GenerateTypeAgnosticVarBinaryBase<BinaryJoinElementWise>(ty),
+                          BinaryJoinElementWiseState::Init};
+      kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
+      kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+      DCHECK_OK(func->AddKernel(std::move(kernel)));
+    }
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+}
+
+template <template <typename> class ExecFunctor>
+void MakeUnaryStringBatchKernel(
+    std::string name, FunctionRegistry* registry, const FunctionDoc* doc,
+    MemAllocation::type mem_allocation = MemAllocation::PREALLOCATE) {
+  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
+  {
+    auto exec_32 = ExecFunctor<StringType>::Exec;
+    ScalarKernel kernel{{utf8()}, utf8(), exec_32};
+    kernel.mem_allocation = mem_allocation;
+    DCHECK_OK(func->AddKernel(std::move(kernel)));
+  }
+  {
+    auto exec_64 = ExecFunctor<LargeStringType>::Exec;
+    ScalarKernel kernel{{large_utf8()}, large_utf8(), exec_64};
+    kernel.mem_allocation = mem_allocation;
+    DCHECK_OK(func->AddKernel(std::move(kernel)));
+  }
+  DCHECK_OK(registry->AddFunction(std::move(func)));
+}
+
 template <template <typename> class ExecFunctor>
-void MakeUnaryStringBatchKernel( 
-    std::string name, FunctionRegistry* registry, const FunctionDoc* doc, 
-    MemAllocation::type mem_allocation = MemAllocation::PREALLOCATE) { 
-  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc); 
-  { 
-    auto exec_32 = ExecFunctor<StringType>::Exec; 
-    ScalarKernel kernel{{utf8()}, utf8(), exec_32}; 
-    kernel.mem_allocation = mem_allocation; 
-    DCHECK_OK(func->AddKernel(std::move(kernel))); 
-  } 
-  { 
-    auto exec_64 = ExecFunctor<LargeStringType>::Exec; 
-    ScalarKernel kernel{{large_utf8()}, large_utf8(), exec_64}; 
-    kernel.mem_allocation = mem_allocation; 
-    DCHECK_OK(func->AddKernel(std::move(kernel))); 
-  } 
+void MakeUnaryStringBatchKernelWithState(
+    std::string name, FunctionRegistry* registry, const FunctionDoc* doc,
+    MemAllocation::type mem_allocation = MemAllocation::PREALLOCATE) {
+  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
+  {
+    using t32 = ExecFunctor<StringType>;
+    ScalarKernel kernel{{utf8()}, utf8(), t32::Exec, t32::State::Init};
+    kernel.mem_allocation = mem_allocation;
+    DCHECK_OK(func->AddKernel(std::move(kernel)));
+  }
+  {
+    using t64 = ExecFunctor<LargeStringType>;
+    ScalarKernel kernel{{large_utf8()}, large_utf8(), t64::Exec, t64::State::Init};
+    kernel.mem_allocation = mem_allocation;
+    DCHECK_OK(func->AddKernel(std::move(kernel)));
+  }
   DCHECK_OK(registry->AddFunction(std::move(func)));
 }
 
-template <template <typename> class ExecFunctor> 
-void MakeUnaryStringBatchKernelWithState( 
-    std::string name, FunctionRegistry* registry, const FunctionDoc* doc, 
-    MemAllocation::type mem_allocation = MemAllocation::PREALLOCATE) { 
-  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc); 
-  { 
-    using t32 = ExecFunctor<StringType>; 
-    ScalarKernel kernel{{utf8()}, utf8(), t32::Exec, t32::State::Init}; 
-    kernel.mem_allocation = mem_allocation; 
-    DCHECK_OK(func->AddKernel(std::move(kernel))); 
-  } 
-  { 
-    using t64 = ExecFunctor<LargeStringType>; 
-    ScalarKernel kernel{{large_utf8()}, large_utf8(), t64::Exec, t64::State::Init}; 
-    kernel.mem_allocation = mem_allocation; 
-    DCHECK_OK(func->AddKernel(std::move(kernel))); 
-  } 
-  DCHECK_OK(registry->AddFunction(std::move(func))); 
-} 
- 
 #ifdef ARROW_WITH_UTF8PROC
 
 template <template <typename> class Transformer>
-void MakeUnaryStringUTF8TransformKernel(std::string name, FunctionRegistry* registry, 
-                                        const FunctionDoc* doc) { 
-  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc); 
+void MakeUnaryStringUTF8TransformKernel(std::string name, FunctionRegistry* registry,
+                                        const FunctionDoc* doc) {
+  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
   ArrayKernelExec exec_32 = Transformer<StringType>::Exec;
   ArrayKernelExec exec_64 = Transformer<LargeStringType>::Exec;
   DCHECK_OK(func->AddKernel({utf8()}, utf8(), exec_32));
@@ -3876,15 +3876,15 @@ void MakeUnaryStringUTF8TransformKernel(std::string name, FunctionRegistry* regi
 
 #endif
 
-// NOTE: Predicate should only populate 'status' with errors, 
-//       leave it unmodified to indicate Status::OK() 
-using StringPredicate = 
-    std::function<bool(KernelContext*, const uint8_t*, size_t, Status*)>; 
+// NOTE: Predicate should only populate 'status' with errors,
+//       leave it unmodified to indicate Status::OK()
+using StringPredicate =
+    std::function<bool(KernelContext*, const uint8_t*, size_t, Status*)>;
 
 template <typename Type>
-Status ApplyPredicate(KernelContext* ctx, const ExecBatch& batch, 
-                      StringPredicate predicate, Datum* out) { 
-  Status st = Status::OK(); 
+Status ApplyPredicate(KernelContext* ctx, const ExecBatch& batch,
+                      StringPredicate predicate, Datum* out) {
+  Status st = Status::OK();
   EnsureLookupTablesFilled();
   if (batch[0].kind() == Datum::ARRAY) {
     const ArrayData& input = *batch[0].array();
@@ -3894,250 +3894,250 @@ Status ApplyPredicate(KernelContext* ctx, const ExecBatch& batch,
         out_arr->buffers[1]->mutable_data(), out_arr->offset, input.length,
         [&]() -> bool {
           util::string_view val = input_it();
-          return predicate(ctx, reinterpret_cast<const uint8_t*>(val.data()), val.size(), 
-                           &st); 
+          return predicate(ctx, reinterpret_cast<const uint8_t*>(val.data()), val.size(),
+                           &st);
         });
   } else {
     const auto& input = checked_cast<const BaseBinaryScalar&>(*batch[0].scalar());
     if (input.is_valid) {
-      bool boolean_result = predicate(ctx, input.value->data(), 
-                                      static_cast<size_t>(input.value->size()), &st); 
-      // UTF decoding can lead to issues 
-      if (st.ok()) { 
-        out->value = std::make_shared<BooleanScalar>(boolean_result); 
+      bool boolean_result = predicate(ctx, input.value->data(),
+                                      static_cast<size_t>(input.value->size()), &st);
+      // UTF decoding can lead to issues
+      if (st.ok()) {
+        out->value = std::make_shared<BooleanScalar>(boolean_result);
       }
     }
   }
-  return st; 
+  return st;
 }
 
 template <typename Predicate>
-void AddUnaryStringPredicate(std::string name, FunctionRegistry* registry, 
-                             const FunctionDoc* doc) { 
-  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc); 
+void AddUnaryStringPredicate(std::string name, FunctionRegistry* registry,
+                             const FunctionDoc* doc) {
+  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
   auto exec_32 = [](KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    return ApplyPredicate<StringType>(ctx, batch, Predicate::Call, out); 
+    return ApplyPredicate<StringType>(ctx, batch, Predicate::Call, out);
   };
   auto exec_64 = [](KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    return ApplyPredicate<LargeStringType>(ctx, batch, Predicate::Call, out); 
+    return ApplyPredicate<LargeStringType>(ctx, batch, Predicate::Call, out);
   };
   DCHECK_OK(func->AddKernel({utf8()}, boolean(), std::move(exec_32)));
   DCHECK_OK(func->AddKernel({large_utf8()}, boolean(), std::move(exec_64)));
   DCHECK_OK(registry->AddFunction(std::move(func)));
 }
 
-FunctionDoc StringPredicateDoc(std::string summary, std::string description) { 
-  return FunctionDoc{std::move(summary), std::move(description), {"strings"}}; 
-} 
- 
-FunctionDoc StringClassifyDoc(std::string class_summary, std::string class_desc, 
-                              bool non_empty) { 
-  std::string summary, description; 
-  { 
-    std::stringstream ss; 
-    ss << "Classify strings as " << class_summary; 
-    summary = ss.str(); 
-  } 
-  { 
-    std::stringstream ss; 
-    if (non_empty) { 
-      ss 
-          << ("For each string in `strings`, emit true iff the string is non-empty\n" 
-              "and consists only of "); 
-    } else { 
-      ss 
-          << ("For each string in `strings`, emit true iff the string consists only\n" 
-              "of "); 
-    } 
-    ss << class_desc << ".  Null strings emit null."; 
-    description = ss.str(); 
-  } 
-  return StringPredicateDoc(std::move(summary), std::move(description)); 
-} 
- 
-const auto string_is_ascii_doc = StringClassifyDoc("ASCII", "ASCII characters", false); 
- 
-const auto ascii_is_alnum_doc = 
-    StringClassifyDoc("ASCII alphanumeric", "alphanumeric ASCII characters", true); 
-const auto ascii_is_alpha_doc = 
-    StringClassifyDoc("ASCII alphabetic", "alphabetic ASCII characters", true); 
-const auto ascii_is_decimal_doc = 
-    StringClassifyDoc("ASCII decimal", "decimal ASCII characters", true); 
-const auto ascii_is_lower_doc = 
-    StringClassifyDoc("ASCII lowercase", "lowercase ASCII characters", true); 
-const auto ascii_is_printable_doc = 
-    StringClassifyDoc("ASCII printable", "printable ASCII characters", true); 
-const auto ascii_is_space_doc = 
-    StringClassifyDoc("ASCII whitespace", "whitespace ASCII characters", true); 
-const auto ascii_is_upper_doc = 
-    StringClassifyDoc("ASCII uppercase", "uppercase ASCII characters", true); 
- 
-const auto ascii_is_title_doc = StringPredicateDoc( 
-    "Classify strings as ASCII titlecase", 
-    ("For each string in `strings`, emit true iff the string is title-cased,\n" 
-     "i.e. it has at least one cased character, each uppercase character\n" 
-     "follows a non-cased character, and each lowercase character follows\n" 
-     "an uppercase character.\n")); 
- 
-const auto utf8_is_alnum_doc = 
-    StringClassifyDoc("alphanumeric", "alphanumeric Unicode characters", true); 
-const auto utf8_is_alpha_doc = 
-    StringClassifyDoc("alphabetic", "alphabetic Unicode characters", true); 
-const auto utf8_is_decimal_doc = 
-    StringClassifyDoc("decimal", "decimal Unicode characters", true); 
-const auto utf8_is_digit_doc = StringClassifyDoc("digits", "Unicode digits", true); 
-const auto utf8_is_lower_doc = 
-    StringClassifyDoc("lowercase", "lowercase Unicode characters", true); 
-const auto utf8_is_numeric_doc = 
-    StringClassifyDoc("numeric", "numeric Unicode characters", true); 
-const auto utf8_is_printable_doc = 
-    StringClassifyDoc("printable", "printable Unicode characters", true); 
-const auto utf8_is_space_doc = 
-    StringClassifyDoc("whitespace", "whitespace Unicode characters", true); 
-const auto utf8_is_upper_doc = 
-    StringClassifyDoc("uppercase", "uppercase Unicode characters", true); 
- 
-const auto utf8_is_title_doc = StringPredicateDoc( 
-    "Classify strings as titlecase", 
-    ("For each string in `strings`, emit true iff the string is title-cased,\n" 
-     "i.e. it has at least one cased character, each uppercase character\n" 
-     "follows a non-cased character, and each lowercase character follows\n" 
-     "an uppercase character.\n")); 
- 
-const FunctionDoc ascii_upper_doc( 
-    "Transform ASCII input to uppercase", 
-    ("For each string in `strings`, return an uppercase version.\n\n" 
-     "This function assumes the input is fully ASCII.  It it may contain\n" 
-     "non-ASCII characters, use \"utf8_upper\" instead."), 
-    {"strings"}); 
- 
-const FunctionDoc ascii_lower_doc( 
-    "Transform ASCII input to lowercase", 
-    ("For each string in `strings`, return a lowercase version.\n\n" 
-     "This function assumes the input is fully ASCII.  If it may contain\n" 
-     "non-ASCII characters, use \"utf8_lower\" instead."), 
-    {"strings"}); 
- 
-const FunctionDoc utf8_upper_doc( 
-    "Transform input to uppercase", 
-    ("For each string in `strings`, return an uppercase version."), {"strings"}); 
- 
-const FunctionDoc utf8_lower_doc( 
-    "Transform input to lowercase", 
-    ("For each string in `strings`, return a lowercase version."), {"strings"}); 
- 
-const FunctionDoc ascii_reverse_doc( 
-    "Reverse ASCII input", 
-    ("For each ASCII string in `strings`, return a reversed version.\n\n" 
-     "This function assumes the input is fully ASCII.  If it may contain\n" 
-     "non-ASCII characters, use \"utf8_reverse\" instead."), 
-    {"strings"}); 
- 
-const FunctionDoc utf8_reverse_doc( 
-    "Reverse utf8 input", 
-    ("For each utf8 string in `strings`, return a reversed version.\n\n" 
-     "This function operates on codepoints/UTF-8 code units, not grapheme\n" 
-     "clusters. Hence, it will not correctly reverse grapheme clusters\n" 
-     "composed of multiple codepoints."), 
-    {"strings"}); 
- 
+FunctionDoc StringPredicateDoc(std::string summary, std::string description) {
+  return FunctionDoc{std::move(summary), std::move(description), {"strings"}};
+}
+
+FunctionDoc StringClassifyDoc(std::string class_summary, std::string class_desc,
+                              bool non_empty) {
+  std::string summary, description;
+  {
+    std::stringstream ss;
+    ss << "Classify strings as " << class_summary;
+    summary = ss.str();
+  }
+  {
+    std::stringstream ss;
+    if (non_empty) {
+      ss
+          << ("For each string in `strings`, emit true iff the string is non-empty\n"
+              "and consists only of ");
+    } else {
+      ss
+          << ("For each string in `strings`, emit true iff the string consists only\n"
+              "of ");
+    }
+    ss << class_desc << ".  Null strings emit null.";
+    description = ss.str();
+  }
+  return StringPredicateDoc(std::move(summary), std::move(description));
+}
+
+const auto string_is_ascii_doc = StringClassifyDoc("ASCII", "ASCII characters", false);
+
+const auto ascii_is_alnum_doc =
+    StringClassifyDoc("ASCII alphanumeric", "alphanumeric ASCII characters", true);
+const auto ascii_is_alpha_doc =
+    StringClassifyDoc("ASCII alphabetic", "alphabetic ASCII characters", true);
+const auto ascii_is_decimal_doc =
+    StringClassifyDoc("ASCII decimal", "decimal ASCII characters", true);
+const auto ascii_is_lower_doc =
+    StringClassifyDoc("ASCII lowercase", "lowercase ASCII characters", true);
+const auto ascii_is_printable_doc =
+    StringClassifyDoc("ASCII printable", "printable ASCII characters", true);
+const auto ascii_is_space_doc =
+    StringClassifyDoc("ASCII whitespace", "whitespace ASCII characters", true);
+const auto ascii_is_upper_doc =
+    StringClassifyDoc("ASCII uppercase", "uppercase ASCII characters", true);
+
+const auto ascii_is_title_doc = StringPredicateDoc(
+    "Classify strings as ASCII titlecase",
+    ("For each string in `strings`, emit true iff the string is title-cased,\n"
+     "i.e. it has at least one cased character, each uppercase character\n"
+     "follows a non-cased character, and each lowercase character follows\n"
+     "an uppercase character.\n"));
+
+const auto utf8_is_alnum_doc =
+    StringClassifyDoc("alphanumeric", "alphanumeric Unicode characters", true);
+const auto utf8_is_alpha_doc =
+    StringClassifyDoc("alphabetic", "alphabetic Unicode characters", true);
+const auto utf8_is_decimal_doc =
+    StringClassifyDoc("decimal", "decimal Unicode characters", true);
+const auto utf8_is_digit_doc = StringClassifyDoc("digits", "Unicode digits", true);
+const auto utf8_is_lower_doc =
+    StringClassifyDoc("lowercase", "lowercase Unicode characters", true);
+const auto utf8_is_numeric_doc =
+    StringClassifyDoc("numeric", "numeric Unicode characters", true);
+const auto utf8_is_printable_doc =
+    StringClassifyDoc("printable", "printable Unicode characters", true);
+const auto utf8_is_space_doc =
+    StringClassifyDoc("whitespace", "whitespace Unicode characters", true);
+const auto utf8_is_upper_doc =
+    StringClassifyDoc("uppercase", "uppercase Unicode characters", true);
+
+const auto utf8_is_title_doc = StringPredicateDoc(
+    "Classify strings as titlecase",
+    ("For each string in `strings`, emit true iff the string is title-cased,\n"
+     "i.e. it has at least one cased character, each uppercase character\n"
+     "follows a non-cased character, and each lowercase character follows\n"
+     "an uppercase character.\n"));
+
+const FunctionDoc ascii_upper_doc(
+    "Transform ASCII input to uppercase",
+    ("For each string in `strings`, return an uppercase version.\n\n"
+     "This function assumes the input is fully ASCII.  It it may contain\n"
+     "non-ASCII characters, use \"utf8_upper\" instead."),
+    {"strings"});
+
+const FunctionDoc ascii_lower_doc(
+    "Transform ASCII input to lowercase",
+    ("For each string in `strings`, return a lowercase version.\n\n"
+     "This function assumes the input is fully ASCII.  If it may contain\n"
+     "non-ASCII characters, use \"utf8_lower\" instead."),
+    {"strings"});
+
+const FunctionDoc utf8_upper_doc(
+    "Transform input to uppercase",
+    ("For each string in `strings`, return an uppercase version."), {"strings"});
+
+const FunctionDoc utf8_lower_doc(
+    "Transform input to lowercase",
+    ("For each string in `strings`, return a lowercase version."), {"strings"});
+
+const FunctionDoc ascii_reverse_doc(
+    "Reverse ASCII input",
+    ("For each ASCII string in `strings`, return a reversed version.\n\n"
+     "This function assumes the input is fully ASCII.  If it may contain\n"
+     "non-ASCII characters, use \"utf8_reverse\" instead."),
+    {"strings"});
+
+const FunctionDoc utf8_reverse_doc(
+    "Reverse utf8 input",
+    ("For each utf8 string in `strings`, return a reversed version.\n\n"
+     "This function operates on codepoints/UTF-8 code units, not grapheme\n"
+     "clusters. Hence, it will not correctly reverse grapheme clusters\n"
+     "composed of multiple codepoints."),
+    {"strings"});
+
 }  // namespace
 
 void RegisterScalarStringAscii(FunctionRegistry* registry) {
-  // ascii_upper and ascii_lower are able to reuse the original offsets buffer, 
-  // so don't preallocate them in the output. 
-  MakeUnaryStringBatchKernel<AsciiUpper>("ascii_upper", registry, &ascii_upper_doc, 
-                                         MemAllocation::NO_PREALLOCATE); 
-  MakeUnaryStringBatchKernel<AsciiLower>("ascii_lower", registry, &ascii_lower_doc, 
-                                         MemAllocation::NO_PREALLOCATE); 
-  MakeUnaryStringBatchKernel<AsciiTrimWhitespace>("ascii_trim_whitespace", registry, 
-                                                  &ascii_trim_whitespace_doc); 
-  MakeUnaryStringBatchKernel<AsciiLTrimWhitespace>("ascii_ltrim_whitespace", registry, 
-                                                   &ascii_ltrim_whitespace_doc); 
-  MakeUnaryStringBatchKernel<AsciiRTrimWhitespace>("ascii_rtrim_whitespace", registry, 
-                                                   &ascii_rtrim_whitespace_doc); 
-  MakeUnaryStringBatchKernel<AsciiReverse>("ascii_reverse", registry, &ascii_reverse_doc); 
-  MakeUnaryStringBatchKernel<Utf8Reverse>("utf8_reverse", registry, &utf8_reverse_doc); 
-
-  MakeUnaryStringBatchKernelWithState<AsciiCenter>("ascii_center", registry, 
-                                                   &ascii_center_doc); 
-  MakeUnaryStringBatchKernelWithState<AsciiLPad>("ascii_lpad", registry, &ascii_lpad_doc); 
-  MakeUnaryStringBatchKernelWithState<AsciiRPad>("ascii_rpad", registry, &ascii_rpad_doc); 
-  MakeUnaryStringBatchKernelWithState<Utf8Center>("utf8_center", registry, 
-                                                  &utf8_center_doc); 
-  MakeUnaryStringBatchKernelWithState<Utf8LPad>("utf8_lpad", registry, &utf8_lpad_doc); 
-  MakeUnaryStringBatchKernelWithState<Utf8RPad>("utf8_rpad", registry, &utf8_rpad_doc); 
-
-  MakeUnaryStringBatchKernelWithState<AsciiTrim>("ascii_trim", registry, &ascii_trim_doc); 
-  MakeUnaryStringBatchKernelWithState<AsciiLTrim>("ascii_ltrim", registry, 
-                                                  &ascii_ltrim_doc); 
-  MakeUnaryStringBatchKernelWithState<AsciiRTrim>("ascii_rtrim", registry, 
-                                                  &ascii_rtrim_doc); 
- 
-  AddUnaryStringPredicate<IsAscii>("string_is_ascii", registry, &string_is_ascii_doc); 
- 
-  AddUnaryStringPredicate<IsAlphaNumericAscii>("ascii_is_alnum", registry, 
-                                               &ascii_is_alnum_doc); 
-  AddUnaryStringPredicate<IsAlphaAscii>("ascii_is_alpha", registry, &ascii_is_alpha_doc); 
-  AddUnaryStringPredicate<IsDecimalAscii>("ascii_is_decimal", registry, 
-                                          &ascii_is_decimal_doc); 
+  // ascii_upper and ascii_lower are able to reuse the original offsets buffer,
+  // so don't preallocate them in the output.
+  MakeUnaryStringBatchKernel<AsciiUpper>("ascii_upper", registry, &ascii_upper_doc,
+                                         MemAllocation::NO_PREALLOCATE);
+  MakeUnaryStringBatchKernel<AsciiLower>("ascii_lower", registry, &ascii_lower_doc,
+                                         MemAllocation::NO_PREALLOCATE);
+  MakeUnaryStringBatchKernel<AsciiTrimWhitespace>("ascii_trim_whitespace", registry,
+                                                  &ascii_trim_whitespace_doc);
+  MakeUnaryStringBatchKernel<AsciiLTrimWhitespace>("ascii_ltrim_whitespace", registry,
+                                                   &ascii_ltrim_whitespace_doc);
+  MakeUnaryStringBatchKernel<AsciiRTrimWhitespace>("ascii_rtrim_whitespace", registry,
+                                                   &ascii_rtrim_whitespace_doc);
+  MakeUnaryStringBatchKernel<AsciiReverse>("ascii_reverse", registry, &ascii_reverse_doc);
+  MakeUnaryStringBatchKernel<Utf8Reverse>("utf8_reverse", registry, &utf8_reverse_doc);
+
+  MakeUnaryStringBatchKernelWithState<AsciiCenter>("ascii_center", registry,
+                                                   &ascii_center_doc);
+  MakeUnaryStringBatchKernelWithState<AsciiLPad>("ascii_lpad", registry, &ascii_lpad_doc);
+  MakeUnaryStringBatchKernelWithState<AsciiRPad>("ascii_rpad", registry, &ascii_rpad_doc);
+  MakeUnaryStringBatchKernelWithState<Utf8Center>("utf8_center", registry,
+                                                  &utf8_center_doc);
+  MakeUnaryStringBatchKernelWithState<Utf8LPad>("utf8_lpad", registry, &utf8_lpad_doc);
+  MakeUnaryStringBatchKernelWithState<Utf8RPad>("utf8_rpad", registry, &utf8_rpad_doc);
+
+  MakeUnaryStringBatchKernelWithState<AsciiTrim>("ascii_trim", registry, &ascii_trim_doc);
+  MakeUnaryStringBatchKernelWithState<AsciiLTrim>("ascii_ltrim", registry,
+                                                  &ascii_ltrim_doc);
+  MakeUnaryStringBatchKernelWithState<AsciiRTrim>("ascii_rtrim", registry,
+                                                  &ascii_rtrim_doc);
+
+  AddUnaryStringPredicate<IsAscii>("string_is_ascii", registry, &string_is_ascii_doc);
+
+  AddUnaryStringPredicate<IsAlphaNumericAscii>("ascii_is_alnum", registry,
+                                               &ascii_is_alnum_doc);
+  AddUnaryStringPredicate<IsAlphaAscii>("ascii_is_alpha", registry, &ascii_is_alpha_doc);
+  AddUnaryStringPredicate<IsDecimalAscii>("ascii_is_decimal", registry,
+                                          &ascii_is_decimal_doc);
   // no is_digit for ascii, since it is the same as is_decimal
-  AddUnaryStringPredicate<IsLowerAscii>("ascii_is_lower", registry, &ascii_is_lower_doc); 
+  AddUnaryStringPredicate<IsLowerAscii>("ascii_is_lower", registry, &ascii_is_lower_doc);
   // no is_numeric for ascii, since it is the same as is_decimal
-  AddUnaryStringPredicate<IsPrintableAscii>("ascii_is_printable", registry, 
-                                            &ascii_is_printable_doc); 
-  AddUnaryStringPredicate<IsSpaceAscii>("ascii_is_space", registry, &ascii_is_space_doc); 
-  AddUnaryStringPredicate<IsTitleAscii>("ascii_is_title", registry, &ascii_is_title_doc); 
-  AddUnaryStringPredicate<IsUpperAscii>("ascii_is_upper", registry, &ascii_is_upper_doc); 
+  AddUnaryStringPredicate<IsPrintableAscii>("ascii_is_printable", registry,
+                                            &ascii_is_printable_doc);
+  AddUnaryStringPredicate<IsSpaceAscii>("ascii_is_space", registry, &ascii_is_space_doc);
+  AddUnaryStringPredicate<IsTitleAscii>("ascii_is_title", registry, &ascii_is_title_doc);
+  AddUnaryStringPredicate<IsUpperAscii>("ascii_is_upper", registry, &ascii_is_upper_doc);
 
 #ifdef ARROW_WITH_UTF8PROC
-  MakeUnaryStringUTF8TransformKernel<UTF8Upper>("utf8_upper", registry, &utf8_upper_doc); 
-  MakeUnaryStringUTF8TransformKernel<UTF8Lower>("utf8_lower", registry, &utf8_lower_doc); 
-  MakeUnaryStringBatchKernel<UTF8TrimWhitespace>("utf8_trim_whitespace", registry, 
-                                                 &utf8_trim_whitespace_doc); 
-  MakeUnaryStringBatchKernel<UTF8LTrimWhitespace>("utf8_ltrim_whitespace", registry, 
-                                                  &utf8_ltrim_whitespace_doc); 
-  MakeUnaryStringBatchKernel<UTF8RTrimWhitespace>("utf8_rtrim_whitespace", registry, 
-                                                  &utf8_rtrim_whitespace_doc); 
-  MakeUnaryStringBatchKernelWithState<UTF8Trim>("utf8_trim", registry, &utf8_trim_doc); 
-  MakeUnaryStringBatchKernelWithState<UTF8LTrim>("utf8_ltrim", registry, &utf8_ltrim_doc); 
-  MakeUnaryStringBatchKernelWithState<UTF8RTrim>("utf8_rtrim", registry, &utf8_rtrim_doc); 
-
-  AddUnaryStringPredicate<IsAlphaNumericUnicode>("utf8_is_alnum", registry, 
-                                                 &utf8_is_alnum_doc); 
-  AddUnaryStringPredicate<IsAlphaUnicode>("utf8_is_alpha", registry, &utf8_is_alpha_doc); 
-  AddUnaryStringPredicate<IsDecimalUnicode>("utf8_is_decimal", registry, 
-                                            &utf8_is_decimal_doc); 
-  AddUnaryStringPredicate<IsDigitUnicode>("utf8_is_digit", registry, &utf8_is_digit_doc); 
-  AddUnaryStringPredicate<IsLowerUnicode>("utf8_is_lower", registry, &utf8_is_lower_doc); 
-  AddUnaryStringPredicate<IsNumericUnicode>("utf8_is_numeric", registry, 
-                                            &utf8_is_numeric_doc); 
-  AddUnaryStringPredicate<IsPrintableUnicode>("utf8_is_printable", registry, 
-                                              &utf8_is_printable_doc); 
-  AddUnaryStringPredicate<IsSpaceUnicode>("utf8_is_space", registry, &utf8_is_space_doc); 
-  AddUnaryStringPredicate<IsTitleUnicode>("utf8_is_title", registry, &utf8_is_title_doc); 
-  AddUnaryStringPredicate<IsUpperUnicode>("utf8_is_upper", registry, &utf8_is_upper_doc); 
+  MakeUnaryStringUTF8TransformKernel<UTF8Upper>("utf8_upper", registry, &utf8_upper_doc);
+  MakeUnaryStringUTF8TransformKernel<UTF8Lower>("utf8_lower", registry, &utf8_lower_doc);
+  MakeUnaryStringBatchKernel<UTF8TrimWhitespace>("utf8_trim_whitespace", registry,
+                                                 &utf8_trim_whitespace_doc);
+  MakeUnaryStringBatchKernel<UTF8LTrimWhitespace>("utf8_ltrim_whitespace", registry,
+                                                  &utf8_ltrim_whitespace_doc);
+  MakeUnaryStringBatchKernel<UTF8RTrimWhitespace>("utf8_rtrim_whitespace", registry,
+                                                  &utf8_rtrim_whitespace_doc);
+  MakeUnaryStringBatchKernelWithState<UTF8Trim>("utf8_trim", registry, &utf8_trim_doc);
+  MakeUnaryStringBatchKernelWithState<UTF8LTrim>("utf8_ltrim", registry, &utf8_ltrim_doc);
+  MakeUnaryStringBatchKernelWithState<UTF8RTrim>("utf8_rtrim", registry, &utf8_rtrim_doc);
+
+  AddUnaryStringPredicate<IsAlphaNumericUnicode>("utf8_is_alnum", registry,
+                                                 &utf8_is_alnum_doc);
+  AddUnaryStringPredicate<IsAlphaUnicode>("utf8_is_alpha", registry, &utf8_is_alpha_doc);
+  AddUnaryStringPredicate<IsDecimalUnicode>("utf8_is_decimal", registry,
+                                            &utf8_is_decimal_doc);
+  AddUnaryStringPredicate<IsDigitUnicode>("utf8_is_digit", registry, &utf8_is_digit_doc);
+  AddUnaryStringPredicate<IsLowerUnicode>("utf8_is_lower", registry, &utf8_is_lower_doc);
+  AddUnaryStringPredicate<IsNumericUnicode>("utf8_is_numeric", registry,
+                                            &utf8_is_numeric_doc);
+  AddUnaryStringPredicate<IsPrintableUnicode>("utf8_is_printable", registry,
+                                              &utf8_is_printable_doc);
+  AddUnaryStringPredicate<IsSpaceUnicode>("utf8_is_space", registry, &utf8_is_space_doc);
+  AddUnaryStringPredicate<IsTitleUnicode>("utf8_is_title", registry, &utf8_is_title_doc);
+  AddUnaryStringPredicate<IsUpperUnicode>("utf8_is_upper", registry, &utf8_is_upper_doc);
 #endif
 
   AddBinaryLength(registry);
-  AddUtf8Length(registry); 
+  AddUtf8Length(registry);
   AddMatchSubstring(registry);
-  AddFindSubstring(registry); 
-  AddCountSubstring(registry); 
-  MakeUnaryStringBatchKernelWithState<ReplaceSubStringPlain>( 
-      "replace_substring", registry, &replace_substring_doc, 
-      MemAllocation::NO_PREALLOCATE); 
-#ifdef ARROW_WITH_RE2 
-  MakeUnaryStringBatchKernelWithState<ReplaceSubStringRegex>( 
-      "replace_substring_regex", registry, &replace_substring_regex_doc, 
-      MemAllocation::NO_PREALLOCATE); 
-  AddExtractRegex(registry); 
-#endif 
-  AddReplaceSlice(registry); 
-  AddSlice(registry); 
-  AddSplit(registry); 
+  AddFindSubstring(registry);
+  AddCountSubstring(registry);
+  MakeUnaryStringBatchKernelWithState<ReplaceSubStringPlain>(
+      "replace_substring", registry, &replace_substring_doc,
+      MemAllocation::NO_PREALLOCATE);
+#ifdef ARROW_WITH_RE2
+  MakeUnaryStringBatchKernelWithState<ReplaceSubStringRegex>(
+      "replace_substring_regex", registry, &replace_substring_regex_doc,
+      MemAllocation::NO_PREALLOCATE);
+  AddExtractRegex(registry);
+#endif
+  AddReplaceSlice(registry);
+  AddSlice(registry);
+  AddSplit(registry);
   AddStrptime(registry);
-  AddBinaryJoin(registry); 
+  AddBinaryJoin(registry);
 }
 
 }  // namespace internal
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_temporal.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_temporal.cc
index e9375664a90..f0257772d4a 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_temporal.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_temporal.cc
@@ -1,663 +1,663 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "arrow/builder.h" 
-#include "arrow/compute/api_scalar.h" 
-#include "arrow/compute/kernels/common.h" 
-#include "arrow/util/checked_cast.h" 
-#include "arrow/util/time.h" 
-#include "arrow/vendored/datetime.h" 
- 
-namespace arrow { 
- 
-using internal::checked_cast; 
-using internal::checked_pointer_cast; 
- 
-namespace compute { 
-namespace internal { 
- 
-namespace { 
- 
-using arrow_vendored::date::days; 
-using arrow_vendored::date::floor; 
-using arrow_vendored::date::hh_mm_ss; 
-using arrow_vendored::date::sys_time; 
-using arrow_vendored::date::trunc; 
-using arrow_vendored::date::weekday; 
-using arrow_vendored::date::weeks; 
-using arrow_vendored::date::year_month_day; 
-using arrow_vendored::date::years; 
-using arrow_vendored::date::literals::dec; 
-using arrow_vendored::date::literals::jan; 
-using arrow_vendored::date::literals::last; 
-using arrow_vendored::date::literals::mon; 
-using arrow_vendored::date::literals::thu; 
-using internal::applicator::ScalarUnaryNotNull; 
-using internal::applicator::SimpleUnary; 
- 
-using DayOfWeekState = OptionsWrapper<DayOfWeekOptions>; 
- 
-const std::string& GetInputTimezone(const Datum& datum) { 
-  return checked_cast<const TimestampType&>(*datum.type()).timezone(); 
-} 
- 
-const std::string& GetInputTimezone(const Scalar& scalar) { 
-  return checked_cast<const TimestampType&>(*scalar.type).timezone(); 
-} 
- 
-const std::string& GetInputTimezone(const ArrayData& array) { 
-  return checked_cast<const TimestampType&>(*array.type).timezone(); 
-} 
- 
-template <typename T> 
-Status TemporalComponentExtractCheckTimezone(const T& input) { 
-  const auto& timezone = GetInputTimezone(input); 
-  if (!timezone.empty()) { 
-    return Status::NotImplemented( 
-        "Cannot extract components from timestamp with specific timezone: ", timezone); 
-  } 
-  return Status::OK(); 
-} 
- 
-template <typename Op, typename OutType> 
-struct TemporalComponentExtract { 
-  using OutValue = typename internal::GetOutputType<OutType>::T; 
- 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    RETURN_NOT_OK(TemporalComponentExtractCheckTimezone(batch.values[0])); 
-    return ScalarUnaryNotNull<OutType, TimestampType, Op>::Exec(ctx, batch, out); 
-  } 
-}; 
- 
-template <typename Op, typename OutType> 
-struct DayOfWeekExec { 
-  using OutValue = typename internal::GetOutputType<OutType>::T; 
- 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    const DayOfWeekOptions& options = DayOfWeekState::Get(ctx); 
-    if (options.week_start < 1 || 7 < options.week_start) { 
-      return Status::Invalid( 
-          "week_start must follow ISO convention (Monday=1, Sunday=7). Got week_start=", 
-          options.week_start); 
-    } 
- 
-    RETURN_NOT_OK(TemporalComponentExtractCheckTimezone(batch.values[0])); 
-    applicator::ScalarUnaryNotNullStateful<OutType, TimestampType, Op> kernel{ 
-        Op(options)}; 
-    return kernel.Exec(ctx, batch, out); 
-  } 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// Extract year from timestamp 
- 
-template <typename Duration> 
-struct Year { 
-  template <typename T, typename Arg0> 
-  static T Call(KernelContext*, Arg0 arg, Status*) { 
-    return static_cast<T>(static_cast<const int32_t>( 
-        year_month_day(floor<days>(sys_time<Duration>(Duration{arg}))).year())); 
-  } 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// Extract month from timestamp 
- 
-template <typename Duration> 
-struct Month { 
-  template <typename T, typename Arg0> 
-  static T Call(KernelContext*, Arg0 arg, Status*) { 
-    return static_cast<T>(static_cast<const uint32_t>( 
-        year_month_day(floor<days>(sys_time<Duration>(Duration{arg}))).month())); 
-  } 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// Extract day from timestamp 
- 
-template <typename Duration> 
-struct Day { 
-  template <typename T, typename Arg0> 
-  static T Call(KernelContext*, Arg0 arg, Status*) { 
-    return static_cast<T>(static_cast<const uint32_t>( 
-        year_month_day(floor<days>(sys_time<Duration>(Duration{arg}))).day())); 
-  } 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// Extract day of week from timestamp 
-// 
-// By default week starts on Monday represented by 0 and ends on Sunday represented 
-// by 6. Start day of the week (Monday=1, Sunday=7) and numbering start (0 or 1) can be 
-// set using DayOfWeekOptions 
- 
-template <typename Duration> 
-struct DayOfWeek { 
-  explicit DayOfWeek(const DayOfWeekOptions& options) { 
-    for (int i = 0; i < 7; i++) { 
-      lookup_table[i] = i + 8 - options.week_start; 
-      lookup_table[i] = (lookup_table[i] > 6) ? lookup_table[i] - 7 : lookup_table[i]; 
-      lookup_table[i] += options.one_based_numbering; 
-    } 
-  } 
- 
-  template <typename T, typename Arg0> 
-  T Call(KernelContext*, Arg0 arg, Status*) const { 
-    const auto wd = arrow_vendored::date::year_month_weekday( 
-                        floor<days>(sys_time<Duration>(Duration{arg}))) 
-                        .weekday() 
-                        .iso_encoding(); 
-    return lookup_table[wd - 1]; 
-  } 
-  std::array<int64_t, 7> lookup_table; 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// Extract day of year from timestamp 
- 
-template <typename Duration> 
-struct DayOfYear { 
-  template <typename T, typename Arg0> 
-  static T Call(KernelContext*, Arg0 arg, Status*) { 
-    const auto t = floor<days>(sys_time<Duration>(Duration{arg})); 
-    return static_cast<T>( 
-        (t - sys_time<days>(year_month_day(t).year() / jan / 0)).count()); 
-  } 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// Extract ISO Year values from timestamp 
-// 
-// First week of an ISO year has the majority (4 or more) of it's days in January. 
-// Last week of an ISO year has the year's last Thursday in it. 
- 
-template <typename Duration> 
-struct ISOYear { 
-  template <typename T, typename Arg0> 
-  static T Call(KernelContext*, Arg0 arg, Status*) { 
-    const auto t = floor<days>(sys_time<Duration>(Duration{arg})); 
-    auto y = year_month_day{t + days{3}}.year(); 
-    auto start = sys_time<days>((y - years{1}) / dec / thu[last]) + (mon - thu); 
-    if (t < start) { 
-      --y; 
-    } 
-    return static_cast<T>(static_cast<int32_t>(y)); 
-  } 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// Extract ISO week from timestamp 
-// 
-// First week of an ISO year has the majority (4 or more) of it's days in January. 
-// Last week of an ISO year has the year's last Thursday in it. 
-// Based on 
-// https://github.com/HowardHinnant/date/blob/6e921e1b1d21e84a5c82416ba7ecd98e33a436d0/include/date/iso_week.h#L1503 
-template <typename Duration> 
-struct ISOWeek { 
-  template <typename T, typename Arg0> 
-  static T Call(KernelContext*, Arg0 arg, Status*) { 
-    const auto t = floor<days>(sys_time<Duration>(Duration{arg})); 
-    auto y = year_month_day{t + days{3}}.year(); 
-    auto start = sys_time<days>((y - years{1}) / dec / thu[last]) + (mon - thu); 
-    if (t < start) { 
-      --y; 
-      start = sys_time<days>((y - years{1}) / dec / thu[last]) + (mon - thu); 
-    } 
-    return static_cast<T>(trunc<weeks>(t - start).count() + 1); 
-  } 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// Extract quarter from timestamp 
- 
-template <typename Duration> 
-struct Quarter { 
-  template <typename T, typename Arg0> 
-  static T Call(KernelContext*, Arg0 arg, Status*) { 
-    const auto ymd = year_month_day(floor<days>(sys_time<Duration>(Duration{arg}))); 
-    return static_cast<T>((static_cast<const uint32_t>(ymd.month()) - 1) / 3 + 1); 
-  } 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// Extract hour from timestamp 
- 
-template <typename Duration> 
-struct Hour { 
-  template <typename T, typename Arg0> 
-  static T Call(KernelContext*, Arg0 arg, Status*) { 
-    Duration t = Duration{arg}; 
-    return static_cast<T>((t - floor<days>(t)) / std::chrono::hours(1)); 
-  } 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// Extract minute from timestamp 
- 
-template <typename Duration> 
-struct Minute { 
-  template <typename T, typename Arg0> 
-  static T Call(KernelContext*, Arg0 arg, Status*) { 
-    Duration t = Duration{arg}; 
-    return static_cast<T>((t - floor<std::chrono::hours>(t)) / std::chrono::minutes(1)); 
-  } 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// Extract second from timestamp 
- 
-template <typename Duration> 
-struct Second { 
-  template <typename T, typename Arg0> 
-  static T Call(KernelContext*, Arg0 arg, Status*) { 
-    Duration t = Duration{arg}; 
-    return static_cast<T>((t - floor<std::chrono::minutes>(t)) / std::chrono::seconds(1)); 
-  } 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// Extract subsecond from timestamp 
- 
-template <typename Duration> 
-struct Subsecond { 
-  template <typename T, typename Arg0> 
-  static T Call(KernelContext*, Arg0 arg, Status*) { 
-    Duration t = Duration{arg}; 
-    return static_cast<T>( 
-        (std::chrono::duration<double>(t - floor<std::chrono::seconds>(t)).count())); 
-  } 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// Extract milliseconds from timestamp 
- 
-template <typename Duration> 
-struct Millisecond { 
-  template <typename T, typename Arg0> 
-  static T Call(KernelContext*, Arg0 arg, Status*) { 
-    Duration t = Duration{arg}; 
-    return static_cast<T>( 
-        ((t - floor<std::chrono::seconds>(t)) / std::chrono::milliseconds(1)) % 1000); 
-  } 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// Extract microseconds from timestamp 
- 
-template <typename Duration> 
-struct Microsecond { 
-  template <typename T, typename Arg0> 
-  static T Call(KernelContext*, Arg0 arg, Status*) { 
-    Duration t = Duration{arg}; 
-    return static_cast<T>( 
-        ((t - floor<std::chrono::seconds>(t)) / std::chrono::microseconds(1)) % 1000); 
-  } 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// Extract nanoseconds from timestamp 
- 
-template <typename Duration> 
-struct Nanosecond { 
-  template <typename T, typename Arg0> 
-  static T Call(KernelContext*, Arg0 arg, Status*) { 
-    Duration t = Duration{arg}; 
-    return static_cast<T>( 
-        ((t - floor<std::chrono::seconds>(t)) / std::chrono::nanoseconds(1)) % 1000); 
-  } 
-}; 
- 
-template <typename Duration> 
-inline std::vector<int64_t> get_iso_calendar(int64_t arg) { 
-  const auto t = floor<days>(sys_time<Duration>(Duration{arg})); 
-  const auto ymd = year_month_day(t); 
-  auto y = year_month_day{t + days{3}}.year(); 
-  auto start = sys_time<days>((y - years{1}) / dec / thu[last]) + (mon - thu); 
-  if (t < start) { 
-    --y; 
-    start = sys_time<days>((y - years{1}) / dec / thu[last]) + (mon - thu); 
-  } 
-  return {static_cast<int64_t>(static_cast<int32_t>(y)), 
-          static_cast<int64_t>(trunc<weeks>(t - start).count() + 1), 
-          static_cast<int64_t>(weekday(ymd).iso_encoding())}; 
-} 
- 
-// ---------------------------------------------------------------------- 
-// Extract ISO calendar values from timestamp 
- 
-template <typename Duration> 
-struct ISOCalendar { 
-  static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { 
-    RETURN_NOT_OK(TemporalComponentExtractCheckTimezone(in)); 
-    if (in.is_valid) { 
-      const std::shared_ptr<DataType> iso_calendar_type = 
-          struct_({field("iso_year", int64()), field("iso_week", int64()), 
-                   field("iso_day_of_week", int64())}); 
-      const auto& in_val = internal::UnboxScalar<const TimestampType>::Unbox(in); 
-      const auto iso_calendar = get_iso_calendar<Duration>(in_val); 
- 
-      std::vector<std::shared_ptr<Scalar>> values = { 
-          std::make_shared<Int64Scalar>(iso_calendar[0]), 
-          std::make_shared<Int64Scalar>(iso_calendar[1]), 
-          std::make_shared<Int64Scalar>(iso_calendar[2])}; 
-      *checked_cast<StructScalar*>(out) = StructScalar(values, iso_calendar_type); 
-    } else { 
-      out->is_valid = false; 
-    } 
-    return Status::OK(); 
-  } 
- 
-  static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { 
-    using BuilderType = typename TypeTraits<Int64Type>::BuilderType; 
- 
-    RETURN_NOT_OK(TemporalComponentExtractCheckTimezone(in)); 
-    const std::shared_ptr<DataType> iso_calendar_type = 
-        struct_({field("iso_year", int64()), field("iso_week", int64()), 
-                 field("iso_day_of_week", int64())}); 
- 
-    std::unique_ptr<ArrayBuilder> array_builder; 
-    RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), iso_calendar_type, &array_builder)); 
-    StructBuilder* struct_builder = checked_cast<StructBuilder*>(array_builder.get()); 
-    RETURN_NOT_OK(struct_builder->Reserve(in.length)); 
- 
-    std::vector<BuilderType*> field_builders; 
-    field_builders.reserve(3); 
-    for (int i = 0; i < 3; i++) { 
-      field_builders.push_back( 
-          checked_cast<BuilderType*>(struct_builder->field_builder(i))); 
-      RETURN_NOT_OK(field_builders[i]->Reserve(1)); 
-    } 
-    auto visit_null = [&]() { return struct_builder->AppendNull(); }; 
-    auto visit_value = [&](int64_t arg) { 
-      const auto iso_calendar = get_iso_calendar<Duration>(arg); 
-      field_builders[0]->UnsafeAppend(iso_calendar[0]); 
-      field_builders[1]->UnsafeAppend(iso_calendar[1]); 
-      field_builders[2]->UnsafeAppend(iso_calendar[2]); 
-      return struct_builder->Append(); 
-    }; 
-    RETURN_NOT_OK(VisitArrayDataInline<Int64Type>(in, visit_value, visit_null)); 
- 
-    std::shared_ptr<Array> out_array; 
-    RETURN_NOT_OK(struct_builder->Finish(&out_array)); 
-    *out = *std::move(out_array->data()); 
- 
-    return Status::OK(); 
-  } 
-}; 
- 
-template <template <typename...> class Op, typename OutType> 
-std::shared_ptr<ScalarFunction> MakeTemporal(std::string name, const FunctionDoc* doc) { 
-  const auto& out_type = TypeTraits<OutType>::type_singleton(); 
-  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc); 
- 
-  for (auto unit : internal::AllTimeUnits()) { 
-    InputType in_type{match::TimestampTypeUnit(unit)}; 
-    switch (unit) { 
-      case TimeUnit::SECOND: { 
-        auto exec = TemporalComponentExtract<Op<std::chrono::seconds>, OutType>::Exec; 
-        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec))); 
-        break; 
-      } 
-      case TimeUnit::MILLI: { 
-        auto exec = 
-            TemporalComponentExtract<Op<std::chrono::milliseconds>, OutType>::Exec; 
-        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec))); 
-        break; 
-      } 
-      case TimeUnit::MICRO: { 
-        auto exec = 
-            TemporalComponentExtract<Op<std::chrono::microseconds>, OutType>::Exec; 
-        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec))); 
-        break; 
-      } 
-      case TimeUnit::NANO: { 
-        auto exec = TemporalComponentExtract<Op<std::chrono::nanoseconds>, OutType>::Exec; 
-        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec))); 
-        break; 
-      } 
-    } 
-  } 
-  return func; 
-} 
- 
-template <template <typename...> class Op, typename OutType> 
-std::shared_ptr<ScalarFunction> MakeTemporalWithOptions( 
-    std::string name, const FunctionDoc* doc, const DayOfWeekOptions& default_options, 
-    KernelInit init) { 
-  const auto& out_type = TypeTraits<OutType>::type_singleton(); 
-  auto func = 
-      std::make_shared<ScalarFunction>(name, Arity::Unary(), doc, &default_options); 
- 
-  for (auto unit : internal::AllTimeUnits()) { 
-    InputType in_type{match::TimestampTypeUnit(unit)}; 
-    switch (unit) { 
-      case TimeUnit::SECOND: { 
-        auto exec = DayOfWeekExec<Op<std::chrono::seconds>, OutType>::Exec; 
-        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec), init)); 
-        break; 
-      } 
-      case TimeUnit::MILLI: { 
-        auto exec = DayOfWeekExec<Op<std::chrono::milliseconds>, OutType>::Exec; 
-        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec), init)); 
-        break; 
-      } 
-      case TimeUnit::MICRO: { 
-        auto exec = DayOfWeekExec<Op<std::chrono::microseconds>, OutType>::Exec; 
-        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec), init)); 
-        break; 
-      } 
-      case TimeUnit::NANO: { 
-        auto exec = DayOfWeekExec<Op<std::chrono::nanoseconds>, OutType>::Exec; 
-        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec), init)); 
-        break; 
-      } 
-    } 
-  } 
-  return func; 
-} 
- 
-template <template <typename...> class Op> 
-std::shared_ptr<ScalarFunction> MakeStructTemporal(std::string name, 
-                                                   const FunctionDoc* doc) { 
-  const auto& out_type = struct_({field("iso_year", int64()), field("iso_week", int64()), 
-                                  field("iso_day_of_week", int64())}); 
-  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc); 
- 
-  for (auto unit : internal::AllTimeUnits()) { 
-    InputType in_type{match::TimestampTypeUnit(unit)}; 
-    switch (unit) { 
-      case TimeUnit::SECOND: { 
-        auto exec = SimpleUnary<Op<std::chrono::seconds>>; 
-        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec))); 
-        break; 
-      } 
-      case TimeUnit::MILLI: { 
-        auto exec = SimpleUnary<Op<std::chrono::milliseconds>>; 
-        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec))); 
-        break; 
-      } 
-      case TimeUnit::MICRO: { 
-        auto exec = SimpleUnary<Op<std::chrono::microseconds>>; 
-        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec))); 
-        break; 
-      } 
-      case TimeUnit::NANO: { 
-        auto exec = SimpleUnary<Op<std::chrono::nanoseconds>>; 
-        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec))); 
-        break; 
-      } 
-    } 
-  } 
-  return func; 
-} 
- 
-const FunctionDoc year_doc{ 
-    "Extract year from timestamp", 
-    "Returns an error if timestamp has a defined timezone. Null values return null.", 
-    {"values"}}; 
- 
-const FunctionDoc month_doc{ 
-    "Extract month number", 
-    ("Month is encoded as January=1, December=12.\n" 
-     "Returns an error if timestamp has a defined timezone. Null values return null."), 
-    {"values"}}; 
- 
-const FunctionDoc day_doc{ 
-    "Extract day number", 
-    "Returns an error if timestamp has a defined timezone. Null values return null.", 
-    {"values"}}; 
- 
-const FunctionDoc day_of_week_doc{ 
-    "Extract day of the week number", 
-    ("By default, the week starts on Monday represented by 0 and ends on Sunday " 
-     "represented by 6.\n" 
-     "DayOfWeekOptions.week_start can be used to set another starting day using ISO " 
-     "convention (Monday=1, Sunday=7). Day numbering can start with 0 or 1 using " 
-     "DayOfWeekOptions.one_based_numbering parameter.\n" 
-     "Returns an error if timestamp has a defined timezone. Null values return null."), 
-    {"values"}, 
-    "DayOfWeekOptions"}; 
- 
-const FunctionDoc day_of_year_doc{ 
-    "Extract number of day of year", 
-    ("January 1st maps to day number 1, February 1st to 32, etc.\n" 
-     "Returns an error if timestamp has a defined timezone. Null values return null."), 
-    {"values"}}; 
- 
-const FunctionDoc iso_year_doc{ 
-    "Extract ISO year number", 
-    ("First week of an ISO year has the majority (4 or more) of its days in January." 
-     "Returns an error if timestamp has a defined timezone. Null values return null."), 
-    {"values"}}; 
- 
-const FunctionDoc iso_week_doc{ 
-    "Extract ISO week of year number", 
-    ("First ISO week has the majority (4 or more) of its days in January.\n" 
-     "Week of the year starts with 1 and can run up to 53.\n" 
-     "Returns an error if timestamp has a defined timezone. Null values return null."), 
-    {"values"}}; 
- 
-const FunctionDoc iso_calendar_doc{ 
-    "Extract (ISO year, ISO week, ISO day of week) struct", 
-    ("ISO week starts on Monday denoted by 1 and ends on Sunday denoted by 7.\n" 
-     "Returns an error if timestamp has a defined timezone. Null values return null."), 
-    {"values"}}; 
- 
-const FunctionDoc quarter_doc{ 
-    "Extract quarter of year number", 
-    ("First quarter maps to 1 and forth quarter maps to 4.\n" 
-     "Returns an error if timestamp has a defined timezone. Null values return null."), 
-    {"values"}}; 
- 
-const FunctionDoc hour_doc{ 
-    "Extract hour value", 
-    "Returns an error if timestamp has a defined timezone. Null values return null.", 
-    {"values"}}; 
- 
-const FunctionDoc minute_doc{ 
-    "Extract minute values", 
-    "Returns an error if timestamp has a defined timezone. Null values return null.", 
-    {"values"}}; 
- 
-const FunctionDoc second_doc{ 
-    "Extract second values", 
-    "Returns an error if timestamp has a defined timezone. Null values return null.", 
-    {"values"}}; 
- 
-const FunctionDoc millisecond_doc{ 
-    "Extract millisecond values", 
-    ("Millisecond returns number of milliseconds since the last full second.\n" 
-     "Returns an error if timestamp has a defined timezone. Null values return null."), 
-    {"values"}}; 
- 
-const FunctionDoc microsecond_doc{ 
-    "Extract microsecond values", 
-    ("Millisecond returns number of microseconds since the last full millisecond.\n" 
-     "Returns an error if timestamp has a defined timezone. Null values return null."), 
-    {"values"}}; 
- 
-const FunctionDoc nanosecond_doc{ 
-    "Extract nanosecond values", 
-    ("Nanosecond returns number of nanoseconds since the last full microsecond.\n" 
-     "Returns an error if timestamp has a defined timezone. Null values return null."), 
-    {"values"}}; 
- 
-const FunctionDoc subsecond_doc{ 
-    "Extract subsecond values", 
-    ("Subsecond returns the fraction of a second since the last full second.\n" 
-     "Returns an error if timestamp has a defined timezone. Null values return null."), 
-    {"values"}}; 
- 
-}  // namespace 
- 
-void RegisterScalarTemporal(FunctionRegistry* registry) { 
-  auto year = MakeTemporal<Year, Int64Type>("year", &year_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(year))); 
- 
-  auto month = MakeTemporal<Month, Int64Type>("month", &year_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(month))); 
- 
-  auto day = MakeTemporal<Day, Int64Type>("day", &year_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(day))); 
- 
-  static auto default_day_of_week_options = DayOfWeekOptions::Defaults(); 
-  auto day_of_week = MakeTemporalWithOptions<DayOfWeek, Int64Type>( 
-      "day_of_week", &day_of_week_doc, default_day_of_week_options, DayOfWeekState::Init); 
-  DCHECK_OK(registry->AddFunction(std::move(day_of_week))); 
- 
-  auto day_of_year = MakeTemporal<DayOfYear, Int64Type>("day_of_year", &day_of_year_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(day_of_year))); 
- 
-  auto iso_year = MakeTemporal<ISOYear, Int64Type>("iso_year", &iso_year_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(iso_year))); 
- 
-  auto iso_week = MakeTemporal<ISOWeek, Int64Type>("iso_week", &iso_week_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(iso_week))); 
- 
-  auto iso_calendar = MakeStructTemporal<ISOCalendar>("iso_calendar", &iso_calendar_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(iso_calendar))); 
- 
-  auto quarter = MakeTemporal<Quarter, Int64Type>("quarter", &quarter_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(quarter))); 
- 
-  auto hour = MakeTemporal<Hour, Int64Type>("hour", &hour_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(hour))); 
- 
-  auto minute = MakeTemporal<Minute, Int64Type>("minute", &minute_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(minute))); 
- 
-  auto second = MakeTemporal<Second, Int64Type>("second", &second_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(second))); 
- 
-  auto millisecond = 
-      MakeTemporal<Millisecond, Int64Type>("millisecond", &millisecond_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(millisecond))); 
- 
-  auto microsecond = 
-      MakeTemporal<Microsecond, Int64Type>("microsecond", &microsecond_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(microsecond))); 
- 
-  auto nanosecond = MakeTemporal<Nanosecond, Int64Type>("nanosecond", &nanosecond_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(nanosecond))); 
- 
-  auto subsecond = MakeTemporal<Subsecond, DoubleType>("subsecond", &subsecond_doc); 
-  DCHECK_OK(registry->AddFunction(std::move(subsecond))); 
-} 
- 
-}  // namespace internal 
-}  // namespace compute 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/builder.h"
+#include "arrow/compute/api_scalar.h"
+#include "arrow/compute/kernels/common.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/time.h"
+#include "arrow/vendored/datetime.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+using internal::checked_pointer_cast;
+
+namespace compute {
+namespace internal {
+
+namespace {
+
+using arrow_vendored::date::days;
+using arrow_vendored::date::floor;
+using arrow_vendored::date::hh_mm_ss;
+using arrow_vendored::date::sys_time;
+using arrow_vendored::date::trunc;
+using arrow_vendored::date::weekday;
+using arrow_vendored::date::weeks;
+using arrow_vendored::date::year_month_day;
+using arrow_vendored::date::years;
+using arrow_vendored::date::literals::dec;
+using arrow_vendored::date::literals::jan;
+using arrow_vendored::date::literals::last;
+using arrow_vendored::date::literals::mon;
+using arrow_vendored::date::literals::thu;
+using internal::applicator::ScalarUnaryNotNull;
+using internal::applicator::SimpleUnary;
+
+using DayOfWeekState = OptionsWrapper<DayOfWeekOptions>;
+
+const std::string& GetInputTimezone(const Datum& datum) {
+  return checked_cast<const TimestampType&>(*datum.type()).timezone();
+}
+
+const std::string& GetInputTimezone(const Scalar& scalar) {
+  return checked_cast<const TimestampType&>(*scalar.type).timezone();
+}
+
+const std::string& GetInputTimezone(const ArrayData& array) {
+  return checked_cast<const TimestampType&>(*array.type).timezone();
+}
+
+template <typename T>
+Status TemporalComponentExtractCheckTimezone(const T& input) {
+  const auto& timezone = GetInputTimezone(input);
+  if (!timezone.empty()) {
+    return Status::NotImplemented(
+        "Cannot extract components from timestamp with specific timezone: ", timezone);
+  }
+  return Status::OK();
+}
+
+template <typename Op, typename OutType>
+struct TemporalComponentExtract {
+  using OutValue = typename internal::GetOutputType<OutType>::T;
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    RETURN_NOT_OK(TemporalComponentExtractCheckTimezone(batch.values[0]));
+    return ScalarUnaryNotNull<OutType, TimestampType, Op>::Exec(ctx, batch, out);
+  }
+};
+
+template <typename Op, typename OutType>
+struct DayOfWeekExec {
+  using OutValue = typename internal::GetOutputType<OutType>::T;
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const DayOfWeekOptions& options = DayOfWeekState::Get(ctx);
+    if (options.week_start < 1 || 7 < options.week_start) {
+      return Status::Invalid(
+          "week_start must follow ISO convention (Monday=1, Sunday=7). Got week_start=",
+          options.week_start);
+    }
+
+    RETURN_NOT_OK(TemporalComponentExtractCheckTimezone(batch.values[0]));
+    applicator::ScalarUnaryNotNullStateful<OutType, TimestampType, Op> kernel{
+        Op(options)};
+    return kernel.Exec(ctx, batch, out);
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract year from timestamp
+
+template <typename Duration>
+struct Year {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
+    return static_cast<T>(static_cast<const int32_t>(
+        year_month_day(floor<days>(sys_time<Duration>(Duration{arg}))).year()));
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract month from timestamp
+
+template <typename Duration>
+struct Month {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
+    return static_cast<T>(static_cast<const uint32_t>(
+        year_month_day(floor<days>(sys_time<Duration>(Duration{arg}))).month()));
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract day from timestamp
+
+template <typename Duration>
+struct Day {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
+    return static_cast<T>(static_cast<const uint32_t>(
+        year_month_day(floor<days>(sys_time<Duration>(Duration{arg}))).day()));
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract day of week from timestamp
+//
+// By default week starts on Monday represented by 0 and ends on Sunday represented
+// by 6. Start day of the week (Monday=1, Sunday=7) and numbering start (0 or 1) can be
+// set using DayOfWeekOptions
+
+template <typename Duration>
+struct DayOfWeek {
+  explicit DayOfWeek(const DayOfWeekOptions& options) {
+    for (int i = 0; i < 7; i++) {
+      lookup_table[i] = i + 8 - options.week_start;
+      lookup_table[i] = (lookup_table[i] > 6) ? lookup_table[i] - 7 : lookup_table[i];
+      lookup_table[i] += options.one_based_numbering;
+    }
+  }
+
+  template <typename T, typename Arg0>
+  T Call(KernelContext*, Arg0 arg, Status*) const {
+    const auto wd = arrow_vendored::date::year_month_weekday(
+                        floor<days>(sys_time<Duration>(Duration{arg})))
+                        .weekday()
+                        .iso_encoding();
+    return lookup_table[wd - 1];
+  }
+  std::array<int64_t, 7> lookup_table;
+};
+
+// ----------------------------------------------------------------------
+// Extract day of year from timestamp
+
+template <typename Duration>
+struct DayOfYear {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
+    const auto t = floor<days>(sys_time<Duration>(Duration{arg}));
+    return static_cast<T>(
+        (t - sys_time<days>(year_month_day(t).year() / jan / 0)).count());
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract ISO Year values from timestamp
+//
+// First week of an ISO year has the majority (4 or more) of it's days in January.
+// Last week of an ISO year has the year's last Thursday in it.
+
+template <typename Duration>
+struct ISOYear {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
+    const auto t = floor<days>(sys_time<Duration>(Duration{arg}));
+    auto y = year_month_day{t + days{3}}.year();
+    auto start = sys_time<days>((y - years{1}) / dec / thu[last]) + (mon - thu);
+    if (t < start) {
+      --y;
+    }
+    return static_cast<T>(static_cast<int32_t>(y));
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract ISO week from timestamp
+//
+// First week of an ISO year has the majority (4 or more) of it's days in January.
+// Last week of an ISO year has the year's last Thursday in it.
+// Based on
+// https://github.com/HowardHinnant/date/blob/6e921e1b1d21e84a5c82416ba7ecd98e33a436d0/include/date/iso_week.h#L1503
+template <typename Duration>
+struct ISOWeek {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
+    const auto t = floor<days>(sys_time<Duration>(Duration{arg}));
+    auto y = year_month_day{t + days{3}}.year();
+    auto start = sys_time<days>((y - years{1}) / dec / thu[last]) + (mon - thu);
+    if (t < start) {
+      --y;
+      start = sys_time<days>((y - years{1}) / dec / thu[last]) + (mon - thu);
+    }
+    return static_cast<T>(trunc<weeks>(t - start).count() + 1);
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract quarter from timestamp
+
+template <typename Duration>
+struct Quarter {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
+    const auto ymd = year_month_day(floor<days>(sys_time<Duration>(Duration{arg})));
+    return static_cast<T>((static_cast<const uint32_t>(ymd.month()) - 1) / 3 + 1);
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract hour from timestamp
+
+template <typename Duration>
+struct Hour {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
+    Duration t = Duration{arg};
+    return static_cast<T>((t - floor<days>(t)) / std::chrono::hours(1));
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract minute from timestamp
+
+template <typename Duration>
+struct Minute {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
+    Duration t = Duration{arg};
+    return static_cast<T>((t - floor<std::chrono::hours>(t)) / std::chrono::minutes(1));
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract second from timestamp
+
+template <typename Duration>
+struct Second {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
+    Duration t = Duration{arg};
+    return static_cast<T>((t - floor<std::chrono::minutes>(t)) / std::chrono::seconds(1));
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract subsecond from timestamp
+
+template <typename Duration>
+struct Subsecond {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
+    Duration t = Duration{arg};
+    return static_cast<T>(
+        (std::chrono::duration<double>(t - floor<std::chrono::seconds>(t)).count()));
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract milliseconds from timestamp
+
+template <typename Duration>
+struct Millisecond {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
+    Duration t = Duration{arg};
+    return static_cast<T>(
+        ((t - floor<std::chrono::seconds>(t)) / std::chrono::milliseconds(1)) % 1000);
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract microseconds from timestamp
+
+template <typename Duration>
+struct Microsecond {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
+    Duration t = Duration{arg};
+    return static_cast<T>(
+        ((t - floor<std::chrono::seconds>(t)) / std::chrono::microseconds(1)) % 1000);
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract nanoseconds from timestamp
+
+template <typename Duration>
+struct Nanosecond {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
+    Duration t = Duration{arg};
+    return static_cast<T>(
+        ((t - floor<std::chrono::seconds>(t)) / std::chrono::nanoseconds(1)) % 1000);
+  }
+};
+
+template <typename Duration>
+inline std::vector<int64_t> get_iso_calendar(int64_t arg) {
+  const auto t = floor<days>(sys_time<Duration>(Duration{arg}));
+  const auto ymd = year_month_day(t);
+  auto y = year_month_day{t + days{3}}.year();
+  auto start = sys_time<days>((y - years{1}) / dec / thu[last]) + (mon - thu);
+  if (t < start) {
+    --y;
+    start = sys_time<days>((y - years{1}) / dec / thu[last]) + (mon - thu);
+  }
+  return {static_cast<int64_t>(static_cast<int32_t>(y)),
+          static_cast<int64_t>(trunc<weeks>(t - start).count() + 1),
+          static_cast<int64_t>(weekday(ymd).iso_encoding())};
+}
+
+// ----------------------------------------------------------------------
+// Extract ISO calendar values from timestamp
+
+template <typename Duration>
+struct ISOCalendar {
+  static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
+    RETURN_NOT_OK(TemporalComponentExtractCheckTimezone(in));
+    if (in.is_valid) {
+      const std::shared_ptr<DataType> iso_calendar_type =
+          struct_({field("iso_year", int64()), field("iso_week", int64()),
+                   field("iso_day_of_week", int64())});
+      const auto& in_val = internal::UnboxScalar<const TimestampType>::Unbox(in);
+      const auto iso_calendar = get_iso_calendar<Duration>(in_val);
+
+      std::vector<std::shared_ptr<Scalar>> values = {
+          std::make_shared<Int64Scalar>(iso_calendar[0]),
+          std::make_shared<Int64Scalar>(iso_calendar[1]),
+          std::make_shared<Int64Scalar>(iso_calendar[2])};
+      *checked_cast<StructScalar*>(out) = StructScalar(values, iso_calendar_type);
+    } else {
+      out->is_valid = false;
+    }
+    return Status::OK();
+  }
+
+  static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) {
+    using BuilderType = typename TypeTraits<Int64Type>::BuilderType;
+
+    RETURN_NOT_OK(TemporalComponentExtractCheckTimezone(in));
+    const std::shared_ptr<DataType> iso_calendar_type =
+        struct_({field("iso_year", int64()), field("iso_week", int64()),
+                 field("iso_day_of_week", int64())});
+
+    std::unique_ptr<ArrayBuilder> array_builder;
+    RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), iso_calendar_type, &array_builder));
+    StructBuilder* struct_builder = checked_cast<StructBuilder*>(array_builder.get());
+    RETURN_NOT_OK(struct_builder->Reserve(in.length));
+
+    std::vector<BuilderType*> field_builders;
+    field_builders.reserve(3);
+    for (int i = 0; i < 3; i++) {
+      field_builders.push_back(
+          checked_cast<BuilderType*>(struct_builder->field_builder(i)));
+      RETURN_NOT_OK(field_builders[i]->Reserve(1));
+    }
+    auto visit_null = [&]() { return struct_builder->AppendNull(); };
+    auto visit_value = [&](int64_t arg) {
+      const auto iso_calendar = get_iso_calendar<Duration>(arg);
+      field_builders[0]->UnsafeAppend(iso_calendar[0]);
+      field_builders[1]->UnsafeAppend(iso_calendar[1]);
+      field_builders[2]->UnsafeAppend(iso_calendar[2]);
+      return struct_builder->Append();
+    };
+    RETURN_NOT_OK(VisitArrayDataInline<Int64Type>(in, visit_value, visit_null));
+
+    std::shared_ptr<Array> out_array;
+    RETURN_NOT_OK(struct_builder->Finish(&out_array));
+    *out = *std::move(out_array->data());
+
+    return Status::OK();
+  }
+};
+
+template <template <typename...> class Op, typename OutType>
+std::shared_ptr<ScalarFunction> MakeTemporal(std::string name, const FunctionDoc* doc) {
+  const auto& out_type = TypeTraits<OutType>::type_singleton();
+  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
+
+  for (auto unit : internal::AllTimeUnits()) {
+    InputType in_type{match::TimestampTypeUnit(unit)};
+    switch (unit) {
+      case TimeUnit::SECOND: {
+        auto exec = TemporalComponentExtract<Op<std::chrono::seconds>, OutType>::Exec;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
+        break;
+      }
+      case TimeUnit::MILLI: {
+        auto exec =
+            TemporalComponentExtract<Op<std::chrono::milliseconds>, OutType>::Exec;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
+        break;
+      }
+      case TimeUnit::MICRO: {
+        auto exec =
+            TemporalComponentExtract<Op<std::chrono::microseconds>, OutType>::Exec;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
+        break;
+      }
+      case TimeUnit::NANO: {
+        auto exec = TemporalComponentExtract<Op<std::chrono::nanoseconds>, OutType>::Exec;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
+        break;
+      }
+    }
+  }
+  return func;
+}
+
+template <template <typename...> class Op, typename OutType>
+std::shared_ptr<ScalarFunction> MakeTemporalWithOptions(
+    std::string name, const FunctionDoc* doc, const DayOfWeekOptions& default_options,
+    KernelInit init) {
+  const auto& out_type = TypeTraits<OutType>::type_singleton();
+  auto func =
+      std::make_shared<ScalarFunction>(name, Arity::Unary(), doc, &default_options);
+
+  for (auto unit : internal::AllTimeUnits()) {
+    InputType in_type{match::TimestampTypeUnit(unit)};
+    switch (unit) {
+      case TimeUnit::SECOND: {
+        auto exec = DayOfWeekExec<Op<std::chrono::seconds>, OutType>::Exec;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec), init));
+        break;
+      }
+      case TimeUnit::MILLI: {
+        auto exec = DayOfWeekExec<Op<std::chrono::milliseconds>, OutType>::Exec;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec), init));
+        break;
+      }
+      case TimeUnit::MICRO: {
+        auto exec = DayOfWeekExec<Op<std::chrono::microseconds>, OutType>::Exec;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec), init));
+        break;
+      }
+      case TimeUnit::NANO: {
+        auto exec = DayOfWeekExec<Op<std::chrono::nanoseconds>, OutType>::Exec;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec), init));
+        break;
+      }
+    }
+  }
+  return func;
+}
+
+template <template <typename...> class Op>
+std::shared_ptr<ScalarFunction> MakeStructTemporal(std::string name,
+                                                   const FunctionDoc* doc) {
+  const auto& out_type = struct_({field("iso_year", int64()), field("iso_week", int64()),
+                                  field("iso_day_of_week", int64())});
+  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
+
+  for (auto unit : internal::AllTimeUnits()) {
+    InputType in_type{match::TimestampTypeUnit(unit)};
+    switch (unit) {
+      case TimeUnit::SECOND: {
+        auto exec = SimpleUnary<Op<std::chrono::seconds>>;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
+        break;
+      }
+      case TimeUnit::MILLI: {
+        auto exec = SimpleUnary<Op<std::chrono::milliseconds>>;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
+        break;
+      }
+      case TimeUnit::MICRO: {
+        auto exec = SimpleUnary<Op<std::chrono::microseconds>>;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
+        break;
+      }
+      case TimeUnit::NANO: {
+        auto exec = SimpleUnary<Op<std::chrono::nanoseconds>>;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
+        break;
+      }
+    }
+  }
+  return func;
+}
+
+const FunctionDoc year_doc{
+    "Extract year from timestamp",
+    "Returns an error if timestamp has a defined timezone. Null values return null.",
+    {"values"}};
+
+const FunctionDoc month_doc{
+    "Extract month number",
+    ("Month is encoded as January=1, December=12.\n"
+     "Returns an error if timestamp has a defined timezone. Null values return null."),
+    {"values"}};
+
+const FunctionDoc day_doc{
+    "Extract day number",
+    "Returns an error if timestamp has a defined timezone. Null values return null.",
+    {"values"}};
+
+const FunctionDoc day_of_week_doc{
+    "Extract day of the week number",
+    ("By default, the week starts on Monday represented by 0 and ends on Sunday "
+     "represented by 6.\n"
+     "DayOfWeekOptions.week_start can be used to set another starting day using ISO "
+     "convention (Monday=1, Sunday=7). Day numbering can start with 0 or 1 using "
+     "DayOfWeekOptions.one_based_numbering parameter.\n"
+     "Returns an error if timestamp has a defined timezone. Null values return null."),
+    {"values"},
+    "DayOfWeekOptions"};
+
+const FunctionDoc day_of_year_doc{
+    "Extract number of day of year",
+    ("January 1st maps to day number 1, February 1st to 32, etc.\n"
+     "Returns an error if timestamp has a defined timezone. Null values return null."),
+    {"values"}};
+
+const FunctionDoc iso_year_doc{
+    "Extract ISO year number",
+    ("First week of an ISO year has the majority (4 or more) of its days in January."
+     "Returns an error if timestamp has a defined timezone. Null values return null."),
+    {"values"}};
+
+const FunctionDoc iso_week_doc{
+    "Extract ISO week of year number",
+    ("First ISO week has the majority (4 or more) of its days in January.\n"
+     "Week of the year starts with 1 and can run up to 53.\n"
+     "Returns an error if timestamp has a defined timezone. Null values return null."),
+    {"values"}};
+
+const FunctionDoc iso_calendar_doc{
+    "Extract (ISO year, ISO week, ISO day of week) struct",
+    ("ISO week starts on Monday denoted by 1 and ends on Sunday denoted by 7.\n"
+     "Returns an error if timestamp has a defined timezone. Null values return null."),
+    {"values"}};
+
+const FunctionDoc quarter_doc{
+    "Extract quarter of year number",
+    ("First quarter maps to 1 and forth quarter maps to 4.\n"
+     "Returns an error if timestamp has a defined timezone. Null values return null."),
+    {"values"}};
+
+const FunctionDoc hour_doc{
+    "Extract hour value",
+    "Returns an error if timestamp has a defined timezone. Null values return null.",
+    {"values"}};
+
+const FunctionDoc minute_doc{
+    "Extract minute values",
+    "Returns an error if timestamp has a defined timezone. Null values return null.",
+    {"values"}};
+
+const FunctionDoc second_doc{
+    "Extract second values",
+    "Returns an error if timestamp has a defined timezone. Null values return null.",
+    {"values"}};
+
+const FunctionDoc millisecond_doc{
+    "Extract millisecond values",
+    ("Millisecond returns number of milliseconds since the last full second.\n"
+     "Returns an error if timestamp has a defined timezone. Null values return null."),
+    {"values"}};
+
+const FunctionDoc microsecond_doc{
+    "Extract microsecond values",
+    ("Millisecond returns number of microseconds since the last full millisecond.\n"
+     "Returns an error if timestamp has a defined timezone. Null values return null."),
+    {"values"}};
+
+const FunctionDoc nanosecond_doc{
+    "Extract nanosecond values",
+    ("Nanosecond returns number of nanoseconds since the last full microsecond.\n"
+     "Returns an error if timestamp has a defined timezone. Null values return null."),
+    {"values"}};
+
+const FunctionDoc subsecond_doc{
+    "Extract subsecond values",
+    ("Subsecond returns the fraction of a second since the last full second.\n"
+     "Returns an error if timestamp has a defined timezone. Null values return null."),
+    {"values"}};
+
+}  // namespace
+
+void RegisterScalarTemporal(FunctionRegistry* registry) {
+  auto year = MakeTemporal<Year, Int64Type>("year", &year_doc);
+  DCHECK_OK(registry->AddFunction(std::move(year)));
+
+  auto month = MakeTemporal<Month, Int64Type>("month", &year_doc);
+  DCHECK_OK(registry->AddFunction(std::move(month)));
+
+  auto day = MakeTemporal<Day, Int64Type>("day", &year_doc);
+  DCHECK_OK(registry->AddFunction(std::move(day)));
+
+  static auto default_day_of_week_options = DayOfWeekOptions::Defaults();
+  auto day_of_week = MakeTemporalWithOptions<DayOfWeek, Int64Type>(
+      "day_of_week", &day_of_week_doc, default_day_of_week_options, DayOfWeekState::Init);
+  DCHECK_OK(registry->AddFunction(std::move(day_of_week)));
+
+  auto day_of_year = MakeTemporal<DayOfYear, Int64Type>("day_of_year", &day_of_year_doc);
+  DCHECK_OK(registry->AddFunction(std::move(day_of_year)));
+
+  auto iso_year = MakeTemporal<ISOYear, Int64Type>("iso_year", &iso_year_doc);
+  DCHECK_OK(registry->AddFunction(std::move(iso_year)));
+
+  auto iso_week = MakeTemporal<ISOWeek, Int64Type>("iso_week", &iso_week_doc);
+  DCHECK_OK(registry->AddFunction(std::move(iso_week)));
+
+  auto iso_calendar = MakeStructTemporal<ISOCalendar>("iso_calendar", &iso_calendar_doc);
+  DCHECK_OK(registry->AddFunction(std::move(iso_calendar)));
+
+  auto quarter = MakeTemporal<Quarter, Int64Type>("quarter", &quarter_doc);
+  DCHECK_OK(registry->AddFunction(std::move(quarter)));
+
+  auto hour = MakeTemporal<Hour, Int64Type>("hour", &hour_doc);
+  DCHECK_OK(registry->AddFunction(std::move(hour)));
+
+  auto minute = MakeTemporal<Minute, Int64Type>("minute", &minute_doc);
+  DCHECK_OK(registry->AddFunction(std::move(minute)));
+
+  auto second = MakeTemporal<Second, Int64Type>("second", &second_doc);
+  DCHECK_OK(registry->AddFunction(std::move(second)));
+
+  auto millisecond =
+      MakeTemporal<Millisecond, Int64Type>("millisecond", &millisecond_doc);
+  DCHECK_OK(registry->AddFunction(std::move(millisecond)));
+
+  auto microsecond =
+      MakeTemporal<Microsecond, Int64Type>("microsecond", &microsecond_doc);
+  DCHECK_OK(registry->AddFunction(std::move(microsecond)));
+
+  auto nanosecond = MakeTemporal<Nanosecond, Int64Type>("nanosecond", &nanosecond_doc);
+  DCHECK_OK(registry->AddFunction(std::move(nanosecond)));
+
+  auto subsecond = MakeTemporal<Subsecond, DoubleType>("subsecond", &subsecond_doc);
+  DCHECK_OK(registry->AddFunction(std::move(subsecond)));
+}
+
+}  // namespace internal
+}  // namespace compute
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_validity.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_validity.cc
index dc63edab12c..ead88abc0f2 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_validity.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/scalar_validity.cc
@@ -15,8 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <cmath> 
- 
+#include <cmath>
+
 #include "arrow/compute/kernels/common.h"
 
 #include "arrow/util/bit_util.h"
@@ -32,12 +32,12 @@ namespace internal {
 namespace {
 
 struct IsValidOperator {
-  static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { 
+  static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
     checked_cast<BooleanScalar*>(out)->value = in.is_valid;
-    return Status::OK(); 
+    return Status::OK();
   }
 
-  static Status Call(KernelContext* ctx, const ArrayData& arr, ArrayData* out) { 
+  static Status Call(KernelContext* ctx, const ArrayData& arr, ArrayData* out) {
     DCHECK_EQ(out->offset, 0);
     DCHECK_LE(out->length, arr.length);
     if (arr.MayHaveNulls()) {
@@ -49,64 +49,64 @@ struct IsValidOperator {
           arr.offset == 0 ? arr.buffers[0]
                           : SliceBuffer(arr.buffers[0], arr.offset / 8,
                                         BitUtil::BytesForBits(out->length + out->offset));
-      return Status::OK(); 
+      return Status::OK();
     }
 
     // Input has no nulls => output is entirely true.
-    ARROW_ASSIGN_OR_RAISE(out->buffers[1], 
-                          ctx->AllocateBitmap(out->length + out->offset)); 
+    ARROW_ASSIGN_OR_RAISE(out->buffers[1],
+                          ctx->AllocateBitmap(out->length + out->offset));
     BitUtil::SetBitsTo(out->buffers[1]->mutable_data(), out->offset, out->length, true);
-    return Status::OK(); 
+    return Status::OK();
+  }
+};
+
+struct IsFiniteOperator {
+  template <typename OutType, typename InType>
+  static constexpr OutType Call(KernelContext*, const InType& value, Status*) {
+    return std::isfinite(value);
+  }
+};
+
+struct IsInfOperator {
+  template <typename OutType, typename InType>
+  static constexpr OutType Call(KernelContext*, const InType& value, Status*) {
+    return std::isinf(value);
   }
 };
 
-struct IsFiniteOperator { 
-  template <typename OutType, typename InType> 
-  static constexpr OutType Call(KernelContext*, const InType& value, Status*) { 
-    return std::isfinite(value); 
-  } 
-}; 
- 
-struct IsInfOperator { 
-  template <typename OutType, typename InType> 
-  static constexpr OutType Call(KernelContext*, const InType& value, Status*) { 
-    return std::isinf(value); 
-  } 
-}; 
- 
 struct IsNullOperator {
-  static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { 
+  static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
     checked_cast<BooleanScalar*>(out)->value = !in.is_valid;
-    return Status::OK(); 
+    return Status::OK();
   }
 
-  static Status Call(KernelContext* ctx, const ArrayData& arr, ArrayData* out) { 
+  static Status Call(KernelContext* ctx, const ArrayData& arr, ArrayData* out) {
     if (arr.MayHaveNulls()) {
       // Input has nulls => output is the inverted null (validity) bitmap.
       InvertBitmap(arr.buffers[0]->data(), arr.offset, arr.length,
                    out->buffers[1]->mutable_data(), out->offset);
-    } else { 
-      // Input has no nulls => output is entirely false. 
-      BitUtil::SetBitsTo(out->buffers[1]->mutable_data(), out->offset, out->length, 
-                         false); 
+    } else {
+      // Input has no nulls => output is entirely false.
+      BitUtil::SetBitsTo(out->buffers[1]->mutable_data(), out->offset, out->length,
+                         false);
     }
-    return Status::OK(); 
-  } 
-}; 
-
-struct IsNanOperator { 
-  template <typename OutType, typename InType> 
-  static constexpr OutType Call(KernelContext*, const InType& value, Status*) { 
-    return std::isnan(value); 
+    return Status::OK();
   }
 };
 
-void MakeFunction(std::string name, const FunctionDoc* doc, 
-                  std::vector<InputType> in_types, OutputType out_type, 
+struct IsNanOperator {
+  template <typename OutType, typename InType>
+  static constexpr OutType Call(KernelContext*, const InType& value, Status*) {
+    return std::isnan(value);
+  }
+};
+
+void MakeFunction(std::string name, const FunctionDoc* doc,
+                  std::vector<InputType> in_types, OutputType out_type,
                   ArrayKernelExec exec, FunctionRegistry* registry,
                   MemAllocation::type mem_allocation, bool can_write_into_slices) {
   Arity arity{static_cast<int>(in_types.size())};
-  auto func = std::make_shared<ScalarFunction>(name, arity, doc); 
+  auto func = std::make_shared<ScalarFunction>(name, arity, doc);
 
   ScalarKernel kernel(std::move(in_types), out_type, exec);
   kernel.null_handling = NullHandling::OUTPUT_NOT_NULL;
@@ -117,112 +117,112 @@ void MakeFunction(std::string name, const FunctionDoc* doc,
   DCHECK_OK(registry->AddFunction(std::move(func)));
 }
 
-template <typename InType, typename Op> 
-void AddFloatValidityKernel(const std::shared_ptr<DataType>& ty, ScalarFunction* func) { 
-  DCHECK_OK(func->AddKernel({ty}, boolean(), 
-                            applicator::ScalarUnary<BooleanType, InType, Op>::Exec)); 
-} 
- 
-std::shared_ptr<ScalarFunction> MakeIsFiniteFunction(std::string name, 
-                                                     const FunctionDoc* doc) { 
-  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc); 
- 
-  AddFloatValidityKernel<FloatType, IsFiniteOperator>(float32(), func.get()); 
-  AddFloatValidityKernel<DoubleType, IsFiniteOperator>(float64(), func.get()); 
- 
-  return func; 
-} 
- 
-std::shared_ptr<ScalarFunction> MakeIsInfFunction(std::string name, 
-                                                  const FunctionDoc* doc) { 
-  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc); 
- 
-  AddFloatValidityKernel<FloatType, IsInfOperator>(float32(), func.get()); 
-  AddFloatValidityKernel<DoubleType, IsInfOperator>(float64(), func.get()); 
- 
-  return func; 
-} 
- 
-std::shared_ptr<ScalarFunction> MakeIsNanFunction(std::string name, 
-                                                  const FunctionDoc* doc) { 
-  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc); 
- 
-  AddFloatValidityKernel<FloatType, IsNanOperator>(float32(), func.get()); 
-  AddFloatValidityKernel<DoubleType, IsNanOperator>(float64(), func.get()); 
- 
-  return func; 
-} 
- 
-Status IsValidExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+template <typename InType, typename Op>
+void AddFloatValidityKernel(const std::shared_ptr<DataType>& ty, ScalarFunction* func) {
+  DCHECK_OK(func->AddKernel({ty}, boolean(),
+                            applicator::ScalarUnary<BooleanType, InType, Op>::Exec));
+}
+
+std::shared_ptr<ScalarFunction> MakeIsFiniteFunction(std::string name,
+                                                     const FunctionDoc* doc) {
+  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
+
+  AddFloatValidityKernel<FloatType, IsFiniteOperator>(float32(), func.get());
+  AddFloatValidityKernel<DoubleType, IsFiniteOperator>(float64(), func.get());
+
+  return func;
+}
+
+std::shared_ptr<ScalarFunction> MakeIsInfFunction(std::string name,
+                                                  const FunctionDoc* doc) {
+  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
+
+  AddFloatValidityKernel<FloatType, IsInfOperator>(float32(), func.get());
+  AddFloatValidityKernel<DoubleType, IsInfOperator>(float64(), func.get());
+
+  return func;
+}
+
+std::shared_ptr<ScalarFunction> MakeIsNanFunction(std::string name,
+                                                  const FunctionDoc* doc) {
+  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
+
+  AddFloatValidityKernel<FloatType, IsNanOperator>(float32(), func.get());
+  AddFloatValidityKernel<DoubleType, IsNanOperator>(float64(), func.get());
+
+  return func;
+}
+
+Status IsValidExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   const Datum& arg0 = batch[0];
   if (arg0.type()->id() == Type::NA) {
     auto false_value = std::make_shared<BooleanScalar>(false);
     if (arg0.kind() == Datum::SCALAR) {
-      out->value = false_value; 
+      out->value = false_value;
     } else {
       std::shared_ptr<Array> false_values;
-      RETURN_NOT_OK(MakeArrayFromScalar(*false_value, out->length(), ctx->memory_pool()) 
-                        .Value(&false_values)); 
+      RETURN_NOT_OK(MakeArrayFromScalar(*false_value, out->length(), ctx->memory_pool())
+                        .Value(&false_values));
       out->value = false_values->data();
     }
-    return Status::OK(); 
+    return Status::OK();
   } else {
-    return applicator::SimpleUnary<IsValidOperator>(ctx, batch, out); 
+    return applicator::SimpleUnary<IsValidOperator>(ctx, batch, out);
   }
 }
 
-Status IsNullExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+Status IsNullExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   const Datum& arg0 = batch[0];
   if (arg0.type()->id() == Type::NA) {
     if (arg0.kind() == Datum::SCALAR) {
-      out->value = std::make_shared<BooleanScalar>(true); 
+      out->value = std::make_shared<BooleanScalar>(true);
     } else {
       // Data is preallocated
       ArrayData* out_arr = out->mutable_array();
       BitUtil::SetBitsTo(out_arr->buffers[1]->mutable_data(), out_arr->offset,
                          out_arr->length, true);
     }
-    return Status::OK(); 
+    return Status::OK();
   } else {
-    return applicator::SimpleUnary<IsNullOperator>(ctx, batch, out); 
+    return applicator::SimpleUnary<IsNullOperator>(ctx, batch, out);
   }
 }
 
-const FunctionDoc is_valid_doc( 
-    "Return true if non-null", 
-    ("For each input value, emit true iff the value is valid (non-null)."), {"values"}); 
- 
-const FunctionDoc is_finite_doc( 
-    "Return true if value is finite", 
-    ("For each input value, emit true iff the value is finite (not NaN, inf, or -inf)."), 
-    {"values"}); 
- 
-const FunctionDoc is_inf_doc( 
-    "Return true if infinity", 
-    ("For each input value, emit true iff the value is infinite (inf or -inf)."), 
-    {"values"}); 
- 
-const FunctionDoc is_null_doc("Return true if null", 
-                              ("For each input value, emit true iff the value is null."), 
-                              {"values"}); 
- 
-const FunctionDoc is_nan_doc("Return true if NaN", 
-                             ("For each input value, emit true iff the value is NaN."), 
-                             {"values"}); 
- 
+const FunctionDoc is_valid_doc(
+    "Return true if non-null",
+    ("For each input value, emit true iff the value is valid (non-null)."), {"values"});
+
+const FunctionDoc is_finite_doc(
+    "Return true if value is finite",
+    ("For each input value, emit true iff the value is finite (not NaN, inf, or -inf)."),
+    {"values"});
+
+const FunctionDoc is_inf_doc(
+    "Return true if infinity",
+    ("For each input value, emit true iff the value is infinite (inf or -inf)."),
+    {"values"});
+
+const FunctionDoc is_null_doc("Return true if null",
+                              ("For each input value, emit true iff the value is null."),
+                              {"values"});
+
+const FunctionDoc is_nan_doc("Return true if NaN",
+                             ("For each input value, emit true iff the value is NaN."),
+                             {"values"});
+
 }  // namespace
 
 void RegisterScalarValidity(FunctionRegistry* registry) {
-  MakeFunction("is_valid", &is_valid_doc, {ValueDescr::ANY}, boolean(), IsValidExec, 
-               registry, MemAllocation::NO_PREALLOCATE, /*can_write_into_slices=*/false); 
+  MakeFunction("is_valid", &is_valid_doc, {ValueDescr::ANY}, boolean(), IsValidExec,
+               registry, MemAllocation::NO_PREALLOCATE, /*can_write_into_slices=*/false);
 
-  MakeFunction("is_null", &is_null_doc, {ValueDescr::ANY}, boolean(), IsNullExec, 
-               registry, MemAllocation::PREALLOCATE, 
+  MakeFunction("is_null", &is_null_doc, {ValueDescr::ANY}, boolean(), IsNullExec,
+               registry, MemAllocation::PREALLOCATE,
                /*can_write_into_slices=*/true);
- 
-  DCHECK_OK(registry->AddFunction(MakeIsFiniteFunction("is_finite", &is_finite_doc))); 
-  DCHECK_OK(registry->AddFunction(MakeIsInfFunction("is_inf", &is_inf_doc))); 
-  DCHECK_OK(registry->AddFunction(MakeIsNanFunction("is_nan", &is_nan_doc))); 
+
+  DCHECK_OK(registry->AddFunction(MakeIsFiniteFunction("is_finite", &is_finite_doc)));
+  DCHECK_OK(registry->AddFunction(MakeIsInfFunction("is_inf", &is_inf_doc)));
+  DCHECK_OK(registry->AddFunction(MakeIsNanFunction("is_nan", &is_nan_doc)));
 }
 
 }  // namespace internal
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/util_internal.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/util_internal.cc
index 0ef0ea6c753..846fa26baf2 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/util_internal.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/util_internal.cc
@@ -53,30 +53,30 @@ PrimitiveArg GetPrimitiveArg(const ArrayData& arr) {
     arg.data += arr.offset * arg.bit_width / 8;
   }
   // This may be kUnknownNullCount
-  arg.null_count = (arg.is_valid != nullptr) ? arr.null_count.load() : 0; 
+  arg.null_count = (arg.is_valid != nullptr) ? arr.null_count.load() : 0;
   return arg;
 }
 
-ArrayKernelExec TrivialScalarUnaryAsArraysExec(ArrayKernelExec exec, 
-                                               NullHandling::type null_handling) { 
-  return [=](KernelContext* ctx, const ExecBatch& batch, Datum* out) -> Status { 
-    if (out->is_array()) { 
-      return exec(ctx, batch, out); 
-    } 
- 
-    if (null_handling == NullHandling::INTERSECTION && !batch[0].scalar()->is_valid) { 
-      out->scalar()->is_valid = false; 
-      return Status::OK(); 
-    } 
- 
-    ARROW_ASSIGN_OR_RAISE(Datum array_in, MakeArrayFromScalar(*batch[0].scalar(), 1)); 
-    ARROW_ASSIGN_OR_RAISE(Datum array_out, MakeArrayFromScalar(*out->scalar(), 1)); 
-    RETURN_NOT_OK(exec(ctx, ExecBatch{{std::move(array_in)}, 1}, &array_out)); 
-    ARROW_ASSIGN_OR_RAISE(*out, array_out.make_array()->GetScalar(0)); 
-    return Status::OK(); 
-  }; 
-} 
- 
+ArrayKernelExec TrivialScalarUnaryAsArraysExec(ArrayKernelExec exec,
+                                               NullHandling::type null_handling) {
+  return [=](KernelContext* ctx, const ExecBatch& batch, Datum* out) -> Status {
+    if (out->is_array()) {
+      return exec(ctx, batch, out);
+    }
+
+    if (null_handling == NullHandling::INTERSECTION && !batch[0].scalar()->is_valid) {
+      out->scalar()->is_valid = false;
+      return Status::OK();
+    }
+
+    ARROW_ASSIGN_OR_RAISE(Datum array_in, MakeArrayFromScalar(*batch[0].scalar(), 1));
+    ARROW_ASSIGN_OR_RAISE(Datum array_out, MakeArrayFromScalar(*out->scalar(), 1));
+    RETURN_NOT_OK(exec(ctx, ExecBatch{{std::move(array_in)}, 1}, &array_out));
+    ARROW_ASSIGN_OR_RAISE(*out, array_out.make_array()->GetScalar(0));
+    return Status::OK();
+  };
+}
+
 }  // namespace internal
 }  // namespace compute
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/util_internal.h b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/util_internal.h
index 8ce321f6b4f..394e08da581 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/util_internal.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/util_internal.h
@@ -18,30 +18,30 @@
 #pragma once
 
 #include <cstdint>
-#include <utility> 
+#include <utility>
 
-#include "arrow/array/util.h" 
+#include "arrow/array/util.h"
 #include "arrow/buffer.h"
-#include "arrow/compute/kernels/codegen_internal.h" 
-#include "arrow/compute/type_fwd.h" 
-#include "arrow/util/bit_run_reader.h" 
+#include "arrow/compute/kernels/codegen_internal.h"
+#include "arrow/compute/type_fwd.h"
+#include "arrow/util/bit_run_reader.h"
 
 namespace arrow {
 namespace compute {
 namespace internal {
 
-// Used in some kernels and testing - not provided by default in MSVC 
-// and _USE_MATH_DEFINES is not reliable with unity builds 
-#ifndef M_PI 
-#define M_PI 3.14159265358979323846 
-#endif 
-#ifndef M_PI_2 
-#define M_PI_2 1.57079632679489661923 
-#endif 
-#ifndef M_PI_4 
-#define M_PI_4 0.785398163397448309616 
-#endif 
- 
+// Used in some kernels and testing - not provided by default in MSVC
+// and _USE_MATH_DEFINES is not reliable with unity builds
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+#ifndef M_PI_2
+#define M_PI_2 1.57079632679489661923
+#endif
+#ifndef M_PI_4
+#define M_PI_4 0.785398163397448309616
+#endif
+
 // An internal data structure for unpacking a primitive argument to pass to a
 // kernel implementation
 struct PrimitiveArg {
@@ -67,100 +67,100 @@ int GetBitWidth(const DataType& type);
 // rather than duplicating compiled code to do all these in each kernel.
 PrimitiveArg GetPrimitiveArg(const ArrayData& arr);
 
-// Augment a unary ArrayKernelExec which supports only array-like inputs with support for 
-// scalar inputs. Scalars will be transformed to 1-long arrays with the scalar's value (or 
-// null if the scalar is null) as its only element. This 1-long array will be passed to 
-// the original exec, then the only element of the resulting array will be extracted as 
-// the output scalar. This could be far more efficient, but instead of optimizing this 
-// it'd be better to support scalar inputs "upstream" in original exec. 
-ArrayKernelExec TrivialScalarUnaryAsArraysExec( 
-    ArrayKernelExec exec, NullHandling::type null_handling = NullHandling::INTERSECTION); 
- 
-// Return (min, max) of a numerical array, ignore nulls. 
-// For empty array, return the maximal number limit as 'min', and minimal limit as 'max'. 
-template <typename T> 
-ARROW_NOINLINE std::pair<T, T> GetMinMax(const ArrayData& data) { 
-  T min = std::numeric_limits<T>::max(); 
-  T max = std::numeric_limits<T>::lowest(); 
- 
-  const T* values = data.GetValues<T>(1); 
-  arrow::internal::VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length, 
-                                       [&](int64_t pos, int64_t len) { 
-                                         for (int64_t i = 0; i < len; ++i) { 
-                                           min = std::min(min, values[pos + i]); 
-                                           max = std::max(max, values[pos + i]); 
-                                         } 
-                                       }); 
- 
-  return std::make_pair(min, max); 
-} 
- 
-template <typename T> 
-std::pair<T, T> GetMinMax(const Datum& datum) { 
-  T min = std::numeric_limits<T>::max(); 
-  T max = std::numeric_limits<T>::lowest(); 
- 
-  for (const auto& array : datum.chunks()) { 
-    T local_min, local_max; 
-    std::tie(local_min, local_max) = GetMinMax<T>(*array->data()); 
-    min = std::min(min, local_min); 
-    max = std::max(max, local_max); 
-  } 
- 
-  return std::make_pair(min, max); 
-} 
- 
-// Count value occurrences of an array, ignore nulls. 
-// 'counts' must be zeroed and with enough size. 
-template <typename T> 
-ARROW_NOINLINE int64_t CountValues(uint64_t* counts, const ArrayData& data, T min) { 
-  const int64_t n = data.length - data.GetNullCount(); 
-  if (n > 0) { 
-    const T* values = data.GetValues<T>(1); 
-    arrow::internal::VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length, 
-                                         [&](int64_t pos, int64_t len) { 
-                                           for (int64_t i = 0; i < len; ++i) { 
-                                             ++counts[values[pos + i] - min]; 
-                                           } 
-                                         }); 
-  } 
-  return n; 
-} 
- 
-template <typename T> 
-int64_t CountValues(uint64_t* counts, const Datum& datum, T min) { 
-  int64_t n = 0; 
-  for (const auto& array : datum.chunks()) { 
-    n += CountValues<T>(counts, *array->data(), min); 
-  } 
-  return n; 
-} 
- 
-// Copy numerical array values to a buffer, ignore nulls. 
-template <typename T> 
-ARROW_NOINLINE int64_t CopyNonNullValues(const ArrayData& data, T* out) { 
-  const int64_t n = data.length - data.GetNullCount(); 
-  if (n > 0) { 
-    int64_t index = 0; 
-    const T* values = data.GetValues<T>(1); 
-    arrow::internal::VisitSetBitRunsVoid( 
-        data.buffers[0], data.offset, data.length, [&](int64_t pos, int64_t len) { 
-          memcpy(out + index, values + pos, len * sizeof(T)); 
-          index += len; 
-        }); 
-  } 
-  return n; 
-} 
- 
-template <typename T> 
-int64_t CopyNonNullValues(const Datum& datum, T* out) { 
-  int64_t n = 0; 
-  for (const auto& array : datum.chunks()) { 
-    n += CopyNonNullValues(*array->data(), out + n); 
-  } 
-  return n; 
-} 
- 
+// Augment a unary ArrayKernelExec which supports only array-like inputs with support for
+// scalar inputs. Scalars will be transformed to 1-long arrays with the scalar's value (or
+// null if the scalar is null) as its only element. This 1-long array will be passed to
+// the original exec, then the only element of the resulting array will be extracted as
+// the output scalar. This could be far more efficient, but instead of optimizing this
+// it'd be better to support scalar inputs "upstream" in original exec.
+ArrayKernelExec TrivialScalarUnaryAsArraysExec(
+    ArrayKernelExec exec, NullHandling::type null_handling = NullHandling::INTERSECTION);
+
+// Return (min, max) of a numerical array, ignore nulls.
+// For empty array, return the maximal number limit as 'min', and minimal limit as 'max'.
+template <typename T>
+ARROW_NOINLINE std::pair<T, T> GetMinMax(const ArrayData& data) {
+  T min = std::numeric_limits<T>::max();
+  T max = std::numeric_limits<T>::lowest();
+
+  const T* values = data.GetValues<T>(1);
+  arrow::internal::VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length,
+                                       [&](int64_t pos, int64_t len) {
+                                         for (int64_t i = 0; i < len; ++i) {
+                                           min = std::min(min, values[pos + i]);
+                                           max = std::max(max, values[pos + i]);
+                                         }
+                                       });
+
+  return std::make_pair(min, max);
+}
+
+template <typename T>
+std::pair<T, T> GetMinMax(const Datum& datum) {
+  T min = std::numeric_limits<T>::max();
+  T max = std::numeric_limits<T>::lowest();
+
+  for (const auto& array : datum.chunks()) {
+    T local_min, local_max;
+    std::tie(local_min, local_max) = GetMinMax<T>(*array->data());
+    min = std::min(min, local_min);
+    max = std::max(max, local_max);
+  }
+
+  return std::make_pair(min, max);
+}
+
+// Count value occurrences of an array, ignore nulls.
+// 'counts' must be zeroed and with enough size.
+template <typename T>
+ARROW_NOINLINE int64_t CountValues(uint64_t* counts, const ArrayData& data, T min) {
+  const int64_t n = data.length - data.GetNullCount();
+  if (n > 0) {
+    const T* values = data.GetValues<T>(1);
+    arrow::internal::VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length,
+                                         [&](int64_t pos, int64_t len) {
+                                           for (int64_t i = 0; i < len; ++i) {
+                                             ++counts[values[pos + i] - min];
+                                           }
+                                         });
+  }
+  return n;
+}
+
+template <typename T>
+int64_t CountValues(uint64_t* counts, const Datum& datum, T min) {
+  int64_t n = 0;
+  for (const auto& array : datum.chunks()) {
+    n += CountValues<T>(counts, *array->data(), min);
+  }
+  return n;
+}
+
+// Copy numerical array values to a buffer, ignore nulls.
+template <typename T>
+ARROW_NOINLINE int64_t CopyNonNullValues(const ArrayData& data, T* out) {
+  const int64_t n = data.length - data.GetNullCount();
+  if (n > 0) {
+    int64_t index = 0;
+    const T* values = data.GetValues<T>(1);
+    arrow::internal::VisitSetBitRunsVoid(
+        data.buffers[0], data.offset, data.length, [&](int64_t pos, int64_t len) {
+          memcpy(out + index, values + pos, len * sizeof(T));
+          index += len;
+        });
+  }
+  return n;
+}
+
+template <typename T>
+int64_t CopyNonNullValues(const Datum& datum, T* out) {
+  int64_t n = 0;
+  for (const auto& array : datum.chunks()) {
+    n += CopyNonNullValues(*array->data(), out + n);
+  }
+  return n;
+}
+
 }  // namespace internal
 }  // namespace compute
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/vector_hash.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/vector_hash.cc
index 224916f5980..a68e78130f2 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/vector_hash.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/vector_hash.cc
@@ -22,7 +22,7 @@
 #include "arrow/array/array_dict.h"
 #include "arrow/array/array_nested.h"
 #include "arrow/array/builder_primitive.h"
-#include "arrow/array/concatenate.h" 
+#include "arrow/array/concatenate.h"
 #include "arrow/array/dict_internal.h"
 #include "arrow/array/util.h"
 #include "arrow/compute/api_vector.h"
@@ -60,10 +60,10 @@ class UniqueAction final : public ActionBase {
 
   static constexpr bool with_error_status = false;
 
-  UniqueAction(const std::shared_ptr<DataType>& type, const FunctionOptions* options, 
-               MemoryPool* pool) 
-      : ActionBase(type, pool) {} 
- 
+  UniqueAction(const std::shared_ptr<DataType>& type, const FunctionOptions* options,
+               MemoryPool* pool)
+      : ActionBase(type, pool) {}
+
   Status Reset() { return Status::OK(); }
 
   Status Reserve(const int64_t length) { return Status::OK(); }
@@ -80,8 +80,8 @@ class UniqueAction final : public ActionBase {
   template <class Index>
   void ObserveNotFound(Index index) {}
 
-  bool ShouldEncodeNulls() { return true; } 
- 
+  bool ShouldEncodeNulls() { return true; }
+
   Status Flush(Datum* out) { return Status::OK(); }
 
   Status FlushFinal(Datum* out) { return Status::OK(); }
@@ -96,8 +96,8 @@ class ValueCountsAction final : ActionBase {
 
   static constexpr bool with_error_status = true;
 
-  ValueCountsAction(const std::shared_ptr<DataType>& type, const FunctionOptions* options, 
-                    MemoryPool* pool) 
+  ValueCountsAction(const std::shared_ptr<DataType>& type, const FunctionOptions* options,
+                    MemoryPool* pool)
       : ActionBase(type, pool), count_builder_(pool) {}
 
   Status Reserve(const int64_t length) {
@@ -153,8 +153,8 @@ class ValueCountsAction final : ActionBase {
     }
   }
 
-  bool ShouldEncodeNulls() const { return true; } 
- 
+  bool ShouldEncodeNulls() const { return true; }
+
  private:
   Int64Builder count_builder_;
 };
@@ -168,13 +168,13 @@ class DictEncodeAction final : public ActionBase {
 
   static constexpr bool with_error_status = false;
 
-  DictEncodeAction(const std::shared_ptr<DataType>& type, const FunctionOptions* options, 
-                   MemoryPool* pool) 
-      : ActionBase(type, pool), indices_builder_(pool) { 
-    if (auto options_ptr = static_cast<const DictionaryEncodeOptions*>(options)) { 
-      encode_options_ = *options_ptr; 
-    } 
-  } 
+  DictEncodeAction(const std::shared_ptr<DataType>& type, const FunctionOptions* options,
+                   MemoryPool* pool)
+      : ActionBase(type, pool), indices_builder_(pool) {
+    if (auto options_ptr = static_cast<const DictionaryEncodeOptions*>(options)) {
+      encode_options_ = *options_ptr;
+    }
+  }
 
   Status Reset() {
     indices_builder_.Reset();
@@ -185,16 +185,16 @@ class DictEncodeAction final : public ActionBase {
 
   template <class Index>
   void ObserveNullFound(Index index) {
-    if (encode_options_.null_encoding_behavior == DictionaryEncodeOptions::MASK) { 
-      indices_builder_.UnsafeAppendNull(); 
-    } else { 
-      indices_builder_.UnsafeAppend(index); 
-    } 
+    if (encode_options_.null_encoding_behavior == DictionaryEncodeOptions::MASK) {
+      indices_builder_.UnsafeAppendNull();
+    } else {
+      indices_builder_.UnsafeAppend(index);
+    }
   }
 
   template <class Index>
   void ObserveNullNotFound(Index index) {
-    ObserveNullFound(index); 
+    ObserveNullFound(index);
   }
 
   template <class Index>
@@ -207,10 +207,10 @@ class DictEncodeAction final : public ActionBase {
     ObserveFound(index);
   }
 
-  bool ShouldEncodeNulls() { 
-    return encode_options_.null_encoding_behavior == DictionaryEncodeOptions::ENCODE; 
-  } 
- 
+  bool ShouldEncodeNulls() {
+    return encode_options_.null_encoding_behavior == DictionaryEncodeOptions::ENCODE;
+  }
+
   Status Flush(Datum* out) {
     std::shared_ptr<ArrayData> result;
     RETURN_NOT_OK(indices_builder_.FinishInternal(&result));
@@ -222,14 +222,14 @@ class DictEncodeAction final : public ActionBase {
 
  private:
   Int32Builder indices_builder_;
-  DictionaryEncodeOptions encode_options_; 
+  DictionaryEncodeOptions encode_options_;
 };
 
 class HashKernel : public KernelState {
  public:
-  HashKernel() : options_(nullptr) {} 
-  explicit HashKernel(const FunctionOptions* options) : options_(options) {} 
- 
+  HashKernel() : options_(nullptr) {}
+  explicit HashKernel(const FunctionOptions* options) : options_(options) {}
+
   // Reset for another run.
   virtual Status Reset() = 0;
 
@@ -253,7 +253,7 @@ class HashKernel : public KernelState {
   virtual Status Append(const ArrayData& arr) = 0;
 
  protected:
-  const FunctionOptions* options_; 
+  const FunctionOptions* options_;
   std::mutex lock_;
 };
 
@@ -262,12 +262,12 @@ class HashKernel : public KernelState {
 // (NullType has a separate implementation)
 
 template <typename Type, typename Scalar, typename Action,
-          bool with_error_status = Action::with_error_status> 
+          bool with_error_status = Action::with_error_status>
 class RegularHashKernel : public HashKernel {
  public:
-  RegularHashKernel(const std::shared_ptr<DataType>& type, const FunctionOptions* options, 
-                    MemoryPool* pool) 
-      : HashKernel(options), pool_(pool), type_(type), action_(type, options, pool) {} 
+  RegularHashKernel(const std::shared_ptr<DataType>& type, const FunctionOptions* options,
+                    MemoryPool* pool)
+      : HashKernel(options), pool_(pool), type_(type), action_(type, options, pool) {}
 
   Status Reset() override {
     memo_table_.reset(new MemoTable(pool_, 0));
@@ -307,7 +307,7 @@ class RegularHashKernel : public HashKernel {
                                           &unused_memo_index);
         },
         [this]() {
-          if (action_.ShouldEncodeNulls()) { 
+          if (action_.ShouldEncodeNulls()) {
             auto on_found = [this](int32_t memo_index) {
               action_.ObserveNullFound(memo_index);
             };
@@ -343,13 +343,13 @@ class RegularHashKernel : public HashKernel {
         [this]() {
           // Null
           Status s = Status::OK();
-          auto on_found = [this](int32_t memo_index) { 
-            action_.ObserveNullFound(memo_index); 
-          }; 
-          auto on_not_found = [this, &s](int32_t memo_index) { 
-            action_.ObserveNullNotFound(memo_index, &s); 
-          }; 
-          if (action_.ShouldEncodeNulls()) { 
+          auto on_found = [this](int32_t memo_index) {
+            action_.ObserveNullFound(memo_index);
+          };
+          auto on_not_found = [this, &s](int32_t memo_index) {
+            action_.ObserveNullNotFound(memo_index, &s);
+          };
+          if (action_.ShouldEncodeNulls()) {
             memo_table_->GetOrInsertNull(std::move(on_found), std::move(on_not_found));
           }
           return s;
@@ -368,23 +368,23 @@ class RegularHashKernel : public HashKernel {
 // ----------------------------------------------------------------------
 // Hash kernel implementation for nulls
 
-template <typename Action, bool with_error_status = Action::with_error_status> 
+template <typename Action, bool with_error_status = Action::with_error_status>
 class NullHashKernel : public HashKernel {
  public:
-  NullHashKernel(const std::shared_ptr<DataType>& type, const FunctionOptions* options, 
-                 MemoryPool* pool) 
-      : pool_(pool), type_(type), action_(type, options, pool) {} 
+  NullHashKernel(const std::shared_ptr<DataType>& type, const FunctionOptions* options,
+                 MemoryPool* pool)
+      : pool_(pool), type_(type), action_(type, options, pool) {}
 
   Status Reset() override { return action_.Reset(); }
 
-  Status Append(const ArrayData& arr) override { return DoAppend(arr); } 
- 
-  template <bool HasError = with_error_status> 
-  enable_if_t<!HasError, Status> DoAppend(const ArrayData& arr) { 
+  Status Append(const ArrayData& arr) override { return DoAppend(arr); }
+
+  template <bool HasError = with_error_status>
+  enable_if_t<!HasError, Status> DoAppend(const ArrayData& arr) {
     RETURN_NOT_OK(action_.Reserve(arr.length));
     for (int64_t i = 0; i < arr.length; ++i) {
       if (i == 0) {
-        seen_null_ = true; 
+        seen_null_ = true;
         action_.ObserveNullNotFound(0);
       } else {
         action_.ObserveNullFound(0);
@@ -393,31 +393,31 @@ class NullHashKernel : public HashKernel {
     return Status::OK();
   }
 
-  template <bool HasError = with_error_status> 
-  enable_if_t<HasError, Status> DoAppend(const ArrayData& arr) { 
-    Status s = Status::OK(); 
-    RETURN_NOT_OK(action_.Reserve(arr.length)); 
-    for (int64_t i = 0; i < arr.length; ++i) { 
-      if (seen_null_ == false && i == 0) { 
-        seen_null_ = true; 
-        action_.ObserveNullNotFound(0, &s); 
-      } else { 
-        action_.ObserveNullFound(0); 
-      } 
-    } 
-    return s; 
-  } 
- 
+  template <bool HasError = with_error_status>
+  enable_if_t<HasError, Status> DoAppend(const ArrayData& arr) {
+    Status s = Status::OK();
+    RETURN_NOT_OK(action_.Reserve(arr.length));
+    for (int64_t i = 0; i < arr.length; ++i) {
+      if (seen_null_ == false && i == 0) {
+        seen_null_ = true;
+        action_.ObserveNullNotFound(0, &s);
+      } else {
+        action_.ObserveNullFound(0);
+      }
+    }
+    return s;
+  }
+
   Status Flush(Datum* out) override { return action_.Flush(out); }
   Status FlushFinal(Datum* out) override { return action_.FlushFinal(out); }
 
   Status GetDictionary(std::shared_ptr<ArrayData>* out) override {
-    std::shared_ptr<NullArray> null_array; 
-    if (seen_null_) { 
-      null_array = std::make_shared<NullArray>(1); 
-    } else { 
-      null_array = std::make_shared<NullArray>(0); 
-    } 
+    std::shared_ptr<NullArray> null_array;
+    if (seen_null_) {
+      null_array = std::make_shared<NullArray>(1);
+    } else {
+      null_array = std::make_shared<NullArray>(0);
+    }
     *out = null_array->data();
     return Status::OK();
   }
@@ -427,7 +427,7 @@ class NullHashKernel : public HashKernel {
  protected:
   MemoryPool* pool_;
   std::shared_ptr<DataType> type_;
-  bool seen_null_ = false; 
+  bool seen_null_ = false;
   Action action_;
 };
 
@@ -441,33 +441,33 @@ class DictionaryHashKernel : public HashKernel {
 
   Status Reset() override { return indices_kernel_->Reset(); }
 
-  Status Append(const ArrayData& arr) override { 
+  Status Append(const ArrayData& arr) override {
     if (!dictionary_) {
-      dictionary_ = arr.dictionary; 
-    } else if (!MakeArray(dictionary_)->Equals(*MakeArray(arr.dictionary))) { 
-      // NOTE: This approach computes a new dictionary unification per chunk. 
-      // This is in effect O(n*k) where n is the total chunked array length and 
-      // k is the number of chunks (therefore O(n**2) if chunks have a fixed size). 
-      // 
-      // A better approach may be to run the kernel over each individual chunk, 
-      // and then hash-aggregate all results (for example sum-group-by for 
-      // the "value_counts" kernel). 
-      auto out_dict_type = dictionary_->type; 
-      std::shared_ptr<Buffer> transpose_map; 
-      std::shared_ptr<Array> out_dict; 
-      ARROW_ASSIGN_OR_RAISE(auto unifier, DictionaryUnifier::Make(out_dict_type)); 
- 
-      ARROW_CHECK_OK(unifier->Unify(*MakeArray(dictionary_))); 
-      ARROW_CHECK_OK(unifier->Unify(*MakeArray(arr.dictionary), &transpose_map)); 
-      ARROW_CHECK_OK(unifier->GetResult(&out_dict_type, &out_dict)); 
- 
-      this->dictionary_ = out_dict->data(); 
-      auto transpose = reinterpret_cast<const int32_t*>(transpose_map->data()); 
-      auto in_dict_array = MakeArray(std::make_shared<ArrayData>(arr)); 
-      ARROW_ASSIGN_OR_RAISE( 
-          auto tmp, arrow::internal::checked_cast<const DictionaryArray&>(*in_dict_array) 
-                        .Transpose(arr.type, out_dict, transpose)); 
-      return indices_kernel_->Append(*tmp->data()); 
+      dictionary_ = arr.dictionary;
+    } else if (!MakeArray(dictionary_)->Equals(*MakeArray(arr.dictionary))) {
+      // NOTE: This approach computes a new dictionary unification per chunk.
+      // This is in effect O(n*k) where n is the total chunked array length and
+      // k is the number of chunks (therefore O(n**2) if chunks have a fixed size).
+      //
+      // A better approach may be to run the kernel over each individual chunk,
+      // and then hash-aggregate all results (for example sum-group-by for
+      // the "value_counts" kernel).
+      auto out_dict_type = dictionary_->type;
+      std::shared_ptr<Buffer> transpose_map;
+      std::shared_ptr<Array> out_dict;
+      ARROW_ASSIGN_OR_RAISE(auto unifier, DictionaryUnifier::Make(out_dict_type));
+
+      ARROW_CHECK_OK(unifier->Unify(*MakeArray(dictionary_)));
+      ARROW_CHECK_OK(unifier->Unify(*MakeArray(arr.dictionary), &transpose_map));
+      ARROW_CHECK_OK(unifier->GetResult(&out_dict_type, &out_dict));
+
+      this->dictionary_ = out_dict->data();
+      auto transpose = reinterpret_cast<const int32_t*>(transpose_map->data());
+      auto in_dict_array = MakeArray(std::make_shared<ArrayData>(arr));
+      ARROW_ASSIGN_OR_RAISE(
+          auto tmp, arrow::internal::checked_cast<const DictionaryArray&>(*in_dict_array)
+                        .Transpose(arr.type, out_dict, transpose));
+      return indices_kernel_->Append(*tmp->data());
     }
 
     return indices_kernel_->Append(arr);
@@ -513,19 +513,19 @@ struct HashKernelTraits<Type, Action, enable_if_has_string_view<Type>> {
 };
 
 template <typename Type, typename Action>
-Result<std::unique_ptr<HashKernel>> HashInitImpl(KernelContext* ctx, 
-                                                 const KernelInitArgs& args) { 
+Result<std::unique_ptr<HashKernel>> HashInitImpl(KernelContext* ctx,
+                                                 const KernelInitArgs& args) {
   using HashKernelType = typename HashKernelTraits<Type, Action>::HashKernel;
-  auto result = ::arrow::internal::make_unique<HashKernelType>( 
-      args.inputs[0].type, args.options, ctx->memory_pool()); 
-  RETURN_NOT_OK(result->Reset()); 
+  auto result = ::arrow::internal::make_unique<HashKernelType>(
+      args.inputs[0].type, args.options, ctx->memory_pool());
+  RETURN_NOT_OK(result->Reset());
   return std::move(result);
 }
 
 template <typename Type, typename Action>
-Result<std::unique_ptr<KernelState>> HashInit(KernelContext* ctx, 
-                                              const KernelInitArgs& args) { 
-  return HashInitImpl<Type, Action>(ctx, args); 
+Result<std::unique_ptr<KernelState>> HashInit(KernelContext* ctx,
+                                              const KernelInitArgs& args) {
+  return HashInitImpl<Type, Action>(ctx, args);
 }
 
 template <typename Action>
@@ -564,8 +564,8 @@ KernelInit GetHashInit(Type::type type_id) {
     case Type::LARGE_STRING:
       return HashInit<LargeBinaryType, Action>;
     case Type::FIXED_SIZE_BINARY:
-    case Type::DECIMAL128: 
-    case Type::DECIMAL256: 
+    case Type::DECIMAL128:
+    case Type::DECIMAL256:
       return HashInit<FixedSizeBinaryType, Action>;
     default:
       DCHECK(false);
@@ -573,13 +573,13 @@ KernelInit GetHashInit(Type::type type_id) {
   }
 }
 
-using DictionaryEncodeState = OptionsWrapper<DictionaryEncodeOptions>; 
- 
+using DictionaryEncodeState = OptionsWrapper<DictionaryEncodeOptions>;
+
 template <typename Action>
-Result<std::unique_ptr<KernelState>> DictionaryHashInit(KernelContext* ctx, 
-                                                        const KernelInitArgs& args) { 
+Result<std::unique_ptr<KernelState>> DictionaryHashInit(KernelContext* ctx,
+                                                        const KernelInitArgs& args) {
   const auto& dict_type = checked_cast<const DictionaryType&>(*args.inputs[0].type);
-  Result<std::unique_ptr<HashKernel>> indices_hasher; 
+  Result<std::unique_ptr<HashKernel>> indices_hasher;
   switch (dict_type.index_type()->id()) {
     case Type::INT8:
       indices_hasher = HashInitImpl<UInt8Type, Action>(ctx, args);
@@ -597,37 +597,37 @@ Result<std::unique_ptr<KernelState>> DictionaryHashInit(KernelContext* ctx,
       DCHECK(false) << "Unsupported dictionary index type";
       break;
   }
-  RETURN_NOT_OK(indices_hasher); 
-  return ::arrow::internal::make_unique<DictionaryHashKernel>( 
-      std::move(indices_hasher.ValueOrDie())); 
+  RETURN_NOT_OK(indices_hasher);
+  return ::arrow::internal::make_unique<DictionaryHashKernel>(
+      std::move(indices_hasher.ValueOrDie()));
 }
 
-Status HashExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+Status HashExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   auto hash_impl = checked_cast<HashKernel*>(ctx->state());
-  RETURN_NOT_OK(hash_impl->Append(ctx, *batch[0].array())); 
-  RETURN_NOT_OK(hash_impl->Flush(out)); 
-  return Status::OK(); 
+  RETURN_NOT_OK(hash_impl->Append(ctx, *batch[0].array()));
+  RETURN_NOT_OK(hash_impl->Flush(out));
+  return Status::OK();
 }
 
-Status UniqueFinalize(KernelContext* ctx, std::vector<Datum>* out) { 
+Status UniqueFinalize(KernelContext* ctx, std::vector<Datum>* out) {
   auto hash_impl = checked_cast<HashKernel*>(ctx->state());
   std::shared_ptr<ArrayData> uniques;
-  RETURN_NOT_OK(hash_impl->GetDictionary(&uniques)); 
+  RETURN_NOT_OK(hash_impl->GetDictionary(&uniques));
   *out = {Datum(uniques)};
-  return Status::OK(); 
+  return Status::OK();
 }
 
-Status DictEncodeFinalize(KernelContext* ctx, std::vector<Datum>* out) { 
+Status DictEncodeFinalize(KernelContext* ctx, std::vector<Datum>* out) {
   auto hash_impl = checked_cast<HashKernel*>(ctx->state());
   std::shared_ptr<ArrayData> uniques;
-  RETURN_NOT_OK(hash_impl->GetDictionary(&uniques)); 
+  RETURN_NOT_OK(hash_impl->GetDictionary(&uniques));
   auto dict_type = dictionary(int32(), uniques->type);
   auto dict = MakeArray(uniques);
   for (size_t i = 0; i < out->size(); ++i) {
     (*out)[i] =
         std::make_shared<DictionaryArray>(dict_type, (*out)[i].make_array(), dict);
   }
-  return Status::OK(); 
+  return Status::OK();
 }
 
 std::shared_ptr<ArrayData> BoxValueCounts(const std::shared_ptr<ArrayData>& uniques,
@@ -638,33 +638,33 @@ std::shared_ptr<ArrayData> BoxValueCounts(const std::shared_ptr<ArrayData>& uniq
   return std::make_shared<StructArray>(data_type, uniques->length, children)->data();
 }
 
-Status ValueCountsFinalize(KernelContext* ctx, std::vector<Datum>* out) { 
+Status ValueCountsFinalize(KernelContext* ctx, std::vector<Datum>* out) {
   auto hash_impl = checked_cast<HashKernel*>(ctx->state());
   std::shared_ptr<ArrayData> uniques;
   Datum value_counts;
 
-  RETURN_NOT_OK(hash_impl->GetDictionary(&uniques)); 
-  RETURN_NOT_OK(hash_impl->FlushFinal(&value_counts)); 
+  RETURN_NOT_OK(hash_impl->GetDictionary(&uniques));
+  RETURN_NOT_OK(hash_impl->FlushFinal(&value_counts));
   *out = {Datum(BoxValueCounts(uniques, value_counts.array()))};
-  return Status::OK(); 
+  return Status::OK();
 }
 
-Status UniqueFinalizeDictionary(KernelContext* ctx, std::vector<Datum>* out) { 
-  RETURN_NOT_OK(UniqueFinalize(ctx, out)); 
+Status UniqueFinalizeDictionary(KernelContext* ctx, std::vector<Datum>* out) {
+  RETURN_NOT_OK(UniqueFinalize(ctx, out));
   auto hash = checked_cast<DictionaryHashKernel*>(ctx->state());
   (*out)[0].mutable_array()->dictionary = hash->dictionary();
-  return Status::OK(); 
+  return Status::OK();
 }
 
-Status ValueCountsFinalizeDictionary(KernelContext* ctx, std::vector<Datum>* out) { 
+Status ValueCountsFinalizeDictionary(KernelContext* ctx, std::vector<Datum>* out) {
   auto hash = checked_cast<DictionaryHashKernel*>(ctx->state());
   std::shared_ptr<ArrayData> uniques;
   Datum value_counts;
-  RETURN_NOT_OK(hash->GetDictionary(&uniques)); 
-  RETURN_NOT_OK(hash->FlushFinal(&value_counts)); 
+  RETURN_NOT_OK(hash->GetDictionary(&uniques));
+  RETURN_NOT_OK(hash->FlushFinal(&value_counts));
   uniques->dictionary = hash->dictionary();
   *out = {Datum(BoxValueCounts(uniques, value_counts.array()))};
-  return Status::OK(); 
+  return Status::OK();
 }
 
 ValueDescr DictEncodeOutput(KernelContext*, const std::vector<ValueDescr>& descrs) {
@@ -693,31 +693,31 @@ void AddHashKernels(VectorFunction* func, VectorKernel base, OutputType out_ty)
     DCHECK_OK(func->AddKernel(base));
   }
 
-  for (auto t : {Type::DECIMAL128, Type::DECIMAL256}) { 
-    base.init = GetHashInit<Action>(t); 
-    base.signature = KernelSignature::Make({InputType::Array(t)}, out_ty); 
-    DCHECK_OK(func->AddKernel(base)); 
-  } 
+  for (auto t : {Type::DECIMAL128, Type::DECIMAL256}) {
+    base.init = GetHashInit<Action>(t);
+    base.signature = KernelSignature::Make({InputType::Array(t)}, out_ty);
+    DCHECK_OK(func->AddKernel(base));
+  }
 }
 
-const FunctionDoc unique_doc( 
-    "Compute unique elements", 
-    ("Return an array with distinct values.  Nulls in the input are ignored."), 
-    {"array"}); 
- 
-const FunctionDoc value_counts_doc( 
-    "Compute counts of unique elements", 
-    ("For each distinct value, compute the number of times it occurs in the array.\n" 
-     "The result is returned as an array of `struct<input type, int64>`.\n" 
-     "Nulls in the input are ignored."), 
-    {"array"}); 
- 
-const auto kDefaultDictionaryEncodeOptions = DictionaryEncodeOptions::Defaults(); 
-const FunctionDoc dictionary_encode_doc( 
-    "Dictionary-encode array", 
-    ("Return a dictionary-encoded version of the input array."), {"array"}, 
-    "DictionaryEncodeOptions"); 
- 
+const FunctionDoc unique_doc(
+    "Compute unique elements",
+    ("Return an array with distinct values.  Nulls in the input are ignored."),
+    {"array"});
+
+const FunctionDoc value_counts_doc(
+    "Compute counts of unique elements",
+    ("For each distinct value, compute the number of times it occurs in the array.\n"
+     "The result is returned as an array of `struct<input type, int64>`.\n"
+     "Nulls in the input are ignored."),
+    {"array"});
+
+const auto kDefaultDictionaryEncodeOptions = DictionaryEncodeOptions::Defaults();
+const FunctionDoc dictionary_encode_doc(
+    "Dictionary-encode array",
+    ("Return a dictionary-encoded version of the input array."), {"array"},
+    "DictionaryEncodeOptions");
+
 }  // namespace
 
 void RegisterVectorHash(FunctionRegistry* registry) {
@@ -729,7 +729,7 @@ void RegisterVectorHash(FunctionRegistry* registry) {
 
   base.finalize = UniqueFinalize;
   base.output_chunked = false;
-  auto unique = std::make_shared<VectorFunction>("unique", Arity::Unary(), &unique_doc); 
+  auto unique = std::make_shared<VectorFunction>("unique", Arity::Unary(), &unique_doc);
   AddHashKernels<UniqueAction>(unique.get(), base, OutputType(FirstType));
 
   // Dictionary unique
@@ -745,8 +745,8 @@ void RegisterVectorHash(FunctionRegistry* registry) {
   // value_counts
 
   base.finalize = ValueCountsFinalize;
-  auto value_counts = 
-      std::make_shared<VectorFunction>("value_counts", Arity::Unary(), &value_counts_doc); 
+  auto value_counts =
+      std::make_shared<VectorFunction>("value_counts", Arity::Unary(), &value_counts_doc);
   AddHashKernels<ValueCountsAction>(value_counts.get(), base,
                                     OutputType(ValueCountsOutput));
 
@@ -765,9 +765,9 @@ void RegisterVectorHash(FunctionRegistry* registry) {
   base.finalize = DictEncodeFinalize;
   // Unique and ValueCounts output unchunked arrays
   base.output_chunked = true;
-  auto dict_encode = std::make_shared<VectorFunction>("dictionary_encode", Arity::Unary(), 
-                                                      &dictionary_encode_doc, 
-                                                      &kDefaultDictionaryEncodeOptions); 
+  auto dict_encode = std::make_shared<VectorFunction>("dictionary_encode", Arity::Unary(),
+                                                      &dictionary_encode_doc,
+                                                      &kDefaultDictionaryEncodeOptions);
   AddHashKernels<DictEncodeAction>(dict_encode.get(), base, OutputType(DictEncodeOutput));
 
   // Calling dictionary_encode on dictionary input not supported, but if it
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/vector_nested.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/vector_nested.cc
index 68db6ae04cc..b84640854ed 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/vector_nested.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/vector_nested.cc
@@ -27,15 +27,15 @@ namespace internal {
 namespace {
 
 template <typename Type>
-Status ListFlatten(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+Status ListFlatten(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   typename TypeTraits<Type>::ArrayType list_array(batch[0].array());
-  ARROW_ASSIGN_OR_RAISE(auto result, list_array.Flatten(ctx->memory_pool())); 
-  out->value = result->data(); 
-  return Status::OK(); 
+  ARROW_ASSIGN_OR_RAISE(auto result, list_array.Flatten(ctx->memory_pool()));
+  out->value = result->data();
+  return Status::OK();
 }
 
 template <typename Type, typename offset_type = typename Type::offset_type>
-Status ListParentIndices(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+Status ListParentIndices(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   typename TypeTraits<Type>::ArrayType list(batch[0].array());
   ArrayData* out_arr = out->mutable_array();
 
@@ -44,8 +44,8 @@ Status ListParentIndices(KernelContext* ctx, const ExecBatch& batch, Datum* out)
 
   out_arr->length = values_length;
   out_arr->null_count = 0;
-  ARROW_ASSIGN_OR_RAISE(out_arr->buffers[1], 
-                        ctx->Allocate(values_length * sizeof(offset_type))); 
+  ARROW_ASSIGN_OR_RAISE(out_arr->buffers[1],
+                        ctx->Allocate(values_length * sizeof(offset_type)));
   auto out_indices = reinterpret_cast<offset_type*>(out_arr->buffers[1]->mutable_data());
   for (int64_t i = 0; i < list.length(); ++i) {
     // Note: In most cases, null slots are empty, but when they are non-empty
@@ -55,7 +55,7 @@ Status ListParentIndices(KernelContext* ctx, const ExecBatch& batch, Datum* out)
       *out_indices++ = static_cast<offset_type>(i);
     }
   }
-  return Status::OK(); 
+  return Status::OK();
 }
 
 Result<ValueDescr> ValuesType(KernelContext*, const std::vector<ValueDescr>& args) {
@@ -63,33 +63,33 @@ Result<ValueDescr> ValuesType(KernelContext*, const std::vector<ValueDescr>& arg
   return ValueDescr::Array(list_type.value_type());
 }
 
-const FunctionDoc list_flatten_doc( 
-    "Flatten list values", 
-    ("`lists` must have a list-like type.\n" 
-     "Return an array with the top list level flattened.\n" 
-     "Top-level null values in `lists` do not emit anything in the input."), 
-    {"lists"}); 
- 
-const FunctionDoc list_parent_indices_doc( 
-    "Compute parent indices of nested list values", 
-    ("`lists` must have a list-like type.\n" 
-     "For each value in each list of `lists`, the top-level list index\n" 
-     "is emitted."), 
-    {"lists"}); 
- 
+const FunctionDoc list_flatten_doc(
+    "Flatten list values",
+    ("`lists` must have a list-like type.\n"
+     "Return an array with the top list level flattened.\n"
+     "Top-level null values in `lists` do not emit anything in the input."),
+    {"lists"});
+
+const FunctionDoc list_parent_indices_doc(
+    "Compute parent indices of nested list values",
+    ("`lists` must have a list-like type.\n"
+     "For each value in each list of `lists`, the top-level list index\n"
+     "is emitted."),
+    {"lists"});
+
 }  // namespace
 
 void RegisterVectorNested(FunctionRegistry* registry) {
-  auto flatten = 
-      std::make_shared<VectorFunction>("list_flatten", Arity::Unary(), &list_flatten_doc); 
+  auto flatten =
+      std::make_shared<VectorFunction>("list_flatten", Arity::Unary(), &list_flatten_doc);
   DCHECK_OK(flatten->AddKernel({InputType::Array(Type::LIST)}, OutputType(ValuesType),
                                ListFlatten<ListType>));
   DCHECK_OK(flatten->AddKernel({InputType::Array(Type::LARGE_LIST)},
                                OutputType(ValuesType), ListFlatten<LargeListType>));
   DCHECK_OK(registry->AddFunction(std::move(flatten)));
 
-  auto list_parent_indices = std::make_shared<VectorFunction>( 
-      "list_parent_indices", Arity::Unary(), &list_parent_indices_doc); 
+  auto list_parent_indices = std::make_shared<VectorFunction>(
+      "list_parent_indices", Arity::Unary(), &list_parent_indices_doc);
   DCHECK_OK(list_parent_indices->AddKernel({InputType::Array(Type::LIST)}, int32(),
                                            ListParentIndices<ListType>));
   DCHECK_OK(list_parent_indices->AddKernel({InputType::Array(Type::LARGE_LIST)}, int64(),
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/vector_replace.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/vector_replace.cc
index d89f7a6bb40..644aec2a4e9 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/vector_replace.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/vector_replace.cc
@@ -1,540 +1,540 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "arrow/compute/api_scalar.h" 
-#include "arrow/compute/kernels/common.h" 
-#include "arrow/util/bitmap_ops.h" 
- 
-namespace arrow { 
-namespace compute { 
-namespace internal { 
- 
-namespace { 
- 
-Status ReplacementArrayTooShort(int64_t expected, int64_t actual) { 
-  return Status::Invalid("Replacement array must be of appropriate length (expected ", 
-                         expected, " items but got ", actual, " items)"); 
-} 
- 
-// Helper to implement replace_with kernel with scalar mask for fixed-width types, 
-// using callbacks to handle both bool and byte-sized types 
-template <typename Functor> 
-Status ReplaceWithScalarMask(KernelContext* ctx, const ArrayData& array, 
-                             const BooleanScalar& mask, const Datum& replacements, 
-                             ArrayData* output) { 
-  Datum source = array; 
-  if (!mask.is_valid) { 
-    // Output = null 
-    source = MakeNullScalar(output->type); 
-  } else if (mask.value) { 
-    // Output = replacement 
-    source = replacements; 
-  } 
-  uint8_t* out_bitmap = output->buffers[0]->mutable_data(); 
-  uint8_t* out_values = output->buffers[1]->mutable_data(); 
-  const int64_t out_offset = output->offset; 
-  if (source.is_array()) { 
-    const ArrayData& in_data = *source.array(); 
-    if (in_data.length < array.length) { 
-      return ReplacementArrayTooShort(array.length, in_data.length); 
-    } 
-    Functor::CopyData(*array.type, out_values, out_offset, in_data, /*in_offset=*/0, 
-                      array.length); 
-    if (in_data.MayHaveNulls()) { 
-      arrow::internal::CopyBitmap(in_data.buffers[0]->data(), in_data.offset, 
-                                  array.length, out_bitmap, out_offset); 
-    } else { 
-      BitUtil::SetBitsTo(out_bitmap, out_offset, array.length, true); 
-    } 
-  } else { 
-    const Scalar& in_data = *source.scalar(); 
-    Functor::CopyData(*array.type, out_values, out_offset, in_data, /*in_offset=*/0, 
-                      array.length); 
-    BitUtil::SetBitsTo(out_bitmap, out_offset, array.length, in_data.is_valid); 
-  } 
-  return Status::OK(); 
-} 
- 
-struct CopyArrayBitmap { 
-  const uint8_t* in_bitmap; 
-  int64_t in_offset; 
- 
-  void CopyBitmap(uint8_t* out_bitmap, int64_t out_offset, int64_t offset, 
-                  int64_t length) const { 
-    arrow::internal::CopyBitmap(in_bitmap, in_offset + offset, length, out_bitmap, 
-                                out_offset); 
-  } 
- 
-  void SetBit(uint8_t* out_bitmap, int64_t out_offset, int64_t offset) const { 
-    BitUtil::SetBitTo(out_bitmap, out_offset, 
-                      BitUtil::GetBit(in_bitmap, in_offset + offset)); 
-  } 
-}; 
- 
-struct CopyScalarBitmap { 
-  const bool is_valid; 
- 
-  void CopyBitmap(uint8_t* out_bitmap, int64_t out_offset, int64_t offset, 
-                  int64_t length) const { 
-    BitUtil::SetBitsTo(out_bitmap, out_offset, length, is_valid); 
-  } 
- 
-  void SetBit(uint8_t* out_bitmap, int64_t out_offset, int64_t offset) const { 
-    BitUtil::SetBitTo(out_bitmap, out_offset, is_valid); 
-  } 
-}; 
- 
-// Helper to implement replace_with kernel with array mask for fixed-width types, 
-// using callbacks to handle both bool and byte-sized types and to handle 
-// scalar and array replacements 
-template <typename Functor, typename Data, typename CopyBitmap> 
-void ReplaceWithArrayMaskImpl(const ArrayData& array, const ArrayData& mask, 
-                              const Data& replacements, bool replacements_bitmap, 
-                              const CopyBitmap& copy_bitmap, const uint8_t* mask_bitmap, 
-                              const uint8_t* mask_values, uint8_t* out_bitmap, 
-                              uint8_t* out_values, const int64_t out_offset) { 
-  Functor::CopyData(*array.type, out_values, /*out_offset=*/0, array, /*in_offset=*/0, 
-                    array.length); 
-  arrow::internal::OptionalBinaryBitBlockCounter counter( 
-      mask_values, mask.offset, mask_bitmap, mask.offset, mask.length); 
-  int64_t write_offset = 0; 
-  int64_t replacements_offset = 0; 
-  while (write_offset < array.length) { 
-    BitBlockCount block = counter.NextAndBlock(); 
-    if (block.AllSet()) { 
-      // Copy from replacement array 
-      Functor::CopyData(*array.type, out_values, out_offset + write_offset, replacements, 
-                        replacements_offset, block.length); 
-      if (replacements_bitmap) { 
-        copy_bitmap.CopyBitmap(out_bitmap, out_offset + write_offset, replacements_offset, 
-                               block.length); 
-      } else if (!replacements_bitmap && out_bitmap) { 
-        BitUtil::SetBitsTo(out_bitmap, out_offset + write_offset, block.length, true); 
-      } 
-      replacements_offset += block.length; 
-    } else if (block.popcount) { 
-      for (int64_t i = 0; i < block.length; ++i) { 
-        if (BitUtil::GetBit(mask_values, write_offset + mask.offset + i) && 
-            (!mask_bitmap || 
-             BitUtil::GetBit(mask_bitmap, write_offset + mask.offset + i))) { 
-          Functor::CopyData(*array.type, out_values, out_offset + write_offset + i, 
-                            replacements, replacements_offset, /*length=*/1); 
-          if (replacements_bitmap) { 
-            copy_bitmap.SetBit(out_bitmap, out_offset + write_offset + i, 
-                               replacements_offset); 
-          } 
-          replacements_offset++; 
-        } 
-      } 
-    } 
-    write_offset += block.length; 
-  } 
-} 
- 
-template <typename Functor> 
-Status ReplaceWithArrayMask(KernelContext* ctx, const ArrayData& array, 
-                            const ArrayData& mask, const Datum& replacements, 
-                            ArrayData* output) { 
-  const int64_t out_offset = output->offset; 
-  uint8_t* out_bitmap = nullptr; 
-  uint8_t* out_values = output->buffers[1]->mutable_data(); 
-  const uint8_t* mask_bitmap = mask.MayHaveNulls() ? mask.buffers[0]->data() : nullptr; 
-  const uint8_t* mask_values = mask.buffers[1]->data(); 
-  const bool replacements_bitmap = replacements.is_array() 
-                                       ? replacements.array()->MayHaveNulls() 
-                                       : !replacements.scalar()->is_valid; 
-  if (replacements.is_array()) { 
-    // Check that we have enough replacement values 
-    const int64_t replacements_length = replacements.array()->length; 
- 
-    BooleanArray mask_arr(mask.length, mask.buffers[1], mask.buffers[0], mask.null_count, 
-                          mask.offset); 
-    const int64_t count = mask_arr.true_count(); 
-    if (count > replacements_length) { 
-      return ReplacementArrayTooShort(count, replacements_length); 
-    } 
-  } 
-  if (array.MayHaveNulls() || mask.MayHaveNulls() || replacements_bitmap) { 
-    out_bitmap = output->buffers[0]->mutable_data(); 
-    output->null_count = -1; 
-    if (array.MayHaveNulls()) { 
-      // Copy array's bitmap 
-      arrow::internal::CopyBitmap(array.buffers[0]->data(), array.offset, array.length, 
-                                  out_bitmap, out_offset); 
-    } else { 
-      // Array has no bitmap but mask/replacements do, generate an all-valid bitmap 
-      BitUtil::SetBitsTo(out_bitmap, out_offset, array.length, true); 
-    } 
-  } else { 
-    BitUtil::SetBitsTo(output->buffers[0]->mutable_data(), out_offset, array.length, 
-                       true); 
-    output->null_count = 0; 
-  } 
- 
-  if (replacements.is_array()) { 
-    const ArrayData& array_repl = *replacements.array(); 
-    ReplaceWithArrayMaskImpl<Functor>( 
-        array, mask, array_repl, replacements_bitmap, 
-        CopyArrayBitmap{replacements_bitmap ? array_repl.buffers[0]->data() : nullptr, 
-                        array_repl.offset}, 
-        mask_bitmap, mask_values, out_bitmap, out_values, out_offset); 
-  } else { 
-    const Scalar& scalar_repl = *replacements.scalar(); 
-    ReplaceWithArrayMaskImpl<Functor>(array, mask, scalar_repl, replacements_bitmap, 
-                                      CopyScalarBitmap{scalar_repl.is_valid}, mask_bitmap, 
-                                      mask_values, out_bitmap, out_values, out_offset); 
-  } 
- 
-  if (mask.MayHaveNulls()) { 
-    arrow::internal::BitmapAnd(out_bitmap, out_offset, mask.buffers[0]->data(), 
-                               mask.offset, array.length, out_offset, out_bitmap); 
-  } 
-  return Status::OK(); 
-} 
- 
-template <typename Type, typename Enable = void> 
-struct ReplaceWithMask {}; 
- 
-template <typename Type> 
-struct ReplaceWithMask<Type, enable_if_number<Type>> { 
-  using T = typename TypeTraits<Type>::CType; 
- 
-  static void CopyData(const DataType&, uint8_t* out, const int64_t out_offset, 
-                       const ArrayData& in, const int64_t in_offset, 
-                       const int64_t length) { 
-    const auto in_arr = in.GetValues<uint8_t>(1, (in_offset + in.offset) * sizeof(T)); 
-    std::memcpy(out + (out_offset * sizeof(T)), in_arr, length * sizeof(T)); 
-  } 
- 
-  static void CopyData(const DataType&, uint8_t* out, const int64_t out_offset, 
-                       const Scalar& in, const int64_t in_offset, const int64_t length) { 
-    T* begin = reinterpret_cast<T*>(out + (out_offset * sizeof(T))); 
-    T* end = begin + length; 
-    std::fill(begin, end, UnboxScalar<Type>::Unbox(in)); 
-  } 
- 
-  static Status ExecScalarMask(KernelContext* ctx, const ArrayData& array, 
-                               const BooleanScalar& mask, const Datum& replacements, 
-                               ArrayData* output) { 
-    return ReplaceWithScalarMask<ReplaceWithMask<Type>>(ctx, array, mask, replacements, 
-                                                        output); 
-  } 
- 
-  static Status ExecArrayMask(KernelContext* ctx, const ArrayData& array, 
-                              const ArrayData& mask, const Datum& replacements, 
-                              ArrayData* output) { 
-    return ReplaceWithArrayMask<ReplaceWithMask<Type>>(ctx, array, mask, replacements, 
-                                                       output); 
-  } 
-}; 
- 
-template <typename Type> 
-struct ReplaceWithMask<Type, enable_if_boolean<Type>> { 
-  static void CopyData(const DataType&, uint8_t* out, const int64_t out_offset, 
-                       const ArrayData& in, const int64_t in_offset, 
-                       const int64_t length) { 
-    const auto in_arr = in.GetValues<uint8_t>(1, /*absolute_offset=*/0); 
-    arrow::internal::CopyBitmap(in_arr, in_offset + in.offset, length, out, out_offset); 
-  } 
-  static void CopyData(const DataType&, uint8_t* out, const int64_t out_offset, 
-                       const Scalar& in, const int64_t in_offset, const int64_t length) { 
-    BitUtil::SetBitsTo(out, out_offset, length, in.is_valid); 
-  } 
- 
-  static Status ExecScalarMask(KernelContext* ctx, const ArrayData& array, 
-                               const BooleanScalar& mask, const Datum& replacements, 
-                               ArrayData* output) { 
-    return ReplaceWithScalarMask<ReplaceWithMask<Type>>(ctx, array, mask, replacements, 
-                                                        output); 
-  } 
-  static Status ExecArrayMask(KernelContext* ctx, const ArrayData& array, 
-                              const ArrayData& mask, const Datum& replacements, 
-                              ArrayData* output) { 
-    return ReplaceWithArrayMask<ReplaceWithMask<Type>>(ctx, array, mask, replacements, 
-                                                       output); 
-  } 
-}; 
- 
-template <typename Type> 
-struct ReplaceWithMask<Type, enable_if_same<Type, FixedSizeBinaryType>> { 
-  static void CopyData(const DataType& ty, uint8_t* out, const int64_t out_offset, 
-                       const ArrayData& in, const int64_t in_offset, 
-                       const int64_t length) { 
-    const int32_t width = checked_cast<const FixedSizeBinaryType&>(ty).byte_width(); 
-    uint8_t* begin = out + (out_offset * width); 
-    const auto in_arr = in.GetValues<uint8_t>(1, (in_offset + in.offset) * width); 
-    std::memcpy(begin, in_arr, length * width); 
-  } 
-  static void CopyData(const DataType& ty, uint8_t* out, const int64_t out_offset, 
-                       const Scalar& in, const int64_t in_offset, const int64_t length) { 
-    const int32_t width = checked_cast<const FixedSizeBinaryType&>(ty).byte_width(); 
-    uint8_t* begin = out + (out_offset * width); 
-    const auto& scalar = checked_cast<const FixedSizeBinaryScalar&>(in); 
-    // Null scalar may have null value buffer 
-    if (!scalar.value) return; 
-    const Buffer& buffer = *scalar.value; 
-    const uint8_t* value = buffer.data(); 
-    DCHECK_GE(buffer.size(), width); 
-    for (int i = 0; i < length; i++) { 
-      std::memcpy(begin, value, width); 
-      begin += width; 
-    } 
-  } 
- 
-  static Status ExecScalarMask(KernelContext* ctx, const ArrayData& array, 
-                               const BooleanScalar& mask, const Datum& replacements, 
-                               ArrayData* output) { 
-    return ReplaceWithScalarMask<ReplaceWithMask<Type>>(ctx, array, mask, replacements, 
-                                                        output); 
-  } 
- 
-  static Status ExecArrayMask(KernelContext* ctx, const ArrayData& array, 
-                              const ArrayData& mask, const Datum& replacements, 
-                              ArrayData* output) { 
-    return ReplaceWithArrayMask<ReplaceWithMask<Type>>(ctx, array, mask, replacements, 
-                                                       output); 
-  } 
-}; 
- 
-template <typename Type> 
-struct ReplaceWithMask<Type, enable_if_decimal<Type>> { 
-  using ScalarType = typename TypeTraits<Type>::ScalarType; 
-  static void CopyData(const DataType& ty, uint8_t* out, const int64_t out_offset, 
-                       const ArrayData& in, const int64_t in_offset, 
-                       const int64_t length) { 
-    const int32_t width = checked_cast<const FixedSizeBinaryType&>(ty).byte_width(); 
-    uint8_t* begin = out + (out_offset * width); 
-    const auto in_arr = in.GetValues<uint8_t>(1, (in_offset + in.offset) * width); 
-    std::memcpy(begin, in_arr, length * width); 
-  } 
-  static void CopyData(const DataType& ty, uint8_t* out, const int64_t out_offset, 
-                       const Scalar& in, const int64_t in_offset, const int64_t length) { 
-    const int32_t width = checked_cast<const FixedSizeBinaryType&>(ty).byte_width(); 
-    uint8_t* begin = out + (out_offset * width); 
-    const auto& scalar = checked_cast<const ScalarType&>(in); 
-    const auto value = scalar.value.ToBytes(); 
-    for (int i = 0; i < length; i++) { 
-      std::memcpy(begin, value.data(), width); 
-      begin += width; 
-    } 
-  } 
- 
-  static Status ExecScalarMask(KernelContext* ctx, const ArrayData& array, 
-                               const BooleanScalar& mask, const Datum& replacements, 
-                               ArrayData* output) { 
-    return ReplaceWithScalarMask<ReplaceWithMask<Type>>(ctx, array, mask, replacements, 
-                                                        output); 
-  } 
- 
-  static Status ExecArrayMask(KernelContext* ctx, const ArrayData& array, 
-                              const ArrayData& mask, const Datum& replacements, 
-                              ArrayData* output) { 
-    return ReplaceWithArrayMask<ReplaceWithMask<Type>>(ctx, array, mask, replacements, 
-                                                       output); 
-  } 
-}; 
- 
-template <typename Type> 
-struct ReplaceWithMask<Type, enable_if_null<Type>> { 
-  static Status ExecScalarMask(KernelContext* ctx, const ArrayData& array, 
-                               const BooleanScalar& mask, const Datum& replacements, 
-                               ArrayData* output) { 
-    *output = array; 
-    return Status::OK(); 
-  } 
-  static Status ExecArrayMask(KernelContext* ctx, const ArrayData& array, 
-                              const ArrayData& mask, const Datum& replacements, 
-                              ArrayData* output) { 
-    *output = array; 
-    return Status::OK(); 
-  } 
-}; 
- 
-template <typename Type> 
-struct ReplaceWithMask<Type, enable_if_base_binary<Type>> { 
-  using offset_type = typename Type::offset_type; 
-  using BuilderType = typename TypeTraits<Type>::BuilderType; 
- 
-  static Status ExecScalarMask(KernelContext* ctx, const ArrayData& array, 
-                               const BooleanScalar& mask, const Datum& replacements, 
-                               ArrayData* output) { 
-    if (!mask.is_valid) { 
-      // Output = null 
-      ARROW_ASSIGN_OR_RAISE( 
-          auto replacement_array, 
-          MakeArrayOfNull(array.type, array.length, ctx->memory_pool())); 
-      *output = *replacement_array->data(); 
-    } else if (mask.value) { 
-      // Output = replacement 
-      if (replacements.is_scalar()) { 
-        ARROW_ASSIGN_OR_RAISE(auto replacement_array, 
-                              MakeArrayFromScalar(*replacements.scalar(), array.length, 
-                                                  ctx->memory_pool())); 
-        *output = *replacement_array->data(); 
-      } else { 
-        const ArrayData& replacement_array = *replacements.array(); 
-        if (replacement_array.length < array.length) { 
-          return ReplacementArrayTooShort(array.length, replacement_array.length); 
-        } 
-        *output = replacement_array; 
-        output->length = array.length; 
-      } 
-    } else { 
-      // Output = input 
-      *output = array; 
-    } 
-    return Status::OK(); 
-  } 
-  static Status ExecArrayMask(KernelContext* ctx, const ArrayData& array, 
-                              const ArrayData& mask, const Datum& replacements, 
-                              ArrayData* output) { 
-    BuilderType builder(array.type, ctx->memory_pool()); 
-    RETURN_NOT_OK(builder.Reserve(array.length)); 
-    RETURN_NOT_OK(builder.ReserveData(array.buffers[2]->size())); 
-    int64_t source_offset = 0; 
-    int64_t replacements_offset = 0; 
-    RETURN_NOT_OK(VisitArrayDataInline<BooleanType>( 
-        mask, 
-        [&](bool replace) { 
-          if (replace && replacements.is_scalar()) { 
-            const Scalar& scalar = *replacements.scalar(); 
-            if (scalar.is_valid) { 
-              RETURN_NOT_OK(builder.Append(UnboxScalar<Type>::Unbox(scalar))); 
-            } else { 
-              RETURN_NOT_OK(builder.AppendNull()); 
-            } 
-          } else { 
-            const ArrayData& source = replace ? *replacements.array() : array; 
-            const int64_t offset = replace ? replacements_offset++ : source_offset; 
-            if (!source.MayHaveNulls() || 
-                BitUtil::GetBit(source.buffers[0]->data(), source.offset + offset)) { 
-              const uint8_t* data = source.buffers[2]->data(); 
-              const offset_type* offsets = source.GetValues<offset_type>(1); 
-              const offset_type offset0 = offsets[offset]; 
-              const offset_type offset1 = offsets[offset + 1]; 
-              RETURN_NOT_OK(builder.Append(data + offset0, offset1 - offset0)); 
-            } else { 
-              RETURN_NOT_OK(builder.AppendNull()); 
-            } 
-          } 
-          source_offset++; 
-          return Status::OK(); 
-        }, 
-        [&]() { 
-          RETURN_NOT_OK(builder.AppendNull()); 
-          source_offset++; 
-          return Status::OK(); 
-        })); 
-    std::shared_ptr<Array> temp_output; 
-    RETURN_NOT_OK(builder.Finish(&temp_output)); 
-    *output = *temp_output->data(); 
-    // Builder type != logical type due to GenerateTypeAgnosticVarBinaryBase 
-    output->type = array.type; 
-    return Status::OK(); 
-  } 
-}; 
- 
-template <typename Type> 
-struct ReplaceWithMaskFunctor { 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    const ArrayData& array = *batch[0].array(); 
-    const Datum& replacements = batch[2]; 
-    ArrayData* output = out->array().get(); 
-    output->length = array.length; 
- 
-    // Needed for FixedSizeBinary/parameterized types 
-    if (!array.type->Equals(*replacements.type(), /*check_metadata=*/false)) { 
-      return Status::Invalid("Replacements must be of same type (expected ", 
-                             array.type->ToString(), " but got ", 
-                             replacements.type()->ToString(), ")"); 
-    } 
- 
-    if (!replacements.is_array() && !replacements.is_scalar()) { 
-      return Status::Invalid("Replacements must be array or scalar"); 
-    } 
- 
-    if (batch[1].is_scalar()) { 
-      return ReplaceWithMask<Type>::ExecScalarMask( 
-          ctx, array, batch[1].scalar_as<BooleanScalar>(), replacements, output); 
-    } 
-    const ArrayData& mask = *batch[1].array(); 
-    if (array.length != mask.length) { 
-      return Status::Invalid("Mask must be of same length as array (expected ", 
-                             array.length, " items but got ", mask.length, " items)"); 
-    } 
-    return ReplaceWithMask<Type>::ExecArrayMask(ctx, array, mask, replacements, output); 
-  } 
-}; 
- 
-}  // namespace 
- 
-const FunctionDoc replace_with_mask_doc( 
-    "Replace items using a mask and replacement values", 
-    ("Given an array and a Boolean mask (either scalar or of equal length), " 
-     "along with replacement values (either scalar or array), " 
-     "each element of the array for which the corresponding mask element is " 
-     "true will be replaced by the next value from the replacements, " 
-     "or with null if the mask is null. " 
-     "Hence, for replacement arrays, len(replacements) == sum(mask == true)."), 
-    {"values", "mask", "replacements"}); 
- 
-void RegisterVectorReplace(FunctionRegistry* registry) { 
-  auto func = std::make_shared<VectorFunction>("replace_with_mask", Arity::Ternary(), 
-                                               &replace_with_mask_doc); 
-  auto add_kernel = [&](detail::GetTypeId get_id, ArrayKernelExec exec) { 
-    VectorKernel kernel; 
-    kernel.can_execute_chunkwise = false; 
-    if (is_fixed_width(get_id.id)) { 
-      kernel.null_handling = NullHandling::type::COMPUTED_PREALLOCATE; 
-    } else { 
-      kernel.can_write_into_slices = false; 
-      kernel.null_handling = NullHandling::type::COMPUTED_NO_PREALLOCATE; 
-    } 
-    kernel.mem_allocation = MemAllocation::type::PREALLOCATE; 
-    kernel.signature = KernelSignature::Make( 
-        {InputType::Array(get_id.id), InputType(boolean()), InputType(get_id.id)}, 
-        OutputType(FirstType)); 
-    kernel.exec = std::move(exec); 
-    DCHECK_OK(func->AddKernel(std::move(kernel))); 
-  }; 
-  auto add_primitive_kernel = [&](detail::GetTypeId get_id) { 
-    add_kernel(get_id, GenerateTypeAgnosticPrimitive<ReplaceWithMaskFunctor>(get_id)); 
-  }; 
-  for (const auto& ty : NumericTypes()) { 
-    add_primitive_kernel(ty); 
-  } 
-  for (const auto& ty : TemporalTypes()) { 
-    add_primitive_kernel(ty); 
-  } 
-  add_primitive_kernel(null()); 
-  add_primitive_kernel(boolean()); 
-  add_primitive_kernel(day_time_interval()); 
-  add_primitive_kernel(month_interval()); 
-  add_kernel(Type::FIXED_SIZE_BINARY, ReplaceWithMaskFunctor<FixedSizeBinaryType>::Exec); 
-  add_kernel(Type::DECIMAL128, ReplaceWithMaskFunctor<Decimal128Type>::Exec); 
-  add_kernel(Type::DECIMAL256, ReplaceWithMaskFunctor<Decimal256Type>::Exec); 
-  for (const auto& ty : BaseBinaryTypes()) { 
-    add_kernel(ty->id(), GenerateTypeAgnosticVarBinaryBase<ReplaceWithMaskFunctor>(*ty)); 
-  } 
-  // TODO: list types 
-  DCHECK_OK(registry->AddFunction(std::move(func))); 
- 
-  // TODO(ARROW-9431): "replace_with_indices" 
-} 
-}  // namespace internal 
-}  // namespace compute 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/api_scalar.h"
+#include "arrow/compute/kernels/common.h"
+#include "arrow/util/bitmap_ops.h"
+
+namespace arrow {
+namespace compute {
+namespace internal {
+
+namespace {
+
+Status ReplacementArrayTooShort(int64_t expected, int64_t actual) {
+  return Status::Invalid("Replacement array must be of appropriate length (expected ",
+                         expected, " items but got ", actual, " items)");
+}
+
+// Helper to implement replace_with kernel with scalar mask for fixed-width types,
+// using callbacks to handle both bool and byte-sized types
+template <typename Functor>
+Status ReplaceWithScalarMask(KernelContext* ctx, const ArrayData& array,
+                             const BooleanScalar& mask, const Datum& replacements,
+                             ArrayData* output) {
+  Datum source = array;
+  if (!mask.is_valid) {
+    // Output = null
+    source = MakeNullScalar(output->type);
+  } else if (mask.value) {
+    // Output = replacement
+    source = replacements;
+  }
+  uint8_t* out_bitmap = output->buffers[0]->mutable_data();
+  uint8_t* out_values = output->buffers[1]->mutable_data();
+  const int64_t out_offset = output->offset;
+  if (source.is_array()) {
+    const ArrayData& in_data = *source.array();
+    if (in_data.length < array.length) {
+      return ReplacementArrayTooShort(array.length, in_data.length);
+    }
+    Functor::CopyData(*array.type, out_values, out_offset, in_data, /*in_offset=*/0,
+                      array.length);
+    if (in_data.MayHaveNulls()) {
+      arrow::internal::CopyBitmap(in_data.buffers[0]->data(), in_data.offset,
+                                  array.length, out_bitmap, out_offset);
+    } else {
+      BitUtil::SetBitsTo(out_bitmap, out_offset, array.length, true);
+    }
+  } else {
+    const Scalar& in_data = *source.scalar();
+    Functor::CopyData(*array.type, out_values, out_offset, in_data, /*in_offset=*/0,
+                      array.length);
+    BitUtil::SetBitsTo(out_bitmap, out_offset, array.length, in_data.is_valid);
+  }
+  return Status::OK();
+}
+
+struct CopyArrayBitmap {
+  const uint8_t* in_bitmap;
+  int64_t in_offset;
+
+  void CopyBitmap(uint8_t* out_bitmap, int64_t out_offset, int64_t offset,
+                  int64_t length) const {
+    arrow::internal::CopyBitmap(in_bitmap, in_offset + offset, length, out_bitmap,
+                                out_offset);
+  }
+
+  void SetBit(uint8_t* out_bitmap, int64_t out_offset, int64_t offset) const {
+    BitUtil::SetBitTo(out_bitmap, out_offset,
+                      BitUtil::GetBit(in_bitmap, in_offset + offset));
+  }
+};
+
+struct CopyScalarBitmap {
+  const bool is_valid;
+
+  void CopyBitmap(uint8_t* out_bitmap, int64_t out_offset, int64_t offset,
+                  int64_t length) const {
+    BitUtil::SetBitsTo(out_bitmap, out_offset, length, is_valid);
+  }
+
+  void SetBit(uint8_t* out_bitmap, int64_t out_offset, int64_t offset) const {
+    BitUtil::SetBitTo(out_bitmap, out_offset, is_valid);
+  }
+};
+
+// Helper to implement replace_with kernel with array mask for fixed-width types,
+// using callbacks to handle both bool and byte-sized types and to handle
+// scalar and array replacements
+template <typename Functor, typename Data, typename CopyBitmap>
+void ReplaceWithArrayMaskImpl(const ArrayData& array, const ArrayData& mask,
+                              const Data& replacements, bool replacements_bitmap,
+                              const CopyBitmap& copy_bitmap, const uint8_t* mask_bitmap,
+                              const uint8_t* mask_values, uint8_t* out_bitmap,
+                              uint8_t* out_values, const int64_t out_offset) {
+  Functor::CopyData(*array.type, out_values, /*out_offset=*/0, array, /*in_offset=*/0,
+                    array.length);
+  arrow::internal::OptionalBinaryBitBlockCounter counter(
+      mask_values, mask.offset, mask_bitmap, mask.offset, mask.length);
+  int64_t write_offset = 0;
+  int64_t replacements_offset = 0;
+  while (write_offset < array.length) {
+    BitBlockCount block = counter.NextAndBlock();
+    if (block.AllSet()) {
+      // Copy from replacement array
+      Functor::CopyData(*array.type, out_values, out_offset + write_offset, replacements,
+                        replacements_offset, block.length);
+      if (replacements_bitmap) {
+        copy_bitmap.CopyBitmap(out_bitmap, out_offset + write_offset, replacements_offset,
+                               block.length);
+      } else if (!replacements_bitmap && out_bitmap) {
+        BitUtil::SetBitsTo(out_bitmap, out_offset + write_offset, block.length, true);
+      }
+      replacements_offset += block.length;
+    } else if (block.popcount) {
+      for (int64_t i = 0; i < block.length; ++i) {
+        if (BitUtil::GetBit(mask_values, write_offset + mask.offset + i) &&
+            (!mask_bitmap ||
+             BitUtil::GetBit(mask_bitmap, write_offset + mask.offset + i))) {
+          Functor::CopyData(*array.type, out_values, out_offset + write_offset + i,
+                            replacements, replacements_offset, /*length=*/1);
+          if (replacements_bitmap) {
+            copy_bitmap.SetBit(out_bitmap, out_offset + write_offset + i,
+                               replacements_offset);
+          }
+          replacements_offset++;
+        }
+      }
+    }
+    write_offset += block.length;
+  }
+}
+
+template <typename Functor>
+Status ReplaceWithArrayMask(KernelContext* ctx, const ArrayData& array,
+                            const ArrayData& mask, const Datum& replacements,
+                            ArrayData* output) {
+  const int64_t out_offset = output->offset;
+  uint8_t* out_bitmap = nullptr;
+  uint8_t* out_values = output->buffers[1]->mutable_data();
+  const uint8_t* mask_bitmap = mask.MayHaveNulls() ? mask.buffers[0]->data() : nullptr;
+  const uint8_t* mask_values = mask.buffers[1]->data();
+  const bool replacements_bitmap = replacements.is_array()
+                                       ? replacements.array()->MayHaveNulls()
+                                       : !replacements.scalar()->is_valid;
+  if (replacements.is_array()) {
+    // Check that we have enough replacement values
+    const int64_t replacements_length = replacements.array()->length;
+
+    BooleanArray mask_arr(mask.length, mask.buffers[1], mask.buffers[0], mask.null_count,
+                          mask.offset);
+    const int64_t count = mask_arr.true_count();
+    if (count > replacements_length) {
+      return ReplacementArrayTooShort(count, replacements_length);
+    }
+  }
+  if (array.MayHaveNulls() || mask.MayHaveNulls() || replacements_bitmap) {
+    out_bitmap = output->buffers[0]->mutable_data();
+    output->null_count = -1;
+    if (array.MayHaveNulls()) {
+      // Copy array's bitmap
+      arrow::internal::CopyBitmap(array.buffers[0]->data(), array.offset, array.length,
+                                  out_bitmap, out_offset);
+    } else {
+      // Array has no bitmap but mask/replacements do, generate an all-valid bitmap
+      BitUtil::SetBitsTo(out_bitmap, out_offset, array.length, true);
+    }
+  } else {
+    BitUtil::SetBitsTo(output->buffers[0]->mutable_data(), out_offset, array.length,
+                       true);
+    output->null_count = 0;
+  }
+
+  if (replacements.is_array()) {
+    const ArrayData& array_repl = *replacements.array();
+    ReplaceWithArrayMaskImpl<Functor>(
+        array, mask, array_repl, replacements_bitmap,
+        CopyArrayBitmap{replacements_bitmap ? array_repl.buffers[0]->data() : nullptr,
+                        array_repl.offset},
+        mask_bitmap, mask_values, out_bitmap, out_values, out_offset);
+  } else {
+    const Scalar& scalar_repl = *replacements.scalar();
+    ReplaceWithArrayMaskImpl<Functor>(array, mask, scalar_repl, replacements_bitmap,
+                                      CopyScalarBitmap{scalar_repl.is_valid}, mask_bitmap,
+                                      mask_values, out_bitmap, out_values, out_offset);
+  }
+
+  if (mask.MayHaveNulls()) {
+    arrow::internal::BitmapAnd(out_bitmap, out_offset, mask.buffers[0]->data(),
+                               mask.offset, array.length, out_offset, out_bitmap);
+  }
+  return Status::OK();
+}
+
+template <typename Type, typename Enable = void>
+struct ReplaceWithMask {};
+
+template <typename Type>
+struct ReplaceWithMask<Type, enable_if_number<Type>> {
+  using T = typename TypeTraits<Type>::CType;
+
+  static void CopyData(const DataType&, uint8_t* out, const int64_t out_offset,
+                       const ArrayData& in, const int64_t in_offset,
+                       const int64_t length) {
+    const auto in_arr = in.GetValues<uint8_t>(1, (in_offset + in.offset) * sizeof(T));
+    std::memcpy(out + (out_offset * sizeof(T)), in_arr, length * sizeof(T));
+  }
+
+  static void CopyData(const DataType&, uint8_t* out, const int64_t out_offset,
+                       const Scalar& in, const int64_t in_offset, const int64_t length) {
+    T* begin = reinterpret_cast<T*>(out + (out_offset * sizeof(T)));
+    T* end = begin + length;
+    std::fill(begin, end, UnboxScalar<Type>::Unbox(in));
+  }
+
+  static Status ExecScalarMask(KernelContext* ctx, const ArrayData& array,
+                               const BooleanScalar& mask, const Datum& replacements,
+                               ArrayData* output) {
+    return ReplaceWithScalarMask<ReplaceWithMask<Type>>(ctx, array, mask, replacements,
+                                                        output);
+  }
+
+  static Status ExecArrayMask(KernelContext* ctx, const ArrayData& array,
+                              const ArrayData& mask, const Datum& replacements,
+                              ArrayData* output) {
+    return ReplaceWithArrayMask<ReplaceWithMask<Type>>(ctx, array, mask, replacements,
+                                                       output);
+  }
+};
+
+template <typename Type>
+struct ReplaceWithMask<Type, enable_if_boolean<Type>> {
+  static void CopyData(const DataType&, uint8_t* out, const int64_t out_offset,
+                       const ArrayData& in, const int64_t in_offset,
+                       const int64_t length) {
+    const auto in_arr = in.GetValues<uint8_t>(1, /*absolute_offset=*/0);
+    arrow::internal::CopyBitmap(in_arr, in_offset + in.offset, length, out, out_offset);
+  }
+  static void CopyData(const DataType&, uint8_t* out, const int64_t out_offset,
+                       const Scalar& in, const int64_t in_offset, const int64_t length) {
+    BitUtil::SetBitsTo(out, out_offset, length, in.is_valid);
+  }
+
+  static Status ExecScalarMask(KernelContext* ctx, const ArrayData& array,
+                               const BooleanScalar& mask, const Datum& replacements,
+                               ArrayData* output) {
+    return ReplaceWithScalarMask<ReplaceWithMask<Type>>(ctx, array, mask, replacements,
+                                                        output);
+  }
+  static Status ExecArrayMask(KernelContext* ctx, const ArrayData& array,
+                              const ArrayData& mask, const Datum& replacements,
+                              ArrayData* output) {
+    return ReplaceWithArrayMask<ReplaceWithMask<Type>>(ctx, array, mask, replacements,
+                                                       output);
+  }
+};
+
+template <typename Type>
+struct ReplaceWithMask<Type, enable_if_same<Type, FixedSizeBinaryType>> {
+  static void CopyData(const DataType& ty, uint8_t* out, const int64_t out_offset,
+                       const ArrayData& in, const int64_t in_offset,
+                       const int64_t length) {
+    const int32_t width = checked_cast<const FixedSizeBinaryType&>(ty).byte_width();
+    uint8_t* begin = out + (out_offset * width);
+    const auto in_arr = in.GetValues<uint8_t>(1, (in_offset + in.offset) * width);
+    std::memcpy(begin, in_arr, length * width);
+  }
+  static void CopyData(const DataType& ty, uint8_t* out, const int64_t out_offset,
+                       const Scalar& in, const int64_t in_offset, const int64_t length) {
+    const int32_t width = checked_cast<const FixedSizeBinaryType&>(ty).byte_width();
+    uint8_t* begin = out + (out_offset * width);
+    const auto& scalar = checked_cast<const FixedSizeBinaryScalar&>(in);
+    // Null scalar may have null value buffer
+    if (!scalar.value) return;
+    const Buffer& buffer = *scalar.value;
+    const uint8_t* value = buffer.data();
+    DCHECK_GE(buffer.size(), width);
+    for (int i = 0; i < length; i++) {
+      std::memcpy(begin, value, width);
+      begin += width;
+    }
+  }
+
+  static Status ExecScalarMask(KernelContext* ctx, const ArrayData& array,
+                               const BooleanScalar& mask, const Datum& replacements,
+                               ArrayData* output) {
+    return ReplaceWithScalarMask<ReplaceWithMask<Type>>(ctx, array, mask, replacements,
+                                                        output);
+  }
+
+  static Status ExecArrayMask(KernelContext* ctx, const ArrayData& array,
+                              const ArrayData& mask, const Datum& replacements,
+                              ArrayData* output) {
+    return ReplaceWithArrayMask<ReplaceWithMask<Type>>(ctx, array, mask, replacements,
+                                                       output);
+  }
+};
+
+template <typename Type>
+struct ReplaceWithMask<Type, enable_if_decimal<Type>> {
+  using ScalarType = typename TypeTraits<Type>::ScalarType;
+  static void CopyData(const DataType& ty, uint8_t* out, const int64_t out_offset,
+                       const ArrayData& in, const int64_t in_offset,
+                       const int64_t length) {
+    const int32_t width = checked_cast<const FixedSizeBinaryType&>(ty).byte_width();
+    uint8_t* begin = out + (out_offset * width);
+    const auto in_arr = in.GetValues<uint8_t>(1, (in_offset + in.offset) * width);
+    std::memcpy(begin, in_arr, length * width);
+  }
+  static void CopyData(const DataType& ty, uint8_t* out, const int64_t out_offset,
+                       const Scalar& in, const int64_t in_offset, const int64_t length) {
+    const int32_t width = checked_cast<const FixedSizeBinaryType&>(ty).byte_width();
+    uint8_t* begin = out + (out_offset * width);
+    const auto& scalar = checked_cast<const ScalarType&>(in);
+    const auto value = scalar.value.ToBytes();
+    for (int i = 0; i < length; i++) {
+      std::memcpy(begin, value.data(), width);
+      begin += width;
+    }
+  }
+
+  static Status ExecScalarMask(KernelContext* ctx, const ArrayData& array,
+                               const BooleanScalar& mask, const Datum& replacements,
+                               ArrayData* output) {
+    return ReplaceWithScalarMask<ReplaceWithMask<Type>>(ctx, array, mask, replacements,
+                                                        output);
+  }
+
+  static Status ExecArrayMask(KernelContext* ctx, const ArrayData& array,
+                              const ArrayData& mask, const Datum& replacements,
+                              ArrayData* output) {
+    return ReplaceWithArrayMask<ReplaceWithMask<Type>>(ctx, array, mask, replacements,
+                                                       output);
+  }
+};
+
+template <typename Type>
+struct ReplaceWithMask<Type, enable_if_null<Type>> {
+  static Status ExecScalarMask(KernelContext* ctx, const ArrayData& array,
+                               const BooleanScalar& mask, const Datum& replacements,
+                               ArrayData* output) {
+    *output = array;
+    return Status::OK();
+  }
+  static Status ExecArrayMask(KernelContext* ctx, const ArrayData& array,
+                              const ArrayData& mask, const Datum& replacements,
+                              ArrayData* output) {
+    *output = array;
+    return Status::OK();
+  }
+};
+
+template <typename Type>
+struct ReplaceWithMask<Type, enable_if_base_binary<Type>> {
+  using offset_type = typename Type::offset_type;
+  using BuilderType = typename TypeTraits<Type>::BuilderType;
+
+  static Status ExecScalarMask(KernelContext* ctx, const ArrayData& array,
+                               const BooleanScalar& mask, const Datum& replacements,
+                               ArrayData* output) {
+    if (!mask.is_valid) {
+      // Output = null
+      ARROW_ASSIGN_OR_RAISE(
+          auto replacement_array,
+          MakeArrayOfNull(array.type, array.length, ctx->memory_pool()));
+      *output = *replacement_array->data();
+    } else if (mask.value) {
+      // Output = replacement
+      if (replacements.is_scalar()) {
+        ARROW_ASSIGN_OR_RAISE(auto replacement_array,
+                              MakeArrayFromScalar(*replacements.scalar(), array.length,
+                                                  ctx->memory_pool()));
+        *output = *replacement_array->data();
+      } else {
+        const ArrayData& replacement_array = *replacements.array();
+        if (replacement_array.length < array.length) {
+          return ReplacementArrayTooShort(array.length, replacement_array.length);
+        }
+        *output = replacement_array;
+        output->length = array.length;
+      }
+    } else {
+      // Output = input
+      *output = array;
+    }
+    return Status::OK();
+  }
+  static Status ExecArrayMask(KernelContext* ctx, const ArrayData& array,
+                              const ArrayData& mask, const Datum& replacements,
+                              ArrayData* output) {
+    BuilderType builder(array.type, ctx->memory_pool());
+    RETURN_NOT_OK(builder.Reserve(array.length));
+    RETURN_NOT_OK(builder.ReserveData(array.buffers[2]->size()));
+    int64_t source_offset = 0;
+    int64_t replacements_offset = 0;
+    RETURN_NOT_OK(VisitArrayDataInline<BooleanType>(
+        mask,
+        [&](bool replace) {
+          if (replace && replacements.is_scalar()) {
+            const Scalar& scalar = *replacements.scalar();
+            if (scalar.is_valid) {
+              RETURN_NOT_OK(builder.Append(UnboxScalar<Type>::Unbox(scalar)));
+            } else {
+              RETURN_NOT_OK(builder.AppendNull());
+            }
+          } else {
+            const ArrayData& source = replace ? *replacements.array() : array;
+            const int64_t offset = replace ? replacements_offset++ : source_offset;
+            if (!source.MayHaveNulls() ||
+                BitUtil::GetBit(source.buffers[0]->data(), source.offset + offset)) {
+              const uint8_t* data = source.buffers[2]->data();
+              const offset_type* offsets = source.GetValues<offset_type>(1);
+              const offset_type offset0 = offsets[offset];
+              const offset_type offset1 = offsets[offset + 1];
+              RETURN_NOT_OK(builder.Append(data + offset0, offset1 - offset0));
+            } else {
+              RETURN_NOT_OK(builder.AppendNull());
+            }
+          }
+          source_offset++;
+          return Status::OK();
+        },
+        [&]() {
+          RETURN_NOT_OK(builder.AppendNull());
+          source_offset++;
+          return Status::OK();
+        }));
+    std::shared_ptr<Array> temp_output;
+    RETURN_NOT_OK(builder.Finish(&temp_output));
+    *output = *temp_output->data();
+    // Builder type != logical type due to GenerateTypeAgnosticVarBinaryBase
+    output->type = array.type;
+    return Status::OK();
+  }
+};
+
+template <typename Type>
+struct ReplaceWithMaskFunctor {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const ArrayData& array = *batch[0].array();
+    const Datum& replacements = batch[2];
+    ArrayData* output = out->array().get();
+    output->length = array.length;
+
+    // Needed for FixedSizeBinary/parameterized types
+    if (!array.type->Equals(*replacements.type(), /*check_metadata=*/false)) {
+      return Status::Invalid("Replacements must be of same type (expected ",
+                             array.type->ToString(), " but got ",
+                             replacements.type()->ToString(), ")");
+    }
+
+    if (!replacements.is_array() && !replacements.is_scalar()) {
+      return Status::Invalid("Replacements must be array or scalar");
+    }
+
+    if (batch[1].is_scalar()) {
+      return ReplaceWithMask<Type>::ExecScalarMask(
+          ctx, array, batch[1].scalar_as<BooleanScalar>(), replacements, output);
+    }
+    const ArrayData& mask = *batch[1].array();
+    if (array.length != mask.length) {
+      return Status::Invalid("Mask must be of same length as array (expected ",
+                             array.length, " items but got ", mask.length, " items)");
+    }
+    return ReplaceWithMask<Type>::ExecArrayMask(ctx, array, mask, replacements, output);
+  }
+};
+
+}  // namespace
+
+const FunctionDoc replace_with_mask_doc(
+    "Replace items using a mask and replacement values",
+    ("Given an array and a Boolean mask (either scalar or of equal length), "
+     "along with replacement values (either scalar or array), "
+     "each element of the array for which the corresponding mask element is "
+     "true will be replaced by the next value from the replacements, "
+     "or with null if the mask is null. "
+     "Hence, for replacement arrays, len(replacements) == sum(mask == true)."),
+    {"values", "mask", "replacements"});
+
+void RegisterVectorReplace(FunctionRegistry* registry) {
+  auto func = std::make_shared<VectorFunction>("replace_with_mask", Arity::Ternary(),
+                                               &replace_with_mask_doc);
+  auto add_kernel = [&](detail::GetTypeId get_id, ArrayKernelExec exec) {
+    VectorKernel kernel;
+    kernel.can_execute_chunkwise = false;
+    if (is_fixed_width(get_id.id)) {
+      kernel.null_handling = NullHandling::type::COMPUTED_PREALLOCATE;
+    } else {
+      kernel.can_write_into_slices = false;
+      kernel.null_handling = NullHandling::type::COMPUTED_NO_PREALLOCATE;
+    }
+    kernel.mem_allocation = MemAllocation::type::PREALLOCATE;
+    kernel.signature = KernelSignature::Make(
+        {InputType::Array(get_id.id), InputType(boolean()), InputType(get_id.id)},
+        OutputType(FirstType));
+    kernel.exec = std::move(exec);
+    DCHECK_OK(func->AddKernel(std::move(kernel)));
+  };
+  auto add_primitive_kernel = [&](detail::GetTypeId get_id) {
+    add_kernel(get_id, GenerateTypeAgnosticPrimitive<ReplaceWithMaskFunctor>(get_id));
+  };
+  for (const auto& ty : NumericTypes()) {
+    add_primitive_kernel(ty);
+  }
+  for (const auto& ty : TemporalTypes()) {
+    add_primitive_kernel(ty);
+  }
+  add_primitive_kernel(null());
+  add_primitive_kernel(boolean());
+  add_primitive_kernel(day_time_interval());
+  add_primitive_kernel(month_interval());
+  add_kernel(Type::FIXED_SIZE_BINARY, ReplaceWithMaskFunctor<FixedSizeBinaryType>::Exec);
+  add_kernel(Type::DECIMAL128, ReplaceWithMaskFunctor<Decimal128Type>::Exec);
+  add_kernel(Type::DECIMAL256, ReplaceWithMaskFunctor<Decimal256Type>::Exec);
+  for (const auto& ty : BaseBinaryTypes()) {
+    add_kernel(ty->id(), GenerateTypeAgnosticVarBinaryBase<ReplaceWithMaskFunctor>(*ty));
+  }
+  // TODO: list types
+  DCHECK_OK(registry->AddFunction(std::move(func)));
+
+  // TODO(ARROW-9431): "replace_with_indices"
+}
+}  // namespace internal
+}  // namespace compute
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/vector_selection.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/vector_selection.cc
index b70dadbd146..5845a7ee2d0 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/vector_selection.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/vector_selection.cc
@@ -36,7 +36,7 @@
 #include "arrow/table.h"
 #include "arrow/type.h"
 #include "arrow/util/bit_block_counter.h"
-#include "arrow/util/bit_run_reader.h" 
+#include "arrow/util/bit_run_reader.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/bitmap_ops.h"
 #include "arrow/util/bitmap_reader.h"
@@ -87,8 +87,8 @@ int64_t GetFilterOutputSize(const ArrayData& filter,
   return output_size;
 }
 
-namespace { 
- 
+namespace {
+
 template <typename IndexType>
 Result<std::shared_ptr<ArrayData>> GetTakeIndicesImpl(
     const ArrayData& filter, FilterOptions::NullSelectionBehavior null_selection,
@@ -96,130 +96,130 @@ Result<std::shared_ptr<ArrayData>> GetTakeIndicesImpl(
   using T = typename IndexType::c_type;
 
   const uint8_t* filter_data = filter.buffers[1]->data();
-  const bool have_filter_nulls = filter.MayHaveNulls(); 
-  const uint8_t* filter_is_valid = 
-      have_filter_nulls ? filter.buffers[0]->data() : nullptr; 
-
-  if (have_filter_nulls && null_selection == FilterOptions::EMIT_NULL) { 
-    // Most complex case: the filter may have nulls and we don't drop them. 
-    // The logic is ternary: 
-    // - filter is null: emit null 
-    // - filter is valid and true: emit index 
-    // - filter is valid and false: don't emit anything 
-
-    typename TypeTraits<IndexType>::BuilderType builder(memory_pool); 
-
-    // The position relative to the start of the filter 
-    T position = 0; 
-    // The current position taking the filter offset into account 
-    int64_t position_with_offset = filter.offset; 
- 
-    // To count blocks where filter_data[i] || !filter_is_valid[i] 
+  const bool have_filter_nulls = filter.MayHaveNulls();
+  const uint8_t* filter_is_valid =
+      have_filter_nulls ? filter.buffers[0]->data() : nullptr;
+
+  if (have_filter_nulls && null_selection == FilterOptions::EMIT_NULL) {
+    // Most complex case: the filter may have nulls and we don't drop them.
+    // The logic is ternary:
+    // - filter is null: emit null
+    // - filter is valid and true: emit index
+    // - filter is valid and false: don't emit anything
+
+    typename TypeTraits<IndexType>::BuilderType builder(memory_pool);
+
+    // The position relative to the start of the filter
+    T position = 0;
+    // The current position taking the filter offset into account
+    int64_t position_with_offset = filter.offset;
+
+    // To count blocks where filter_data[i] || !filter_is_valid[i]
     BinaryBitBlockCounter filter_counter(filter_data, filter.offset, filter_is_valid,
                                          filter.offset, filter.length);
-    BitBlockCounter is_valid_counter(filter_is_valid, filter.offset, filter.length); 
-    while (position < filter.length) { 
-      // true OR NOT valid 
-      BitBlockCount selected_or_null_block = filter_counter.NextOrNotWord(); 
-      if (selected_or_null_block.NoneSet()) { 
-        position += selected_or_null_block.length; 
-        position_with_offset += selected_or_null_block.length; 
-        continue; 
+    BitBlockCounter is_valid_counter(filter_is_valid, filter.offset, filter.length);
+    while (position < filter.length) {
+      // true OR NOT valid
+      BitBlockCount selected_or_null_block = filter_counter.NextOrNotWord();
+      if (selected_or_null_block.NoneSet()) {
+        position += selected_or_null_block.length;
+        position_with_offset += selected_or_null_block.length;
+        continue;
       }
-      RETURN_NOT_OK(builder.Reserve(selected_or_null_block.popcount)); 
-
-      // If the values are all valid and the selected_or_null_block is full, 
-      // then we can infer that all the values are true and skip the bit checking 
-      BitBlockCount is_valid_block = is_valid_counter.NextWord(); 
-
-      if (selected_or_null_block.AllSet() && is_valid_block.AllSet()) { 
-        // All the values are selected and non-null 
-        for (int64_t i = 0; i < selected_or_null_block.length; ++i) { 
-          builder.UnsafeAppend(position++); 
-        } 
-        position_with_offset += selected_or_null_block.length; 
-      } else { 
-        // Some of the values are false or null 
-        for (int64_t i = 0; i < selected_or_null_block.length; ++i) { 
-          if (BitUtil::GetBit(filter_is_valid, position_with_offset)) { 
-            if (BitUtil::GetBit(filter_data, position_with_offset)) { 
-              builder.UnsafeAppend(position); 
+      RETURN_NOT_OK(builder.Reserve(selected_or_null_block.popcount));
+
+      // If the values are all valid and the selected_or_null_block is full,
+      // then we can infer that all the values are true and skip the bit checking
+      BitBlockCount is_valid_block = is_valid_counter.NextWord();
+
+      if (selected_or_null_block.AllSet() && is_valid_block.AllSet()) {
+        // All the values are selected and non-null
+        for (int64_t i = 0; i < selected_or_null_block.length; ++i) {
+          builder.UnsafeAppend(position++);
+        }
+        position_with_offset += selected_or_null_block.length;
+      } else {
+        // Some of the values are false or null
+        for (int64_t i = 0; i < selected_or_null_block.length; ++i) {
+          if (BitUtil::GetBit(filter_is_valid, position_with_offset)) {
+            if (BitUtil::GetBit(filter_data, position_with_offset)) {
+              builder.UnsafeAppend(position);
             }
-          } else { 
-            // Null slot, so append a null 
-            builder.UnsafeAppendNull(); 
+          } else {
+            // Null slot, so append a null
+            builder.UnsafeAppendNull();
           }
-          ++position; 
-          ++position_with_offset; 
+          ++position;
+          ++position_with_offset;
         }
       }
     }
-    std::shared_ptr<ArrayData> result; 
-    RETURN_NOT_OK(builder.FinishInternal(&result)); 
-    return result; 
-  } 
-
-  // Other cases don't emit nulls and are therefore simpler. 
-  TypedBufferBuilder<T> builder(memory_pool); 
-
-  if (have_filter_nulls) { 
-    // The filter may have nulls, so we scan the validity bitmap and the filter 
-    // data bitmap together. 
-    DCHECK_EQ(null_selection, FilterOptions::DROP); 
- 
-    // The position relative to the start of the filter 
-    T position = 0; 
-    // The current position taking the filter offset into account 
-    int64_t position_with_offset = filter.offset; 
- 
-    BinaryBitBlockCounter filter_counter(filter_data, filter.offset, filter_is_valid, 
-                                         filter.offset, filter.length); 
-    while (position < filter.length) { 
-      BitBlockCount and_block = filter_counter.NextAndWord(); 
-      RETURN_NOT_OK(builder.Reserve(and_block.popcount)); 
-      if (and_block.AllSet()) { 
-        // All the values are selected and non-null 
-        for (int64_t i = 0; i < and_block.length; ++i) { 
+    std::shared_ptr<ArrayData> result;
+    RETURN_NOT_OK(builder.FinishInternal(&result));
+    return result;
+  }
+
+  // Other cases don't emit nulls and are therefore simpler.
+  TypedBufferBuilder<T> builder(memory_pool);
+
+  if (have_filter_nulls) {
+    // The filter may have nulls, so we scan the validity bitmap and the filter
+    // data bitmap together.
+    DCHECK_EQ(null_selection, FilterOptions::DROP);
+
+    // The position relative to the start of the filter
+    T position = 0;
+    // The current position taking the filter offset into account
+    int64_t position_with_offset = filter.offset;
+
+    BinaryBitBlockCounter filter_counter(filter_data, filter.offset, filter_is_valid,
+                                         filter.offset, filter.length);
+    while (position < filter.length) {
+      BitBlockCount and_block = filter_counter.NextAndWord();
+      RETURN_NOT_OK(builder.Reserve(and_block.popcount));
+      if (and_block.AllSet()) {
+        // All the values are selected and non-null
+        for (int64_t i = 0; i < and_block.length; ++i) {
           builder.UnsafeAppend(position++);
         }
-        position_with_offset += and_block.length; 
-      } else if (!and_block.NoneSet()) { 
-        // Some of the values are false or null 
-        for (int64_t i = 0; i < and_block.length; ++i) { 
-          if (BitUtil::GetBit(filter_is_valid, position_with_offset) && 
-              BitUtil::GetBit(filter_data, position_with_offset)) { 
+        position_with_offset += and_block.length;
+      } else if (!and_block.NoneSet()) {
+        // Some of the values are false or null
+        for (int64_t i = 0; i < and_block.length; ++i) {
+          if (BitUtil::GetBit(filter_is_valid, position_with_offset) &&
+              BitUtil::GetBit(filter_data, position_with_offset)) {
             builder.UnsafeAppend(position);
           }
           ++position;
           ++position_with_offset;
         }
       } else {
-        position += and_block.length; 
-        position_with_offset += and_block.length; 
+        position += and_block.length;
+        position_with_offset += and_block.length;
       }
     }
-  } else { 
-    // The filter has no nulls, so we need only look for true values 
-    RETURN_NOT_OK(::arrow::internal::VisitSetBitRuns( 
-        filter_data, filter.offset, filter.length, [&](int64_t offset, int64_t length) { 
-          // Append the consecutive run of indices 
-          RETURN_NOT_OK(builder.Reserve(length)); 
-          for (int64_t i = 0; i < length; ++i) { 
-            builder.UnsafeAppend(static_cast<T>(offset + i)); 
-          } 
-          return Status::OK(); 
-        })); 
+  } else {
+    // The filter has no nulls, so we need only look for true values
+    RETURN_NOT_OK(::arrow::internal::VisitSetBitRuns(
+        filter_data, filter.offset, filter.length, [&](int64_t offset, int64_t length) {
+          // Append the consecutive run of indices
+          RETURN_NOT_OK(builder.Reserve(length));
+          for (int64_t i = 0; i < length; ++i) {
+            builder.UnsafeAppend(static_cast<T>(offset + i));
+          }
+          return Status::OK();
+        }));
   }
- 
-  const int64_t length = builder.length(); 
-  std::shared_ptr<Buffer> out_buffer; 
-  RETURN_NOT_OK(builder.Finish(&out_buffer)); 
-  return std::make_shared<ArrayData>(TypeTraits<IndexType>::type_singleton(), length, 
-                                     BufferVector{nullptr, out_buffer}, /*null_count=*/0); 
+
+  const int64_t length = builder.length();
+  std::shared_ptr<Buffer> out_buffer;
+  RETURN_NOT_OK(builder.Finish(&out_buffer));
+  return std::make_shared<ArrayData>(TypeTraits<IndexType>::type_singleton(), length,
+                                     BufferVector{nullptr, out_buffer}, /*null_count=*/0);
 }
 
-}  // namespace 
- 
+}  // namespace
+
 Result<std::shared_ptr<ArrayData>> GetTakeIndices(
     const ArrayData& filter, FilterOptions::NullSelectionBehavior null_selection,
     MemoryPool* memory_pool) {
@@ -490,9 +490,9 @@ void TakeIndexDispatch(const PrimitiveArg& values, const PrimitiveArg& indices,
   }
 }
 
-Status PrimitiveTake(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+Status PrimitiveTake(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   if (TakeState::Get(ctx).boundscheck) {
-    RETURN_NOT_OK(CheckIndexBounds(*batch[1].array(), batch[0].length())); 
+    RETURN_NOT_OK(CheckIndexBounds(*batch[1].array(), batch[0].length()));
   }
 
   PrimitiveArg values = GetPrimitiveArg(*batch[0].array());
@@ -504,29 +504,29 @@ Status PrimitiveTake(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   // allocating the validity bitmap altogether and save time and space. A
   // streamlined PrimitiveTakeImpl would need to be written that skips all
   // interactions with the output validity bitmap, though.
-  RETURN_NOT_OK(PreallocateData(ctx, indices.length, values.bit_width, 
-                                /*allocate_validity=*/true, out_arr)); 
+  RETURN_NOT_OK(PreallocateData(ctx, indices.length, values.bit_width,
+                                /*allocate_validity=*/true, out_arr));
   switch (values.bit_width) {
     case 1:
-      TakeIndexDispatch<BooleanTakeImpl>(values, indices, out_arr); 
-      break; 
+      TakeIndexDispatch<BooleanTakeImpl>(values, indices, out_arr);
+      break;
     case 8:
-      TakeIndexDispatch<PrimitiveTakeImpl, int8_t>(values, indices, out_arr); 
-      break; 
+      TakeIndexDispatch<PrimitiveTakeImpl, int8_t>(values, indices, out_arr);
+      break;
     case 16:
-      TakeIndexDispatch<PrimitiveTakeImpl, int16_t>(values, indices, out_arr); 
-      break; 
+      TakeIndexDispatch<PrimitiveTakeImpl, int16_t>(values, indices, out_arr);
+      break;
     case 32:
-      TakeIndexDispatch<PrimitiveTakeImpl, int32_t>(values, indices, out_arr); 
-      break; 
+      TakeIndexDispatch<PrimitiveTakeImpl, int32_t>(values, indices, out_arr);
+      break;
     case 64:
-      TakeIndexDispatch<PrimitiveTakeImpl, int64_t>(values, indices, out_arr); 
-      break; 
+      TakeIndexDispatch<PrimitiveTakeImpl, int64_t>(values, indices, out_arr);
+      break;
     default:
       DCHECK(false) << "Invalid values byte width";
       break;
   }
-  return Status::OK(); 
+  return Status::OK();
 }
 
 // ----------------------------------------------------------------------
@@ -597,9 +597,9 @@ class PrimitiveFilterImpl {
 
   void ExecNonNull() {
     // Fast filter when values and filter are not null
-    ::arrow::internal::VisitSetBitRunsVoid( 
-        filter_data_, filter_offset_, values_length_, 
-        [&](int64_t position, int64_t length) { WriteValueSegment(position, length); }); 
+    ::arrow::internal::VisitSetBitRunsVoid(
+        filter_data_, filter_offset_, values_length_,
+        [&](int64_t position, int64_t length) { WriteValueSegment(position, length); });
   }
 
   void Exec() {
@@ -783,7 +783,7 @@ inline void PrimitiveFilterImpl<BooleanType>::WriteNull() {
   BitUtil::ClearBit(out_data_, out_offset_ + out_position_++);
 }
 
-Status PrimitiveFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+Status PrimitiveFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   PrimitiveArg values = GetPrimitiveArg(*batch[0].array());
   PrimitiveArg filter = GetPrimitiveArg(*batch[1].array());
   FilterOptions::NullSelectionBehavior null_selection =
@@ -808,30 +808,30 @@ Status PrimitiveFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   // validity bitmap.
   bool allocate_validity = values.null_count != 0 || filter.null_count != 0;
 
-  RETURN_NOT_OK( 
-      PreallocateData(ctx, output_length, values.bit_width, allocate_validity, out_arr)); 
+  RETURN_NOT_OK(
+      PreallocateData(ctx, output_length, values.bit_width, allocate_validity, out_arr));
 
   switch (values.bit_width) {
     case 1:
-      PrimitiveFilterImpl<BooleanType>(values, filter, null_selection, out_arr).Exec(); 
-      break; 
+      PrimitiveFilterImpl<BooleanType>(values, filter, null_selection, out_arr).Exec();
+      break;
     case 8:
-      PrimitiveFilterImpl<UInt8Type>(values, filter, null_selection, out_arr).Exec(); 
-      break; 
+      PrimitiveFilterImpl<UInt8Type>(values, filter, null_selection, out_arr).Exec();
+      break;
     case 16:
-      PrimitiveFilterImpl<UInt16Type>(values, filter, null_selection, out_arr).Exec(); 
-      break; 
+      PrimitiveFilterImpl<UInt16Type>(values, filter, null_selection, out_arr).Exec();
+      break;
     case 32:
-      PrimitiveFilterImpl<UInt32Type>(values, filter, null_selection, out_arr).Exec(); 
-      break; 
+      PrimitiveFilterImpl<UInt32Type>(values, filter, null_selection, out_arr).Exec();
+      break;
     case 64:
-      PrimitiveFilterImpl<UInt64Type>(values, filter, null_selection, out_arr).Exec(); 
-      break; 
+      PrimitiveFilterImpl<UInt64Type>(values, filter, null_selection, out_arr).Exec();
+      break;
     default:
       DCHECK(false) << "Invalid values bit width";
       break;
   }
-  return Status::OK(); 
+  return Status::OK();
 }
 
 // ----------------------------------------------------------------------
@@ -880,25 +880,25 @@ Status BinaryFilterNonNullImpl(KernelContext* ctx, const ArrayData& values,
                                ArrayData* out) {
   using offset_type = typename Type::offset_type;
   const auto filter_data = filter.buffers[1]->data();
- 
+
   BINARY_FILTER_SETUP_COMMON();
 
-  RETURN_NOT_OK(arrow::internal::VisitSetBitRuns( 
-      filter_data, filter.offset, filter.length, [&](int64_t position, int64_t length) { 
+  RETURN_NOT_OK(arrow::internal::VisitSetBitRuns(
+      filter_data, filter.offset, filter.length, [&](int64_t position, int64_t length) {
         // Bulk-append raw data
-        const offset_type run_data_bytes = 
-            (raw_offsets[position + length] - raw_offsets[position]); 
-        APPEND_RAW_DATA(raw_data + raw_offsets[position], run_data_bytes); 
+        const offset_type run_data_bytes =
+            (raw_offsets[position + length] - raw_offsets[position]);
+        APPEND_RAW_DATA(raw_data + raw_offsets[position], run_data_bytes);
         // Append offsets
-        offset_type cur_offset = raw_offsets[position]; 
-        for (int64_t i = 0; i < length; ++i) { 
+        offset_type cur_offset = raw_offsets[position];
+        for (int64_t i = 0; i < length; ++i) {
           offset_builder.UnsafeAppend(offset);
-          offset += raw_offsets[i + position + 1] - cur_offset; 
-          cur_offset = raw_offsets[i + position + 1]; 
+          offset += raw_offsets[i + position + 1] - cur_offset;
+          cur_offset = raw_offsets[i + position + 1];
         }
-        return Status::OK(); 
-      })); 
- 
+        return Status::OK();
+      }));
+
   offset_builder.UnsafeAppend(offset);
   out->length = output_length;
   RETURN_NOT_OK(offset_builder.Finish(&out->buffers[1]));
@@ -936,8 +936,8 @@ Status BinaryFilterImpl(KernelContext* ctx, const ArrayData& values,
 
   BINARY_FILTER_SETUP_COMMON();
 
-  int64_t in_position = 0; 
-  int64_t out_position = 0; 
+  int64_t in_position = 0;
+  int64_t out_position = 0;
   while (in_position < filter.length) {
     BitBlockCount filter_valid_block = filter_valid_counter.NextWord();
     BitBlockCount values_valid_block = values_valid_counter.NextWord();
@@ -1079,7 +1079,7 @@ Status BinaryFilterImpl(KernelContext* ctx, const ArrayData& values,
 #undef APPEND_RAW_DATA
 #undef APPEND_SINGLE_VALUE
 
-Status BinaryFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+Status BinaryFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   FilterOptions::NullSelectionBehavior null_selection =
       FilterState::Get(ctx).null_selection_behavior;
 
@@ -1101,100 +1101,100 @@ Status BinaryFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   if (values.null_count == 0 && filter.null_count == 0) {
     // Faster no-nulls case
     if (is_binary_like(type_id)) {
-      RETURN_NOT_OK(BinaryFilterNonNullImpl<BinaryType>( 
-          ctx, values, filter, output_length, null_selection, out_arr)); 
+      RETURN_NOT_OK(BinaryFilterNonNullImpl<BinaryType>(
+          ctx, values, filter, output_length, null_selection, out_arr));
     } else if (is_large_binary_like(type_id)) {
-      RETURN_NOT_OK(BinaryFilterNonNullImpl<LargeBinaryType>( 
-          ctx, values, filter, output_length, null_selection, out_arr)); 
+      RETURN_NOT_OK(BinaryFilterNonNullImpl<LargeBinaryType>(
+          ctx, values, filter, output_length, null_selection, out_arr));
     } else {
       DCHECK(false);
     }
   } else {
     // Output may have nulls
-    RETURN_NOT_OK(ctx->AllocateBitmap(output_length).Value(&out_arr->buffers[0])); 
+    RETURN_NOT_OK(ctx->AllocateBitmap(output_length).Value(&out_arr->buffers[0]));
     if (is_binary_like(type_id)) {
-      RETURN_NOT_OK(BinaryFilterImpl<BinaryType>(ctx, values, filter, output_length, 
-                                                 null_selection, out_arr)); 
+      RETURN_NOT_OK(BinaryFilterImpl<BinaryType>(ctx, values, filter, output_length,
+                                                 null_selection, out_arr));
     } else if (is_large_binary_like(type_id)) {
-      RETURN_NOT_OK(BinaryFilterImpl<LargeBinaryType>(ctx, values, filter, output_length, 
-                                                      null_selection, out_arr)); 
+      RETURN_NOT_OK(BinaryFilterImpl<LargeBinaryType>(ctx, values, filter, output_length,
+                                                      null_selection, out_arr));
     } else {
       DCHECK(false);
     }
   }
- 
-  return Status::OK(); 
+
+  return Status::OK();
 }
 
 // ----------------------------------------------------------------------
 // Null take and filter
 
-Status NullTake(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+Status NullTake(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   if (TakeState::Get(ctx).boundscheck) {
-    RETURN_NOT_OK(CheckIndexBounds(*batch[1].array(), batch[0].length())); 
+    RETURN_NOT_OK(CheckIndexBounds(*batch[1].array(), batch[0].length()));
   }
   // batch.length doesn't take into account the take indices
   auto new_length = batch[1].array()->length;
   out->value = std::make_shared<NullArray>(new_length)->data();
-  return Status::OK(); 
+  return Status::OK();
 }
 
-Status NullFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+Status NullFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   int64_t output_length = GetFilterOutputSize(
       *batch[1].array(), FilterState::Get(ctx).null_selection_behavior);
   out->value = std::make_shared<NullArray>(output_length)->data();
-  return Status::OK(); 
+  return Status::OK();
 }
 
 // ----------------------------------------------------------------------
 // Dictionary take and filter
 
-Status DictionaryTake(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+Status DictionaryTake(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   DictionaryArray values(batch[0].array());
   Datum result;
-  RETURN_NOT_OK( 
-      Take(Datum(values.indices()), batch[1], TakeState::Get(ctx), ctx->exec_context()) 
-          .Value(&result)); 
+  RETURN_NOT_OK(
+      Take(Datum(values.indices()), batch[1], TakeState::Get(ctx), ctx->exec_context())
+          .Value(&result));
   DictionaryArray taken_values(values.type(), result.make_array(), values.dictionary());
   out->value = taken_values.data();
-  return Status::OK(); 
+  return Status::OK();
 }
 
-Status DictionaryFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+Status DictionaryFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   DictionaryArray dict_values(batch[0].array());
   Datum result;
-  RETURN_NOT_OK(Filter(Datum(dict_values.indices()), batch[1].array(), 
-                       FilterState::Get(ctx), ctx->exec_context()) 
-                    .Value(&result)); 
+  RETURN_NOT_OK(Filter(Datum(dict_values.indices()), batch[1].array(),
+                       FilterState::Get(ctx), ctx->exec_context())
+                    .Value(&result));
   DictionaryArray filtered_values(dict_values.type(), result.make_array(),
                                   dict_values.dictionary());
   out->value = filtered_values.data();
-  return Status::OK(); 
+  return Status::OK();
 }
 
 // ----------------------------------------------------------------------
 // Extension take and filter
 
-Status ExtensionTake(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+Status ExtensionTake(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   ExtensionArray values(batch[0].array());
   Datum result;
-  RETURN_NOT_OK( 
-      Take(Datum(values.storage()), batch[1], TakeState::Get(ctx), ctx->exec_context()) 
-          .Value(&result)); 
+  RETURN_NOT_OK(
+      Take(Datum(values.storage()), batch[1], TakeState::Get(ctx), ctx->exec_context())
+          .Value(&result));
   ExtensionArray taken_values(values.type(), result.make_array());
   out->value = taken_values.data();
-  return Status::OK(); 
+  return Status::OK();
 }
 
-Status ExtensionFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+Status ExtensionFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   ExtensionArray ext_values(batch[0].array());
   Datum result;
-  RETURN_NOT_OK(Filter(Datum(ext_values.storage()), batch[1].array(), 
-                       FilterState::Get(ctx), ctx->exec_context()) 
-                    .Value(&result)); 
+  RETURN_NOT_OK(Filter(Datum(ext_values.storage()), batch[1].array(),
+                       FilterState::Get(ctx), ctx->exec_context())
+                    .Value(&result));
   ExtensionArray filtered_values(ext_values.type(), result.make_array());
   out->value = filtered_values.data();
-  return Status::OK(); 
+  return Status::OK();
 }
 
 // ----------------------------------------------------------------------
@@ -1668,81 +1668,81 @@ struct ListImpl : public Selection<ListImpl<Type>, Type> {
   }
 };
 
-struct DenseUnionImpl : public Selection<DenseUnionImpl, DenseUnionType> { 
-  using Base = Selection<DenseUnionImpl, DenseUnionType>; 
-  LIFT_BASE_MEMBERS(); 
- 
-  TypedBufferBuilder<int32_t> value_offset_buffer_builder_; 
-  TypedBufferBuilder<int8_t> child_id_buffer_builder_; 
-  std::vector<int8_t> type_codes_; 
-  std::vector<Int32Builder> child_indices_builders_; 
- 
-  DenseUnionImpl(KernelContext* ctx, const ExecBatch& batch, int64_t output_length, 
-                 Datum* out) 
-      : Base(ctx, batch, output_length, out), 
-        value_offset_buffer_builder_(ctx->memory_pool()), 
-        child_id_buffer_builder_(ctx->memory_pool()), 
-        type_codes_(checked_cast<const UnionType&>(*this->values->type).type_codes()), 
-        child_indices_builders_(type_codes_.size()) { 
-    for (auto& child_indices_builder : child_indices_builders_) { 
-      child_indices_builder = Int32Builder(ctx->memory_pool()); 
-    } 
-  } 
- 
-  template <typename Adapter> 
-  Status GenerateOutput() { 
-    DenseUnionArray typed_values(this->values); 
-    Adapter adapter(this); 
-    RETURN_NOT_OK(adapter.Generate( 
-        [&](int64_t index) { 
-          int8_t child_id = typed_values.child_id(index); 
-          child_id_buffer_builder_.UnsafeAppend(type_codes_[child_id]); 
-          int32_t value_offset = typed_values.value_offset(index); 
-          value_offset_buffer_builder_.UnsafeAppend( 
-              static_cast<int32_t>(child_indices_builders_[child_id].length())); 
-          RETURN_NOT_OK(child_indices_builders_[child_id].Reserve(1)); 
-          child_indices_builders_[child_id].UnsafeAppend(value_offset); 
-          return Status::OK(); 
-        }, 
-        [&]() { 
-          int8_t child_id = 0; 
-          child_id_buffer_builder_.UnsafeAppend(type_codes_[child_id]); 
-          value_offset_buffer_builder_.UnsafeAppend( 
-              static_cast<int32_t>(child_indices_builders_[child_id].length())); 
-          RETURN_NOT_OK(child_indices_builders_[child_id].Reserve(1)); 
-          child_indices_builders_[child_id].UnsafeAppendNull(); 
-          return Status::OK(); 
-        })); 
-    return Status::OK(); 
-  } 
- 
-  Status Init() override { 
-    RETURN_NOT_OK(child_id_buffer_builder_.Reserve(output_length)); 
-    RETURN_NOT_OK(value_offset_buffer_builder_.Reserve(output_length)); 
-    return Status::OK(); 
-  } 
- 
-  Status Finish() override { 
-    ARROW_ASSIGN_OR_RAISE(auto child_ids_buffer, child_id_buffer_builder_.Finish()); 
-    ARROW_ASSIGN_OR_RAISE(auto value_offsets_buffer, 
-                          value_offset_buffer_builder_.Finish()); 
-    DenseUnionArray typed_values(this->values); 
-    auto num_fields = typed_values.num_fields(); 
-    auto num_rows = child_ids_buffer->size(); 
-    BufferVector buffers{nullptr, std::move(child_ids_buffer), 
-                         std::move(value_offsets_buffer)}; 
-    *out = ArrayData(typed_values.type(), num_rows, std::move(buffers), 0); 
-    for (auto i = 0; i < num_fields; i++) { 
-      ARROW_ASSIGN_OR_RAISE(auto child_indices_array, 
-                            child_indices_builders_[i].Finish()); 
-      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Array> child_array, 
-                            Take(*typed_values.field(i), *child_indices_array)); 
-      out->child_data.push_back(child_array->data()); 
-    } 
-    return Status::OK(); 
-  } 
-}; 
- 
+struct DenseUnionImpl : public Selection<DenseUnionImpl, DenseUnionType> {
+  using Base = Selection<DenseUnionImpl, DenseUnionType>;
+  LIFT_BASE_MEMBERS();
+
+  TypedBufferBuilder<int32_t> value_offset_buffer_builder_;
+  TypedBufferBuilder<int8_t> child_id_buffer_builder_;
+  std::vector<int8_t> type_codes_;
+  std::vector<Int32Builder> child_indices_builders_;
+
+  DenseUnionImpl(KernelContext* ctx, const ExecBatch& batch, int64_t output_length,
+                 Datum* out)
+      : Base(ctx, batch, output_length, out),
+        value_offset_buffer_builder_(ctx->memory_pool()),
+        child_id_buffer_builder_(ctx->memory_pool()),
+        type_codes_(checked_cast<const UnionType&>(*this->values->type).type_codes()),
+        child_indices_builders_(type_codes_.size()) {
+    for (auto& child_indices_builder : child_indices_builders_) {
+      child_indices_builder = Int32Builder(ctx->memory_pool());
+    }
+  }
+
+  template <typename Adapter>
+  Status GenerateOutput() {
+    DenseUnionArray typed_values(this->values);
+    Adapter adapter(this);
+    RETURN_NOT_OK(adapter.Generate(
+        [&](int64_t index) {
+          int8_t child_id = typed_values.child_id(index);
+          child_id_buffer_builder_.UnsafeAppend(type_codes_[child_id]);
+          int32_t value_offset = typed_values.value_offset(index);
+          value_offset_buffer_builder_.UnsafeAppend(
+              static_cast<int32_t>(child_indices_builders_[child_id].length()));
+          RETURN_NOT_OK(child_indices_builders_[child_id].Reserve(1));
+          child_indices_builders_[child_id].UnsafeAppend(value_offset);
+          return Status::OK();
+        },
+        [&]() {
+          int8_t child_id = 0;
+          child_id_buffer_builder_.UnsafeAppend(type_codes_[child_id]);
+          value_offset_buffer_builder_.UnsafeAppend(
+              static_cast<int32_t>(child_indices_builders_[child_id].length()));
+          RETURN_NOT_OK(child_indices_builders_[child_id].Reserve(1));
+          child_indices_builders_[child_id].UnsafeAppendNull();
+          return Status::OK();
+        }));
+    return Status::OK();
+  }
+
+  Status Init() override {
+    RETURN_NOT_OK(child_id_buffer_builder_.Reserve(output_length));
+    RETURN_NOT_OK(value_offset_buffer_builder_.Reserve(output_length));
+    return Status::OK();
+  }
+
+  Status Finish() override {
+    ARROW_ASSIGN_OR_RAISE(auto child_ids_buffer, child_id_buffer_builder_.Finish());
+    ARROW_ASSIGN_OR_RAISE(auto value_offsets_buffer,
+                          value_offset_buffer_builder_.Finish());
+    DenseUnionArray typed_values(this->values);
+    auto num_fields = typed_values.num_fields();
+    auto num_rows = child_ids_buffer->size();
+    BufferVector buffers{nullptr, std::move(child_ids_buffer),
+                         std::move(value_offsets_buffer)};
+    *out = ArrayData(typed_values.type(), num_rows, std::move(buffers), 0);
+    for (auto i = 0; i < num_fields; i++) {
+      ARROW_ASSIGN_OR_RAISE(auto child_indices_array,
+                            child_indices_builders_[i].Finish());
+      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Array> child_array,
+                            Take(*typed_values.field(i), *child_indices_array));
+      out->child_data.push_back(child_array->data());
+    }
+    return Status::OK();
+  }
+};
+
 struct FSLImpl : public Selection<FSLImpl, FixedSizeListType> {
   Int64Builder child_index_builder;
 
@@ -1827,20 +1827,20 @@ struct StructImpl : public Selection<StructImpl, StructType> {
   }
 };
 
-Status StructFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+Status StructFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   // Transform filter to selection indices and then use Take.
   std::shared_ptr<ArrayData> indices;
-  RETURN_NOT_OK(GetTakeIndices(*batch[1].array(), 
-                               FilterState::Get(ctx).null_selection_behavior, 
-                               ctx->memory_pool()) 
-                    .Value(&indices)); 
+  RETURN_NOT_OK(GetTakeIndices(*batch[1].array(),
+                               FilterState::Get(ctx).null_selection_behavior,
+                               ctx->memory_pool())
+                    .Value(&indices));
 
   Datum result;
-  RETURN_NOT_OK( 
-      Take(batch[0], Datum(indices), TakeOptions::NoBoundsCheck(), ctx->exec_context()) 
-          .Value(&result)); 
+  RETURN_NOT_OK(
+      Take(batch[0], Datum(indices), TakeOptions::NoBoundsCheck(), ctx->exec_context())
+          .Value(&result));
   out->value = result.array();
-  return Status::OK(); 
+  return Status::OK();
 }
 
 #undef LIFT_BASE_MEMBERS
@@ -1860,15 +1860,15 @@ Result<std::shared_ptr<RecordBatch>> FilterRecordBatch(const RecordBatch& batch,
   const auto& filter_opts = *static_cast<const FilterOptions*>(options);
   ARROW_ASSIGN_OR_RAISE(
       std::shared_ptr<ArrayData> indices,
-      GetTakeIndices(*filter.array(), filter_opts.null_selection_behavior, 
-                     ctx->memory_pool())); 
+      GetTakeIndices(*filter.array(), filter_opts.null_selection_behavior,
+                     ctx->memory_pool()));
   std::vector<std::shared_ptr<Array>> columns(batch.num_columns());
   for (int i = 0; i < batch.num_columns(); ++i) {
     ARROW_ASSIGN_OR_RAISE(Datum out, Take(batch.column(i)->data(), Datum(indices),
                                           TakeOptions::NoBoundsCheck(), ctx));
     columns[i] = out.make_array();
   }
-  return RecordBatch::Make(batch.schema(), indices->length, std::move(columns)); 
+  return RecordBatch::Make(batch.schema(), indices->length, std::move(columns));
 }
 
 Result<std::shared_ptr<Table>> FilterTable(const Table& table, const Datum& filter,
@@ -1877,82 +1877,82 @@ Result<std::shared_ptr<Table>> FilterTable(const Table& table, const Datum& filt
   if (table.num_rows() != filter.length()) {
     return Status::Invalid("Filter inputs must all be the same length");
   }
-  if (table.num_rows() == 0) { 
-    return Table::Make(table.schema(), table.columns(), 0); 
-  } 
-
-  // Last input element will be the filter array 
-  const int num_columns = table.num_columns(); 
-  std::vector<ArrayVector> inputs(num_columns + 1); 
- 
-  // Fetch table columns 
-  for (int i = 0; i < num_columns; ++i) { 
-    inputs[i] = table.column(i)->chunks(); 
-  } 
-  // Fetch filter 
-  const auto& filter_opts = *static_cast<const FilterOptions*>(options); 
-  switch (filter.kind()) { 
-    case Datum::ARRAY: 
-      inputs.back().push_back(filter.make_array()); 
-      break; 
-    case Datum::CHUNKED_ARRAY: 
-      inputs.back() = filter.chunked_array()->chunks(); 
-      break; 
-    default: 
-      return Status::NotImplemented("Filter should be array-like"); 
-  } 
- 
-  // Rechunk inputs to allow consistent iteration over their respective chunks 
-  inputs = arrow::internal::RechunkArraysConsistently(inputs); 
- 
-  // Instead of filtering each column with the boolean filter 
-  // (which would be slow if the table has a large number of columns: ARROW-10569), 
-  // convert each filter chunk to indices, and take() the column. 
-  const int64_t num_chunks = static_cast<int64_t>(inputs.back().size()); 
-  std::vector<ArrayVector> out_columns(num_columns); 
-  int64_t out_num_rows = 0; 
- 
-  for (int64_t i = 0; i < num_chunks; ++i) { 
-    const ArrayData& filter_chunk = *inputs.back()[i]->data(); 
+  if (table.num_rows() == 0) {
+    return Table::Make(table.schema(), table.columns(), 0);
+  }
+
+  // Last input element will be the filter array
+  const int num_columns = table.num_columns();
+  std::vector<ArrayVector> inputs(num_columns + 1);
+
+  // Fetch table columns
+  for (int i = 0; i < num_columns; ++i) {
+    inputs[i] = table.column(i)->chunks();
+  }
+  // Fetch filter
+  const auto& filter_opts = *static_cast<const FilterOptions*>(options);
+  switch (filter.kind()) {
+    case Datum::ARRAY:
+      inputs.back().push_back(filter.make_array());
+      break;
+    case Datum::CHUNKED_ARRAY:
+      inputs.back() = filter.chunked_array()->chunks();
+      break;
+    default:
+      return Status::NotImplemented("Filter should be array-like");
+  }
+
+  // Rechunk inputs to allow consistent iteration over their respective chunks
+  inputs = arrow::internal::RechunkArraysConsistently(inputs);
+
+  // Instead of filtering each column with the boolean filter
+  // (which would be slow if the table has a large number of columns: ARROW-10569),
+  // convert each filter chunk to indices, and take() the column.
+  const int64_t num_chunks = static_cast<int64_t>(inputs.back().size());
+  std::vector<ArrayVector> out_columns(num_columns);
+  int64_t out_num_rows = 0;
+
+  for (int64_t i = 0; i < num_chunks; ++i) {
+    const ArrayData& filter_chunk = *inputs.back()[i]->data();
     ARROW_ASSIGN_OR_RAISE(
-        const auto indices, 
-        GetTakeIndices(filter_chunk, filter_opts.null_selection_behavior, 
-                       ctx->memory_pool())); 
- 
-    if (indices->length > 0) { 
-      // Take from all input columns 
-      Datum indices_datum{std::move(indices)}; 
-      for (int col = 0; col < num_columns; ++col) { 
-        const auto& column_chunk = inputs[col][i]; 
-        ARROW_ASSIGN_OR_RAISE(Datum out, Take(column_chunk, indices_datum, 
-                                              TakeOptions::NoBoundsCheck(), ctx)); 
-        out_columns[col].push_back(std::move(out).make_array()); 
-      } 
-      out_num_rows += indices->length; 
-    } 
+        const auto indices,
+        GetTakeIndices(filter_chunk, filter_opts.null_selection_behavior,
+                       ctx->memory_pool()));
+
+    if (indices->length > 0) {
+      // Take from all input columns
+      Datum indices_datum{std::move(indices)};
+      for (int col = 0; col < num_columns; ++col) {
+        const auto& column_chunk = inputs[col][i];
+        ARROW_ASSIGN_OR_RAISE(Datum out, Take(column_chunk, indices_datum,
+                                              TakeOptions::NoBoundsCheck(), ctx));
+        out_columns[col].push_back(std::move(out).make_array());
+      }
+      out_num_rows += indices->length;
+    }
   }
- 
-  ChunkedArrayVector out_chunks(num_columns); 
-  for (int i = 0; i < num_columns; ++i) { 
-    out_chunks[i] = std::make_shared<ChunkedArray>(std::move(out_columns[i]), 
-                                                   table.column(i)->type()); 
-  } 
-  return Table::Make(table.schema(), std::move(out_chunks), out_num_rows); 
+
+  ChunkedArrayVector out_chunks(num_columns);
+  for (int i = 0; i < num_columns; ++i) {
+    out_chunks[i] = std::make_shared<ChunkedArray>(std::move(out_columns[i]),
+                                                   table.column(i)->type());
+  }
+  return Table::Make(table.schema(), std::move(out_chunks), out_num_rows);
 }
 
 static auto kDefaultFilterOptions = FilterOptions::Defaults();
 
-const FunctionDoc filter_doc( 
-    "Filter with a boolean selection filter", 
-    ("The output is populated with values from the input at positions\n" 
-     "where the selection filter is non-zero.  Nulls in the selection filter\n" 
-     "are handled based on FilterOptions."), 
-    {"input", "selection_filter"}, "FilterOptions"); 
- 
+const FunctionDoc filter_doc(
+    "Filter with a boolean selection filter",
+    ("The output is populated with values from the input at positions\n"
+     "where the selection filter is non-zero.  Nulls in the selection filter\n"
+     "are handled based on FilterOptions."),
+    {"input", "selection_filter"}, "FilterOptions");
+
 class FilterMetaFunction : public MetaFunction {
  public:
   FilterMetaFunction()
-      : MetaFunction("filter", Arity::Binary(), &filter_doc, &kDefaultFilterOptions) {} 
+      : MetaFunction("filter", Arity::Binary(), &filter_doc, &kDefaultFilterOptions) {}
 
   Result<Datum> ExecuteImpl(const std::vector<Datum>& args,
                             const FunctionOptions* options,
@@ -2061,7 +2061,7 @@ Result<std::shared_ptr<RecordBatch>> TakeRA(const RecordBatch& batch,
   for (int j = 0; j < ncols; j++) {
     ARROW_ASSIGN_OR_RAISE(columns[j], TakeAA(*batch.column(j), indices, options, ctx));
   }
-  return RecordBatch::Make(batch.schema(), nrows, std::move(columns)); 
+  return RecordBatch::Make(batch.schema(), nrows, std::move(columns));
 }
 
 Result<std::shared_ptr<Table>> TakeTA(const Table& table, const Array& indices,
@@ -2072,7 +2072,7 @@ Result<std::shared_ptr<Table>> TakeTA(const Table& table, const Array& indices,
   for (int j = 0; j < ncols; j++) {
     ARROW_ASSIGN_OR_RAISE(columns[j], TakeCA(*table.column(j), indices, options, ctx));
   }
-  return Table::Make(table.schema(), std::move(columns)); 
+  return Table::Make(table.schema(), std::move(columns));
 }
 
 Result<std::shared_ptr<Table>> TakeTC(const Table& table, const ChunkedArray& indices,
@@ -2082,17 +2082,17 @@ Result<std::shared_ptr<Table>> TakeTC(const Table& table, const ChunkedArray& in
   for (int j = 0; j < ncols; j++) {
     ARROW_ASSIGN_OR_RAISE(columns[j], TakeCC(*table.column(j), indices, options, ctx));
   }
-  return Table::Make(table.schema(), std::move(columns)); 
+  return Table::Make(table.schema(), std::move(columns));
 }
 
 static auto kDefaultTakeOptions = TakeOptions::Defaults();
 
-const FunctionDoc take_doc( 
-    "Select values from an input based on indices from another array", 
-    ("The output is populated with values from the input at positions\n" 
-     "given by `indices`.  Nulls in `indices` emit null in the output."), 
-    {"input", "indices"}, "TakeOptions"); 
- 
+const FunctionDoc take_doc(
+    "Select values from an input based on indices from another array",
+    ("The output is populated with values from the input at positions\n"
+     "given by `indices`.  Nulls in `indices` emit null in the output."),
+    {"input", "indices"}, "TakeOptions");
+
 // Metafunction for dispatching to different Take implementations other than
 // Array-Array.
 //
@@ -2100,8 +2100,8 @@ const FunctionDoc take_doc(
 // overly complex dispatching, there is no parallelization.
 class TakeMetaFunction : public MetaFunction {
  public:
-  TakeMetaFunction() 
-      : MetaFunction("take", Arity::Binary(), &take_doc, &kDefaultTakeOptions) {} 
+  TakeMetaFunction()
+      : MetaFunction("take", Arity::Binary(), &take_doc, &kDefaultTakeOptions) {}
 
   Result<Datum> ExecuteImpl(const std::vector<Datum>& args,
                             const FunctionOptions* options,
@@ -2149,21 +2149,21 @@ class TakeMetaFunction : public MetaFunction {
 // ----------------------------------------------------------------------
 
 template <typename Impl>
-Status FilterExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+Status FilterExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   // TODO: where are the values and filter length equality checked?
   int64_t output_length = GetFilterOutputSize(
       *batch[1].array(), FilterState::Get(ctx).null_selection_behavior);
   Impl kernel(ctx, batch, output_length, out);
-  return kernel.ExecFilter(); 
+  return kernel.ExecFilter();
 }
 
 template <typename Impl>
-Status TakeExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
+Status TakeExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   if (TakeState::Get(ctx).boundscheck) {
-    RETURN_NOT_OK(CheckIndexBounds(*batch[1].array(), batch[0].length())); 
+    RETURN_NOT_OK(CheckIndexBounds(*batch[1].array(), batch[0].length()));
   }
   Impl kernel(ctx, batch, /*output_length=*/batch[1].length(), out);
-  return kernel.ExecTake(); 
+  return kernel.ExecTake();
 }
 
 struct SelectionKernelDescr {
@@ -2171,13 +2171,13 @@ struct SelectionKernelDescr {
   ArrayKernelExec exec;
 };
 
-void RegisterSelectionFunction(const std::string& name, const FunctionDoc* doc, 
-                               VectorKernel base_kernel, InputType selection_type, 
+void RegisterSelectionFunction(const std::string& name, const FunctionDoc* doc,
+                               VectorKernel base_kernel, InputType selection_type,
                                const std::vector<SelectionKernelDescr>& descrs,
                                const FunctionOptions* default_options,
                                FunctionRegistry* registry) {
-  auto func = 
-      std::make_shared<VectorFunction>(name, Arity::Binary(), doc, default_options); 
+  auto func =
+      std::make_shared<VectorFunction>(name, Arity::Binary(), doc, default_options);
   for (auto& descr : descrs) {
     base_kernel.signature = KernelSignature::Make(
         {std::move(descr.input), selection_type}, OutputType(FirstType));
@@ -2187,19 +2187,19 @@ void RegisterSelectionFunction(const std::string& name, const FunctionDoc* doc,
   DCHECK_OK(registry->AddFunction(std::move(func)));
 }
 
-const FunctionDoc array_filter_doc( 
-    "Filter with a boolean selection filter", 
-    ("The output is populated with values from the input `array` at positions\n" 
-     "where the selection filter is non-zero.  Nulls in the selection filter\n" 
-     "are handled based on FilterOptions."), 
-    {"array", "selection_filter"}, "FilterOptions"); 
- 
-const FunctionDoc array_take_doc( 
-    "Select values from an array based on indices from another array", 
-    ("The output is populated with values from the input array at positions\n" 
-     "given by `indices`.  Nulls in `indices` emit null in the output."), 
-    {"array", "indices"}, "TakeOptions"); 
- 
+const FunctionDoc array_filter_doc(
+    "Filter with a boolean selection filter",
+    ("The output is populated with values from the input `array` at positions\n"
+     "where the selection filter is non-zero.  Nulls in the selection filter\n"
+     "are handled based on FilterOptions."),
+    {"array", "selection_filter"}, "FilterOptions");
+
+const FunctionDoc array_take_doc(
+    "Select values from an array based on indices from another array",
+    ("The output is populated with values from the input array at positions\n"
+     "given by `indices`.  Nulls in `indices` emit null in the output."),
+    {"array", "indices"}, "TakeOptions");
+
 }  // namespace
 
 void RegisterVectorSelection(FunctionRegistry* registry) {
@@ -2216,7 +2216,7 @@ void RegisterVectorSelection(FunctionRegistry* registry) {
       {InputType::Array(Type::LIST), FilterExec<ListImpl<ListType>>},
       {InputType::Array(Type::LARGE_LIST), FilterExec<ListImpl<LargeListType>>},
       {InputType::Array(Type::FIXED_SIZE_LIST), FilterExec<FSLImpl>},
-      {InputType::Array(Type::DENSE_UNION), FilterExec<DenseUnionImpl>}, 
+      {InputType::Array(Type::DENSE_UNION), FilterExec<DenseUnionImpl>},
       {InputType::Array(Type::STRUCT), StructFilter},
       // TODO: Reuse ListType kernel for MAP
       {InputType::Array(Type::MAP), FilterExec<ListImpl<MapType>>},
@@ -2224,7 +2224,7 @@ void RegisterVectorSelection(FunctionRegistry* registry) {
 
   VectorKernel filter_base;
   filter_base.init = FilterState::Init;
-  RegisterSelectionFunction("array_filter", &array_filter_doc, filter_base, 
+  RegisterSelectionFunction("array_filter", &array_filter_doc, filter_base,
                             /*selection_type=*/InputType::Array(boolean()),
                             filter_kernel_descrs, &kDefaultFilterOptions, registry);
 
@@ -2239,14 +2239,14 @@ void RegisterVectorSelection(FunctionRegistry* registry) {
        TakeExec<VarBinaryImpl<LargeBinaryType>>},
       {InputType::Array(Type::FIXED_SIZE_BINARY), TakeExec<FSBImpl>},
       {InputType::Array(null()), NullTake},
-      {InputType::Array(Type::DECIMAL128), TakeExec<FSBImpl>}, 
-      {InputType::Array(Type::DECIMAL256), TakeExec<FSBImpl>}, 
+      {InputType::Array(Type::DECIMAL128), TakeExec<FSBImpl>},
+      {InputType::Array(Type::DECIMAL256), TakeExec<FSBImpl>},
       {InputType::Array(Type::DICTIONARY), DictionaryTake},
       {InputType::Array(Type::EXTENSION), ExtensionTake},
       {InputType::Array(Type::LIST), TakeExec<ListImpl<ListType>>},
       {InputType::Array(Type::LARGE_LIST), TakeExec<ListImpl<LargeListType>>},
       {InputType::Array(Type::FIXED_SIZE_LIST), TakeExec<FSLImpl>},
-      {InputType::Array(Type::DENSE_UNION), TakeExec<DenseUnionImpl>}, 
+      {InputType::Array(Type::DENSE_UNION), TakeExec<DenseUnionImpl>},
       {InputType::Array(Type::STRUCT), TakeExec<StructImpl>},
       // TODO: Reuse ListType kernel for MAP
       {InputType::Array(Type::MAP), TakeExec<ListImpl<MapType>>},
@@ -2256,7 +2256,7 @@ void RegisterVectorSelection(FunctionRegistry* registry) {
   take_base.init = TakeState::Init;
   take_base.can_execute_chunkwise = false;
   RegisterSelectionFunction(
-      "array_take", &array_take_doc, take_base, 
+      "array_take", &array_take_doc, take_base,
       /*selection_type=*/InputType(match::Integer(), ValueDescr::ARRAY),
       take_kernel_descrs, &kDefaultTakeOptions, registry);
 
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/vector_sort.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/vector_sort.cc
index b42e9d536f1..7fa43e715d8 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/vector_sort.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/kernels/vector_sort.cc
@@ -16,305 +16,305 @@
 // under the License.
 
 #include <algorithm>
-#include <cmath> 
+#include <cmath>
 #include <limits>
 #include <numeric>
-#include <type_traits> 
-#include <utility> 
+#include <type_traits>
+#include <utility>
 
 #include "arrow/array/data.h"
 #include "arrow/compute/api_vector.h"
 #include "arrow/compute/kernels/common.h"
-#include "arrow/compute/kernels/util_internal.h" 
-#include "arrow/table.h" 
-#include "arrow/type_traits.h" 
-#include "arrow/util/bit_block_counter.h" 
-#include "arrow/util/bitmap.h" 
-#include "arrow/util/bitmap_ops.h" 
-#include "arrow/util/checked_cast.h" 
+#include "arrow/compute/kernels/util_internal.h"
+#include "arrow/table.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit_block_counter.h"
+#include "arrow/util/bitmap.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/checked_cast.h"
 #include "arrow/util/optional.h"
-#include "arrow/visitor_inline.h" 
+#include "arrow/visitor_inline.h"
 
 namespace arrow {
- 
-using internal::checked_cast; 
- 
+
+using internal::checked_cast;
+
 namespace compute {
-namespace internal { 
-
-// Visit all physical types for which sorting is implemented. 
-#define VISIT_PHYSICAL_TYPES(VISIT) \ 
-  VISIT(BooleanType)                \ 
-  VISIT(Int8Type)                   \ 
-  VISIT(Int16Type)                  \ 
-  VISIT(Int32Type)                  \ 
-  VISIT(Int64Type)                  \ 
-  VISIT(UInt8Type)                  \ 
-  VISIT(UInt16Type)                 \ 
-  VISIT(UInt32Type)                 \ 
-  VISIT(UInt64Type)                 \ 
-  VISIT(FloatType)                  \ 
-  VISIT(DoubleType)                 \ 
-  VISIT(BinaryType)                 \ 
-  VISIT(LargeBinaryType)            \ 
-  VISIT(FixedSizeBinaryType)        \ 
-  VISIT(Decimal128Type)             \ 
-  VISIT(Decimal256Type) 
- 
+namespace internal {
+
+// Visit all physical types for which sorting is implemented.
+#define VISIT_PHYSICAL_TYPES(VISIT) \
+  VISIT(BooleanType)                \
+  VISIT(Int8Type)                   \
+  VISIT(Int16Type)                  \
+  VISIT(Int32Type)                  \
+  VISIT(Int64Type)                  \
+  VISIT(UInt8Type)                  \
+  VISIT(UInt16Type)                 \
+  VISIT(UInt32Type)                 \
+  VISIT(UInt64Type)                 \
+  VISIT(FloatType)                  \
+  VISIT(DoubleType)                 \
+  VISIT(BinaryType)                 \
+  VISIT(LargeBinaryType)            \
+  VISIT(FixedSizeBinaryType)        \
+  VISIT(Decimal128Type)             \
+  VISIT(Decimal256Type)
+
 namespace {
 
-// The target chunk in a chunked array. 
-template <typename ArrayType> 
-struct ResolvedChunk { 
-  using V = GetViewType<typename ArrayType::TypeClass>; 
-  using LogicalValueType = typename V::T; 
- 
-  // The target array in chunked array. 
-  const ArrayType* array; 
-  // The index in the target array. 
-  const int64_t index; 
- 
-  ResolvedChunk(const ArrayType* array, int64_t index) : array(array), index(index) {} 
- 
-  bool IsNull() const { return array->IsNull(index); } 
- 
-  LogicalValueType Value() const { return V::LogicalValue(array->GetView(index)); } 
-}; 
- 
-// ResolvedChunk specialization for untyped arrays when all is needed is null lookup 
-template <> 
-struct ResolvedChunk<Array> { 
-  // The target array in chunked array. 
-  const Array* array; 
-  // The index in the target array. 
-  const int64_t index; 
- 
-  ResolvedChunk(const Array* array, int64_t index) : array(array), index(index) {} 
- 
-  bool IsNull() const { return array->IsNull(index); } 
-}; 
- 
-// An object that resolves an array chunk depending on the index. 
-struct ChunkedArrayResolver { 
-  explicit ChunkedArrayResolver(const std::vector<const Array*>& chunks) 
-      : num_chunks_(static_cast<int64_t>(chunks.size())), 
-        chunks_(chunks.data()), 
-        offsets_(MakeEndOffsets(chunks)), 
-        cached_chunk_(0) {} 
- 
-  template <typename ArrayType> 
-  ResolvedChunk<ArrayType> Resolve(int64_t index) const { 
-    // It is common for the algorithms below to make consecutive accesses at 
-    // a relatively small distance from each other, hence often falling in 
-    // the same chunk. 
-    // This is trivial when merging (assuming each side of the merge uses 
-    // its own resolver), but also in the inner recursive invocations of 
-    // partitioning. 
-    const bool cache_hit = 
-        (index >= offsets_[cached_chunk_] && index < offsets_[cached_chunk_ + 1]); 
-    if (ARROW_PREDICT_TRUE(cache_hit)) { 
-      return ResolvedChunk<ArrayType>( 
-          checked_cast<const ArrayType*>(chunks_[cached_chunk_]), 
-          index - offsets_[cached_chunk_]); 
-    } else { 
-      return ResolveMissBisect<ArrayType>(index); 
-    } 
-  } 
- 
- private: 
-  template <typename ArrayType> 
-  ResolvedChunk<ArrayType> ResolveMissBisect(int64_t index) const { 
-    // Like std::upper_bound(), but hand-written as it can help the compiler. 
-    const int64_t* raw_offsets = offsets_.data(); 
-    // Search [lo, lo + n) 
-    int64_t lo = 0, n = num_chunks_; 
-    while (n > 1) { 
-      int64_t m = n >> 1; 
-      int64_t mid = lo + m; 
-      if (index >= raw_offsets[mid]) { 
-        lo = mid; 
-        n -= m; 
-      } else { 
-        n = m; 
-      } 
-    } 
-    cached_chunk_ = lo; 
-    return ResolvedChunk<ArrayType>(checked_cast<const ArrayType*>(chunks_[lo]), 
-                                    index - offsets_[lo]); 
-  } 
- 
-  static std::vector<int64_t> MakeEndOffsets(const std::vector<const Array*>& chunks) { 
-    std::vector<int64_t> end_offsets(chunks.size() + 1); 
-    int64_t offset = 0; 
-    end_offsets[0] = 0; 
-    std::transform(chunks.begin(), chunks.end(), end_offsets.begin() + 1, 
-                   [&](const Array* chunk) { 
-                     offset += chunk->length(); 
-                     return offset; 
-                   }); 
-    return end_offsets; 
-  } 
- 
-  int64_t num_chunks_; 
-  const Array* const* chunks_; 
-  std::vector<int64_t> offsets_; 
- 
-  mutable int64_t cached_chunk_; 
-}; 
- 
-// We could try to reproduce the concrete Array classes' facilities 
-// (such as cached raw values pointer) in a separate hierarchy of 
-// physical accessors, but doing so ends up too cumbersome. 
-// Instead, we simply create the desired concrete Array objects. 
-std::shared_ptr<Array> GetPhysicalArray(const Array& array, 
-                                        const std::shared_ptr<DataType>& physical_type) { 
-  auto new_data = array.data()->Copy(); 
-  new_data->type = physical_type; 
-  return MakeArray(std::move(new_data)); 
-} 
- 
-ArrayVector GetPhysicalChunks(const ChunkedArray& chunked_array, 
-                              const std::shared_ptr<DataType>& physical_type) { 
-  const auto& chunks = chunked_array.chunks(); 
-  ArrayVector physical(chunks.size()); 
-  std::transform(chunks.begin(), chunks.end(), physical.begin(), 
-                 [&](const std::shared_ptr<Array>& array) { 
-                   return GetPhysicalArray(*array, physical_type); 
-                 }); 
-  return physical; 
-} 
- 
-std::vector<const Array*> GetArrayPointers(const ArrayVector& arrays) { 
-  std::vector<const Array*> pointers(arrays.size()); 
-  std::transform(arrays.begin(), arrays.end(), pointers.begin(), 
-                 [&](const std::shared_ptr<Array>& array) { return array.get(); }); 
-  return pointers; 
-} 
- 
-// NOTE: std::partition is usually faster than std::stable_partition. 
- 
-struct NonStablePartitioner { 
-  template <typename Predicate> 
-  uint64_t* operator()(uint64_t* indices_begin, uint64_t* indices_end, Predicate&& pred) { 
-    return std::partition(indices_begin, indices_end, std::forward<Predicate>(pred)); 
-  } 
-}; 
- 
-struct StablePartitioner { 
-  template <typename Predicate> 
-  uint64_t* operator()(uint64_t* indices_begin, uint64_t* indices_end, Predicate&& pred) { 
-    return std::stable_partition(indices_begin, indices_end, 
-                                 std::forward<Predicate>(pred)); 
-  } 
-}; 
- 
-// TODO factor out value comparison and NaN checking? 
- 
-template <typename TypeClass, typename Enable = void> 
-struct NullTraits { 
-  static constexpr bool has_null_like_values = false; 
-}; 
- 
-template <typename TypeClass> 
-struct NullTraits<TypeClass, enable_if_floating_point<TypeClass>> { 
-  static constexpr bool has_null_like_values = true; 
-}; 
- 
-// Move nulls (not null-like values) to end of array. Return where null starts. 
-// 
-// `offset` is used when this is called on a chunk of a chunked array 
-template <typename Partitioner> 
-uint64_t* PartitionNullsOnly(uint64_t* indices_begin, uint64_t* indices_end, 
-                             const Array& values, int64_t offset) { 
-  if (values.null_count() == 0) { 
-    return indices_end; 
-  } 
-  Partitioner partitioner; 
-  return partitioner(indices_begin, indices_end, [&values, &offset](uint64_t ind) { 
-    return !values.IsNull(ind - offset); 
-  }); 
-} 
- 
-// For chunked array. 
-template <typename Partitioner> 
-uint64_t* PartitionNullsOnly(uint64_t* indices_begin, uint64_t* indices_end, 
-                             const std::vector<const Array*>& arrays, 
-                             int64_t null_count) { 
-  if (null_count == 0) { 
-    return indices_end; 
-  } 
-  ChunkedArrayResolver resolver(arrays); 
-  Partitioner partitioner; 
-  return partitioner(indices_begin, indices_end, [&](uint64_t ind) { 
-    const auto chunk = resolver.Resolve<Array>(ind); 
-    return !chunk.IsNull(); 
-  }); 
-} 
- 
-// Move non-null null-like values to end of array. Return where null-like starts. 
-// 
-// `offset` is used when this is called on a chunk of a chunked array 
-template <typename ArrayType, typename Partitioner> 
-enable_if_t<!is_floating_type<typename ArrayType::TypeClass>::value, uint64_t*> 
-PartitionNullLikes(uint64_t* indices_begin, uint64_t* indices_end, 
-                   const ArrayType& values, int64_t offset) { 
-  return indices_end; 
-} 
- 
-// For chunked array. 
-template <typename ArrayType, typename Partitioner> 
-enable_if_t<!is_floating_type<typename ArrayType::TypeClass>::value, uint64_t*> 
-PartitionNullLikes(uint64_t* indices_begin, uint64_t* indices_end, 
-                   const std::vector<const Array*>& arrays, int64_t null_count) { 
-  return indices_end; 
-} 
- 
-template <typename ArrayType, typename Partitioner> 
-enable_if_t<is_floating_type<typename ArrayType::TypeClass>::value, uint64_t*> 
-PartitionNullLikes(uint64_t* indices_begin, uint64_t* indices_end, 
-                   const ArrayType& values, int64_t offset) { 
-  Partitioner partitioner; 
-  return partitioner(indices_begin, indices_end, [&values, &offset](uint64_t ind) { 
-    return !std::isnan(values.GetView(ind - offset)); 
-  }); 
-} 
- 
-template <typename ArrayType, typename Partitioner> 
-enable_if_t<is_floating_type<typename ArrayType::TypeClass>::value, uint64_t*> 
-PartitionNullLikes(uint64_t* indices_begin, uint64_t* indices_end, 
-                   const std::vector<const Array*>& arrays, int64_t null_count) { 
-  Partitioner partitioner; 
-  ChunkedArrayResolver resolver(arrays); 
-  return partitioner(indices_begin, indices_end, [&](uint64_t ind) { 
-    const auto chunk = resolver.Resolve<ArrayType>(ind); 
-    return !std::isnan(chunk.Value()); 
-  }); 
-} 
- 
-// Move nulls to end of array. Return where null starts. 
-// 
-// `offset` is used when this is called on a chunk of a chunked array 
-template <typename ArrayType, typename Partitioner> 
-uint64_t* PartitionNulls(uint64_t* indices_begin, uint64_t* indices_end, 
-                         const ArrayType& values, int64_t offset) { 
-  // Partition nulls at end, and null-like values just before 
-  uint64_t* nulls_begin = 
-      PartitionNullsOnly<Partitioner>(indices_begin, indices_end, values, offset); 
-  return PartitionNullLikes<ArrayType, Partitioner>(indices_begin, nulls_begin, values, 
-                                                    offset); 
-} 
- 
-// For chunked array. 
-template <typename ArrayType, typename Partitioner> 
-uint64_t* PartitionNulls(uint64_t* indices_begin, uint64_t* indices_end, 
-                         const std::vector<const Array*>& arrays, int64_t null_count) { 
-  // Partition nulls at end, and null-like values just before 
-  uint64_t* nulls_begin = 
-      PartitionNullsOnly<Partitioner>(indices_begin, indices_end, arrays, null_count); 
-  return PartitionNullLikes<ArrayType, Partitioner>(indices_begin, nulls_begin, arrays, 
-                                                    null_count); 
-} 
- 
+// The target chunk in a chunked array.
+template <typename ArrayType>
+struct ResolvedChunk {
+  using V = GetViewType<typename ArrayType::TypeClass>;
+  using LogicalValueType = typename V::T;
+
+  // The target array in chunked array.
+  const ArrayType* array;
+  // The index in the target array.
+  const int64_t index;
+
+  ResolvedChunk(const ArrayType* array, int64_t index) : array(array), index(index) {}
+
+  bool IsNull() const { return array->IsNull(index); }
+
+  LogicalValueType Value() const { return V::LogicalValue(array->GetView(index)); }
+};
+
+// ResolvedChunk specialization for untyped arrays when all is needed is null lookup
+template <>
+struct ResolvedChunk<Array> {
+  // The target array in chunked array.
+  const Array* array;
+  // The index in the target array.
+  const int64_t index;
+
+  ResolvedChunk(const Array* array, int64_t index) : array(array), index(index) {}
+
+  bool IsNull() const { return array->IsNull(index); }
+};
+
+// An object that resolves an array chunk depending on the index.
+struct ChunkedArrayResolver {
+  explicit ChunkedArrayResolver(const std::vector<const Array*>& chunks)
+      : num_chunks_(static_cast<int64_t>(chunks.size())),
+        chunks_(chunks.data()),
+        offsets_(MakeEndOffsets(chunks)),
+        cached_chunk_(0) {}
+
+  template <typename ArrayType>
+  ResolvedChunk<ArrayType> Resolve(int64_t index) const {
+    // It is common for the algorithms below to make consecutive accesses at
+    // a relatively small distance from each other, hence often falling in
+    // the same chunk.
+    // This is trivial when merging (assuming each side of the merge uses
+    // its own resolver), but also in the inner recursive invocations of
+    // partitioning.
+    const bool cache_hit =
+        (index >= offsets_[cached_chunk_] && index < offsets_[cached_chunk_ + 1]);
+    if (ARROW_PREDICT_TRUE(cache_hit)) {
+      return ResolvedChunk<ArrayType>(
+          checked_cast<const ArrayType*>(chunks_[cached_chunk_]),
+          index - offsets_[cached_chunk_]);
+    } else {
+      return ResolveMissBisect<ArrayType>(index);
+    }
+  }
+
+ private:
+  template <typename ArrayType>
+  ResolvedChunk<ArrayType> ResolveMissBisect(int64_t index) const {
+    // Like std::upper_bound(), but hand-written as it can help the compiler.
+    const int64_t* raw_offsets = offsets_.data();
+    // Search [lo, lo + n)
+    int64_t lo = 0, n = num_chunks_;
+    while (n > 1) {
+      int64_t m = n >> 1;
+      int64_t mid = lo + m;
+      if (index >= raw_offsets[mid]) {
+        lo = mid;
+        n -= m;
+      } else {
+        n = m;
+      }
+    }
+    cached_chunk_ = lo;
+    return ResolvedChunk<ArrayType>(checked_cast<const ArrayType*>(chunks_[lo]),
+                                    index - offsets_[lo]);
+  }
+
+  static std::vector<int64_t> MakeEndOffsets(const std::vector<const Array*>& chunks) {
+    std::vector<int64_t> end_offsets(chunks.size() + 1);
+    int64_t offset = 0;
+    end_offsets[0] = 0;
+    std::transform(chunks.begin(), chunks.end(), end_offsets.begin() + 1,
+                   [&](const Array* chunk) {
+                     offset += chunk->length();
+                     return offset;
+                   });
+    return end_offsets;
+  }
+
+  int64_t num_chunks_;
+  const Array* const* chunks_;
+  std::vector<int64_t> offsets_;
+
+  mutable int64_t cached_chunk_;
+};
+
+// We could try to reproduce the concrete Array classes' facilities
+// (such as cached raw values pointer) in a separate hierarchy of
+// physical accessors, but doing so ends up too cumbersome.
+// Instead, we simply create the desired concrete Array objects.
+std::shared_ptr<Array> GetPhysicalArray(const Array& array,
+                                        const std::shared_ptr<DataType>& physical_type) {
+  auto new_data = array.data()->Copy();
+  new_data->type = physical_type;
+  return MakeArray(std::move(new_data));
+}
+
+ArrayVector GetPhysicalChunks(const ChunkedArray& chunked_array,
+                              const std::shared_ptr<DataType>& physical_type) {
+  const auto& chunks = chunked_array.chunks();
+  ArrayVector physical(chunks.size());
+  std::transform(chunks.begin(), chunks.end(), physical.begin(),
+                 [&](const std::shared_ptr<Array>& array) {
+                   return GetPhysicalArray(*array, physical_type);
+                 });
+  return physical;
+}
+
+std::vector<const Array*> GetArrayPointers(const ArrayVector& arrays) {
+  std::vector<const Array*> pointers(arrays.size());
+  std::transform(arrays.begin(), arrays.end(), pointers.begin(),
+                 [&](const std::shared_ptr<Array>& array) { return array.get(); });
+  return pointers;
+}
+
+// NOTE: std::partition is usually faster than std::stable_partition.
+
+struct NonStablePartitioner {
+  template <typename Predicate>
+  uint64_t* operator()(uint64_t* indices_begin, uint64_t* indices_end, Predicate&& pred) {
+    return std::partition(indices_begin, indices_end, std::forward<Predicate>(pred));
+  }
+};
+
+struct StablePartitioner {
+  template <typename Predicate>
+  uint64_t* operator()(uint64_t* indices_begin, uint64_t* indices_end, Predicate&& pred) {
+    return std::stable_partition(indices_begin, indices_end,
+                                 std::forward<Predicate>(pred));
+  }
+};
+
+// TODO factor out value comparison and NaN checking?
+
+template <typename TypeClass, typename Enable = void>
+struct NullTraits {
+  static constexpr bool has_null_like_values = false;
+};
+
+template <typename TypeClass>
+struct NullTraits<TypeClass, enable_if_floating_point<TypeClass>> {
+  static constexpr bool has_null_like_values = true;
+};
+
+// Move nulls (not null-like values) to end of array. Return where null starts.
+//
+// `offset` is used when this is called on a chunk of a chunked array
+template <typename Partitioner>
+uint64_t* PartitionNullsOnly(uint64_t* indices_begin, uint64_t* indices_end,
+                             const Array& values, int64_t offset) {
+  if (values.null_count() == 0) {
+    return indices_end;
+  }
+  Partitioner partitioner;
+  return partitioner(indices_begin, indices_end, [&values, &offset](uint64_t ind) {
+    return !values.IsNull(ind - offset);
+  });
+}
+
+// For chunked array.
+template <typename Partitioner>
+uint64_t* PartitionNullsOnly(uint64_t* indices_begin, uint64_t* indices_end,
+                             const std::vector<const Array*>& arrays,
+                             int64_t null_count) {
+  if (null_count == 0) {
+    return indices_end;
+  }
+  ChunkedArrayResolver resolver(arrays);
+  Partitioner partitioner;
+  return partitioner(indices_begin, indices_end, [&](uint64_t ind) {
+    const auto chunk = resolver.Resolve<Array>(ind);
+    return !chunk.IsNull();
+  });
+}
+
+// Move non-null null-like values to end of array. Return where null-like starts.
+//
+// `offset` is used when this is called on a chunk of a chunked array
+template <typename ArrayType, typename Partitioner>
+enable_if_t<!is_floating_type<typename ArrayType::TypeClass>::value, uint64_t*>
+PartitionNullLikes(uint64_t* indices_begin, uint64_t* indices_end,
+                   const ArrayType& values, int64_t offset) {
+  return indices_end;
+}
+
+// For chunked array.
+template <typename ArrayType, typename Partitioner>
+enable_if_t<!is_floating_type<typename ArrayType::TypeClass>::value, uint64_t*>
+PartitionNullLikes(uint64_t* indices_begin, uint64_t* indices_end,
+                   const std::vector<const Array*>& arrays, int64_t null_count) {
+  return indices_end;
+}
+
+template <typename ArrayType, typename Partitioner>
+enable_if_t<is_floating_type<typename ArrayType::TypeClass>::value, uint64_t*>
+PartitionNullLikes(uint64_t* indices_begin, uint64_t* indices_end,
+                   const ArrayType& values, int64_t offset) {
+  Partitioner partitioner;
+  return partitioner(indices_begin, indices_end, [&values, &offset](uint64_t ind) {
+    return !std::isnan(values.GetView(ind - offset));
+  });
+}
+
+template <typename ArrayType, typename Partitioner>
+enable_if_t<is_floating_type<typename ArrayType::TypeClass>::value, uint64_t*>
+PartitionNullLikes(uint64_t* indices_begin, uint64_t* indices_end,
+                   const std::vector<const Array*>& arrays, int64_t null_count) {
+  Partitioner partitioner;
+  ChunkedArrayResolver resolver(arrays);
+  return partitioner(indices_begin, indices_end, [&](uint64_t ind) {
+    const auto chunk = resolver.Resolve<ArrayType>(ind);
+    return !std::isnan(chunk.Value());
+  });
+}
+
+// Move nulls to end of array. Return where null starts.
+//
+// `offset` is used when this is called on a chunk of a chunked array
+template <typename ArrayType, typename Partitioner>
+uint64_t* PartitionNulls(uint64_t* indices_begin, uint64_t* indices_end,
+                         const ArrayType& values, int64_t offset) {
+  // Partition nulls at end, and null-like values just before
+  uint64_t* nulls_begin =
+      PartitionNullsOnly<Partitioner>(indices_begin, indices_end, values, offset);
+  return PartitionNullLikes<ArrayType, Partitioner>(indices_begin, nulls_begin, values,
+                                                    offset);
+}
+
+// For chunked array.
+template <typename ArrayType, typename Partitioner>
+uint64_t* PartitionNulls(uint64_t* indices_begin, uint64_t* indices_end,
+                         const std::vector<const Array*>& arrays, int64_t null_count) {
+  // Partition nulls at end, and null-like values just before
+  uint64_t* nulls_begin =
+      PartitionNullsOnly<Partitioner>(indices_begin, indices_end, arrays, null_count);
+  return PartitionNullLikes<ArrayType, Partitioner>(indices_begin, nulls_begin, arrays,
+                                                    null_count);
+}
+
 // ----------------------------------------------------------------------
 // partition_nth_indices implementation
 
@@ -324,116 +324,116 @@ using PartitionNthToIndicesState = internal::OptionsWrapper<PartitionNthOptions>
 template <typename OutType, typename InType>
 struct PartitionNthToIndices {
   using ArrayType = typename TypeTraits<InType>::ArrayType;
- 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    using GetView = GetViewType<InType>; 
- 
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    using GetView = GetViewType<InType>;
+
     if (ctx->state() == nullptr) {
-      return Status::Invalid("NthToIndices requires PartitionNthOptions"); 
+      return Status::Invalid("NthToIndices requires PartitionNthOptions");
     }
 
-    ArrayType arr(batch[0].array()); 
+    ArrayType arr(batch[0].array());
 
     int64_t pivot = PartitionNthToIndicesState::Get(ctx).pivot;
     if (pivot > arr.length()) {
-      return Status::IndexError("NthToIndices index out of bound"); 
+      return Status::IndexError("NthToIndices index out of bound");
     }
     ArrayData* out_arr = out->mutable_array();
     uint64_t* out_begin = out_arr->GetMutableValues<uint64_t>(1);
     uint64_t* out_end = out_begin + arr.length();
     std::iota(out_begin, out_end, 0);
     if (pivot == arr.length()) {
-      return Status::OK(); 
+      return Status::OK();
     }
-    auto nulls_begin = 
-        PartitionNulls<ArrayType, NonStablePartitioner>(out_begin, out_end, arr, 0); 
+    auto nulls_begin =
+        PartitionNulls<ArrayType, NonStablePartitioner>(out_begin, out_end, arr, 0);
     auto nth_begin = out_begin + pivot;
     if (nth_begin < nulls_begin) {
       std::nth_element(out_begin, nth_begin, nulls_begin,
                        [&arr](uint64_t left, uint64_t right) {
-                         const auto lval = GetView::LogicalValue(arr.GetView(left)); 
-                         const auto rval = GetView::LogicalValue(arr.GetView(right)); 
-                         return lval < rval; 
+                         const auto lval = GetView::LogicalValue(arr.GetView(left));
+                         const auto rval = GetView::LogicalValue(arr.GetView(right));
+                         return lval < rval;
                        });
     }
-    return Status::OK(); 
+    return Status::OK();
   }
 };
 
-// ---------------------------------------------------------------------- 
-// Array sorting implementations 
- 
+// ----------------------------------------------------------------------
+// Array sorting implementations
+
 template <typename ArrayType, typename VisitorNotNull, typename VisitorNull>
 inline void VisitRawValuesInline(const ArrayType& values,
                                  VisitorNotNull&& visitor_not_null,
                                  VisitorNull&& visitor_null) {
   const auto data = values.raw_values();
-  VisitBitBlocksVoid( 
-      values.null_bitmap(), values.offset(), values.length(), 
-      [&](int64_t i) { visitor_not_null(data[i]); }, [&]() { visitor_null(); }); 
-} 
- 
-template <typename VisitorNotNull, typename VisitorNull> 
-inline void VisitRawValuesInline(const BooleanArray& values, 
-                                 VisitorNotNull&& visitor_not_null, 
-                                 VisitorNull&& visitor_null) { 
-  if (values.null_count() != 0) { 
-    const uint8_t* data = values.data()->GetValues<uint8_t>(1, 0); 
-    VisitBitBlocksVoid( 
-        values.null_bitmap(), values.offset(), values.length(), 
-        [&](int64_t i) { visitor_not_null(BitUtil::GetBit(data, values.offset() + i)); }, 
-        [&]() { visitor_null(); }); 
+  VisitBitBlocksVoid(
+      values.null_bitmap(), values.offset(), values.length(),
+      [&](int64_t i) { visitor_not_null(data[i]); }, [&]() { visitor_null(); });
+}
+
+template <typename VisitorNotNull, typename VisitorNull>
+inline void VisitRawValuesInline(const BooleanArray& values,
+                                 VisitorNotNull&& visitor_not_null,
+                                 VisitorNull&& visitor_null) {
+  if (values.null_count() != 0) {
+    const uint8_t* data = values.data()->GetValues<uint8_t>(1, 0);
+    VisitBitBlocksVoid(
+        values.null_bitmap(), values.offset(), values.length(),
+        [&](int64_t i) { visitor_not_null(BitUtil::GetBit(data, values.offset() + i)); },
+        [&]() { visitor_null(); });
   } else {
-    // Can avoid GetBit() overhead in the no-nulls case 
-    VisitBitBlocksVoid( 
-        values.data()->buffers[1], values.offset(), values.length(), 
-        [&](int64_t i) { visitor_not_null(true); }, [&]() { visitor_not_null(false); }); 
+    // Can avoid GetBit() overhead in the no-nulls case
+    VisitBitBlocksVoid(
+        values.data()->buffers[1], values.offset(), values.length(),
+        [&](int64_t i) { visitor_not_null(true); }, [&]() { visitor_not_null(false); });
   }
 }
 
 template <typename ArrowType>
-class ArrayCompareSorter { 
+class ArrayCompareSorter {
   using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
-  using GetView = GetViewType<ArrowType>; 
+  using GetView = GetViewType<ArrowType>;
 
  public:
-  // Returns where null starts. 
-  // 
-  // `offset` is used when this is called on a chunk of a chunked array 
-  uint64_t* Sort(uint64_t* indices_begin, uint64_t* indices_end, const ArrayType& values, 
-                 int64_t offset, const ArraySortOptions& options) { 
-    auto nulls_begin = PartitionNulls<ArrayType, StablePartitioner>( 
-        indices_begin, indices_end, values, offset); 
-    if (options.order == SortOrder::Ascending) { 
-      std::stable_sort( 
-          indices_begin, nulls_begin, [&values, &offset](uint64_t left, uint64_t right) { 
-            const auto lhs = GetView::LogicalValue(values.GetView(left - offset)); 
-            const auto rhs = GetView::LogicalValue(values.GetView(right - offset)); 
-            return lhs < rhs; 
-          }); 
-    } else { 
-      std::stable_sort( 
-          indices_begin, nulls_begin, [&values, &offset](uint64_t left, uint64_t right) { 
-            const auto lhs = GetView::LogicalValue(values.GetView(left - offset)); 
-            const auto rhs = GetView::LogicalValue(values.GetView(right - offset)); 
-            // We don't use 'left > right' here to reduce required operator. 
-            // If we use 'right < left' here, '<' is only required. 
-            return rhs < lhs; 
-          }); 
+  // Returns where null starts.
+  //
+  // `offset` is used when this is called on a chunk of a chunked array
+  uint64_t* Sort(uint64_t* indices_begin, uint64_t* indices_end, const ArrayType& values,
+                 int64_t offset, const ArraySortOptions& options) {
+    auto nulls_begin = PartitionNulls<ArrayType, StablePartitioner>(
+        indices_begin, indices_end, values, offset);
+    if (options.order == SortOrder::Ascending) {
+      std::stable_sort(
+          indices_begin, nulls_begin, [&values, &offset](uint64_t left, uint64_t right) {
+            const auto lhs = GetView::LogicalValue(values.GetView(left - offset));
+            const auto rhs = GetView::LogicalValue(values.GetView(right - offset));
+            return lhs < rhs;
+          });
+    } else {
+      std::stable_sort(
+          indices_begin, nulls_begin, [&values, &offset](uint64_t left, uint64_t right) {
+            const auto lhs = GetView::LogicalValue(values.GetView(left - offset));
+            const auto rhs = GetView::LogicalValue(values.GetView(right - offset));
+            // We don't use 'left > right' here to reduce required operator.
+            // If we use 'right < left' here, '<' is only required.
+            return rhs < lhs;
+          });
     }
-    return nulls_begin; 
+    return nulls_begin;
   }
 };
 
 template <typename ArrowType>
-class ArrayCountSorter { 
+class ArrayCountSorter {
   using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
   using c_type = typename ArrowType::c_type;
 
  public:
-  ArrayCountSorter() = default; 
+  ArrayCountSorter() = default;
 
-  explicit ArrayCountSorter(c_type min, c_type max) { SetMinMax(min, max); } 
+  explicit ArrayCountSorter(c_type min, c_type max) { SetMinMax(min, max); }
 
   // Assume: max >= min && (max - min) < 4Gi
   void SetMinMax(c_type min, c_type max) {
@@ -441,14 +441,14 @@ class ArrayCountSorter {
     value_range_ = static_cast<uint32_t>(max - min) + 1;
   }
 
-  // Returns where null starts. 
-  uint64_t* Sort(uint64_t* indices_begin, uint64_t* indices_end, const ArrayType& values, 
-                 int64_t offset, const ArraySortOptions& options) { 
+  // Returns where null starts.
+  uint64_t* Sort(uint64_t* indices_begin, uint64_t* indices_end, const ArrayType& values,
+                 int64_t offset, const ArraySortOptions& options) {
     // 32bit counter performs much better than 64bit one
     if (values.length() < (1LL << 32)) {
-      return SortInternal<uint32_t>(indices_begin, indices_end, values, offset, options); 
+      return SortInternal<uint32_t>(indices_begin, indices_end, values, offset, options);
     } else {
-      return SortInternal<uint64_t>(indices_begin, indices_end, values, offset, options); 
+      return SortInternal<uint64_t>(indices_begin, indices_end, values, offset, options);
     }
   }
 
@@ -456,81 +456,81 @@ class ArrayCountSorter {
   c_type min_{0};
   uint32_t value_range_{0};
 
-  // Returns where null starts. 
-  // 
-  // `offset` is used when this is called on a chunk of a chunked array 
+  // Returns where null starts.
+  //
+  // `offset` is used when this is called on a chunk of a chunked array
   template <typename CounterType>
-  uint64_t* SortInternal(uint64_t* indices_begin, uint64_t* indices_end, 
-                         const ArrayType& values, int64_t offset, 
-                         const ArraySortOptions& options) { 
+  uint64_t* SortInternal(uint64_t* indices_begin, uint64_t* indices_end,
+                         const ArrayType& values, int64_t offset,
+                         const ArraySortOptions& options) {
     const uint32_t value_range = value_range_;
 
     // first slot reserved for prefix sum
     std::vector<CounterType> counts(1 + value_range);
 
-    if (options.order == SortOrder::Ascending) { 
-      VisitRawValuesInline( 
-          values, [&](c_type v) { ++counts[v - min_ + 1]; }, []() {}); 
-      for (uint32_t i = 1; i <= value_range; ++i) { 
-        counts[i] += counts[i - 1]; 
-      } 
-      auto null_position = counts[value_range]; 
-      auto nulls_begin = indices_begin + null_position; 
-      int64_t index = offset; 
-      VisitRawValuesInline( 
-          values, [&](c_type v) { indices_begin[counts[v - min_]++] = index++; }, 
-          [&]() { indices_begin[null_position++] = index++; }); 
-      return nulls_begin; 
-    } else { 
-      VisitRawValuesInline( 
-          values, [&](c_type v) { ++counts[v - min_]; }, []() {}); 
-      for (uint32_t i = value_range; i >= 1; --i) { 
-        counts[i - 1] += counts[i]; 
-      } 
-      auto null_position = counts[0]; 
-      auto nulls_begin = indices_begin + null_position; 
-      int64_t index = offset; 
-      VisitRawValuesInline( 
-          values, [&](c_type v) { indices_begin[counts[v - min_ + 1]++] = index++; }, 
-          [&]() { indices_begin[null_position++] = index++; }); 
-      return nulls_begin; 
-    } 
-  } 
-}; 
-
-using ::arrow::internal::Bitmap; 
- 
-template <> 
-class ArrayCountSorter<BooleanType> { 
- public: 
-  ArrayCountSorter() = default; 
- 
-  // Returns where null starts. 
-  // `offset` is used when this is called on a chunk of a chunked array 
-  uint64_t* Sort(uint64_t* indices_begin, uint64_t* indices_end, 
-                 const BooleanArray& values, int64_t offset, 
-                 const ArraySortOptions& options) { 
-    std::array<int64_t, 2> counts{0, 0}; 
- 
-    const int64_t nulls = values.null_count(); 
-    const int64_t ones = values.true_count(); 
-    const int64_t zeros = values.length() - ones - nulls; 
- 
-    int64_t null_position = values.length() - nulls; 
-    int64_t index = offset; 
-    const auto nulls_begin = indices_begin + null_position; 
- 
-    if (options.order == SortOrder::Ascending) { 
-      // ones start after zeros 
-      counts[1] = zeros; 
-    } else { 
-      // zeros start after ones 
-      counts[0] = ones; 
+    if (options.order == SortOrder::Ascending) {
+      VisitRawValuesInline(
+          values, [&](c_type v) { ++counts[v - min_ + 1]; }, []() {});
+      for (uint32_t i = 1; i <= value_range; ++i) {
+        counts[i] += counts[i - 1];
+      }
+      auto null_position = counts[value_range];
+      auto nulls_begin = indices_begin + null_position;
+      int64_t index = offset;
+      VisitRawValuesInline(
+          values, [&](c_type v) { indices_begin[counts[v - min_]++] = index++; },
+          [&]() { indices_begin[null_position++] = index++; });
+      return nulls_begin;
+    } else {
+      VisitRawValuesInline(
+          values, [&](c_type v) { ++counts[v - min_]; }, []() {});
+      for (uint32_t i = value_range; i >= 1; --i) {
+        counts[i - 1] += counts[i];
+      }
+      auto null_position = counts[0];
+      auto nulls_begin = indices_begin + null_position;
+      int64_t index = offset;
+      VisitRawValuesInline(
+          values, [&](c_type v) { indices_begin[counts[v - min_ + 1]++] = index++; },
+          [&]() { indices_begin[null_position++] = index++; });
+      return nulls_begin;
+    }
+  }
+};
+
+using ::arrow::internal::Bitmap;
+
+template <>
+class ArrayCountSorter<BooleanType> {
+ public:
+  ArrayCountSorter() = default;
+
+  // Returns where null starts.
+  // `offset` is used when this is called on a chunk of a chunked array
+  uint64_t* Sort(uint64_t* indices_begin, uint64_t* indices_end,
+                 const BooleanArray& values, int64_t offset,
+                 const ArraySortOptions& options) {
+    std::array<int64_t, 2> counts{0, 0};
+
+    const int64_t nulls = values.null_count();
+    const int64_t ones = values.true_count();
+    const int64_t zeros = values.length() - ones - nulls;
+
+    int64_t null_position = values.length() - nulls;
+    int64_t index = offset;
+    const auto nulls_begin = indices_begin + null_position;
+
+    if (options.order == SortOrder::Ascending) {
+      // ones start after zeros
+      counts[1] = zeros;
+    } else {
+      // zeros start after ones
+      counts[0] = ones;
     }
     VisitRawValuesInline(
-        values, [&](bool v) { indices_begin[counts[v]++] = index++; }, 
-        [&]() { indices_begin[null_position++] = index++; }); 
-    return nulls_begin; 
+        values, [&](bool v) { indices_begin[counts[v]++] = index++; },
+        [&]() { indices_begin[null_position++] = index++; });
+    return nulls_begin;
   }
 };
 
@@ -538,35 +538,35 @@ class ArrayCountSorter<BooleanType> {
 // - Use O(n) counting sort if values are in a small range
 // - Use O(nlogn) std::stable_sort otherwise
 template <typename ArrowType>
-class ArrayCountOrCompareSorter { 
+class ArrayCountOrCompareSorter {
   using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
   using c_type = typename ArrowType::c_type;
 
  public:
-  // Returns where null starts. 
-  // 
-  // `offset` is used when this is called on a chunk of a chunked array 
-  uint64_t* Sort(uint64_t* indices_begin, uint64_t* indices_end, const ArrayType& values, 
-                 int64_t offset, const ArraySortOptions& options) { 
+  // Returns where null starts.
+  //
+  // `offset` is used when this is called on a chunk of a chunked array
+  uint64_t* Sort(uint64_t* indices_begin, uint64_t* indices_end, const ArrayType& values,
+                 int64_t offset, const ArraySortOptions& options) {
     if (values.length() >= countsort_min_len_ && values.length() > values.null_count()) {
-      c_type min, max; 
-      std::tie(min, max) = GetMinMax<c_type>(*values.data()); 
+      c_type min, max;
+      std::tie(min, max) = GetMinMax<c_type>(*values.data());
 
       // For signed int32/64, (max - min) may overflow and trigger UBSAN.
       // Cast to largest unsigned type(uint64_t) before subtraction.
       if (static_cast<uint64_t>(max) - static_cast<uint64_t>(min) <=
           countsort_max_range_) {
         count_sorter_.SetMinMax(min, max);
-        return count_sorter_.Sort(indices_begin, indices_end, values, offset, options); 
+        return count_sorter_.Sort(indices_begin, indices_end, values, offset, options);
       }
     }
 
-    return compare_sorter_.Sort(indices_begin, indices_end, values, offset, options); 
+    return compare_sorter_.Sort(indices_begin, indices_end, values, offset, options);
   }
 
  private:
-  ArrayCompareSorter<ArrowType> compare_sorter_; 
-  ArrayCountSorter<ArrowType> count_sorter_; 
+  ArrayCompareSorter<ArrowType> compare_sorter_;
+  ArrayCountSorter<ArrowType> count_sorter_;
 
   // Cross point to prefer counting sort than stl::stable_sort(merge sort)
   // - array to be sorted is longer than "count_min_len_"
@@ -582,1257 +582,1257 @@ class ArrayCountOrCompareSorter {
 };
 
 template <typename Type, typename Enable = void>
-struct ArraySorter; 
+struct ArraySorter;
+
+template <>
+struct ArraySorter<BooleanType> {
+  ArrayCountSorter<BooleanType> impl;
+};
 
 template <>
-struct ArraySorter<BooleanType> { 
-  ArrayCountSorter<BooleanType> impl; 
+struct ArraySorter<UInt8Type> {
+  ArrayCountSorter<UInt8Type> impl;
+  ArraySorter() : impl(0, 255) {}
 };
 
 template <>
-struct ArraySorter<UInt8Type> { 
-  ArrayCountSorter<UInt8Type> impl; 
-  ArraySorter() : impl(0, 255) {} 
+struct ArraySorter<Int8Type> {
+  ArrayCountSorter<Int8Type> impl;
+  ArraySorter() : impl(-128, 127) {}
 };
 
-template <> 
-struct ArraySorter<Int8Type> { 
-  ArrayCountSorter<Int8Type> impl; 
-  ArraySorter() : impl(-128, 127) {} 
-}; 
- 
 template <typename Type>
-struct ArraySorter<Type, enable_if_t<(is_integer_type<Type>::value && 
-                                      (sizeof(typename Type::c_type) > 1)) || 
-                                     is_temporal_type<Type>::value>> { 
-  ArrayCountOrCompareSorter<Type> impl; 
+struct ArraySorter<Type, enable_if_t<(is_integer_type<Type>::value &&
+                                      (sizeof(typename Type::c_type) > 1)) ||
+                                     is_temporal_type<Type>::value>> {
+  ArrayCountOrCompareSorter<Type> impl;
 };
 
 template <typename Type>
-struct ArraySorter< 
-    Type, enable_if_t<is_floating_type<Type>::value || is_base_binary_type<Type>::value || 
-                      is_fixed_size_binary_type<Type>::value>> { 
-  ArrayCompareSorter<Type> impl; 
+struct ArraySorter<
+    Type, enable_if_t<is_floating_type<Type>::value || is_base_binary_type<Type>::value ||
+                      is_fixed_size_binary_type<Type>::value>> {
+  ArrayCompareSorter<Type> impl;
 };
 
-using ArraySortIndicesState = internal::OptionsWrapper<ArraySortOptions>; 
- 
+using ArraySortIndicesState = internal::OptionsWrapper<ArraySortOptions>;
+
 template <typename OutType, typename InType>
-struct ArraySortIndices { 
+struct ArraySortIndices {
   using ArrayType = typename TypeTraits<InType>::ArrayType;
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { 
-    const auto& options = ArraySortIndicesState::Get(ctx); 
- 
-    ArrayType arr(batch[0].array()); 
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const auto& options = ArraySortIndicesState::Get(ctx);
+
+    ArrayType arr(batch[0].array());
     ArrayData* out_arr = out->mutable_array();
     uint64_t* out_begin = out_arr->GetMutableValues<uint64_t>(1);
     uint64_t* out_end = out_begin + arr.length();
-    std::iota(out_begin, out_end, 0); 
+    std::iota(out_begin, out_end, 0);
+
+    ArraySorter<InType> sorter;
+    sorter.impl.Sort(out_begin, out_end, arr, 0, options);
 
-    ArraySorter<InType> sorter; 
-    sorter.impl.Sort(out_begin, out_end, arr, 0, options); 
- 
-    return Status::OK(); 
+    return Status::OK();
   }
 };
 
 // Sort indices kernels implemented for
 //
-// * Boolean type 
+// * Boolean type
 // * Number types
 // * Base binary types
 
 template <template <typename...> class ExecTemplate>
 void AddSortingKernels(VectorKernel base, VectorFunction* func) {
-  // bool type 
-  base.signature = KernelSignature::Make({InputType::Array(boolean())}, uint64()); 
-  base.exec = ExecTemplate<UInt64Type, BooleanType>::Exec; 
-  DCHECK_OK(func->AddKernel(base)); 
- 
+  // bool type
+  base.signature = KernelSignature::Make({InputType::Array(boolean())}, uint64());
+  base.exec = ExecTemplate<UInt64Type, BooleanType>::Exec;
+  DCHECK_OK(func->AddKernel(base));
+
   for (const auto& ty : NumericTypes()) {
-    auto physical_type = GetPhysicalType(ty); 
+    auto physical_type = GetPhysicalType(ty);
+    base.signature = KernelSignature::Make({InputType::Array(ty)}, uint64());
+    base.exec = GenerateNumeric<ExecTemplate, UInt64Type>(*physical_type);
+    DCHECK_OK(func->AddKernel(base));
+  }
+  for (const auto& ty : TemporalTypes()) {
+    auto physical_type = GetPhysicalType(ty);
     base.signature = KernelSignature::Make({InputType::Array(ty)}, uint64());
-    base.exec = GenerateNumeric<ExecTemplate, UInt64Type>(*physical_type); 
+    base.exec = GenerateNumeric<ExecTemplate, UInt64Type>(*physical_type);
+    DCHECK_OK(func->AddKernel(base));
+  }
+  for (const auto id : DecimalTypeIds()) {
+    base.signature = KernelSignature::Make({InputType::Array(id)}, uint64());
+    base.exec = GenerateDecimal<ExecTemplate, UInt64Type>(id);
     DCHECK_OK(func->AddKernel(base));
   }
-  for (const auto& ty : TemporalTypes()) { 
-    auto physical_type = GetPhysicalType(ty); 
-    base.signature = KernelSignature::Make({InputType::Array(ty)}, uint64()); 
-    base.exec = GenerateNumeric<ExecTemplate, UInt64Type>(*physical_type); 
-    DCHECK_OK(func->AddKernel(base)); 
-  } 
-  for (const auto id : DecimalTypeIds()) { 
-    base.signature = KernelSignature::Make({InputType::Array(id)}, uint64()); 
-    base.exec = GenerateDecimal<ExecTemplate, UInt64Type>(id); 
-    DCHECK_OK(func->AddKernel(base)); 
-  } 
   for (const auto& ty : BaseBinaryTypes()) {
-    auto physical_type = GetPhysicalType(ty); 
+    auto physical_type = GetPhysicalType(ty);
     base.signature = KernelSignature::Make({InputType::Array(ty)}, uint64());
-    base.exec = GenerateVarBinaryBase<ExecTemplate, UInt64Type>(*physical_type); 
+    base.exec = GenerateVarBinaryBase<ExecTemplate, UInt64Type>(*physical_type);
     DCHECK_OK(func->AddKernel(base));
   }
-  base.signature = 
-      KernelSignature::Make({InputType::Array(Type::FIXED_SIZE_BINARY)}, uint64()); 
-  base.exec = ExecTemplate<UInt64Type, FixedSizeBinaryType>::Exec; 
-  DCHECK_OK(func->AddKernel(base)); 
+  base.signature =
+      KernelSignature::Make({InputType::Array(Type::FIXED_SIZE_BINARY)}, uint64());
+  base.exec = ExecTemplate<UInt64Type, FixedSizeBinaryType>::Exec;
+  DCHECK_OK(func->AddKernel(base));
 }
 
-// ---------------------------------------------------------------------- 
-// ChunkedArray sorting implementations 
- 
-// Sort a chunked array directly without sorting each array in the 
-// chunked array. This is used for processing the second and following 
-// sort keys in TableRadixSorter. 
-// 
-// This uses the same algorithm as ArrayCompareSorter. 
-template <typename Type> 
-class ChunkedArrayCompareSorter { 
-  using ArrayType = typename TypeTraits<Type>::ArrayType; 
- 
- public: 
-  // Returns where null starts. 
-  uint64_t* Sort(uint64_t* indices_begin, uint64_t* indices_end, 
-                 const std::vector<const Array*>& arrays, int64_t null_count, 
-                 const ArraySortOptions& options) { 
-    auto nulls_begin = PartitionNulls<ArrayType, StablePartitioner>( 
-        indices_begin, indices_end, arrays, null_count); 
-    ChunkedArrayResolver resolver(arrays); 
-    if (options.order == SortOrder::Ascending) { 
-      std::stable_sort(indices_begin, nulls_begin, [&](uint64_t left, uint64_t right) { 
-        const auto chunk_left = resolver.Resolve<ArrayType>(left); 
-        const auto chunk_right = resolver.Resolve<ArrayType>(right); 
-        return chunk_left.Value() < chunk_right.Value(); 
-      }); 
-    } else { 
-      std::stable_sort(indices_begin, nulls_begin, [&](uint64_t left, uint64_t right) { 
-        const auto chunk_left = resolver.Resolve<ArrayType>(left); 
-        const auto chunk_right = resolver.Resolve<ArrayType>(right); 
-        // We don't use 'left > right' here to reduce required operator. 
-        // If we use 'right < left' here, '<' is only required. 
-        return chunk_right.Value() < chunk_left.Value(); 
-      }); 
-    } 
-    return nulls_begin; 
-  } 
-}; 
- 
-// Sort a chunked array by sorting each array in the chunked array. 
-// 
-// TODO: This is a naive implementation. We'll be able to improve 
-// performance of this. For example, we'll be able to use threads for 
-// sorting each array. 
-class ChunkedArraySorter : public TypeVisitor { 
- public: 
-  ChunkedArraySorter(ExecContext* ctx, uint64_t* indices_begin, uint64_t* indices_end, 
-                     const ChunkedArray& chunked_array, const SortOrder order, 
-                     bool can_use_array_sorter = true) 
-      : TypeVisitor(), 
-        indices_begin_(indices_begin), 
-        indices_end_(indices_end), 
-        chunked_array_(chunked_array), 
-        physical_type_(GetPhysicalType(chunked_array.type())), 
-        physical_chunks_(GetPhysicalChunks(chunked_array_, physical_type_)), 
-        order_(order), 
-        can_use_array_sorter_(can_use_array_sorter), 
-        ctx_(ctx) {} 
- 
-  Status Sort() { return physical_type_->Accept(this); } 
- 
-#define VISIT(TYPE) \ 
-  Status Visit(const TYPE& type) override { return SortInternal<TYPE>(); } 
- 
-  VISIT_PHYSICAL_TYPES(VISIT) 
- 
-#undef VISIT 
- 
- private: 
-  template <typename Type> 
-  Status SortInternal() { 
-    using ArrayType = typename TypeTraits<Type>::ArrayType; 
-    ArraySortOptions options(order_); 
-    const auto num_chunks = chunked_array_.num_chunks(); 
-    if (num_chunks == 0) { 
-      return Status::OK(); 
-    } 
-    const auto arrays = GetArrayPointers(physical_chunks_); 
-    if (can_use_array_sorter_) { 
-      // Sort each chunk independently and merge to sorted indices. 
-      // This is a serial implementation. 
-      ArraySorter<Type> sorter; 
-      struct SortedChunk { 
-        int64_t begin_offset; 
-        int64_t end_offset; 
-        int64_t nulls_offset; 
-      }; 
-      std::vector<SortedChunk> sorted(num_chunks); 
- 
-      // First sort all individual chunks 
-      int64_t begin_offset = 0; 
-      int64_t end_offset = 0; 
-      int64_t null_count = 0; 
-      for (int i = 0; i < num_chunks; ++i) { 
-        const auto array = checked_cast<const ArrayType*>(arrays[i]); 
-        end_offset += array->length(); 
-        null_count += array->null_count(); 
-        uint64_t* nulls_begin = 
-            sorter.impl.Sort(indices_begin_ + begin_offset, indices_begin_ + end_offset, 
-                             *array, begin_offset, options); 
-        sorted[i] = {begin_offset, end_offset, nulls_begin - indices_begin_}; 
-        begin_offset = end_offset; 
-      } 
-      DCHECK_EQ(end_offset, indices_end_ - indices_begin_); 
- 
-      std::unique_ptr<Buffer> temp_buffer; 
-      uint64_t* temp_indices = nullptr; 
-      if (sorted.size() > 1) { 
-        ARROW_ASSIGN_OR_RAISE( 
-            temp_buffer, 
-            AllocateBuffer(sizeof(int64_t) * (indices_end_ - indices_begin_ - null_count), 
-                           ctx_->memory_pool())); 
-        temp_indices = reinterpret_cast<uint64_t*>(temp_buffer->mutable_data()); 
-      } 
- 
-      // Then merge them by pairs, recursively 
-      while (sorted.size() > 1) { 
-        auto out_it = sorted.begin(); 
-        auto it = sorted.begin(); 
-        while (it < sorted.end() - 1) { 
-          const auto& left = *it++; 
-          const auto& right = *it++; 
-          DCHECK_EQ(left.end_offset, right.begin_offset); 
-          DCHECK_GE(left.nulls_offset, left.begin_offset); 
-          DCHECK_LE(left.nulls_offset, left.end_offset); 
-          DCHECK_GE(right.nulls_offset, right.begin_offset); 
-          DCHECK_LE(right.nulls_offset, right.end_offset); 
-          uint64_t* nulls_begin = Merge<ArrayType>( 
-              indices_begin_ + left.begin_offset, indices_begin_ + left.end_offset, 
-              indices_begin_ + right.end_offset, indices_begin_ + left.nulls_offset, 
-              indices_begin_ + right.nulls_offset, arrays, null_count, order_, 
-              temp_indices); 
-          *out_it++ = {left.begin_offset, right.end_offset, nulls_begin - indices_begin_}; 
-        } 
-        if (it < sorted.end()) { 
-          *out_it++ = *it++; 
-        } 
-        sorted.erase(out_it, sorted.end()); 
-      } 
-      DCHECK_EQ(sorted.size(), 1); 
-      DCHECK_EQ(sorted[0].begin_offset, 0); 
-      DCHECK_EQ(sorted[0].end_offset, chunked_array_.length()); 
-      // Note that "nulls" can also include NaNs, hence the >= check 
-      DCHECK_GE(chunked_array_.length() - sorted[0].nulls_offset, null_count); 
-    } else { 
-      // Sort the chunked array directory. 
-      ChunkedArrayCompareSorter<Type> sorter; 
-      sorter.Sort(indices_begin_, indices_end_, arrays, chunked_array_.null_count(), 
-                  options); 
-    } 
-    return Status::OK(); 
-  } 
- 
-  // Merges two sorted indices arrays and returns where nulls starts. 
-  // Where nulls starts is used when the next merge to detect the 
-  // sorted indices locations. 
-  template <typename ArrayType> 
-  uint64_t* Merge(uint64_t* indices_begin, uint64_t* indices_middle, 
-                  uint64_t* indices_end, uint64_t* left_nulls_begin, 
-                  uint64_t* right_nulls_begin, const std::vector<const Array*>& arrays, 
-                  int64_t null_count, const SortOrder order, uint64_t* temp_indices) { 
-    // Input layout: 
-    // [left non-nulls .... left nulls .... right non-nulls .... right nulls] 
-    //  ^                   ^               ^                    ^ 
-    //  |                   |               |                    | 
-    //  indices_begin   left_nulls_begin   indices_middle     right_nulls_begin 
-    auto left_num_non_nulls = left_nulls_begin - indices_begin; 
-    auto right_num_non_nulls = right_nulls_begin - indices_middle; 
- 
-    // Mutate the input, stably, to obtain the following layout: 
-    // [left non-nulls .... right non-nulls .... left nulls .... right nulls] 
-    //  ^                   ^                    ^                    ^ 
-    //  |                   |                    |                    | 
-    //  indices_begin   indices_middle        nulls_begin     right_nulls_begin 
-    std::rotate(left_nulls_begin, indices_middle, right_nulls_begin); 
-    auto nulls_begin = indices_begin + left_num_non_nulls + right_num_non_nulls; 
-    // If the type has null-like values (such as NaN), ensure those plus regular 
-    // nulls are partitioned in the right order.  Note this assumes that all 
-    // null-like values (e.g. NaN) are ordered equally. 
-    if (NullTraits<typename ArrayType::TypeClass>::has_null_like_values) { 
-      PartitionNullsOnly<StablePartitioner>(nulls_begin, indices_end, arrays, null_count); 
-    } 
- 
-    // Merge the non-null values into temp area 
-    indices_middle = indices_begin + left_num_non_nulls; 
-    indices_end = indices_middle + right_num_non_nulls; 
-    const ChunkedArrayResolver left_resolver(arrays); 
-    const ChunkedArrayResolver right_resolver(arrays); 
-    if (order == SortOrder::Ascending) { 
-      std::merge(indices_begin, indices_middle, indices_middle, indices_end, temp_indices, 
-                 [&](uint64_t left, uint64_t right) { 
-                   const auto chunk_left = left_resolver.Resolve<ArrayType>(left); 
-                   const auto chunk_right = right_resolver.Resolve<ArrayType>(right); 
-                   return chunk_left.Value() < chunk_right.Value(); 
-                 }); 
-    } else { 
-      std::merge(indices_begin, indices_middle, indices_middle, indices_end, temp_indices, 
-                 [&](uint64_t left, uint64_t right) { 
-                   const auto chunk_left = left_resolver.Resolve<ArrayType>(left); 
-                   const auto chunk_right = right_resolver.Resolve<ArrayType>(right); 
-                   // We don't use 'left > right' here to reduce required 
-                   // operator. If we use 'right < left' here, '<' is only 
-                   // required. 
-                   return chunk_right.Value() < chunk_left.Value(); 
-                 }); 
-    } 
-    // Copy back temp area into main buffer 
-    std::copy(temp_indices, temp_indices + (nulls_begin - indices_begin), indices_begin); 
-    return nulls_begin; 
-  } 
- 
-  uint64_t* indices_begin_; 
-  uint64_t* indices_end_; 
-  const ChunkedArray& chunked_array_; 
-  const std::shared_ptr<DataType> physical_type_; 
-  const ArrayVector physical_chunks_; 
-  const SortOrder order_; 
-  const bool can_use_array_sorter_; 
-  ExecContext* ctx_; 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// Record batch sorting implementation(s) 
- 
-// Visit contiguous ranges of equal values.  All entries are assumed 
-// to be non-null. 
-template <typename ArrayType, typename Visitor> 
-void VisitConstantRanges(const ArrayType& array, uint64_t* indices_begin, 
-                         uint64_t* indices_end, Visitor&& visit) { 
-  using GetView = GetViewType<typename ArrayType::TypeClass>; 
- 
-  if (indices_begin == indices_end) { 
-    return; 
-  } 
-  auto range_start = indices_begin; 
-  auto range_cur = range_start; 
-  auto last_value = GetView::LogicalValue(array.GetView(*range_cur)); 
-  while (++range_cur != indices_end) { 
-    auto v = GetView::LogicalValue(array.GetView(*range_cur)); 
-    if (v != last_value) { 
-      visit(range_start, range_cur); 
-      range_start = range_cur; 
-      last_value = v; 
-    } 
-  } 
-  if (range_start != range_cur) { 
-    visit(range_start, range_cur); 
-  } 
-} 
- 
-// A sorter for a single column of a RecordBatch, deferring to the next column 
-// for ranges of equal values. 
-class RecordBatchColumnSorter { 
- public: 
-  explicit RecordBatchColumnSorter(RecordBatchColumnSorter* next_column = nullptr) 
-      : next_column_(next_column) {} 
-  virtual ~RecordBatchColumnSorter() {} 
- 
-  virtual void SortRange(uint64_t* indices_begin, uint64_t* indices_end) = 0; 
- 
- protected: 
-  RecordBatchColumnSorter* next_column_; 
-}; 
- 
-template <typename Type> 
-class ConcreteRecordBatchColumnSorter : public RecordBatchColumnSorter { 
- public: 
-  using ArrayType = typename TypeTraits<Type>::ArrayType; 
- 
-  ConcreteRecordBatchColumnSorter(std::shared_ptr<Array> array, SortOrder order, 
-                                  RecordBatchColumnSorter* next_column = nullptr) 
-      : RecordBatchColumnSorter(next_column), 
-        owned_array_(std::move(array)), 
-        array_(checked_cast<const ArrayType&>(*owned_array_)), 
-        order_(order), 
-        null_count_(array_.null_count()) {} 
- 
-  void SortRange(uint64_t* indices_begin, uint64_t* indices_end) { 
-    using GetView = GetViewType<Type>; 
- 
-    constexpr int64_t offset = 0; 
-    uint64_t* nulls_begin; 
-    if (null_count_ == 0) { 
-      nulls_begin = indices_end; 
-    } else { 
-      // NOTE that null_count_ is merely an upper bound on the number of nulls 
-      // in this particular range. 
-      nulls_begin = PartitionNullsOnly<StablePartitioner>(indices_begin, indices_end, 
-                                                          array_, offset); 
-      DCHECK_LE(indices_end - nulls_begin, null_count_); 
-    } 
-    uint64_t* null_likes_begin = PartitionNullLikes<ArrayType, StablePartitioner>( 
-        indices_begin, nulls_begin, array_, offset); 
- 
-    // TODO This is roughly the same as ArrayCompareSorter. 
-    // Also, we would like to use a counting sort if possible.  This requires 
-    // a counting sort compatible with indirect indexing. 
-    if (order_ == SortOrder::Ascending) { 
-      std::stable_sort( 
-          indices_begin, null_likes_begin, [&](uint64_t left, uint64_t right) { 
-            const auto lhs = GetView::LogicalValue(array_.GetView(left - offset)); 
-            const auto rhs = GetView::LogicalValue(array_.GetView(right - offset)); 
-            return lhs < rhs; 
-          }); 
-    } else { 
-      std::stable_sort( 
-          indices_begin, null_likes_begin, [&](uint64_t left, uint64_t right) { 
-            // We don't use 'left > right' here to reduce required operator. 
-            // If we use 'right < left' here, '<' is only required. 
-            const auto lhs = GetView::LogicalValue(array_.GetView(left - offset)); 
-            const auto rhs = GetView::LogicalValue(array_.GetView(right - offset)); 
-            return lhs > rhs; 
-          }); 
-    } 
- 
-    if (next_column_ != nullptr) { 
-      // Visit all ranges of equal values in this column and sort them on 
-      // the next column. 
-      SortNextColumn(null_likes_begin, nulls_begin); 
-      SortNextColumn(nulls_begin, indices_end); 
-      VisitConstantRanges(array_, indices_begin, null_likes_begin, 
-                          [&](uint64_t* range_start, uint64_t* range_end) { 
-                            SortNextColumn(range_start, range_end); 
-                          }); 
-    } 
-  } 
- 
-  void SortNextColumn(uint64_t* indices_begin, uint64_t* indices_end) { 
-    // Avoid the cost of a virtual method call in trivial cases 
-    if (indices_end - indices_begin > 1) { 
-      next_column_->SortRange(indices_begin, indices_end); 
-    } 
-  } 
- 
- protected: 
-  const std::shared_ptr<Array> owned_array_; 
-  const ArrayType& array_; 
-  const SortOrder order_; 
-  const int64_t null_count_; 
-}; 
- 
-// Sort a batch using a single-pass left-to-right radix sort. 
-class RadixRecordBatchSorter { 
- public: 
-  RadixRecordBatchSorter(uint64_t* indices_begin, uint64_t* indices_end, 
-                         const RecordBatch& batch, const SortOptions& options) 
-      : batch_(batch), 
-        options_(options), 
-        indices_begin_(indices_begin), 
-        indices_end_(indices_end) {} 
- 
-  Status Sort() { 
-    ARROW_ASSIGN_OR_RAISE(const auto sort_keys, 
-                          ResolveSortKeys(batch_, options_.sort_keys)); 
- 
-    // Create column sorters from right to left 
-    std::vector<std::unique_ptr<RecordBatchColumnSorter>> column_sorts(sort_keys.size()); 
-    RecordBatchColumnSorter* next_column = nullptr; 
-    for (int64_t i = static_cast<int64_t>(sort_keys.size() - 1); i >= 0; --i) { 
-      ColumnSortFactory factory(sort_keys[i], next_column); 
-      ARROW_ASSIGN_OR_RAISE(column_sorts[i], factory.MakeColumnSort()); 
-      next_column = column_sorts[i].get(); 
-    } 
- 
-    // Sort from left to right 
-    column_sorts.front()->SortRange(indices_begin_, indices_end_); 
-    return Status::OK(); 
-  } 
- 
- protected: 
-  struct ResolvedSortKey { 
-    std::shared_ptr<Array> array; 
-    SortOrder order; 
-  }; 
- 
-  struct ColumnSortFactory { 
-    ColumnSortFactory(const ResolvedSortKey& sort_key, 
-                      RecordBatchColumnSorter* next_column) 
-        : physical_type(GetPhysicalType(sort_key.array->type())), 
-          array(GetPhysicalArray(*sort_key.array, physical_type)), 
-          order(sort_key.order), 
-          next_column(next_column) {} 
- 
-    Result<std::unique_ptr<RecordBatchColumnSorter>> MakeColumnSort() { 
-      RETURN_NOT_OK(VisitTypeInline(*physical_type, this)); 
-      DCHECK_NE(result, nullptr); 
-      return std::move(result); 
-    } 
- 
-#define VISIT(TYPE) \ 
-  Status Visit(const TYPE& type) { return VisitGeneric(type); } 
- 
-    VISIT_PHYSICAL_TYPES(VISIT) 
- 
-#undef VISIT 
- 
-    Status Visit(const DataType& type) { 
-      return Status::TypeError("Unsupported type for RecordBatch sorting: ", 
-                               type.ToString()); 
-    } 
- 
-    template <typename Type> 
-    Status VisitGeneric(const Type&) { 
-      result.reset(new ConcreteRecordBatchColumnSorter<Type>(array, order, next_column)); 
-      return Status::OK(); 
-    } 
- 
-    std::shared_ptr<DataType> physical_type; 
-    std::shared_ptr<Array> array; 
-    SortOrder order; 
-    RecordBatchColumnSorter* next_column; 
-    std::unique_ptr<RecordBatchColumnSorter> result; 
-  }; 
- 
-  static Result<std::vector<ResolvedSortKey>> ResolveSortKeys( 
-      const RecordBatch& batch, const std::vector<SortKey>& sort_keys) { 
-    std::vector<ResolvedSortKey> resolved; 
-    resolved.reserve(sort_keys.size()); 
-    for (const auto& sort_key : sort_keys) { 
-      auto array = batch.GetColumnByName(sort_key.name); 
-      if (!array) { 
-        return Status::Invalid("Nonexistent sort key column: ", sort_key.name); 
-      } 
-      resolved.push_back({std::move(array), sort_key.order}); 
-    } 
-    return resolved; 
-  } 
- 
-  const RecordBatch& batch_; 
-  const SortOptions& options_; 
-  uint64_t* indices_begin_; 
-  uint64_t* indices_end_; 
-}; 
- 
-// Compare two records in the same RecordBatch or Table 
-// (indexing is handled through ResolvedSortKey) 
-template <typename ResolvedSortKey> 
-class MultipleKeyComparator { 
- public: 
-  explicit MultipleKeyComparator(const std::vector<ResolvedSortKey>& sort_keys) 
-      : sort_keys_(sort_keys) {} 
- 
-  Status status() const { return status_; } 
- 
-  // Returns true if the left-th value should be ordered before the 
-  // right-th value, false otherwise. The start_sort_key_index-th 
-  // sort key and subsequent sort keys are used for comparison. 
-  bool Compare(uint64_t left, uint64_t right, size_t start_sort_key_index) { 
-    current_left_ = left; 
-    current_right_ = right; 
-    current_compared_ = 0; 
-    auto num_sort_keys = sort_keys_.size(); 
-    for (size_t i = start_sort_key_index; i < num_sort_keys; ++i) { 
-      current_sort_key_index_ = i; 
-      status_ = VisitTypeInline(*sort_keys_[i].type, this); 
-      // If the left value equals to the right value, we need to 
-      // continue to sort. 
-      if (current_compared_ != 0) { 
-        break; 
-      } 
-    } 
-    return current_compared_ < 0; 
-  } 
- 
-#define VISIT(TYPE)                          \ 
-  Status Visit(const TYPE& type) {           \ 
-    current_compared_ = CompareType<TYPE>(); \ 
-    return Status::OK();                     \ 
-  } 
- 
-  VISIT_PHYSICAL_TYPES(VISIT) 
- 
-#undef VISIT 
- 
-  Status Visit(const DataType& type) { 
-    return Status::TypeError("Unsupported type for RecordBatch sorting: ", 
-                             type.ToString()); 
-  } 
- 
- private: 
-  // Compares two records in the same table and returns -1, 0 or 1. 
-  // 
-  // -1: The left is less than the right. 
-  // 0: The left equals to the right. 
-  // 1: The left is greater than the right. 
-  // 
-  // This supports null and NaN. Null is processed in this and NaN 
-  // is processed in CompareTypeValue(). 
-  template <typename Type> 
-  int32_t CompareType() { 
-    using ArrayType = typename TypeTraits<Type>::ArrayType; 
-    const auto& sort_key = sort_keys_[current_sort_key_index_]; 
-    auto order = sort_key.order; 
-    const auto chunk_left = sort_key.template GetChunk<ArrayType>(current_left_); 
-    const auto chunk_right = sort_key.template GetChunk<ArrayType>(current_right_); 
-    if (sort_key.null_count > 0) { 
-      auto is_null_left = chunk_left.IsNull(); 
-      auto is_null_right = chunk_right.IsNull(); 
-      if (is_null_left && is_null_right) { 
-        return 0; 
-      } else if (is_null_left) { 
-        return 1; 
-      } else if (is_null_right) { 
-        return -1; 
-      } 
-    } 
-    return CompareTypeValue<Type>(chunk_left, chunk_right, order); 
-  } 
- 
-  // For non-float types. Value is never NaN. 
-  template <typename Type> 
-  enable_if_t<!is_floating_type<Type>::value, int32_t> CompareTypeValue( 
-      const ResolvedChunk<typename TypeTraits<Type>::ArrayType>& chunk_left, 
-      const ResolvedChunk<typename TypeTraits<Type>::ArrayType>& chunk_right, 
-      const SortOrder order) { 
-    const auto left = chunk_left.Value(); 
-    const auto right = chunk_right.Value(); 
-    int32_t compared; 
-    if (left == right) { 
-      compared = 0; 
-    } else if (left > right) { 
-      compared = 1; 
-    } else { 
-      compared = -1; 
-    } 
-    if (order == SortOrder::Descending) { 
-      compared = -compared; 
-    } 
-    return compared; 
-  } 
- 
-  // For float types. Value may be NaN. 
-  template <typename Type> 
-  enable_if_t<is_floating_type<Type>::value, int32_t> CompareTypeValue( 
-      const ResolvedChunk<typename TypeTraits<Type>::ArrayType>& chunk_left, 
-      const ResolvedChunk<typename TypeTraits<Type>::ArrayType>& chunk_right, 
-      const SortOrder order) { 
-    const auto left = chunk_left.Value(); 
-    const auto right = chunk_right.Value(); 
-    auto is_nan_left = std::isnan(left); 
-    auto is_nan_right = std::isnan(right); 
-    if (is_nan_left && is_nan_right) { 
-      return 0; 
-    } else if (is_nan_left) { 
-      return 1; 
-    } else if (is_nan_right) { 
-      return -1; 
-    } 
-    int32_t compared; 
-    if (left == right) { 
-      compared = 0; 
-    } else if (left > right) { 
-      compared = 1; 
-    } else { 
-      compared = -1; 
-    } 
-    if (order == SortOrder::Descending) { 
-      compared = -compared; 
-    } 
-    return compared; 
-  } 
- 
-  const std::vector<ResolvedSortKey>& sort_keys_; 
-  Status status_; 
-  int64_t current_left_; 
-  int64_t current_right_; 
-  size_t current_sort_key_index_; 
-  int32_t current_compared_; 
-}; 
- 
-// Sort a batch using a single sort and multiple-key comparisons. 
-class MultipleKeyRecordBatchSorter : public TypeVisitor { 
- private: 
-  // Preprocessed sort key. 
-  struct ResolvedSortKey { 
-    ResolvedSortKey(const std::shared_ptr<Array>& array, const SortOrder order) 
-        : type(GetPhysicalType(array->type())), 
-          owned_array(GetPhysicalArray(*array, type)), 
-          array(*owned_array), 
-          order(order), 
-          null_count(array->null_count()) {} 
- 
-    template <typename ArrayType> 
-    ResolvedChunk<ArrayType> GetChunk(int64_t index) const { 
-      return {&checked_cast<const ArrayType&>(array), index}; 
-    } 
- 
-    const std::shared_ptr<DataType> type; 
-    std::shared_ptr<Array> owned_array; 
-    const Array& array; 
-    SortOrder order; 
-    int64_t null_count; 
-  }; 
- 
-  using Comparator = MultipleKeyComparator<ResolvedSortKey>; 
- 
- public: 
-  MultipleKeyRecordBatchSorter(uint64_t* indices_begin, uint64_t* indices_end, 
-                               const RecordBatch& batch, const SortOptions& options) 
-      : indices_begin_(indices_begin), 
-        indices_end_(indices_end), 
-        sort_keys_(ResolveSortKeys(batch, options.sort_keys, &status_)), 
-        comparator_(sort_keys_) {} 
- 
-  // This is optimized for the first sort key. The first sort key sort 
-  // is processed in this class. The second and following sort keys 
-  // are processed in Comparator. 
-  Status Sort() { 
-    RETURN_NOT_OK(status_); 
-    return sort_keys_[0].type->Accept(this); 
-  } 
- 
-#define VISIT(TYPE) \ 
-  Status Visit(const TYPE& type) override { return SortInternal<TYPE>(); } 
- 
-  VISIT_PHYSICAL_TYPES(VISIT) 
- 
-#undef VISIT 
- 
- private: 
-  static std::vector<ResolvedSortKey> ResolveSortKeys( 
-      const RecordBatch& batch, const std::vector<SortKey>& sort_keys, Status* status) { 
-    std::vector<ResolvedSortKey> resolved; 
-    for (const auto& sort_key : sort_keys) { 
-      auto array = batch.GetColumnByName(sort_key.name); 
-      if (!array) { 
-        *status = Status::Invalid("Nonexistent sort key column: ", sort_key.name); 
-        break; 
-      } 
-      resolved.emplace_back(array, sort_key.order); 
-    } 
-    return resolved; 
-  } 
- 
-  template <typename Type> 
-  Status SortInternal() { 
-    using ArrayType = typename TypeTraits<Type>::ArrayType; 
- 
-    auto& comparator = comparator_; 
-    const auto& first_sort_key = sort_keys_[0]; 
-    const ArrayType& array = checked_cast<const ArrayType&>(first_sort_key.array); 
-    auto nulls_begin = indices_end_; 
-    nulls_begin = PartitionNullsInternal<Type>(first_sort_key); 
-    // Sort first-key non-nulls 
-    std::stable_sort(indices_begin_, nulls_begin, [&](uint64_t left, uint64_t right) { 
-      // Both values are never null nor NaN 
-      // (otherwise they've been partitioned away above). 
-      const auto value_left = array.GetView(left); 
-      const auto value_right = array.GetView(right); 
-      if (value_left != value_right) { 
-        bool compared = value_left < value_right; 
-        if (first_sort_key.order == SortOrder::Ascending) { 
-          return compared; 
-        } else { 
-          return !compared; 
-        } 
-      } 
-      // If the left value equals to the right value, 
-      // we need to compare the second and following 
-      // sort keys. 
-      return comparator.Compare(left, right, 1); 
-    }); 
-    return comparator_.status(); 
-  } 
- 
-  // Behaves like PatitionNulls() but this supports multiple sort keys. 
-  // 
-  // For non-float types. 
-  template <typename Type> 
-  enable_if_t<!is_floating_type<Type>::value, uint64_t*> PartitionNullsInternal( 
-      const ResolvedSortKey& first_sort_key) { 
-    using ArrayType = typename TypeTraits<Type>::ArrayType; 
-    if (first_sort_key.null_count == 0) { 
-      return indices_end_; 
-    } 
-    const ArrayType& array = checked_cast<const ArrayType&>(first_sort_key.array); 
-    StablePartitioner partitioner; 
-    auto nulls_begin = partitioner(indices_begin_, indices_end_, 
-                                   [&](uint64_t index) { return !array.IsNull(index); }); 
-    // Sort all nulls by second and following sort keys 
-    // TODO: could we instead run an independent sort from the second key on 
-    // this slice? 
-    if (nulls_begin != indices_end_) { 
-      auto& comparator = comparator_; 
-      std::stable_sort(nulls_begin, indices_end_, 
-                       [&comparator](uint64_t left, uint64_t right) { 
-                         return comparator.Compare(left, right, 1); 
-                       }); 
-    } 
-    return nulls_begin; 
-  } 
- 
-  // Behaves like PatitionNulls() but this supports multiple sort keys. 
-  // 
-  // For float types. 
-  template <typename Type> 
-  enable_if_t<is_floating_type<Type>::value, uint64_t*> PartitionNullsInternal( 
-      const ResolvedSortKey& first_sort_key) { 
-    using ArrayType = typename TypeTraits<Type>::ArrayType; 
-    const ArrayType& array = checked_cast<const ArrayType&>(first_sort_key.array); 
-    StablePartitioner partitioner; 
-    uint64_t* nulls_begin; 
-    if (first_sort_key.null_count == 0) { 
-      nulls_begin = indices_end_; 
-    } else { 
-      nulls_begin = partitioner(indices_begin_, indices_end_, 
-                                [&](uint64_t index) { return !array.IsNull(index); }); 
-    } 
-    uint64_t* nans_and_nulls_begin = 
-        partitioner(indices_begin_, nulls_begin, 
-                    [&](uint64_t index) { return !std::isnan(array.GetView(index)); }); 
-    auto& comparator = comparator_; 
-    if (nans_and_nulls_begin != nulls_begin) { 
-      // Sort all NaNs by the second and following sort keys. 
-      // TODO: could we instead run an independent sort from the second key on 
-      // this slice? 
-      std::stable_sort(nans_and_nulls_begin, nulls_begin, 
-                       [&comparator](uint64_t left, uint64_t right) { 
-                         return comparator.Compare(left, right, 1); 
-                       }); 
-    } 
-    if (nulls_begin != indices_end_) { 
-      // Sort all nulls by the second and following sort keys. 
-      // TODO: could we instead run an independent sort from the second key on 
-      // this slice? 
-      std::stable_sort(nulls_begin, indices_end_, 
-                       [&comparator](uint64_t left, uint64_t right) { 
-                         return comparator.Compare(left, right, 1); 
-                       }); 
-    } 
-    return nans_and_nulls_begin; 
-  } 
- 
-  uint64_t* indices_begin_; 
-  uint64_t* indices_end_; 
-  Status status_; 
-  std::vector<ResolvedSortKey> sort_keys_; 
-  Comparator comparator_; 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// Table sorting implementations 
- 
-// Sort a table using a radix sort-like algorithm. 
-// A distinct stable sort is called for each sort key, from the last key to the first. 
-class TableRadixSorter { 
- public: 
-  Status Sort(ExecContext* ctx, uint64_t* indices_begin, uint64_t* indices_end, 
-              const Table& table, const SortOptions& options) { 
-    for (auto i = options.sort_keys.size(); i > 0; --i) { 
-      const auto& sort_key = options.sort_keys[i - 1]; 
-      const auto& chunked_array = table.GetColumnByName(sort_key.name); 
-      if (!chunked_array) { 
-        return Status::Invalid("Nonexistent sort key column: ", sort_key.name); 
-      } 
-      // We can use ArraySorter only for the sort key that is 
-      // processed first because ArraySorter doesn't care about 
-      // existing indices. 
-      const auto can_use_array_sorter = (i == 0); 
-      ChunkedArraySorter sorter(ctx, indices_begin, indices_end, *chunked_array.get(), 
-                                sort_key.order, can_use_array_sorter); 
-      ARROW_RETURN_NOT_OK(sorter.Sort()); 
-    } 
-    return Status::OK(); 
-  } 
-}; 
- 
-// Sort a table using a single sort and multiple-key comparisons. 
-class MultipleKeyTableSorter : public TypeVisitor { 
- private: 
-  // TODO instead of resolving chunks for each column independently, we could 
-  // split the table into RecordBatches and pay the cost of chunked indexing 
-  // at the first column only. 
- 
-  // Preprocessed sort key. 
-  struct ResolvedSortKey { 
-    ResolvedSortKey(const ChunkedArray& chunked_array, const SortOrder order) 
-        : order(order), 
-          type(GetPhysicalType(chunked_array.type())), 
-          chunks(GetPhysicalChunks(chunked_array, type)), 
-          chunk_pointers(GetArrayPointers(chunks)), 
-          null_count(chunked_array.null_count()), 
-          num_chunks(chunked_array.num_chunks()), 
-          resolver(chunk_pointers) {} 
- 
-    // Finds the target chunk and index in the target chunk from an 
-    // index in chunked array. 
-    template <typename ArrayType> 
-    ResolvedChunk<ArrayType> GetChunk(int64_t index) const { 
-      return resolver.Resolve<ArrayType>(index); 
-    } 
- 
-    const SortOrder order; 
-    const std::shared_ptr<DataType> type; 
-    const ArrayVector chunks; 
-    const std::vector<const Array*> chunk_pointers; 
-    const int64_t null_count; 
-    const int num_chunks; 
-    const ChunkedArrayResolver resolver; 
-  }; 
- 
-  using Comparator = MultipleKeyComparator<ResolvedSortKey>; 
- 
- public: 
-  MultipleKeyTableSorter(uint64_t* indices_begin, uint64_t* indices_end, 
-                         const Table& table, const SortOptions& options) 
-      : indices_begin_(indices_begin), 
-        indices_end_(indices_end), 
-        sort_keys_(ResolveSortKeys(table, options.sort_keys, &status_)), 
-        comparator_(sort_keys_) {} 
- 
-  // This is optimized for the first sort key. The first sort key sort 
-  // is processed in this class. The second and following sort keys 
-  // are processed in Comparator. 
-  Status Sort() { 
-    ARROW_RETURN_NOT_OK(status_); 
-    return sort_keys_[0].type->Accept(this); 
-  } 
- 
-#define VISIT(TYPE) \ 
-  Status Visit(const TYPE& type) override { return SortInternal<TYPE>(); } 
- 
-  VISIT_PHYSICAL_TYPES(VISIT) 
- 
-#undef VISIT 
- 
- private: 
-  static std::vector<ResolvedSortKey> ResolveSortKeys( 
-      const Table& table, const std::vector<SortKey>& sort_keys, Status* status) { 
-    std::vector<ResolvedSortKey> resolved; 
-    resolved.reserve(sort_keys.size()); 
-    for (const auto& sort_key : sort_keys) { 
-      const auto& chunked_array = table.GetColumnByName(sort_key.name); 
-      if (!chunked_array) { 
-        *status = Status::Invalid("Nonexistent sort key column: ", sort_key.name); 
-        break; 
-      } 
-      resolved.emplace_back(*chunked_array, sort_key.order); 
-    } 
-    return resolved; 
-  } 
- 
-  template <typename Type> 
-  Status SortInternal() { 
-    using ArrayType = typename TypeTraits<Type>::ArrayType; 
- 
-    auto& comparator = comparator_; 
-    const auto& first_sort_key = sort_keys_[0]; 
-    auto nulls_begin = indices_end_; 
-    nulls_begin = PartitionNullsInternal<Type>(first_sort_key); 
-    std::stable_sort(indices_begin_, nulls_begin, [&](uint64_t left, uint64_t right) { 
-      // Both values are never null nor NaN. 
-      auto chunk_left = first_sort_key.GetChunk<ArrayType>(left); 
-      auto chunk_right = first_sort_key.GetChunk<ArrayType>(right); 
-      auto value_left = chunk_left.Value(); 
-      auto value_right = chunk_right.Value(); 
-      if (value_left == value_right) { 
-        // If the left value equals to the right value, 
-        // we need to compare the second and following 
-        // sort keys. 
-        return comparator.Compare(left, right, 1); 
-      } else { 
-        auto compared = value_left < value_right; 
-        if (first_sort_key.order == SortOrder::Ascending) { 
-          return compared; 
-        } else { 
-          return !compared; 
-        } 
-      } 
-    }); 
-    return comparator_.status(); 
-  } 
- 
-  // Behaves like PatitionNulls() but this supports multiple sort keys. 
-  // 
-  // For non-float types. 
-  template <typename Type> 
-  enable_if_t<!is_floating_type<Type>::value, uint64_t*> PartitionNullsInternal( 
-      const ResolvedSortKey& first_sort_key) { 
-    using ArrayType = typename TypeTraits<Type>::ArrayType; 
-    if (first_sort_key.null_count == 0) { 
-      return indices_end_; 
-    } 
-    StablePartitioner partitioner; 
-    auto nulls_begin = 
-        partitioner(indices_begin_, indices_end_, [&first_sort_key](uint64_t index) { 
-          const auto chunk = first_sort_key.GetChunk<ArrayType>(index); 
-          return !chunk.IsNull(); 
-        }); 
-    DCHECK_EQ(indices_end_ - nulls_begin, first_sort_key.null_count); 
-    auto& comparator = comparator_; 
-    std::stable_sort(nulls_begin, indices_end_, [&](uint64_t left, uint64_t right) { 
-      return comparator.Compare(left, right, 1); 
-    }); 
-    return nulls_begin; 
-  } 
- 
-  // Behaves like PatitionNulls() but this supports multiple sort keys. 
-  // 
-  // For float types. 
-  template <typename Type> 
-  enable_if_t<is_floating_type<Type>::value, uint64_t*> PartitionNullsInternal( 
-      const ResolvedSortKey& first_sort_key) { 
-    using ArrayType = typename TypeTraits<Type>::ArrayType; 
-    StablePartitioner partitioner; 
-    uint64_t* nulls_begin; 
-    if (first_sort_key.null_count == 0) { 
-      nulls_begin = indices_end_; 
-    } else { 
-      nulls_begin = partitioner(indices_begin_, indices_end_, [&](uint64_t index) { 
-        const auto chunk = first_sort_key.GetChunk<ArrayType>(index); 
-        return !chunk.IsNull(); 
-      }); 
-    } 
-    DCHECK_EQ(indices_end_ - nulls_begin, first_sort_key.null_count); 
-    uint64_t* nans_begin = partitioner(indices_begin_, nulls_begin, [&](uint64_t index) { 
-      const auto chunk = first_sort_key.GetChunk<ArrayType>(index); 
-      return !std::isnan(chunk.Value()); 
-    }); 
-    auto& comparator = comparator_; 
-    // Sort all NaNs by the second and following sort keys. 
-    std::stable_sort(nans_begin, nulls_begin, [&](uint64_t left, uint64_t right) { 
-      return comparator.Compare(left, right, 1); 
-    }); 
-    // Sort all nulls by the second and following sort keys. 
-    std::stable_sort(nulls_begin, indices_end_, [&](uint64_t left, uint64_t right) { 
-      return comparator.Compare(left, right, 1); 
-    }); 
-    return nans_begin; 
-  } 
- 
-  uint64_t* indices_begin_; 
-  uint64_t* indices_end_; 
-  Status status_; 
-  std::vector<ResolvedSortKey> sort_keys_; 
-  Comparator comparator_; 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// Top-level sort functions 
- 
-const auto kDefaultSortOptions = SortOptions::Defaults(); 
- 
-const FunctionDoc sort_indices_doc( 
-    "Return the indices that would sort an array, record batch or table", 
-    ("This function computes an array of indices that define a stable sort\n" 
-     "of the input array, record batch or table.  Null values are considered\n" 
-     "greater than any other value and are therefore sorted at the end of the\n" 
-     "input. For floating-point types, NaNs are considered greater than any\n" 
-     "other non-null value, but smaller than null values."), 
-    {"input"}, "SortOptions"); 
- 
-class SortIndicesMetaFunction : public MetaFunction { 
- public: 
-  SortIndicesMetaFunction() 
-      : MetaFunction("sort_indices", Arity::Unary(), &sort_indices_doc, 
-                     &kDefaultSortOptions) {} 
- 
-  Result<Datum> ExecuteImpl(const std::vector<Datum>& args, 
-                            const FunctionOptions* options, 
-                            ExecContext* ctx) const override { 
-    const SortOptions& sort_options = static_cast<const SortOptions&>(*options); 
-    switch (args[0].kind()) { 
-      case Datum::ARRAY: 
-        return SortIndices(*args[0].make_array(), sort_options, ctx); 
-        break; 
-      case Datum::CHUNKED_ARRAY: 
-        return SortIndices(*args[0].chunked_array(), sort_options, ctx); 
-        break; 
-      case Datum::RECORD_BATCH: { 
-        return SortIndices(*args[0].record_batch(), sort_options, ctx); 
-      } break; 
-      case Datum::TABLE: 
-        return SortIndices(*args[0].table(), sort_options, ctx); 
-        break; 
-      default: 
-        break; 
-    } 
-    return Status::NotImplemented( 
-        "Unsupported types for sort_indices operation: " 
-        "values=", 
-        args[0].ToString()); 
-  } 
- 
- private: 
-  Result<Datum> SortIndices(const Array& values, const SortOptions& options, 
-                            ExecContext* ctx) const { 
-    SortOrder order = SortOrder::Ascending; 
-    if (!options.sort_keys.empty()) { 
-      order = options.sort_keys[0].order; 
-    } 
-    ArraySortOptions array_options(order); 
-    return CallFunction("array_sort_indices", {values}, &array_options, ctx); 
-  } 
- 
-  Result<Datum> SortIndices(const ChunkedArray& chunked_array, const SortOptions& options, 
-                            ExecContext* ctx) const { 
-    SortOrder order = SortOrder::Ascending; 
-    if (!options.sort_keys.empty()) { 
-      order = options.sort_keys[0].order; 
-    } 
- 
-    auto out_type = uint64(); 
-    auto length = chunked_array.length(); 
-    auto buffer_size = BitUtil::BytesForBits( 
-        length * std::static_pointer_cast<UInt64Type>(out_type)->bit_width()); 
-    std::vector<std::shared_ptr<Buffer>> buffers(2); 
-    ARROW_ASSIGN_OR_RAISE(buffers[1], 
-                          AllocateResizableBuffer(buffer_size, ctx->memory_pool())); 
-    auto out = std::make_shared<ArrayData>(out_type, length, buffers, 0); 
-    auto out_begin = out->GetMutableValues<uint64_t>(1); 
-    auto out_end = out_begin + length; 
-    std::iota(out_begin, out_end, 0); 
- 
-    ChunkedArraySorter sorter(ctx, out_begin, out_end, chunked_array, order); 
-    ARROW_RETURN_NOT_OK(sorter.Sort()); 
-    return Datum(out); 
-  } 
- 
-  Result<Datum> SortIndices(const RecordBatch& batch, const SortOptions& options, 
-                            ExecContext* ctx) const { 
-    auto n_sort_keys = options.sort_keys.size(); 
-    if (n_sort_keys == 0) { 
-      return Status::Invalid("Must specify one or more sort keys"); 
-    } 
-    if (n_sort_keys == 1) { 
-      auto array = batch.GetColumnByName(options.sort_keys[0].name); 
-      if (!array) { 
-        return Status::Invalid("Nonexistent sort key column: ", 
-                               options.sort_keys[0].name); 
-      } 
-      return SortIndices(*array, options, ctx); 
-    } 
- 
-    auto out_type = uint64(); 
-    auto length = batch.num_rows(); 
-    auto buffer_size = BitUtil::BytesForBits( 
-        length * std::static_pointer_cast<UInt64Type>(out_type)->bit_width()); 
-    BufferVector buffers(2); 
-    ARROW_ASSIGN_OR_RAISE(buffers[1], 
-                          AllocateResizableBuffer(buffer_size, ctx->memory_pool())); 
-    auto out = std::make_shared<ArrayData>(out_type, length, buffers, 0); 
-    auto out_begin = out->GetMutableValues<uint64_t>(1); 
-    auto out_end = out_begin + length; 
-    std::iota(out_begin, out_end, 0); 
- 
-    // Radix sorting is consistently faster except when there is a large number 
-    // of sort keys, in which case it can end up degrading catastrophically. 
-    // Cut off above 8 sort keys. 
-    if (n_sort_keys <= 8) { 
-      RadixRecordBatchSorter sorter(out_begin, out_end, batch, options); 
-      ARROW_RETURN_NOT_OK(sorter.Sort()); 
-    } else { 
-      MultipleKeyRecordBatchSorter sorter(out_begin, out_end, batch, options); 
-      ARROW_RETURN_NOT_OK(sorter.Sort()); 
-    } 
-    return Datum(out); 
-  } 
- 
-  Result<Datum> SortIndices(const Table& table, const SortOptions& options, 
-                            ExecContext* ctx) const { 
-    auto n_sort_keys = options.sort_keys.size(); 
-    if (n_sort_keys == 0) { 
-      return Status::Invalid("Must specify one or more sort keys"); 
-    } 
-    if (n_sort_keys == 1) { 
-      auto chunked_array = table.GetColumnByName(options.sort_keys[0].name); 
-      if (!chunked_array) { 
-        return Status::Invalid("Nonexistent sort key column: ", 
-                               options.sort_keys[0].name); 
-      } 
-      return SortIndices(*chunked_array, options, ctx); 
-    } 
- 
-    auto out_type = uint64(); 
-    auto length = table.num_rows(); 
-    auto buffer_size = BitUtil::BytesForBits( 
-        length * std::static_pointer_cast<UInt64Type>(out_type)->bit_width()); 
-    std::vector<std::shared_ptr<Buffer>> buffers(2); 
-    ARROW_ASSIGN_OR_RAISE(buffers[1], 
-                          AllocateResizableBuffer(buffer_size, ctx->memory_pool())); 
-    auto out = std::make_shared<ArrayData>(out_type, length, buffers, 0); 
-    auto out_begin = out->GetMutableValues<uint64_t>(1); 
-    auto out_end = out_begin + length; 
-    std::iota(out_begin, out_end, 0); 
- 
-    // TODO: We should choose suitable sort implementation 
-    // automatically. The current TableRadixSorter implementation is 
-    // faster than MultipleKeyTableSorter only when the number of 
-    // sort keys is 2 and counting sort is used. So we always 
-    // MultipleKeyTableSorter for now. 
-    // 
-    // TableRadixSorter sorter; 
-    // ARROW_RETURN_NOT_OK(sorter.Sort(ctx, out_begin, out_end, table, options)); 
-    MultipleKeyTableSorter sorter(out_begin, out_end, table, options); 
-    ARROW_RETURN_NOT_OK(sorter.Sort()); 
-    return Datum(out); 
-  } 
-}; 
- 
-const auto kDefaultArraySortOptions = ArraySortOptions::Defaults(); 
- 
-const FunctionDoc array_sort_indices_doc( 
-    "Return the indices that would sort an array", 
-    ("This function computes an array of indices that define a stable sort\n" 
-     "of the input array.  Null values are considered greater than any\n" 
-     "other value and are therefore sorted at the end of the array.\n" 
-     "For floating-point types, NaNs are considered greater than any\n" 
-     "other non-null value, but smaller than null values."), 
-    {"array"}, "ArraySortOptions"); 
- 
-const FunctionDoc partition_nth_indices_doc( 
-    "Return the indices that would partition an array around a pivot", 
-    ("This functions computes an array of indices that define a non-stable\n" 
-     "partial sort of the input array.\n" 
-     "\n" 
-     "The output is such that the `N`'th index points to the `N`'th element\n" 
-     "of the input in sorted order, and all indices before the `N`'th point\n" 
-     "to elements in the input less or equal to elements at or after the `N`'th.\n" 
-     "\n" 
-     "Null values are considered greater than any other value and are\n" 
-     "therefore partitioned towards the end of the array.\n" 
-     "For floating-point types, NaNs are considered greater than any\n" 
-     "other non-null value, but smaller than null values.\n" 
-     "\n" 
-     "The pivot index `N` must be given in PartitionNthOptions."), 
-    {"array"}, "PartitionNthOptions"); 
- 
-}  // namespace 
- 
+// ----------------------------------------------------------------------
+// ChunkedArray sorting implementations
+
+// Sort a chunked array directly without sorting each array in the
+// chunked array. This is used for processing the second and following
+// sort keys in TableRadixSorter.
+//
+// This uses the same algorithm as ArrayCompareSorter.
+template <typename Type>
+class ChunkedArrayCompareSorter {
+  using ArrayType = typename TypeTraits<Type>::ArrayType;
+
+ public:
+  // Returns where null starts.
+  uint64_t* Sort(uint64_t* indices_begin, uint64_t* indices_end,
+                 const std::vector<const Array*>& arrays, int64_t null_count,
+                 const ArraySortOptions& options) {
+    auto nulls_begin = PartitionNulls<ArrayType, StablePartitioner>(
+        indices_begin, indices_end, arrays, null_count);
+    ChunkedArrayResolver resolver(arrays);
+    if (options.order == SortOrder::Ascending) {
+      std::stable_sort(indices_begin, nulls_begin, [&](uint64_t left, uint64_t right) {
+        const auto chunk_left = resolver.Resolve<ArrayType>(left);
+        const auto chunk_right = resolver.Resolve<ArrayType>(right);
+        return chunk_left.Value() < chunk_right.Value();
+      });
+    } else {
+      std::stable_sort(indices_begin, nulls_begin, [&](uint64_t left, uint64_t right) {
+        const auto chunk_left = resolver.Resolve<ArrayType>(left);
+        const auto chunk_right = resolver.Resolve<ArrayType>(right);
+        // We don't use 'left > right' here to reduce required operator.
+        // If we use 'right < left' here, '<' is only required.
+        return chunk_right.Value() < chunk_left.Value();
+      });
+    }
+    return nulls_begin;
+  }
+};
+
+// Sort a chunked array by sorting each array in the chunked array.
+//
+// TODO: This is a naive implementation. We'll be able to improve
+// performance of this. For example, we'll be able to use threads for
+// sorting each array.
+class ChunkedArraySorter : public TypeVisitor {
+ public:
+  ChunkedArraySorter(ExecContext* ctx, uint64_t* indices_begin, uint64_t* indices_end,
+                     const ChunkedArray& chunked_array, const SortOrder order,
+                     bool can_use_array_sorter = true)
+      : TypeVisitor(),
+        indices_begin_(indices_begin),
+        indices_end_(indices_end),
+        chunked_array_(chunked_array),
+        physical_type_(GetPhysicalType(chunked_array.type())),
+        physical_chunks_(GetPhysicalChunks(chunked_array_, physical_type_)),
+        order_(order),
+        can_use_array_sorter_(can_use_array_sorter),
+        ctx_(ctx) {}
+
+  Status Sort() { return physical_type_->Accept(this); }
+
+#define VISIT(TYPE) \
+  Status Visit(const TYPE& type) override { return SortInternal<TYPE>(); }
+
+  VISIT_PHYSICAL_TYPES(VISIT)
+
+#undef VISIT
+
+ private:
+  template <typename Type>
+  Status SortInternal() {
+    using ArrayType = typename TypeTraits<Type>::ArrayType;
+    ArraySortOptions options(order_);
+    const auto num_chunks = chunked_array_.num_chunks();
+    if (num_chunks == 0) {
+      return Status::OK();
+    }
+    const auto arrays = GetArrayPointers(physical_chunks_);
+    if (can_use_array_sorter_) {
+      // Sort each chunk independently and merge to sorted indices.
+      // This is a serial implementation.
+      ArraySorter<Type> sorter;
+      struct SortedChunk {
+        int64_t begin_offset;
+        int64_t end_offset;
+        int64_t nulls_offset;
+      };
+      std::vector<SortedChunk> sorted(num_chunks);
+
+      // First sort all individual chunks
+      int64_t begin_offset = 0;
+      int64_t end_offset = 0;
+      int64_t null_count = 0;
+      for (int i = 0; i < num_chunks; ++i) {
+        const auto array = checked_cast<const ArrayType*>(arrays[i]);
+        end_offset += array->length();
+        null_count += array->null_count();
+        uint64_t* nulls_begin =
+            sorter.impl.Sort(indices_begin_ + begin_offset, indices_begin_ + end_offset,
+                             *array, begin_offset, options);
+        sorted[i] = {begin_offset, end_offset, nulls_begin - indices_begin_};
+        begin_offset = end_offset;
+      }
+      DCHECK_EQ(end_offset, indices_end_ - indices_begin_);
+
+      std::unique_ptr<Buffer> temp_buffer;
+      uint64_t* temp_indices = nullptr;
+      if (sorted.size() > 1) {
+        ARROW_ASSIGN_OR_RAISE(
+            temp_buffer,
+            AllocateBuffer(sizeof(int64_t) * (indices_end_ - indices_begin_ - null_count),
+                           ctx_->memory_pool()));
+        temp_indices = reinterpret_cast<uint64_t*>(temp_buffer->mutable_data());
+      }
+
+      // Then merge them by pairs, recursively
+      while (sorted.size() > 1) {
+        auto out_it = sorted.begin();
+        auto it = sorted.begin();
+        while (it < sorted.end() - 1) {
+          const auto& left = *it++;
+          const auto& right = *it++;
+          DCHECK_EQ(left.end_offset, right.begin_offset);
+          DCHECK_GE(left.nulls_offset, left.begin_offset);
+          DCHECK_LE(left.nulls_offset, left.end_offset);
+          DCHECK_GE(right.nulls_offset, right.begin_offset);
+          DCHECK_LE(right.nulls_offset, right.end_offset);
+          uint64_t* nulls_begin = Merge<ArrayType>(
+              indices_begin_ + left.begin_offset, indices_begin_ + left.end_offset,
+              indices_begin_ + right.end_offset, indices_begin_ + left.nulls_offset,
+              indices_begin_ + right.nulls_offset, arrays, null_count, order_,
+              temp_indices);
+          *out_it++ = {left.begin_offset, right.end_offset, nulls_begin - indices_begin_};
+        }
+        if (it < sorted.end()) {
+          *out_it++ = *it++;
+        }
+        sorted.erase(out_it, sorted.end());
+      }
+      DCHECK_EQ(sorted.size(), 1);
+      DCHECK_EQ(sorted[0].begin_offset, 0);
+      DCHECK_EQ(sorted[0].end_offset, chunked_array_.length());
+      // Note that "nulls" can also include NaNs, hence the >= check
+      DCHECK_GE(chunked_array_.length() - sorted[0].nulls_offset, null_count);
+    } else {
+      // Sort the chunked array directory.
+      ChunkedArrayCompareSorter<Type> sorter;
+      sorter.Sort(indices_begin_, indices_end_, arrays, chunked_array_.null_count(),
+                  options);
+    }
+    return Status::OK();
+  }
+
+  // Merges two sorted indices arrays and returns where nulls starts.
+  // Where nulls starts is used when the next merge to detect the
+  // sorted indices locations.
+  template <typename ArrayType>
+  uint64_t* Merge(uint64_t* indices_begin, uint64_t* indices_middle,
+                  uint64_t* indices_end, uint64_t* left_nulls_begin,
+                  uint64_t* right_nulls_begin, const std::vector<const Array*>& arrays,
+                  int64_t null_count, const SortOrder order, uint64_t* temp_indices) {
+    // Input layout:
+    // [left non-nulls .... left nulls .... right non-nulls .... right nulls]
+    //  ^                   ^               ^                    ^
+    //  |                   |               |                    |
+    //  indices_begin   left_nulls_begin   indices_middle     right_nulls_begin
+    auto left_num_non_nulls = left_nulls_begin - indices_begin;
+    auto right_num_non_nulls = right_nulls_begin - indices_middle;
+
+    // Mutate the input, stably, to obtain the following layout:
+    // [left non-nulls .... right non-nulls .... left nulls .... right nulls]
+    //  ^                   ^                    ^                    ^
+    //  |                   |                    |                    |
+    //  indices_begin   indices_middle        nulls_begin     right_nulls_begin
+    std::rotate(left_nulls_begin, indices_middle, right_nulls_begin);
+    auto nulls_begin = indices_begin + left_num_non_nulls + right_num_non_nulls;
+    // If the type has null-like values (such as NaN), ensure those plus regular
+    // nulls are partitioned in the right order.  Note this assumes that all
+    // null-like values (e.g. NaN) are ordered equally.
+    if (NullTraits<typename ArrayType::TypeClass>::has_null_like_values) {
+      PartitionNullsOnly<StablePartitioner>(nulls_begin, indices_end, arrays, null_count);
+    }
+
+    // Merge the non-null values into temp area
+    indices_middle = indices_begin + left_num_non_nulls;
+    indices_end = indices_middle + right_num_non_nulls;
+    const ChunkedArrayResolver left_resolver(arrays);
+    const ChunkedArrayResolver right_resolver(arrays);
+    if (order == SortOrder::Ascending) {
+      std::merge(indices_begin, indices_middle, indices_middle, indices_end, temp_indices,
+                 [&](uint64_t left, uint64_t right) {
+                   const auto chunk_left = left_resolver.Resolve<ArrayType>(left);
+                   const auto chunk_right = right_resolver.Resolve<ArrayType>(right);
+                   return chunk_left.Value() < chunk_right.Value();
+                 });
+    } else {
+      std::merge(indices_begin, indices_middle, indices_middle, indices_end, temp_indices,
+                 [&](uint64_t left, uint64_t right) {
+                   const auto chunk_left = left_resolver.Resolve<ArrayType>(left);
+                   const auto chunk_right = right_resolver.Resolve<ArrayType>(right);
+                   // We don't use 'left > right' here to reduce required
+                   // operator. If we use 'right < left' here, '<' is only
+                   // required.
+                   return chunk_right.Value() < chunk_left.Value();
+                 });
+    }
+    // Copy back temp area into main buffer
+    std::copy(temp_indices, temp_indices + (nulls_begin - indices_begin), indices_begin);
+    return nulls_begin;
+  }
+
+  uint64_t* indices_begin_;
+  uint64_t* indices_end_;
+  const ChunkedArray& chunked_array_;
+  const std::shared_ptr<DataType> physical_type_;
+  const ArrayVector physical_chunks_;
+  const SortOrder order_;
+  const bool can_use_array_sorter_;
+  ExecContext* ctx_;
+};
+
+// ----------------------------------------------------------------------
+// Record batch sorting implementation(s)
+
+// Visit contiguous ranges of equal values.  All entries are assumed
+// to be non-null.
+template <typename ArrayType, typename Visitor>
+void VisitConstantRanges(const ArrayType& array, uint64_t* indices_begin,
+                         uint64_t* indices_end, Visitor&& visit) {
+  using GetView = GetViewType<typename ArrayType::TypeClass>;
+
+  if (indices_begin == indices_end) {
+    return;
+  }
+  auto range_start = indices_begin;
+  auto range_cur = range_start;
+  auto last_value = GetView::LogicalValue(array.GetView(*range_cur));
+  while (++range_cur != indices_end) {
+    auto v = GetView::LogicalValue(array.GetView(*range_cur));
+    if (v != last_value) {
+      visit(range_start, range_cur);
+      range_start = range_cur;
+      last_value = v;
+    }
+  }
+  if (range_start != range_cur) {
+    visit(range_start, range_cur);
+  }
+}
+
+// A sorter for a single column of a RecordBatch, deferring to the next column
+// for ranges of equal values.
+class RecordBatchColumnSorter {
+ public:
+  explicit RecordBatchColumnSorter(RecordBatchColumnSorter* next_column = nullptr)
+      : next_column_(next_column) {}
+  virtual ~RecordBatchColumnSorter() {}
+
+  virtual void SortRange(uint64_t* indices_begin, uint64_t* indices_end) = 0;
+
+ protected:
+  RecordBatchColumnSorter* next_column_;
+};
+
+template <typename Type>
+class ConcreteRecordBatchColumnSorter : public RecordBatchColumnSorter {
+ public:
+  using ArrayType = typename TypeTraits<Type>::ArrayType;
+
+  ConcreteRecordBatchColumnSorter(std::shared_ptr<Array> array, SortOrder order,
+                                  RecordBatchColumnSorter* next_column = nullptr)
+      : RecordBatchColumnSorter(next_column),
+        owned_array_(std::move(array)),
+        array_(checked_cast<const ArrayType&>(*owned_array_)),
+        order_(order),
+        null_count_(array_.null_count()) {}
+
+  void SortRange(uint64_t* indices_begin, uint64_t* indices_end) {
+    using GetView = GetViewType<Type>;
+
+    constexpr int64_t offset = 0;
+    uint64_t* nulls_begin;
+    if (null_count_ == 0) {
+      nulls_begin = indices_end;
+    } else {
+      // NOTE that null_count_ is merely an upper bound on the number of nulls
+      // in this particular range.
+      nulls_begin = PartitionNullsOnly<StablePartitioner>(indices_begin, indices_end,
+                                                          array_, offset);
+      DCHECK_LE(indices_end - nulls_begin, null_count_);
+    }
+    uint64_t* null_likes_begin = PartitionNullLikes<ArrayType, StablePartitioner>(
+        indices_begin, nulls_begin, array_, offset);
+
+    // TODO This is roughly the same as ArrayCompareSorter.
+    // Also, we would like to use a counting sort if possible.  This requires
+    // a counting sort compatible with indirect indexing.
+    if (order_ == SortOrder::Ascending) {
+      std::stable_sort(
+          indices_begin, null_likes_begin, [&](uint64_t left, uint64_t right) {
+            const auto lhs = GetView::LogicalValue(array_.GetView(left - offset));
+            const auto rhs = GetView::LogicalValue(array_.GetView(right - offset));
+            return lhs < rhs;
+          });
+    } else {
+      std::stable_sort(
+          indices_begin, null_likes_begin, [&](uint64_t left, uint64_t right) {
+            // We don't use 'left > right' here to reduce required operator.
+            // If we use 'right < left' here, '<' is only required.
+            const auto lhs = GetView::LogicalValue(array_.GetView(left - offset));
+            const auto rhs = GetView::LogicalValue(array_.GetView(right - offset));
+            return lhs > rhs;
+          });
+    }
+
+    if (next_column_ != nullptr) {
+      // Visit all ranges of equal values in this column and sort them on
+      // the next column.
+      SortNextColumn(null_likes_begin, nulls_begin);
+      SortNextColumn(nulls_begin, indices_end);
+      VisitConstantRanges(array_, indices_begin, null_likes_begin,
+                          [&](uint64_t* range_start, uint64_t* range_end) {
+                            SortNextColumn(range_start, range_end);
+                          });
+    }
+  }
+
+  void SortNextColumn(uint64_t* indices_begin, uint64_t* indices_end) {
+    // Avoid the cost of a virtual method call in trivial cases
+    if (indices_end - indices_begin > 1) {
+      next_column_->SortRange(indices_begin, indices_end);
+    }
+  }
+
+ protected:
+  const std::shared_ptr<Array> owned_array_;
+  const ArrayType& array_;
+  const SortOrder order_;
+  const int64_t null_count_;
+};
+
+// Sort a batch using a single-pass left-to-right radix sort.
+class RadixRecordBatchSorter {
+ public:
+  RadixRecordBatchSorter(uint64_t* indices_begin, uint64_t* indices_end,
+                         const RecordBatch& batch, const SortOptions& options)
+      : batch_(batch),
+        options_(options),
+        indices_begin_(indices_begin),
+        indices_end_(indices_end) {}
+
+  Status Sort() {
+    ARROW_ASSIGN_OR_RAISE(const auto sort_keys,
+                          ResolveSortKeys(batch_, options_.sort_keys));
+
+    // Create column sorters from right to left
+    std::vector<std::unique_ptr<RecordBatchColumnSorter>> column_sorts(sort_keys.size());
+    RecordBatchColumnSorter* next_column = nullptr;
+    for (int64_t i = static_cast<int64_t>(sort_keys.size() - 1); i >= 0; --i) {
+      ColumnSortFactory factory(sort_keys[i], next_column);
+      ARROW_ASSIGN_OR_RAISE(column_sorts[i], factory.MakeColumnSort());
+      next_column = column_sorts[i].get();
+    }
+
+    // Sort from left to right
+    column_sorts.front()->SortRange(indices_begin_, indices_end_);
+    return Status::OK();
+  }
+
+ protected:
+  struct ResolvedSortKey {
+    std::shared_ptr<Array> array;
+    SortOrder order;
+  };
+
+  struct ColumnSortFactory {
+    ColumnSortFactory(const ResolvedSortKey& sort_key,
+                      RecordBatchColumnSorter* next_column)
+        : physical_type(GetPhysicalType(sort_key.array->type())),
+          array(GetPhysicalArray(*sort_key.array, physical_type)),
+          order(sort_key.order),
+          next_column(next_column) {}
+
+    Result<std::unique_ptr<RecordBatchColumnSorter>> MakeColumnSort() {
+      RETURN_NOT_OK(VisitTypeInline(*physical_type, this));
+      DCHECK_NE(result, nullptr);
+      return std::move(result);
+    }
+
+#define VISIT(TYPE) \
+  Status Visit(const TYPE& type) { return VisitGeneric(type); }
+
+    VISIT_PHYSICAL_TYPES(VISIT)
+
+#undef VISIT
+
+    Status Visit(const DataType& type) {
+      return Status::TypeError("Unsupported type for RecordBatch sorting: ",
+                               type.ToString());
+    }
+
+    template <typename Type>
+    Status VisitGeneric(const Type&) {
+      result.reset(new ConcreteRecordBatchColumnSorter<Type>(array, order, next_column));
+      return Status::OK();
+    }
+
+    std::shared_ptr<DataType> physical_type;
+    std::shared_ptr<Array> array;
+    SortOrder order;
+    RecordBatchColumnSorter* next_column;
+    std::unique_ptr<RecordBatchColumnSorter> result;
+  };
+
+  static Result<std::vector<ResolvedSortKey>> ResolveSortKeys(
+      const RecordBatch& batch, const std::vector<SortKey>& sort_keys) {
+    std::vector<ResolvedSortKey> resolved;
+    resolved.reserve(sort_keys.size());
+    for (const auto& sort_key : sort_keys) {
+      auto array = batch.GetColumnByName(sort_key.name);
+      if (!array) {
+        return Status::Invalid("Nonexistent sort key column: ", sort_key.name);
+      }
+      resolved.push_back({std::move(array), sort_key.order});
+    }
+    return resolved;
+  }
+
+  const RecordBatch& batch_;
+  const SortOptions& options_;
+  uint64_t* indices_begin_;
+  uint64_t* indices_end_;
+};
+
+// Compare two records in the same RecordBatch or Table
+// (indexing is handled through ResolvedSortKey)
+template <typename ResolvedSortKey>
+class MultipleKeyComparator {
+ public:
+  explicit MultipleKeyComparator(const std::vector<ResolvedSortKey>& sort_keys)
+      : sort_keys_(sort_keys) {}
+
+  Status status() const { return status_; }
+
+  // Returns true if the left-th value should be ordered before the
+  // right-th value, false otherwise. The start_sort_key_index-th
+  // sort key and subsequent sort keys are used for comparison.
+  bool Compare(uint64_t left, uint64_t right, size_t start_sort_key_index) {
+    current_left_ = left;
+    current_right_ = right;
+    current_compared_ = 0;
+    auto num_sort_keys = sort_keys_.size();
+    for (size_t i = start_sort_key_index; i < num_sort_keys; ++i) {
+      current_sort_key_index_ = i;
+      status_ = VisitTypeInline(*sort_keys_[i].type, this);
+      // If the left value equals to the right value, we need to
+      // continue to sort.
+      if (current_compared_ != 0) {
+        break;
+      }
+    }
+    return current_compared_ < 0;
+  }
+
+#define VISIT(TYPE)                          \
+  Status Visit(const TYPE& type) {           \
+    current_compared_ = CompareType<TYPE>(); \
+    return Status::OK();                     \
+  }
+
+  VISIT_PHYSICAL_TYPES(VISIT)
+
+#undef VISIT
+
+  Status Visit(const DataType& type) {
+    return Status::TypeError("Unsupported type for RecordBatch sorting: ",
+                             type.ToString());
+  }
+
+ private:
+  // Compares two records in the same table and returns -1, 0 or 1.
+  //
+  // -1: The left is less than the right.
+  // 0: The left equals to the right.
+  // 1: The left is greater than the right.
+  //
+  // This supports null and NaN. Null is processed in this and NaN
+  // is processed in CompareTypeValue().
+  template <typename Type>
+  int32_t CompareType() {
+    using ArrayType = typename TypeTraits<Type>::ArrayType;
+    const auto& sort_key = sort_keys_[current_sort_key_index_];
+    auto order = sort_key.order;
+    const auto chunk_left = sort_key.template GetChunk<ArrayType>(current_left_);
+    const auto chunk_right = sort_key.template GetChunk<ArrayType>(current_right_);
+    if (sort_key.null_count > 0) {
+      auto is_null_left = chunk_left.IsNull();
+      auto is_null_right = chunk_right.IsNull();
+      if (is_null_left && is_null_right) {
+        return 0;
+      } else if (is_null_left) {
+        return 1;
+      } else if (is_null_right) {
+        return -1;
+      }
+    }
+    return CompareTypeValue<Type>(chunk_left, chunk_right, order);
+  }
+
+  // For non-float types. Value is never NaN.
+  template <typename Type>
+  enable_if_t<!is_floating_type<Type>::value, int32_t> CompareTypeValue(
+      const ResolvedChunk<typename TypeTraits<Type>::ArrayType>& chunk_left,
+      const ResolvedChunk<typename TypeTraits<Type>::ArrayType>& chunk_right,
+      const SortOrder order) {
+    const auto left = chunk_left.Value();
+    const auto right = chunk_right.Value();
+    int32_t compared;
+    if (left == right) {
+      compared = 0;
+    } else if (left > right) {
+      compared = 1;
+    } else {
+      compared = -1;
+    }
+    if (order == SortOrder::Descending) {
+      compared = -compared;
+    }
+    return compared;
+  }
+
+  // For float types. Value may be NaN.
+  template <typename Type>
+  enable_if_t<is_floating_type<Type>::value, int32_t> CompareTypeValue(
+      const ResolvedChunk<typename TypeTraits<Type>::ArrayType>& chunk_left,
+      const ResolvedChunk<typename TypeTraits<Type>::ArrayType>& chunk_right,
+      const SortOrder order) {
+    const auto left = chunk_left.Value();
+    const auto right = chunk_right.Value();
+    auto is_nan_left = std::isnan(left);
+    auto is_nan_right = std::isnan(right);
+    if (is_nan_left && is_nan_right) {
+      return 0;
+    } else if (is_nan_left) {
+      return 1;
+    } else if (is_nan_right) {
+      return -1;
+    }
+    int32_t compared;
+    if (left == right) {
+      compared = 0;
+    } else if (left > right) {
+      compared = 1;
+    } else {
+      compared = -1;
+    }
+    if (order == SortOrder::Descending) {
+      compared = -compared;
+    }
+    return compared;
+  }
+
+  const std::vector<ResolvedSortKey>& sort_keys_;
+  Status status_;
+  int64_t current_left_;
+  int64_t current_right_;
+  size_t current_sort_key_index_;
+  int32_t current_compared_;
+};
+
+// Sort a batch using a single sort and multiple-key comparisons.
+class MultipleKeyRecordBatchSorter : public TypeVisitor {
+ private:
+  // Preprocessed sort key.
+  struct ResolvedSortKey {
+    ResolvedSortKey(const std::shared_ptr<Array>& array, const SortOrder order)
+        : type(GetPhysicalType(array->type())),
+          owned_array(GetPhysicalArray(*array, type)),
+          array(*owned_array),
+          order(order),
+          null_count(array->null_count()) {}
+
+    template <typename ArrayType>
+    ResolvedChunk<ArrayType> GetChunk(int64_t index) const {
+      return {&checked_cast<const ArrayType&>(array), index};
+    }
+
+    const std::shared_ptr<DataType> type;
+    std::shared_ptr<Array> owned_array;
+    const Array& array;
+    SortOrder order;
+    int64_t null_count;
+  };
+
+  using Comparator = MultipleKeyComparator<ResolvedSortKey>;
+
+ public:
+  MultipleKeyRecordBatchSorter(uint64_t* indices_begin, uint64_t* indices_end,
+                               const RecordBatch& batch, const SortOptions& options)
+      : indices_begin_(indices_begin),
+        indices_end_(indices_end),
+        sort_keys_(ResolveSortKeys(batch, options.sort_keys, &status_)),
+        comparator_(sort_keys_) {}
+
+  // This is optimized for the first sort key. The first sort key sort
+  // is processed in this class. The second and following sort keys
+  // are processed in Comparator.
+  Status Sort() {
+    RETURN_NOT_OK(status_);
+    return sort_keys_[0].type->Accept(this);
+  }
+
+#define VISIT(TYPE) \
+  Status Visit(const TYPE& type) override { return SortInternal<TYPE>(); }
+
+  VISIT_PHYSICAL_TYPES(VISIT)
+
+#undef VISIT
+
+ private:
+  static std::vector<ResolvedSortKey> ResolveSortKeys(
+      const RecordBatch& batch, const std::vector<SortKey>& sort_keys, Status* status) {
+    std::vector<ResolvedSortKey> resolved;
+    for (const auto& sort_key : sort_keys) {
+      auto array = batch.GetColumnByName(sort_key.name);
+      if (!array) {
+        *status = Status::Invalid("Nonexistent sort key column: ", sort_key.name);
+        break;
+      }
+      resolved.emplace_back(array, sort_key.order);
+    }
+    return resolved;
+  }
+
+  template <typename Type>
+  Status SortInternal() {
+    using ArrayType = typename TypeTraits<Type>::ArrayType;
+
+    auto& comparator = comparator_;
+    const auto& first_sort_key = sort_keys_[0];
+    const ArrayType& array = checked_cast<const ArrayType&>(first_sort_key.array);
+    auto nulls_begin = indices_end_;
+    nulls_begin = PartitionNullsInternal<Type>(first_sort_key);
+    // Sort first-key non-nulls
+    std::stable_sort(indices_begin_, nulls_begin, [&](uint64_t left, uint64_t right) {
+      // Both values are never null nor NaN
+      // (otherwise they've been partitioned away above).
+      const auto value_left = array.GetView(left);
+      const auto value_right = array.GetView(right);
+      if (value_left != value_right) {
+        bool compared = value_left < value_right;
+        if (first_sort_key.order == SortOrder::Ascending) {
+          return compared;
+        } else {
+          return !compared;
+        }
+      }
+      // If the left value equals to the right value,
+      // we need to compare the second and following
+      // sort keys.
+      return comparator.Compare(left, right, 1);
+    });
+    return comparator_.status();
+  }
+
+  // Behaves like PatitionNulls() but this supports multiple sort keys.
+  //
+  // For non-float types.
+  template <typename Type>
+  enable_if_t<!is_floating_type<Type>::value, uint64_t*> PartitionNullsInternal(
+      const ResolvedSortKey& first_sort_key) {
+    using ArrayType = typename TypeTraits<Type>::ArrayType;
+    if (first_sort_key.null_count == 0) {
+      return indices_end_;
+    }
+    const ArrayType& array = checked_cast<const ArrayType&>(first_sort_key.array);
+    StablePartitioner partitioner;
+    auto nulls_begin = partitioner(indices_begin_, indices_end_,
+                                   [&](uint64_t index) { return !array.IsNull(index); });
+    // Sort all nulls by second and following sort keys
+    // TODO: could we instead run an independent sort from the second key on
+    // this slice?
+    if (nulls_begin != indices_end_) {
+      auto& comparator = comparator_;
+      std::stable_sort(nulls_begin, indices_end_,
+                       [&comparator](uint64_t left, uint64_t right) {
+                         return comparator.Compare(left, right, 1);
+                       });
+    }
+    return nulls_begin;
+  }
+
+  // Behaves like PatitionNulls() but this supports multiple sort keys.
+  //
+  // For float types.
+  template <typename Type>
+  enable_if_t<is_floating_type<Type>::value, uint64_t*> PartitionNullsInternal(
+      const ResolvedSortKey& first_sort_key) {
+    using ArrayType = typename TypeTraits<Type>::ArrayType;
+    const ArrayType& array = checked_cast<const ArrayType&>(first_sort_key.array);
+    StablePartitioner partitioner;
+    uint64_t* nulls_begin;
+    if (first_sort_key.null_count == 0) {
+      nulls_begin = indices_end_;
+    } else {
+      nulls_begin = partitioner(indices_begin_, indices_end_,
+                                [&](uint64_t index) { return !array.IsNull(index); });
+    }
+    uint64_t* nans_and_nulls_begin =
+        partitioner(indices_begin_, nulls_begin,
+                    [&](uint64_t index) { return !std::isnan(array.GetView(index)); });
+    auto& comparator = comparator_;
+    if (nans_and_nulls_begin != nulls_begin) {
+      // Sort all NaNs by the second and following sort keys.
+      // TODO: could we instead run an independent sort from the second key on
+      // this slice?
+      std::stable_sort(nans_and_nulls_begin, nulls_begin,
+                       [&comparator](uint64_t left, uint64_t right) {
+                         return comparator.Compare(left, right, 1);
+                       });
+    }
+    if (nulls_begin != indices_end_) {
+      // Sort all nulls by the second and following sort keys.
+      // TODO: could we instead run an independent sort from the second key on
+      // this slice?
+      std::stable_sort(nulls_begin, indices_end_,
+                       [&comparator](uint64_t left, uint64_t right) {
+                         return comparator.Compare(left, right, 1);
+                       });
+    }
+    return nans_and_nulls_begin;
+  }
+
+  uint64_t* indices_begin_;
+  uint64_t* indices_end_;
+  Status status_;
+  std::vector<ResolvedSortKey> sort_keys_;
+  Comparator comparator_;
+};
+
+// ----------------------------------------------------------------------
+// Table sorting implementations
+
+// Sort a table using a radix sort-like algorithm.
+// A distinct stable sort is called for each sort key, from the last key to the first.
+class TableRadixSorter {
+ public:
+  Status Sort(ExecContext* ctx, uint64_t* indices_begin, uint64_t* indices_end,
+              const Table& table, const SortOptions& options) {
+    for (auto i = options.sort_keys.size(); i > 0; --i) {
+      const auto& sort_key = options.sort_keys[i - 1];
+      const auto& chunked_array = table.GetColumnByName(sort_key.name);
+      if (!chunked_array) {
+        return Status::Invalid("Nonexistent sort key column: ", sort_key.name);
+      }
+      // We can use ArraySorter only for the sort key that is
+      // processed first because ArraySorter doesn't care about
+      // existing indices.
+      const auto can_use_array_sorter = (i == 0);
+      ChunkedArraySorter sorter(ctx, indices_begin, indices_end, *chunked_array.get(),
+                                sort_key.order, can_use_array_sorter);
+      ARROW_RETURN_NOT_OK(sorter.Sort());
+    }
+    return Status::OK();
+  }
+};
+
+// Sort a table using a single sort and multiple-key comparisons.
+class MultipleKeyTableSorter : public TypeVisitor {
+ private:
+  // TODO instead of resolving chunks for each column independently, we could
+  // split the table into RecordBatches and pay the cost of chunked indexing
+  // at the first column only.
+
+  // Preprocessed sort key.
+  struct ResolvedSortKey {
+    ResolvedSortKey(const ChunkedArray& chunked_array, const SortOrder order)
+        : order(order),
+          type(GetPhysicalType(chunked_array.type())),
+          chunks(GetPhysicalChunks(chunked_array, type)),
+          chunk_pointers(GetArrayPointers(chunks)),
+          null_count(chunked_array.null_count()),
+          num_chunks(chunked_array.num_chunks()),
+          resolver(chunk_pointers) {}
+
+    // Finds the target chunk and index in the target chunk from an
+    // index in chunked array.
+    template <typename ArrayType>
+    ResolvedChunk<ArrayType> GetChunk(int64_t index) const {
+      return resolver.Resolve<ArrayType>(index);
+    }
+
+    const SortOrder order;
+    const std::shared_ptr<DataType> type;
+    const ArrayVector chunks;
+    const std::vector<const Array*> chunk_pointers;
+    const int64_t null_count;
+    const int num_chunks;
+    const ChunkedArrayResolver resolver;
+  };
+
+  using Comparator = MultipleKeyComparator<ResolvedSortKey>;
+
+ public:
+  MultipleKeyTableSorter(uint64_t* indices_begin, uint64_t* indices_end,
+                         const Table& table, const SortOptions& options)
+      : indices_begin_(indices_begin),
+        indices_end_(indices_end),
+        sort_keys_(ResolveSortKeys(table, options.sort_keys, &status_)),
+        comparator_(sort_keys_) {}
+
+  // This is optimized for the first sort key. The first sort key sort
+  // is processed in this class. The second and following sort keys
+  // are processed in Comparator.
+  Status Sort() {
+    ARROW_RETURN_NOT_OK(status_);
+    return sort_keys_[0].type->Accept(this);
+  }
+
+#define VISIT(TYPE) \
+  Status Visit(const TYPE& type) override { return SortInternal<TYPE>(); }
+
+  VISIT_PHYSICAL_TYPES(VISIT)
+
+#undef VISIT
+
+ private:
+  static std::vector<ResolvedSortKey> ResolveSortKeys(
+      const Table& table, const std::vector<SortKey>& sort_keys, Status* status) {
+    std::vector<ResolvedSortKey> resolved;
+    resolved.reserve(sort_keys.size());
+    for (const auto& sort_key : sort_keys) {
+      const auto& chunked_array = table.GetColumnByName(sort_key.name);
+      if (!chunked_array) {
+        *status = Status::Invalid("Nonexistent sort key column: ", sort_key.name);
+        break;
+      }
+      resolved.emplace_back(*chunked_array, sort_key.order);
+    }
+    return resolved;
+  }
+
+  template <typename Type>
+  Status SortInternal() {
+    using ArrayType = typename TypeTraits<Type>::ArrayType;
+
+    auto& comparator = comparator_;
+    const auto& first_sort_key = sort_keys_[0];
+    auto nulls_begin = indices_end_;
+    nulls_begin = PartitionNullsInternal<Type>(first_sort_key);
+    std::stable_sort(indices_begin_, nulls_begin, [&](uint64_t left, uint64_t right) {
+      // Both values are never null nor NaN.
+      auto chunk_left = first_sort_key.GetChunk<ArrayType>(left);
+      auto chunk_right = first_sort_key.GetChunk<ArrayType>(right);
+      auto value_left = chunk_left.Value();
+      auto value_right = chunk_right.Value();
+      if (value_left == value_right) {
+        // If the left value equals to the right value,
+        // we need to compare the second and following
+        // sort keys.
+        return comparator.Compare(left, right, 1);
+      } else {
+        auto compared = value_left < value_right;
+        if (first_sort_key.order == SortOrder::Ascending) {
+          return compared;
+        } else {
+          return !compared;
+        }
+      }
+    });
+    return comparator_.status();
+  }
+
+  // Behaves like PatitionNulls() but this supports multiple sort keys.
+  //
+  // For non-float types.
+  template <typename Type>
+  enable_if_t<!is_floating_type<Type>::value, uint64_t*> PartitionNullsInternal(
+      const ResolvedSortKey& first_sort_key) {
+    using ArrayType = typename TypeTraits<Type>::ArrayType;
+    if (first_sort_key.null_count == 0) {
+      return indices_end_;
+    }
+    StablePartitioner partitioner;
+    auto nulls_begin =
+        partitioner(indices_begin_, indices_end_, [&first_sort_key](uint64_t index) {
+          const auto chunk = first_sort_key.GetChunk<ArrayType>(index);
+          return !chunk.IsNull();
+        });
+    DCHECK_EQ(indices_end_ - nulls_begin, first_sort_key.null_count);
+    auto& comparator = comparator_;
+    std::stable_sort(nulls_begin, indices_end_, [&](uint64_t left, uint64_t right) {
+      return comparator.Compare(left, right, 1);
+    });
+    return nulls_begin;
+  }
+
+  // Behaves like PatitionNulls() but this supports multiple sort keys.
+  //
+  // For float types.
+  template <typename Type>
+  enable_if_t<is_floating_type<Type>::value, uint64_t*> PartitionNullsInternal(
+      const ResolvedSortKey& first_sort_key) {
+    using ArrayType = typename TypeTraits<Type>::ArrayType;
+    StablePartitioner partitioner;
+    uint64_t* nulls_begin;
+    if (first_sort_key.null_count == 0) {
+      nulls_begin = indices_end_;
+    } else {
+      nulls_begin = partitioner(indices_begin_, indices_end_, [&](uint64_t index) {
+        const auto chunk = first_sort_key.GetChunk<ArrayType>(index);
+        return !chunk.IsNull();
+      });
+    }
+    DCHECK_EQ(indices_end_ - nulls_begin, first_sort_key.null_count);
+    uint64_t* nans_begin = partitioner(indices_begin_, nulls_begin, [&](uint64_t index) {
+      const auto chunk = first_sort_key.GetChunk<ArrayType>(index);
+      return !std::isnan(chunk.Value());
+    });
+    auto& comparator = comparator_;
+    // Sort all NaNs by the second and following sort keys.
+    std::stable_sort(nans_begin, nulls_begin, [&](uint64_t left, uint64_t right) {
+      return comparator.Compare(left, right, 1);
+    });
+    // Sort all nulls by the second and following sort keys.
+    std::stable_sort(nulls_begin, indices_end_, [&](uint64_t left, uint64_t right) {
+      return comparator.Compare(left, right, 1);
+    });
+    return nans_begin;
+  }
+
+  uint64_t* indices_begin_;
+  uint64_t* indices_end_;
+  Status status_;
+  std::vector<ResolvedSortKey> sort_keys_;
+  Comparator comparator_;
+};
+
+// ----------------------------------------------------------------------
+// Top-level sort functions
+
+const auto kDefaultSortOptions = SortOptions::Defaults();
+
+const FunctionDoc sort_indices_doc(
+    "Return the indices that would sort an array, record batch or table",
+    ("This function computes an array of indices that define a stable sort\n"
+     "of the input array, record batch or table.  Null values are considered\n"
+     "greater than any other value and are therefore sorted at the end of the\n"
+     "input. For floating-point types, NaNs are considered greater than any\n"
+     "other non-null value, but smaller than null values."),
+    {"input"}, "SortOptions");
+
+class SortIndicesMetaFunction : public MetaFunction {
+ public:
+  SortIndicesMetaFunction()
+      : MetaFunction("sort_indices", Arity::Unary(), &sort_indices_doc,
+                     &kDefaultSortOptions) {}
+
+  Result<Datum> ExecuteImpl(const std::vector<Datum>& args,
+                            const FunctionOptions* options,
+                            ExecContext* ctx) const override {
+    const SortOptions& sort_options = static_cast<const SortOptions&>(*options);
+    switch (args[0].kind()) {
+      case Datum::ARRAY:
+        return SortIndices(*args[0].make_array(), sort_options, ctx);
+        break;
+      case Datum::CHUNKED_ARRAY:
+        return SortIndices(*args[0].chunked_array(), sort_options, ctx);
+        break;
+      case Datum::RECORD_BATCH: {
+        return SortIndices(*args[0].record_batch(), sort_options, ctx);
+      } break;
+      case Datum::TABLE:
+        return SortIndices(*args[0].table(), sort_options, ctx);
+        break;
+      default:
+        break;
+    }
+    return Status::NotImplemented(
+        "Unsupported types for sort_indices operation: "
+        "values=",
+        args[0].ToString());
+  }
+
+ private:
+  Result<Datum> SortIndices(const Array& values, const SortOptions& options,
+                            ExecContext* ctx) const {
+    SortOrder order = SortOrder::Ascending;
+    if (!options.sort_keys.empty()) {
+      order = options.sort_keys[0].order;
+    }
+    ArraySortOptions array_options(order);
+    return CallFunction("array_sort_indices", {values}, &array_options, ctx);
+  }
+
+  Result<Datum> SortIndices(const ChunkedArray& chunked_array, const SortOptions& options,
+                            ExecContext* ctx) const {
+    SortOrder order = SortOrder::Ascending;
+    if (!options.sort_keys.empty()) {
+      order = options.sort_keys[0].order;
+    }
+
+    auto out_type = uint64();
+    auto length = chunked_array.length();
+    auto buffer_size = BitUtil::BytesForBits(
+        length * std::static_pointer_cast<UInt64Type>(out_type)->bit_width());
+    std::vector<std::shared_ptr<Buffer>> buffers(2);
+    ARROW_ASSIGN_OR_RAISE(buffers[1],
+                          AllocateResizableBuffer(buffer_size, ctx->memory_pool()));
+    auto out = std::make_shared<ArrayData>(out_type, length, buffers, 0);
+    auto out_begin = out->GetMutableValues<uint64_t>(1);
+    auto out_end = out_begin + length;
+    std::iota(out_begin, out_end, 0);
+
+    ChunkedArraySorter sorter(ctx, out_begin, out_end, chunked_array, order);
+    ARROW_RETURN_NOT_OK(sorter.Sort());
+    return Datum(out);
+  }
+
+  Result<Datum> SortIndices(const RecordBatch& batch, const SortOptions& options,
+                            ExecContext* ctx) const {
+    auto n_sort_keys = options.sort_keys.size();
+    if (n_sort_keys == 0) {
+      return Status::Invalid("Must specify one or more sort keys");
+    }
+    if (n_sort_keys == 1) {
+      auto array = batch.GetColumnByName(options.sort_keys[0].name);
+      if (!array) {
+        return Status::Invalid("Nonexistent sort key column: ",
+                               options.sort_keys[0].name);
+      }
+      return SortIndices(*array, options, ctx);
+    }
+
+    auto out_type = uint64();
+    auto length = batch.num_rows();
+    auto buffer_size = BitUtil::BytesForBits(
+        length * std::static_pointer_cast<UInt64Type>(out_type)->bit_width());
+    BufferVector buffers(2);
+    ARROW_ASSIGN_OR_RAISE(buffers[1],
+                          AllocateResizableBuffer(buffer_size, ctx->memory_pool()));
+    auto out = std::make_shared<ArrayData>(out_type, length, buffers, 0);
+    auto out_begin = out->GetMutableValues<uint64_t>(1);
+    auto out_end = out_begin + length;
+    std::iota(out_begin, out_end, 0);
+
+    // Radix sorting is consistently faster except when there is a large number
+    // of sort keys, in which case it can end up degrading catastrophically.
+    // Cut off above 8 sort keys.
+    if (n_sort_keys <= 8) {
+      RadixRecordBatchSorter sorter(out_begin, out_end, batch, options);
+      ARROW_RETURN_NOT_OK(sorter.Sort());
+    } else {
+      MultipleKeyRecordBatchSorter sorter(out_begin, out_end, batch, options);
+      ARROW_RETURN_NOT_OK(sorter.Sort());
+    }
+    return Datum(out);
+  }
+
+  Result<Datum> SortIndices(const Table& table, const SortOptions& options,
+                            ExecContext* ctx) const {
+    auto n_sort_keys = options.sort_keys.size();
+    if (n_sort_keys == 0) {
+      return Status::Invalid("Must specify one or more sort keys");
+    }
+    if (n_sort_keys == 1) {
+      auto chunked_array = table.GetColumnByName(options.sort_keys[0].name);
+      if (!chunked_array) {
+        return Status::Invalid("Nonexistent sort key column: ",
+                               options.sort_keys[0].name);
+      }
+      return SortIndices(*chunked_array, options, ctx);
+    }
+
+    auto out_type = uint64();
+    auto length = table.num_rows();
+    auto buffer_size = BitUtil::BytesForBits(
+        length * std::static_pointer_cast<UInt64Type>(out_type)->bit_width());
+    std::vector<std::shared_ptr<Buffer>> buffers(2);
+    ARROW_ASSIGN_OR_RAISE(buffers[1],
+                          AllocateResizableBuffer(buffer_size, ctx->memory_pool()));
+    auto out = std::make_shared<ArrayData>(out_type, length, buffers, 0);
+    auto out_begin = out->GetMutableValues<uint64_t>(1);
+    auto out_end = out_begin + length;
+    std::iota(out_begin, out_end, 0);
+
+    // TODO: We should choose suitable sort implementation
+    // automatically. The current TableRadixSorter implementation is
+    // faster than MultipleKeyTableSorter only when the number of
+    // sort keys is 2 and counting sort is used. So we always
+    // MultipleKeyTableSorter for now.
+    //
+    // TableRadixSorter sorter;
+    // ARROW_RETURN_NOT_OK(sorter.Sort(ctx, out_begin, out_end, table, options));
+    MultipleKeyTableSorter sorter(out_begin, out_end, table, options);
+    ARROW_RETURN_NOT_OK(sorter.Sort());
+    return Datum(out);
+  }
+};
+
+const auto kDefaultArraySortOptions = ArraySortOptions::Defaults();
+
+const FunctionDoc array_sort_indices_doc(
+    "Return the indices that would sort an array",
+    ("This function computes an array of indices that define a stable sort\n"
+     "of the input array.  Null values are considered greater than any\n"
+     "other value and are therefore sorted at the end of the array.\n"
+     "For floating-point types, NaNs are considered greater than any\n"
+     "other non-null value, but smaller than null values."),
+    {"array"}, "ArraySortOptions");
+
+const FunctionDoc partition_nth_indices_doc(
+    "Return the indices that would partition an array around a pivot",
+    ("This functions computes an array of indices that define a non-stable\n"
+     "partial sort of the input array.\n"
+     "\n"
+     "The output is such that the `N`'th index points to the `N`'th element\n"
+     "of the input in sorted order, and all indices before the `N`'th point\n"
+     "to elements in the input less or equal to elements at or after the `N`'th.\n"
+     "\n"
+     "Null values are considered greater than any other value and are\n"
+     "therefore partitioned towards the end of the array.\n"
+     "For floating-point types, NaNs are considered greater than any\n"
+     "other non-null value, but smaller than null values.\n"
+     "\n"
+     "The pivot index `N` must be given in PartitionNthOptions."),
+    {"array"}, "PartitionNthOptions");
+
+}  // namespace
+
 void RegisterVectorSort(FunctionRegistry* registry) {
   // The kernel outputs into preallocated memory and is never null
   VectorKernel base;
   base.mem_allocation = MemAllocation::PREALLOCATE;
   base.null_handling = NullHandling::OUTPUT_NOT_NULL;
 
-  auto array_sort_indices = std::make_shared<VectorFunction>( 
-      "array_sort_indices", Arity::Unary(), &array_sort_indices_doc, 
-      &kDefaultArraySortOptions); 
-  base.init = ArraySortIndicesState::Init; 
-  AddSortingKernels<ArraySortIndices>(base, array_sort_indices.get()); 
-  DCHECK_OK(registry->AddFunction(std::move(array_sort_indices))); 
+  auto array_sort_indices = std::make_shared<VectorFunction>(
+      "array_sort_indices", Arity::Unary(), &array_sort_indices_doc,
+      &kDefaultArraySortOptions);
+  base.init = ArraySortIndicesState::Init;
+  AddSortingKernels<ArraySortIndices>(base, array_sort_indices.get());
+  DCHECK_OK(registry->AddFunction(std::move(array_sort_indices)));
+
+  DCHECK_OK(registry->AddFunction(std::make_shared<SortIndicesMetaFunction>()));
 
-  DCHECK_OK(registry->AddFunction(std::make_shared<SortIndicesMetaFunction>())); 
- 
   // partition_nth_indices has a parameter so needs its init function
-  auto part_indices = std::make_shared<VectorFunction>( 
-      "partition_nth_indices", Arity::Unary(), &partition_nth_indices_doc); 
+  auto part_indices = std::make_shared<VectorFunction>(
+      "partition_nth_indices", Arity::Unary(), &partition_nth_indices_doc);
   base.init = PartitionNthToIndicesState::Init;
   AddSortingKernels<PartitionNthToIndices>(base, part_indices.get());
   DCHECK_OK(registry->AddFunction(std::move(part_indices)));
 }
 
-#undef VISIT_PHYSICAL_TYPES 
- 
+#undef VISIT_PHYSICAL_TYPES
+
 }  // namespace internal
 }  // namespace compute
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/registry.cc b/contrib/libs/apache/arrow/cpp/src/arrow/compute/registry.cc
index 9f24f7a7008..ca7b6137306 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/registry.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/registry.cc
@@ -24,10 +24,10 @@
 #include <utility>
 
 #include "arrow/compute/function.h"
-#include "arrow/compute/function_internal.h" 
+#include "arrow/compute/function_internal.h"
 #include "arrow/compute/registry_internal.h"
 #include "arrow/status.h"
-#include "arrow/util/logging.h" 
+#include "arrow/util/logging.h"
 
 namespace arrow {
 namespace compute {
@@ -35,8 +35,8 @@ namespace compute {
 class FunctionRegistry::FunctionRegistryImpl {
  public:
   Status AddFunction(std::shared_ptr<Function> function, bool allow_overwrite) {
-    RETURN_NOT_OK(function->Validate()); 
- 
+    RETURN_NOT_OK(function->Validate());
+
     std::lock_guard<std::mutex> mutation_guard(lock_);
 
     const std::string& name = function->name();
@@ -59,20 +59,20 @@ class FunctionRegistry::FunctionRegistryImpl {
     return Status::OK();
   }
 
-  Status AddFunctionOptionsType(const FunctionOptionsType* options_type, 
-                                bool allow_overwrite = false) { 
-    std::lock_guard<std::mutex> mutation_guard(lock_); 
- 
-    const std::string name = options_type->type_name(); 
-    auto it = name_to_options_type_.find(name); 
-    if (it != name_to_options_type_.end() && !allow_overwrite) { 
-      return Status::KeyError( 
-          "Already have a function options type registered with name: ", name); 
-    } 
-    name_to_options_type_[name] = options_type; 
-    return Status::OK(); 
-  } 
- 
+  Status AddFunctionOptionsType(const FunctionOptionsType* options_type,
+                                bool allow_overwrite = false) {
+    std::lock_guard<std::mutex> mutation_guard(lock_);
+
+    const std::string name = options_type->type_name();
+    auto it = name_to_options_type_.find(name);
+    if (it != name_to_options_type_.end() && !allow_overwrite) {
+      return Status::KeyError(
+          "Already have a function options type registered with name: ", name);
+    }
+    name_to_options_type_[name] = options_type;
+    return Status::OK();
+  }
+
   Result<std::shared_ptr<Function>> GetFunction(const std::string& name) const {
     auto it = name_to_function_.find(name);
     if (it == name_to_function_.end()) {
@@ -90,21 +90,21 @@ class FunctionRegistry::FunctionRegistryImpl {
     return results;
   }
 
-  Result<const FunctionOptionsType*> GetFunctionOptionsType( 
-      const std::string& name) const { 
-    auto it = name_to_options_type_.find(name); 
-    if (it == name_to_options_type_.end()) { 
-      return Status::KeyError("No function options type registered with name: ", name); 
-    } 
-    return it->second; 
-  } 
- 
+  Result<const FunctionOptionsType*> GetFunctionOptionsType(
+      const std::string& name) const {
+    auto it = name_to_options_type_.find(name);
+    if (it == name_to_options_type_.end()) {
+      return Status::KeyError("No function options type registered with name: ", name);
+    }
+    return it->second;
+  }
+
   int num_functions() const { return static_cast<int>(name_to_function_.size()); }
 
  private:
   std::mutex lock_;
   std::unordered_map<std::string, std::shared_ptr<Function>> name_to_function_;
-  std::unordered_map<std::string, const FunctionOptionsType*> name_to_options_type_; 
+  std::unordered_map<std::string, const FunctionOptionsType*> name_to_options_type_;
 };
 
 std::unique_ptr<FunctionRegistry> FunctionRegistry::Make() {
@@ -125,11 +125,11 @@ Status FunctionRegistry::AddAlias(const std::string& target_name,
   return impl_->AddAlias(target_name, source_name);
 }
 
-Status FunctionRegistry::AddFunctionOptionsType(const FunctionOptionsType* options_type, 
-                                                bool allow_overwrite) { 
-  return impl_->AddFunctionOptionsType(options_type, allow_overwrite); 
-} 
- 
+Status FunctionRegistry::AddFunctionOptionsType(const FunctionOptionsType* options_type,
+                                                bool allow_overwrite) {
+  return impl_->AddFunctionOptionsType(options_type, allow_overwrite);
+}
+
 Result<std::shared_ptr<Function>> FunctionRegistry::GetFunction(
     const std::string& name) const {
   return impl_->GetFunction(name);
@@ -139,11 +139,11 @@ std::vector<std::string> FunctionRegistry::GetFunctionNames() const {
   return impl_->GetFunctionNames();
 }
 
-Result<const FunctionOptionsType*> FunctionRegistry::GetFunctionOptionsType( 
-    const std::string& name) const { 
-  return impl_->GetFunctionOptionsType(name); 
-} 
- 
+Result<const FunctionOptionsType*> FunctionRegistry::GetFunctionOptionsType(
+    const std::string& name) const {
+  return impl_->GetFunctionOptionsType(name);
+}
+
 int FunctionRegistry::num_functions() const { return impl_->num_functions(); }
 
 namespace internal {
@@ -161,30 +161,30 @@ static std::unique_ptr<FunctionRegistry> CreateBuiltInRegistry() {
   RegisterScalarStringAscii(registry.get());
   RegisterScalarValidity(registry.get());
   RegisterScalarFillNull(registry.get());
-  RegisterScalarIfElse(registry.get()); 
-  RegisterScalarTemporal(registry.get()); 
+  RegisterScalarIfElse(registry.get());
+  RegisterScalarTemporal(registry.get());
 
-  RegisterScalarOptions(registry.get()); 
+  RegisterScalarOptions(registry.get());
 
   // Vector functions
   RegisterVectorHash(registry.get());
-  RegisterVectorReplace(registry.get()); 
+  RegisterVectorReplace(registry.get());
   RegisterVectorSelection(registry.get());
   RegisterVectorNested(registry.get());
   RegisterVectorSort(registry.get());
 
-  RegisterVectorOptions(registry.get()); 
- 
-  // Aggregate functions 
-  RegisterScalarAggregateBasic(registry.get()); 
-  RegisterScalarAggregateMode(registry.get()); 
-  RegisterScalarAggregateQuantile(registry.get()); 
-  RegisterScalarAggregateTDigest(registry.get()); 
-  RegisterScalarAggregateVariance(registry.get()); 
-  RegisterHashAggregateBasic(registry.get()); 
- 
-  RegisterAggregateOptions(registry.get()); 
- 
+  RegisterVectorOptions(registry.get());
+
+  // Aggregate functions
+  RegisterScalarAggregateBasic(registry.get());
+  RegisterScalarAggregateMode(registry.get());
+  RegisterScalarAggregateQuantile(registry.get());
+  RegisterScalarAggregateTDigest(registry.get());
+  RegisterScalarAggregateVariance(registry.get());
+  RegisterHashAggregateBasic(registry.get());
+
+  RegisterAggregateOptions(registry.get());
+
   return registry;
 }
 
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/registry.h b/contrib/libs/apache/arrow/cpp/src/arrow/compute/registry.h
index 796eba2fb12..e83036db6ac 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/registry.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/registry.h
@@ -32,7 +32,7 @@ namespace arrow {
 namespace compute {
 
 class Function;
-class FunctionOptionsType; 
+class FunctionOptionsType;
 
 /// \brief A mutable central function registry for built-in functions as well
 /// as user-defined functions. Functions are implementations of
@@ -59,11 +59,11 @@ class ARROW_EXPORT FunctionRegistry {
   /// function with the given name is not registered
   Status AddAlias(const std::string& target_name, const std::string& source_name);
 
-  /// \brief Add a new function options type to the registry. Returns Status::KeyError if 
-  /// a function options type with the same name is already registered 
-  Status AddFunctionOptionsType(const FunctionOptionsType* options_type, 
-                                bool allow_overwrite = false); 
- 
+  /// \brief Add a new function options type to the registry. Returns Status::KeyError if
+  /// a function options type with the same name is already registered
+  Status AddFunctionOptionsType(const FunctionOptionsType* options_type,
+                                bool allow_overwrite = false);
+
   /// \brief Retrieve a function by name from the registry
   Result<std::shared_ptr<Function>> GetFunction(const std::string& name) const;
 
@@ -71,10 +71,10 @@ class ARROW_EXPORT FunctionRegistry {
   /// displaying a manifest of available functions
   std::vector<std::string> GetFunctionNames() const;
 
-  /// \brief Retrieve a function options type by name from the registry 
-  Result<const FunctionOptionsType*> GetFunctionOptionsType( 
-      const std::string& name) const; 
- 
+  /// \brief Retrieve a function options type by name from the registry
+  Result<const FunctionOptionsType*> GetFunctionOptionsType(
+      const std::string& name) const;
+
   /// \brief The number of currently registered functions
   int num_functions() const;
 
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/registry_internal.h b/contrib/libs/apache/arrow/cpp/src/arrow/compute/registry_internal.h
index bc5a2d734f4..892b54341da 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/registry_internal.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/registry_internal.h
@@ -34,30 +34,30 @@ void RegisterScalarSetLookup(FunctionRegistry* registry);
 void RegisterScalarStringAscii(FunctionRegistry* registry);
 void RegisterScalarValidity(FunctionRegistry* registry);
 void RegisterScalarFillNull(FunctionRegistry* registry);
-void RegisterScalarIfElse(FunctionRegistry* registry); 
-void RegisterScalarTemporal(FunctionRegistry* registry); 
+void RegisterScalarIfElse(FunctionRegistry* registry);
+void RegisterScalarTemporal(FunctionRegistry* registry);
+
+void RegisterScalarOptions(FunctionRegistry* registry);
 
-void RegisterScalarOptions(FunctionRegistry* registry); 
- 
 // Vector functions
 void RegisterVectorHash(FunctionRegistry* registry);
-void RegisterVectorReplace(FunctionRegistry* registry); 
+void RegisterVectorReplace(FunctionRegistry* registry);
 void RegisterVectorSelection(FunctionRegistry* registry);
 void RegisterVectorNested(FunctionRegistry* registry);
 void RegisterVectorSort(FunctionRegistry* registry);
 
-void RegisterVectorOptions(FunctionRegistry* registry); 
- 
+void RegisterVectorOptions(FunctionRegistry* registry);
+
 // Aggregate functions
 void RegisterScalarAggregateBasic(FunctionRegistry* registry);
-void RegisterScalarAggregateMode(FunctionRegistry* registry); 
-void RegisterScalarAggregateQuantile(FunctionRegistry* registry); 
-void RegisterScalarAggregateTDigest(FunctionRegistry* registry); 
-void RegisterScalarAggregateVariance(FunctionRegistry* registry); 
-void RegisterHashAggregateBasic(FunctionRegistry* registry); 
-
-void RegisterAggregateOptions(FunctionRegistry* registry); 
- 
+void RegisterScalarAggregateMode(FunctionRegistry* registry);
+void RegisterScalarAggregateQuantile(FunctionRegistry* registry);
+void RegisterScalarAggregateTDigest(FunctionRegistry* registry);
+void RegisterScalarAggregateVariance(FunctionRegistry* registry);
+void RegisterHashAggregateBasic(FunctionRegistry* registry);
+
+void RegisterAggregateOptions(FunctionRegistry* registry);
+
 }  // namespace internal
 }  // namespace compute
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/compute/type_fwd.h b/contrib/libs/apache/arrow/cpp/src/arrow/compute/type_fwd.h
index 3a3d2ac4b7d..eebc8c1b678 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/compute/type_fwd.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/compute/type_fwd.h
@@ -20,16 +20,16 @@
 namespace arrow {
 
 struct Datum;
-struct ValueDescr; 
+struct ValueDescr;
 
 namespace compute {
 
-class Function; 
-class FunctionOptions; 
- 
-class CastOptions; 
- 
-struct ExecBatch; 
+class Function;
+class FunctionOptions;
+
+class CastOptions;
+
+struct ExecBatch;
 class ExecContext;
 class KernelContext;
 
@@ -38,11 +38,11 @@ struct ScalarKernel;
 struct ScalarAggregateKernel;
 struct VectorKernel;
 
-struct KernelState; 
- 
-class Expression; 
-class ExecNode; 
-class ExecPlan; 
- 
+struct KernelState;
+
+class Expression;
+class ExecNode;
+class ExecPlan;
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/config.cc b/contrib/libs/apache/arrow/cpp/src/arrow/config.cc
index 7d68f638b6c..b93f207161d 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/config.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/config.cc
@@ -16,19 +16,19 @@
 // under the License.
 
 #include "arrow/config.h"
- 
-#include <cstdint> 
- 
+
+#include <cstdint>
+
 #include "arrow/util/config.h"
-#include "arrow/util/cpu_info.h" 
+#include "arrow/util/cpu_info.h"
 
 namespace arrow {
 
-using internal::CpuInfo; 
- 
-namespace { 
- 
-const BuildInfo kBuildInfo = { 
+using internal::CpuInfo;
+
+namespace {
+
+const BuildInfo kBuildInfo = {
     // clang-format off
     ARROW_VERSION,
     ARROW_VERSION_MAJOR,
@@ -46,33 +46,33 @@ const BuildInfo kBuildInfo = {
     // clang-format on
 };
 
-template <typename QueryFlagFunction> 
-std::string MakeSimdLevelString(QueryFlagFunction&& query_flag) { 
-  if (query_flag(CpuInfo::AVX512)) { 
-    return "avx512"; 
-  } else if (query_flag(CpuInfo::AVX2)) { 
-    return "avx2"; 
-  } else if (query_flag(CpuInfo::AVX)) { 
-    return "avx"; 
-  } else if (query_flag(CpuInfo::SSE4_2)) { 
-    return "sse4_2"; 
-  } else { 
-    return "none"; 
-  } 
-} 
- 
-};  // namespace 
- 
+template <typename QueryFlagFunction>
+std::string MakeSimdLevelString(QueryFlagFunction&& query_flag) {
+  if (query_flag(CpuInfo::AVX512)) {
+    return "avx512";
+  } else if (query_flag(CpuInfo::AVX2)) {
+    return "avx2";
+  } else if (query_flag(CpuInfo::AVX)) {
+    return "avx";
+  } else if (query_flag(CpuInfo::SSE4_2)) {
+    return "sse4_2";
+  } else {
+    return "none";
+  }
+}
+
+};  // namespace
+
 const BuildInfo& GetBuildInfo() { return kBuildInfo; }
 
-RuntimeInfo GetRuntimeInfo() { 
-  RuntimeInfo info; 
-  auto cpu_info = CpuInfo::GetInstance(); 
-  info.simd_level = 
-      MakeSimdLevelString([&](int64_t flags) { return cpu_info->IsSupported(flags); }); 
-  info.detected_simd_level = 
-      MakeSimdLevelString([&](int64_t flags) { return cpu_info->IsDetected(flags); }); 
-  return info; 
-} 
- 
+RuntimeInfo GetRuntimeInfo() {
+  RuntimeInfo info;
+  auto cpu_info = CpuInfo::GetInstance();
+  info.simd_level =
+      MakeSimdLevelString([&](int64_t flags) { return cpu_info->IsSupported(flags); });
+  info.detected_simd_level =
+      MakeSimdLevelString([&](int64_t flags) { return cpu_info->IsDetected(flags); });
+  return info;
+}
+
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/config.h b/contrib/libs/apache/arrow/cpp/src/arrow/config.h
index a1abc997984..5ae7e223164 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/config.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/config.h
@@ -45,17 +45,17 @@ struct BuildInfo {
   std::string package_kind;
 };
 
-struct RuntimeInfo { 
-  /// The enabled SIMD level 
-  /// 
-  /// This can be less than `detected_simd_level` if the ARROW_USER_SIMD_LEVEL 
-  /// environment variable is set to another value. 
-  std::string simd_level; 
- 
-  /// The SIMD level available on the OS and CPU 
-  std::string detected_simd_level; 
-}; 
- 
+struct RuntimeInfo {
+  /// The enabled SIMD level
+  ///
+  /// This can be less than `detected_simd_level` if the ARROW_USER_SIMD_LEVEL
+  /// environment variable is set to another value.
+  std::string simd_level;
+
+  /// The SIMD level available on the OS and CPU
+  std::string detected_simd_level;
+};
+
 /// \brief Get runtime build info.
 ///
 /// The returned values correspond to exact loaded version of the Arrow library,
@@ -64,9 +64,9 @@ struct RuntimeInfo {
 ARROW_EXPORT
 const BuildInfo& GetBuildInfo();
 
-/// \brief Get runtime info. 
-/// 
-ARROW_EXPORT 
-RuntimeInfo GetRuntimeInfo(); 
- 
+/// \brief Get runtime info.
+///
+ARROW_EXPORT
+RuntimeInfo GetRuntimeInfo();
+
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/datum.cc b/contrib/libs/apache/arrow/cpp/src/arrow/datum.cc
index 5be26f62d6e..dd10fce3e4d 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/datum.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/datum.cc
@@ -57,20 +57,20 @@ Datum::Datum(std::shared_ptr<RecordBatch> value) : value(std::move(value)) {}
 Datum::Datum(std::shared_ptr<Table> value) : value(std::move(value)) {}
 Datum::Datum(std::vector<Datum> value) : value(std::move(value)) {}
 
-Datum::Datum(bool value) : value(std::make_shared<BooleanScalar>(value)) {} 
-Datum::Datum(int8_t value) : value(std::make_shared<Int8Scalar>(value)) {} 
-Datum::Datum(uint8_t value) : value(std::make_shared<UInt8Scalar>(value)) {} 
-Datum::Datum(int16_t value) : value(std::make_shared<Int16Scalar>(value)) {} 
-Datum::Datum(uint16_t value) : value(std::make_shared<UInt16Scalar>(value)) {} 
-Datum::Datum(int32_t value) : value(std::make_shared<Int32Scalar>(value)) {} 
-Datum::Datum(uint32_t value) : value(std::make_shared<UInt32Scalar>(value)) {} 
-Datum::Datum(int64_t value) : value(std::make_shared<Int64Scalar>(value)) {} 
-Datum::Datum(uint64_t value) : value(std::make_shared<UInt64Scalar>(value)) {} 
-Datum::Datum(float value) : value(std::make_shared<FloatScalar>(value)) {} 
-Datum::Datum(double value) : value(std::make_shared<DoubleScalar>(value)) {} 
-Datum::Datum(std::string value) 
-    : value(std::make_shared<StringScalar>(std::move(value))) {} 
-Datum::Datum(const char* value) : value(std::make_shared<StringScalar>(value)) {} 
+Datum::Datum(bool value) : value(std::make_shared<BooleanScalar>(value)) {}
+Datum::Datum(int8_t value) : value(std::make_shared<Int8Scalar>(value)) {}
+Datum::Datum(uint8_t value) : value(std::make_shared<UInt8Scalar>(value)) {}
+Datum::Datum(int16_t value) : value(std::make_shared<Int16Scalar>(value)) {}
+Datum::Datum(uint16_t value) : value(std::make_shared<UInt16Scalar>(value)) {}
+Datum::Datum(int32_t value) : value(std::make_shared<Int32Scalar>(value)) {}
+Datum::Datum(uint32_t value) : value(std::make_shared<UInt32Scalar>(value)) {}
+Datum::Datum(int64_t value) : value(std::make_shared<Int64Scalar>(value)) {}
+Datum::Datum(uint64_t value) : value(std::make_shared<UInt64Scalar>(value)) {}
+Datum::Datum(float value) : value(std::make_shared<FloatScalar>(value)) {}
+Datum::Datum(double value) : value(std::make_shared<DoubleScalar>(value)) {}
+Datum::Datum(std::string value)
+    : value(std::make_shared<StringScalar>(std::move(value))) {}
+Datum::Datum(const char* value) : value(std::make_shared<StringScalar>(value)) {}
 
 Datum::Datum(const ChunkedArray& value)
     : value(std::make_shared<ChunkedArray>(value.chunks(), value.type())) {}
@@ -89,26 +89,26 @@ std::shared_ptr<Array> Datum::make_array() const {
 std::shared_ptr<DataType> Datum::type() const {
   if (this->kind() == Datum::ARRAY) {
     return util::get<std::shared_ptr<ArrayData>>(this->value)->type;
-  } 
-  if (this->kind() == Datum::CHUNKED_ARRAY) { 
+  }
+  if (this->kind() == Datum::CHUNKED_ARRAY) {
     return util::get<std::shared_ptr<ChunkedArray>>(this->value)->type();
-  } 
-  if (this->kind() == Datum::SCALAR) { 
+  }
+  if (this->kind() == Datum::SCALAR) {
     return util::get<std::shared_ptr<Scalar>>(this->value)->type;
   }
-  return nullptr; 
+  return nullptr;
+}
+
+std::shared_ptr<Schema> Datum::schema() const {
+  if (this->kind() == Datum::RECORD_BATCH) {
+    return util::get<std::shared_ptr<RecordBatch>>(this->value)->schema();
+  }
+  if (this->kind() == Datum::TABLE) {
+    return util::get<std::shared_ptr<Table>>(this->value)->schema();
+  }
+  return nullptr;
 }
 
-std::shared_ptr<Schema> Datum::schema() const { 
-  if (this->kind() == Datum::RECORD_BATCH) { 
-    return util::get<std::shared_ptr<RecordBatch>>(this->value)->schema(); 
-  } 
-  if (this->kind() == Datum::TABLE) { 
-    return util::get<std::shared_ptr<Table>>(this->value)->schema(); 
-  } 
-  return nullptr; 
-} 
- 
 int64_t Datum::length() const {
   if (this->kind() == Datum::ARRAY) {
     return util::get<std::shared_ptr<ArrayData>>(this->value)->length;
@@ -211,21 +211,21 @@ static std::string FormatValueDescr(const ValueDescr& descr) {
 
 std::string ValueDescr::ToString() const { return FormatValueDescr(*this); }
 
-std::string ValueDescr::ToString(const std::vector<ValueDescr>& descrs) { 
-  std::stringstream ss; 
-  ss << "("; 
-  for (size_t i = 0; i < descrs.size(); ++i) { 
-    if (i > 0) { 
-      ss << ", "; 
-    } 
-    ss << descrs[i].ToString(); 
-  } 
-  ss << ")"; 
-  return ss.str(); 
-} 
- 
-void PrintTo(const ValueDescr& descr, std::ostream* os) { *os << descr.ToString(); } 
- 
+std::string ValueDescr::ToString(const std::vector<ValueDescr>& descrs) {
+  std::stringstream ss;
+  ss << "(";
+  for (size_t i = 0; i < descrs.size(); ++i) {
+    if (i > 0) {
+      ss << ", ";
+    }
+    ss << descrs[i].ToString();
+  }
+  ss << ")";
+  return ss.str();
+}
+
+void PrintTo(const ValueDescr& descr, std::ostream* os) { *os << descr.ToString(); }
+
 std::string Datum::ToString() const {
   switch (this->kind()) {
     case Datum::NONE:
@@ -250,7 +250,7 @@ std::string Datum::ToString() const {
         }
         ss << values[i].ToString();
       }
-      ss << ')'; 
+      ss << ')';
       return ss.str();
     }
     default:
@@ -262,23 +262,23 @@ std::string Datum::ToString() const {
 ValueDescr::Shape GetBroadcastShape(const std::vector<ValueDescr>& args) {
   for (const auto& descr : args) {
     if (descr.shape == ValueDescr::ARRAY) {
-      return ValueDescr::ARRAY; 
+      return ValueDescr::ARRAY;
     }
   }
-  return ValueDescr::SCALAR; 
+  return ValueDescr::SCALAR;
+}
+
+void PrintTo(const Datum& datum, std::ostream* os) {
+  switch (datum.kind()) {
+    case Datum::SCALAR:
+      *os << datum.scalar()->ToString();
+      break;
+    case Datum::ARRAY:
+      *os << datum.make_array()->ToString();
+      break;
+    default:
+      *os << datum.ToString();
+  }
 }
 
-void PrintTo(const Datum& datum, std::ostream* os) { 
-  switch (datum.kind()) { 
-    case Datum::SCALAR: 
-      *os << datum.scalar()->ToString(); 
-      break; 
-    case Datum::ARRAY: 
-      *os << datum.make_array()->ToString(); 
-      break; 
-    default: 
-      *os << datum.ToString(); 
-  } 
-} 
- 
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/datum.h b/contrib/libs/apache/arrow/cpp/src/arrow/datum.h
index d7f487c273c..6ba6af7f79e 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/datum.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/datum.h
@@ -81,17 +81,17 @@ struct ARROW_EXPORT ValueDescr {
   }
 
   bool operator==(const ValueDescr& other) const {
-    if (shape != other.shape) return false; 
-    if (type == other.type) return true; 
-    return type && type->Equals(other.type); 
+    if (shape != other.shape) return false;
+    if (type == other.type) return true;
+    return type && type->Equals(other.type);
   }
 
   bool operator!=(const ValueDescr& other) const { return !(*this == other); }
 
   std::string ToString() const;
-  static std::string ToString(const std::vector<ValueDescr>&); 
- 
-  ARROW_EXPORT friend void PrintTo(const ValueDescr&, std::ostream*); 
+  static std::string ToString(const std::vector<ValueDescr>&);
+
+  ARROW_EXPORT friend void PrintTo(const ValueDescr&, std::ostream*);
 };
 
 /// \brief For use with scalar functions, returns the broadcasted Value::Shape
@@ -105,25 +105,25 @@ ValueDescr::Shape GetBroadcastShape(const std::vector<ValueDescr>& args);
 struct ARROW_EXPORT Datum {
   enum Kind { NONE, SCALAR, ARRAY, CHUNKED_ARRAY, RECORD_BATCH, TABLE, COLLECTION };
 
-  struct Empty {}; 
- 
+  struct Empty {};
+
   // Datums variants may have a length. This special value indicate that the
   // current variant does not have a length.
   static constexpr int64_t kUnknownLength = -1;
 
-  util::Variant<Empty, std::shared_ptr<Scalar>, std::shared_ptr<ArrayData>, 
+  util::Variant<Empty, std::shared_ptr<Scalar>, std::shared_ptr<ArrayData>,
                 std::shared_ptr<ChunkedArray>, std::shared_ptr<RecordBatch>,
                 std::shared_ptr<Table>, std::vector<Datum>>
       value;
 
   /// \brief Empty datum, to be populated elsewhere
-  Datum() = default; 
+  Datum() = default;
+
+  Datum(const Datum& other) = default;
+  Datum& operator=(const Datum& other) = default;
+  Datum(Datum&& other) = default;
+  Datum& operator=(Datum&& other) = default;
 
-  Datum(const Datum& other) = default; 
-  Datum& operator=(const Datum& other) = default; 
-  Datum(Datum&& other) = default; 
-  Datum& operator=(Datum&& other) = default; 
- 
   Datum(std::shared_ptr<Scalar> value)  // NOLINT implicit conversion
       : value(std::move(value)) {}
 
@@ -163,8 +163,8 @@ struct ARROW_EXPORT Datum {
   explicit Datum(uint64_t value);
   explicit Datum(float value);
   explicit Datum(double value);
-  explicit Datum(std::string value); 
-  explicit Datum(const char* value); 
+  explicit Datum(std::string value);
+  explicit Datum(const char* value);
 
   Datum::Kind kind() const {
     switch (this->value.index()) {
@@ -216,11 +216,11 @@ struct ARROW_EXPORT Datum {
   }
 
   template <typename ExactType>
-  std::shared_ptr<ExactType> array_as() const { 
-    return internal::checked_pointer_cast<ExactType>(this->make_array()); 
-  } 
- 
-  template <typename ExactType> 
+  std::shared_ptr<ExactType> array_as() const {
+    return internal::checked_pointer_cast<ExactType>(this->make_array());
+  }
+
+  template <typename ExactType>
   const ExactType& scalar_as() const {
     return internal::checked_cast<const ExactType&>(*this->scalar());
   }
@@ -253,11 +253,11 @@ struct ARROW_EXPORT Datum {
   /// \return nullptr if no type
   std::shared_ptr<DataType> type() const;
 
-  /// \brief The schema of the variant, if any 
-  /// 
-  /// \return nullptr if no schema 
-  std::shared_ptr<Schema> schema() const; 
- 
+  /// \brief The schema of the variant, if any
+  ///
+  /// \return nullptr if no schema
+  std::shared_ptr<Schema> schema() const;
+
   /// \brief The value length of the variant, if any
   ///
   /// \return kUnknownLength if no type
@@ -274,8 +274,8 @@ struct ARROW_EXPORT Datum {
   bool operator!=(const Datum& other) const { return !Equals(other); }
 
   std::string ToString() const;
- 
-  ARROW_EXPORT friend void PrintTo(const Datum&, std::ostream*); 
+
+  ARROW_EXPORT friend void PrintTo(const Datum&, std::ostream*);
 };
 
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/buffered.cc b/contrib/libs/apache/arrow/cpp/src/arrow/io/buffered.cc
index 86893cb5837..7804c130ca1 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/buffered.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/buffered.cc
@@ -476,14 +476,14 @@ Result<std::shared_ptr<Buffer>> BufferedInputStream::DoRead(int64_t nbytes) {
   return impl_->Read(nbytes);
 }
 
-Result<std::shared_ptr<const KeyValueMetadata>> BufferedInputStream::ReadMetadata() { 
-  return impl_->raw()->ReadMetadata(); 
-} 
- 
-Future<std::shared_ptr<const KeyValueMetadata>> BufferedInputStream::ReadMetadataAsync( 
-    const IOContext& io_context) { 
-  return impl_->raw()->ReadMetadataAsync(io_context); 
-} 
- 
+Result<std::shared_ptr<const KeyValueMetadata>> BufferedInputStream::ReadMetadata() {
+  return impl_->raw()->ReadMetadata();
+}
+
+Future<std::shared_ptr<const KeyValueMetadata>> BufferedInputStream::ReadMetadataAsync(
+    const IOContext& io_context) {
+  return impl_->raw()->ReadMetadataAsync(io_context);
+}
+
 }  // namespace io
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/buffered.h b/contrib/libs/apache/arrow/cpp/src/arrow/io/buffered.h
index 3bcc3a82c1c..8116613fa4e 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/buffered.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/buffered.h
@@ -132,9 +132,9 @@ class ARROW_EXPORT BufferedInputStream
   // InputStream APIs
 
   bool closed() const override;
-  Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata() override; 
-  Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync( 
-      const IOContext& io_context) override; 
+  Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata() override;
+  Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync(
+      const IOContext& io_context) override;
 
  private:
   friend InputStreamConcurrencyWrapper<BufferedInputStream>;
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/caching.cc b/contrib/libs/apache/arrow/cpp/src/arrow/io/caching.cc
index 8031d897ba5..722026ccd9b 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/caching.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/caching.cc
@@ -16,11 +16,11 @@
 // under the License.
 
 #include <algorithm>
-#include <atomic> 
+#include <atomic>
 #include <cmath>
-#include <mutex> 
+#include <mutex>
 #include <utility>
-#include <vector> 
+#include <vector>
 
 #include "arrow/buffer.h"
 #include "arrow/io/caching.h"
@@ -34,16 +34,16 @@ namespace io {
 
 CacheOptions CacheOptions::Defaults() {
   return CacheOptions{internal::ReadRangeCache::kDefaultHoleSizeLimit,
-                      internal::ReadRangeCache::kDefaultRangeSizeLimit, 
-                      /*lazy=*/false}; 
+                      internal::ReadRangeCache::kDefaultRangeSizeLimit,
+                      /*lazy=*/false};
+}
+
+CacheOptions CacheOptions::LazyDefaults() {
+  return CacheOptions{internal::ReadRangeCache::kDefaultHoleSizeLimit,
+                      internal::ReadRangeCache::kDefaultRangeSizeLimit,
+                      /*lazy=*/true};
 }
 
-CacheOptions CacheOptions::LazyDefaults() { 
-  return CacheOptions{internal::ReadRangeCache::kDefaultHoleSizeLimit, 
-                      internal::ReadRangeCache::kDefaultRangeSizeLimit, 
-                      /*lazy=*/true}; 
-} 
- 
 CacheOptions CacheOptions::MakeFromNetworkMetrics(int64_t time_to_first_byte_millis,
                                                   int64_t transfer_bandwidth_mib_per_sec,
                                                   double ideal_bandwidth_utilization_frac,
@@ -125,7 +125,7 @@ CacheOptions CacheOptions::MakeFromNetworkMetrics(int64_t time_to_first_byte_mil
                                       (1 - ideal_bandwidth_utilization_frac))));
   DCHECK_GT(range_size_limit, 0) << "Computed range_size_limit must be > 0";
 
-  return {hole_size_limit, range_size_limit, false}; 
+  return {hole_size_limit, range_size_limit, false};
 }
 
 namespace internal {
@@ -134,10 +134,10 @@ struct RangeCacheEntry {
   ReadRange range;
   Future<std::shared_ptr<Buffer>> future;
 
-  RangeCacheEntry() = default; 
-  RangeCacheEntry(const ReadRange& range_, Future<std::shared_ptr<Buffer>> future_) 
-      : range(range_), future(std::move(future_)) {} 
- 
+  RangeCacheEntry() = default;
+  RangeCacheEntry(const ReadRange& range_, Future<std::shared_ptr<Buffer>> future_)
+      : range(range_), future(std::move(future_)) {}
+
   friend bool operator<(const RangeCacheEntry& left, const RangeCacheEntry& right) {
     return left.range.offset < right.range.offset;
   }
@@ -145,36 +145,36 @@ struct RangeCacheEntry {
 
 struct ReadRangeCache::Impl {
   std::shared_ptr<RandomAccessFile> file;
-  IOContext ctx; 
+  IOContext ctx;
   CacheOptions options;
 
   // Ordered by offset (so as to find a matching region by binary search)
   std::vector<RangeCacheEntry> entries;
 
-  virtual ~Impl() = default; 
- 
-  // Get the future corresponding to a range 
-  virtual Future<std::shared_ptr<Buffer>> MaybeRead(RangeCacheEntry* entry) { 
-    return entry->future; 
-  } 
- 
-  // Make cache entries for ranges 
-  virtual std::vector<RangeCacheEntry> MakeCacheEntries( 
-      const std::vector<ReadRange>& ranges) { 
-    std::vector<RangeCacheEntry> new_entries; 
-    new_entries.reserve(ranges.size()); 
-    for (const auto& range : ranges) { 
-      new_entries.emplace_back(range, file->ReadAsync(ctx, range.offset, range.length)); 
-    } 
-    return new_entries; 
-  } 
- 
-  // Add the given ranges to the cache, coalescing them where possible 
-  virtual Status Cache(std::vector<ReadRange> ranges) { 
-    ranges = internal::CoalesceReadRanges(std::move(ranges), options.hole_size_limit, 
-                                          options.range_size_limit); 
-    std::vector<RangeCacheEntry> new_entries = MakeCacheEntries(ranges); 
-    // Add new entries, themselves ordered by offset 
+  virtual ~Impl() = default;
+
+  // Get the future corresponding to a range
+  virtual Future<std::shared_ptr<Buffer>> MaybeRead(RangeCacheEntry* entry) {
+    return entry->future;
+  }
+
+  // Make cache entries for ranges
+  virtual std::vector<RangeCacheEntry> MakeCacheEntries(
+      const std::vector<ReadRange>& ranges) {
+    std::vector<RangeCacheEntry> new_entries;
+    new_entries.reserve(ranges.size());
+    for (const auto& range : ranges) {
+      new_entries.emplace_back(range, file->ReadAsync(ctx, range.offset, range.length));
+    }
+    return new_entries;
+  }
+
+  // Add the given ranges to the cache, coalescing them where possible
+  virtual Status Cache(std::vector<ReadRange> ranges) {
+    ranges = internal::CoalesceReadRanges(std::move(ranges), options.hole_size_limit,
+                                          options.range_size_limit);
+    std::vector<RangeCacheEntry> new_entries = MakeCacheEntries(ranges);
+    // Add new entries, themselves ordered by offset
     if (entries.size() > 0) {
       std::vector<RangeCacheEntry> merged(entries.size() + new_entries.size());
       std::merge(entries.begin(), entries.end(), new_entries.begin(), new_entries.end(),
@@ -183,134 +183,134 @@ struct ReadRangeCache::Impl {
     } else {
       entries = std::move(new_entries);
     }
-    // Prefetch immediately, regardless of executor availability, if possible 
-    return file->WillNeed(ranges); 
+    // Prefetch immediately, regardless of executor availability, if possible
+    return file->WillNeed(ranges);
+  }
+
+  // Read the given range from the cache, blocking if needed. Cannot read a range
+  // that spans cache entries.
+  virtual Result<std::shared_ptr<Buffer>> Read(ReadRange range) {
+    if (range.length == 0) {
+      static const uint8_t byte = 0;
+      return std::make_shared<Buffer>(&byte, 0);
+    }
+
+    const auto it = std::lower_bound(
+        entries.begin(), entries.end(), range,
+        [](const RangeCacheEntry& entry, const ReadRange& range) {
+          return entry.range.offset + entry.range.length < range.offset + range.length;
+        });
+    if (it != entries.end() && it->range.Contains(range)) {
+      auto fut = MaybeRead(&*it);
+      ARROW_ASSIGN_OR_RAISE(auto buf, fut.result());
+      return SliceBuffer(std::move(buf), range.offset - it->range.offset, range.length);
+    }
+    return Status::Invalid("ReadRangeCache did not find matching cache entry");
+  }
+
+  virtual Future<> Wait() {
+    std::vector<Future<>> futures;
+    for (auto& entry : entries) {
+      futures.emplace_back(MaybeRead(&entry));
+    }
+    return AllComplete(futures);
+  }
+
+  // Return a Future that completes when the given ranges have been read.
+  virtual Future<> WaitFor(std::vector<ReadRange> ranges) {
+    auto end = std::remove_if(ranges.begin(), ranges.end(),
+                              [](const ReadRange& range) { return range.length == 0; });
+    ranges.resize(end - ranges.begin());
+    std::vector<Future<>> futures;
+    futures.reserve(ranges.size());
+    for (auto& range : ranges) {
+      const auto it = std::lower_bound(
+          entries.begin(), entries.end(), range,
+          [](const RangeCacheEntry& entry, const ReadRange& range) {
+            return entry.range.offset + entry.range.length < range.offset + range.length;
+          });
+      if (it != entries.end() && it->range.Contains(range)) {
+        futures.push_back(Future<>(MaybeRead(&*it)));
+      } else {
+        return Status::Invalid("Range was not requested for caching: offset=",
+                               range.offset, " length=", range.length);
+      }
+    }
+    return AllComplete(futures);
+  }
+};
+
+// Don't read ranges when they're first added. Instead, wait until they're requested
+// (either through Read or WaitFor).
+struct ReadRangeCache::LazyImpl : public ReadRangeCache::Impl {
+  // Protect against concurrent modification of entries[i]->future
+  std::mutex entry_mutex;
+
+  virtual ~LazyImpl() = default;
+
+  Future<std::shared_ptr<Buffer>> MaybeRead(RangeCacheEntry* entry) override {
+    // Called by superclass Read()/WaitFor() so we have the lock
+    if (!entry->future.is_valid()) {
+      entry->future = file->ReadAsync(ctx, entry->range.offset, entry->range.length);
+    }
+    return entry->future;
+  }
+
+  std::vector<RangeCacheEntry> MakeCacheEntries(
+      const std::vector<ReadRange>& ranges) override {
+    std::vector<RangeCacheEntry> new_entries;
+    new_entries.reserve(ranges.size());
+    for (const auto& range : ranges) {
+      // In the lazy variant, don't read data here - later, a call to Read or WaitFor
+      // will call back to MaybeRead (under the lock) which will fill the future.
+      new_entries.emplace_back(range, Future<std::shared_ptr<Buffer>>());
+    }
+    return new_entries;
+  }
+
+  Status Cache(std::vector<ReadRange> ranges) override {
+    std::unique_lock<std::mutex> guard(entry_mutex);
+    return ReadRangeCache::Impl::Cache(std::move(ranges));
+  }
+
+  Result<std::shared_ptr<Buffer>> Read(ReadRange range) override {
+    std::unique_lock<std::mutex> guard(entry_mutex);
+    return ReadRangeCache::Impl::Read(range);
+  }
+
+  Future<> Wait() override {
+    std::unique_lock<std::mutex> guard(entry_mutex);
+    return ReadRangeCache::Impl::Wait();
+  }
+
+  Future<> WaitFor(std::vector<ReadRange> ranges) override {
+    std::unique_lock<std::mutex> guard(entry_mutex);
+    return ReadRangeCache::Impl::WaitFor(std::move(ranges));
   }
- 
-  // Read the given range from the cache, blocking if needed. Cannot read a range 
-  // that spans cache entries. 
-  virtual Result<std::shared_ptr<Buffer>> Read(ReadRange range) { 
-    if (range.length == 0) { 
-      static const uint8_t byte = 0; 
-      return std::make_shared<Buffer>(&byte, 0); 
-    } 
- 
-    const auto it = std::lower_bound( 
-        entries.begin(), entries.end(), range, 
-        [](const RangeCacheEntry& entry, const ReadRange& range) { 
-          return entry.range.offset + entry.range.length < range.offset + range.length; 
-        }); 
-    if (it != entries.end() && it->range.Contains(range)) { 
-      auto fut = MaybeRead(&*it); 
-      ARROW_ASSIGN_OR_RAISE(auto buf, fut.result()); 
-      return SliceBuffer(std::move(buf), range.offset - it->range.offset, range.length); 
-    } 
-    return Status::Invalid("ReadRangeCache did not find matching cache entry"); 
-  } 
- 
-  virtual Future<> Wait() { 
-    std::vector<Future<>> futures; 
-    for (auto& entry : entries) { 
-      futures.emplace_back(MaybeRead(&entry)); 
-    } 
-    return AllComplete(futures); 
-  } 
- 
-  // Return a Future that completes when the given ranges have been read. 
-  virtual Future<> WaitFor(std::vector<ReadRange> ranges) { 
-    auto end = std::remove_if(ranges.begin(), ranges.end(), 
-                              [](const ReadRange& range) { return range.length == 0; }); 
-    ranges.resize(end - ranges.begin()); 
-    std::vector<Future<>> futures; 
-    futures.reserve(ranges.size()); 
-    for (auto& range : ranges) { 
-      const auto it = std::lower_bound( 
-          entries.begin(), entries.end(), range, 
-          [](const RangeCacheEntry& entry, const ReadRange& range) { 
-            return entry.range.offset + entry.range.length < range.offset + range.length; 
-          }); 
-      if (it != entries.end() && it->range.Contains(range)) { 
-        futures.push_back(Future<>(MaybeRead(&*it))); 
-      } else { 
-        return Status::Invalid("Range was not requested for caching: offset=", 
-                               range.offset, " length=", range.length); 
-      } 
-    } 
-    return AllComplete(futures); 
-  } 
 };
 
-// Don't read ranges when they're first added. Instead, wait until they're requested 
-// (either through Read or WaitFor). 
-struct ReadRangeCache::LazyImpl : public ReadRangeCache::Impl { 
-  // Protect against concurrent modification of entries[i]->future 
-  std::mutex entry_mutex; 
- 
-  virtual ~LazyImpl() = default; 
- 
-  Future<std::shared_ptr<Buffer>> MaybeRead(RangeCacheEntry* entry) override { 
-    // Called by superclass Read()/WaitFor() so we have the lock 
-    if (!entry->future.is_valid()) { 
-      entry->future = file->ReadAsync(ctx, entry->range.offset, entry->range.length); 
-    } 
-    return entry->future; 
-  } 
- 
-  std::vector<RangeCacheEntry> MakeCacheEntries( 
-      const std::vector<ReadRange>& ranges) override { 
-    std::vector<RangeCacheEntry> new_entries; 
-    new_entries.reserve(ranges.size()); 
-    for (const auto& range : ranges) { 
-      // In the lazy variant, don't read data here - later, a call to Read or WaitFor 
-      // will call back to MaybeRead (under the lock) which will fill the future. 
-      new_entries.emplace_back(range, Future<std::shared_ptr<Buffer>>()); 
-    } 
-    return new_entries; 
-  } 
- 
-  Status Cache(std::vector<ReadRange> ranges) override { 
-    std::unique_lock<std::mutex> guard(entry_mutex); 
-    return ReadRangeCache::Impl::Cache(std::move(ranges)); 
-  } 
- 
-  Result<std::shared_ptr<Buffer>> Read(ReadRange range) override { 
-    std::unique_lock<std::mutex> guard(entry_mutex); 
-    return ReadRangeCache::Impl::Read(range); 
-  } 
- 
-  Future<> Wait() override { 
-    std::unique_lock<std::mutex> guard(entry_mutex); 
-    return ReadRangeCache::Impl::Wait(); 
-  } 
- 
-  Future<> WaitFor(std::vector<ReadRange> ranges) override { 
-    std::unique_lock<std::mutex> guard(entry_mutex); 
-    return ReadRangeCache::Impl::WaitFor(std::move(ranges)); 
-  } 
-}; 
- 
-ReadRangeCache::ReadRangeCache(std::shared_ptr<RandomAccessFile> file, IOContext ctx, 
+ReadRangeCache::ReadRangeCache(std::shared_ptr<RandomAccessFile> file, IOContext ctx,
                                CacheOptions options)
-    : impl_(options.lazy ? new LazyImpl() : new Impl()) { 
+    : impl_(options.lazy ? new LazyImpl() : new Impl()) {
   impl_->file = std::move(file);
   impl_->ctx = std::move(ctx);
   impl_->options = options;
 }
 
-ReadRangeCache::~ReadRangeCache() = default; 
+ReadRangeCache::~ReadRangeCache() = default;
 
 Status ReadRangeCache::Cache(std::vector<ReadRange> ranges) {
-  return impl_->Cache(std::move(ranges)); 
+  return impl_->Cache(std::move(ranges));
 }
 
 Result<std::shared_ptr<Buffer>> ReadRangeCache::Read(ReadRange range) {
-  return impl_->Read(range); 
-} 
+  return impl_->Read(range);
+}
+
+Future<> ReadRangeCache::Wait() { return impl_->Wait(); }
 
-Future<> ReadRangeCache::Wait() { return impl_->Wait(); } 
- 
-Future<> ReadRangeCache::WaitFor(std::vector<ReadRange> ranges) { 
-  return impl_->WaitFor(std::move(ranges)); 
+Future<> ReadRangeCache::WaitFor(std::vector<ReadRange> ranges) {
+  return impl_->WaitFor(std::move(ranges));
 }
 
 }  // namespace internal
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/caching.h b/contrib/libs/apache/arrow/cpp/src/arrow/io/caching.h
index 833b36e31a0..59a9b60e82f 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/caching.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/caching.h
@@ -24,7 +24,7 @@
 #include <vector>
 
 #include "arrow/io/interfaces.h"
-#include "arrow/util/type_fwd.h" 
+#include "arrow/util/type_fwd.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
@@ -34,19 +34,19 @@ struct ARROW_EXPORT CacheOptions {
   static constexpr double kDefaultIdealBandwidthUtilizationFrac = 0.9;
   static constexpr int64_t kDefaultMaxIdealRequestSizeMib = 64;
 
-  /// \brief The maximum distance in bytes between two consecutive 
+  /// \brief The maximum distance in bytes between two consecutive
   ///   ranges; beyond this value, ranges are not combined
   int64_t hole_size_limit;
-  /// \brief The maximum size in bytes of a combined range; if 
+  /// \brief The maximum size in bytes of a combined range; if
   ///   combining two consecutive ranges would produce a range of a
   ///   size greater than this, they are not combined
   int64_t range_size_limit;
-  /// \brief A lazy cache does not perform any I/O until requested. 
-  bool lazy; 
+  /// \brief A lazy cache does not perform any I/O until requested.
+  bool lazy;
 
   bool operator==(const CacheOptions& other) const {
     return hole_size_limit == other.hole_size_limit &&
-           range_size_limit == other.range_size_limit && lazy == other.lazy; 
+           range_size_limit == other.range_size_limit && lazy == other.lazy;
   }
 
   /// \brief Construct CacheOptions from network storage metrics (e.g. S3).
@@ -69,45 +69,45 @@ struct ARROW_EXPORT CacheOptions {
       int64_t max_ideal_request_size_mib = kDefaultMaxIdealRequestSizeMib);
 
   static CacheOptions Defaults();
-  static CacheOptions LazyDefaults(); 
+  static CacheOptions LazyDefaults();
 };
 
 namespace internal {
 
 /// \brief A read cache designed to hide IO latencies when reading.
 ///
-/// This class takes multiple byte ranges that an application expects to read, and 
-/// coalesces them into fewer, larger read requests, which benefits performance on some 
-/// filesystems, particularly remote ones like Amazon S3. By default, it also issues 
-/// these read requests in parallel up front. 
-/// 
-/// To use: 
-/// 1. Cache() the ranges you expect to read in the future. Ideally, these ranges have 
-///    the exact offset and length that will later be read. The cache will combine those 
-///    ranges according to parameters (see constructor). 
-/// 
-///    By default, the cache will also start fetching the combined ranges in parallel in 
-///    the background, unless CacheOptions.lazy is set. 
-/// 
-/// 2. Call WaitFor() to be notified when the given ranges have been read. If 
-///    CacheOptions.lazy is set, I/O will be triggered in the background here instead. 
-///    This can be done in parallel (e.g. if parsing a file, call WaitFor() for each 
-///    chunk of the file that can be parsed in parallel). 
-/// 
-/// 3. Call Read() to retrieve the actual data for the given ranges. 
-///    A synchronous application may skip WaitFor() and just call Read() - it will still 
-///    benefit from coalescing and parallel fetching. 
+/// This class takes multiple byte ranges that an application expects to read, and
+/// coalesces them into fewer, larger read requests, which benefits performance on some
+/// filesystems, particularly remote ones like Amazon S3. By default, it also issues
+/// these read requests in parallel up front.
+///
+/// To use:
+/// 1. Cache() the ranges you expect to read in the future. Ideally, these ranges have
+///    the exact offset and length that will later be read. The cache will combine those
+///    ranges according to parameters (see constructor).
+///
+///    By default, the cache will also start fetching the combined ranges in parallel in
+///    the background, unless CacheOptions.lazy is set.
+///
+/// 2. Call WaitFor() to be notified when the given ranges have been read. If
+///    CacheOptions.lazy is set, I/O will be triggered in the background here instead.
+///    This can be done in parallel (e.g. if parsing a file, call WaitFor() for each
+///    chunk of the file that can be parsed in parallel).
+///
+/// 3. Call Read() to retrieve the actual data for the given ranges.
+///    A synchronous application may skip WaitFor() and just call Read() - it will still
+///    benefit from coalescing and parallel fetching.
 class ARROW_EXPORT ReadRangeCache {
  public:
   static constexpr int64_t kDefaultHoleSizeLimit = 8192;
   static constexpr int64_t kDefaultRangeSizeLimit = 32 * 1024 * 1024;
 
   /// Construct a read cache with default
-  explicit ReadRangeCache(std::shared_ptr<RandomAccessFile> file, IOContext ctx) 
+  explicit ReadRangeCache(std::shared_ptr<RandomAccessFile> file, IOContext ctx)
       : ReadRangeCache(file, std::move(ctx), CacheOptions::Defaults()) {}
 
   /// Construct a read cache with given options
-  explicit ReadRangeCache(std::shared_ptr<RandomAccessFile> file, IOContext ctx, 
+  explicit ReadRangeCache(std::shared_ptr<RandomAccessFile> file, IOContext ctx,
                           CacheOptions options);
   ~ReadRangeCache();
 
@@ -120,16 +120,16 @@ class ARROW_EXPORT ReadRangeCache {
   /// \brief Read a range previously given to Cache().
   Result<std::shared_ptr<Buffer>> Read(ReadRange range);
 
-  /// \brief Wait until all ranges added so far have been cached. 
-  Future<> Wait(); 
- 
-  /// \brief Wait until all given ranges have been cached. 
-  Future<> WaitFor(std::vector<ReadRange> ranges); 
- 
+  /// \brief Wait until all ranges added so far have been cached.
+  Future<> Wait();
+
+  /// \brief Wait until all given ranges have been cached.
+  Future<> WaitFor(std::vector<ReadRange> ranges);
+
  protected:
   struct Impl;
-  struct LazyImpl; 
- 
+  struct LazyImpl;
+
   std::unique_ptr<Impl> impl_;
 };
 
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/compressed.cc b/contrib/libs/apache/arrow/cpp/src/arrow/io/compressed.cc
index 0e6f4dc339a..72977f0f297 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/compressed.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/compressed.cc
@@ -342,7 +342,7 @@ class CompressedInputStream::Impl {
       RETURN_NOT_OK(EnsureCompressedData());
       if (compressed_pos_ == compressed_->size()) {
         // No more data to decompress
-        if (!fresh_decompressor_ && !decompressor_->IsFinished()) { 
+        if (!fresh_decompressor_ && !decompressor_->IsFinished()) {
           return Status::IOError("Truncated compressed stream");
         }
         *has_data = false;
@@ -437,14 +437,14 @@ Result<std::shared_ptr<Buffer>> CompressedInputStream::DoRead(int64_t nbytes) {
 
 std::shared_ptr<InputStream> CompressedInputStream::raw() const { return impl_->raw(); }
 
-Result<std::shared_ptr<const KeyValueMetadata>> CompressedInputStream::ReadMetadata() { 
-  return impl_->raw()->ReadMetadata(); 
-} 
- 
-Future<std::shared_ptr<const KeyValueMetadata>> CompressedInputStream::ReadMetadataAsync( 
-    const IOContext& io_context) { 
-  return impl_->raw()->ReadMetadataAsync(io_context); 
-} 
- 
+Result<std::shared_ptr<const KeyValueMetadata>> CompressedInputStream::ReadMetadata() {
+  return impl_->raw()->ReadMetadata();
+}
+
+Future<std::shared_ptr<const KeyValueMetadata>> CompressedInputStream::ReadMetadataAsync(
+    const IOContext& io_context) {
+  return impl_->raw()->ReadMetadataAsync(io_context);
+}
+
 }  // namespace io
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/compressed.h b/contrib/libs/apache/arrow/cpp/src/arrow/io/compressed.h
index 9eb5e44139f..cd1a7f673ce 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/compressed.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/compressed.h
@@ -89,9 +89,9 @@ class ARROW_EXPORT CompressedInputStream
   // InputStream interface
 
   bool closed() const override;
-  Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata() override; 
-  Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync( 
-      const IOContext& io_context) override; 
+  Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata() override;
+  Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync(
+      const IOContext& io_context) override;
 
   /// \brief Return the underlying raw input stream.
   std::shared_ptr<InputStream> raw() const;
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/file.cc b/contrib/libs/apache/arrow/cpp/src/arrow/io/file.cc
index 25308240653..70e15335af2 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/file.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/file.cc
@@ -390,11 +390,11 @@ class MemoryMappedFile::MemoryMap
   // An object representing the entire memory-mapped region.
   // It can be sliced in order to return individual subregions, which
   // will then keep the original region alive as long as necessary.
-  class Region : public Buffer { 
+  class Region : public Buffer {
    public:
     Region(std::shared_ptr<MemoryMappedFile::MemoryMap> memory_map, uint8_t* data,
            int64_t size)
-        : Buffer(data, size) { 
+        : Buffer(data, size) {
       is_mutable_ = memory_map->writable();
     }
 
@@ -539,8 +539,8 @@ class MemoryMappedFile::MemoryMap
 
   void advance(int64_t nbytes) { position_ = position_ + nbytes; }
 
-  uint8_t* data() { return region_ ? region_->data() : nullptr; } 
- 
+  uint8_t* data() { return region_ ? region_->data() : nullptr; }
+
   uint8_t* head() { return data() + position_; }
 
   bool writable() { return file_->mode() != FileMode::READ; }
@@ -696,7 +696,7 @@ Result<std::shared_ptr<Buffer>> MemoryMappedFile::Read(int64_t nbytes) {
   return buffer;
 }
 
-Future<std::shared_ptr<Buffer>> MemoryMappedFile::ReadAsync(const IOContext&, 
+Future<std::shared_ptr<Buffer>> MemoryMappedFile::ReadAsync(const IOContext&,
                                                             int64_t position,
                                                             int64_t nbytes) {
   return Future<std::shared_ptr<Buffer>>::MakeFinished(ReadAt(position, nbytes));
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/file.h b/contrib/libs/apache/arrow/cpp/src/arrow/io/file.h
index 4447f82174f..50d4f2c4dfc 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/file.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/file.h
@@ -185,7 +185,7 @@ class ARROW_EXPORT MemoryMappedFile : public ReadWriteFileInterface {
   Result<int64_t> ReadAt(int64_t position, int64_t nbytes, void* out) override;
 
   // Synchronous ReadAsync override
-  Future<std::shared_ptr<Buffer>> ReadAsync(const IOContext&, int64_t position, 
+  Future<std::shared_ptr<Buffer>> ReadAsync(const IOContext&, int64_t position,
                                             int64_t nbytes) override;
 
   Status WillNeed(const std::vector<ReadRange>& ranges) override;
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/interfaces.cc b/contrib/libs/apache/arrow/cpp/src/arrow/io/interfaces.cc
index cf5d71f2cc8..954c0f37b2d 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/interfaces.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/interfaces.cc
@@ -29,11 +29,11 @@
 
 #include "arrow/buffer.h"
 #include "arrow/io/concurrency.h"
-#include "arrow/io/type_fwd.h" 
+#include "arrow/io/type_fwd.h"
 #include "arrow/io/util_internal.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
-#include "arrow/util/checked_cast.h" 
+#include "arrow/util/checked_cast.h"
 #include "arrow/util/future.h"
 #include "arrow/util/iterator.h"
 #include "arrow/util/logging.h"
@@ -42,36 +42,36 @@
 
 namespace arrow {
 
-using internal::checked_pointer_cast; 
+using internal::checked_pointer_cast;
 using internal::Executor;
 using internal::TaskHints;
 using internal::ThreadPool;
 
 namespace io {
 
-static IOContext g_default_io_context{}; 
+static IOContext g_default_io_context{};
 
-IOContext::IOContext(MemoryPool* pool, StopToken stop_token) 
-    : IOContext(pool, internal::GetIOThreadPool(), std::move(stop_token)) {} 
+IOContext::IOContext(MemoryPool* pool, StopToken stop_token)
+    : IOContext(pool, internal::GetIOThreadPool(), std::move(stop_token)) {}
+
+const IOContext& default_io_context() { return g_default_io_context; }
+
+int GetIOThreadPoolCapacity() { return internal::GetIOThreadPool()->GetCapacity(); }
+
+Status SetIOThreadPoolCapacity(int threads) {
+  return internal::GetIOThreadPool()->SetCapacity(threads);
+}
 
-const IOContext& default_io_context() { return g_default_io_context; } 
- 
-int GetIOThreadPoolCapacity() { return internal::GetIOThreadPool()->GetCapacity(); } 
- 
-Status SetIOThreadPoolCapacity(int threads) { 
-  return internal::GetIOThreadPool()->SetCapacity(threads); 
-} 
- 
 FileInterface::~FileInterface() = default;
 
 Status FileInterface::Abort() { return Close(); }
 
-namespace { 
- 
+namespace {
+
 class InputStreamBlockIterator {
  public:
   InputStreamBlockIterator(std::shared_ptr<InputStream> stream, int64_t block_size)
-      : stream_(std::move(stream)), block_size_(block_size) {} 
+      : stream_(std::move(stream)), block_size_(block_size) {}
 
   Result<std::shared_ptr<Buffer>> Next() {
     if (done_) {
@@ -95,10 +95,10 @@ class InputStreamBlockIterator {
   bool done_ = false;
 };
 
-}  // namespace 
- 
-const IOContext& Readable::io_context() const { return g_default_io_context; } 
- 
+}  // namespace
+
+const IOContext& Readable::io_context() const { return g_default_io_context; }
+
 Status InputStream::Advance(int64_t nbytes) { return Read(nbytes).status(); }
 
 Result<util::string_view> InputStream::Peek(int64_t ARROW_ARG_UNUSED(nbytes)) {
@@ -107,22 +107,22 @@ Result<util::string_view> InputStream::Peek(int64_t ARROW_ARG_UNUSED(nbytes)) {
 
 bool InputStream::supports_zero_copy() const { return false; }
 
-Result<std::shared_ptr<const KeyValueMetadata>> InputStream::ReadMetadata() { 
-  return std::shared_ptr<const KeyValueMetadata>{}; 
-} 
- 
-// Default ReadMetadataAsync() implementation: simply issue the read on the context's 
-// executor 
-Future<std::shared_ptr<const KeyValueMetadata>> InputStream::ReadMetadataAsync( 
-    const IOContext& ctx) { 
-  auto self = shared_from_this(); 
-  return DeferNotOk(internal::SubmitIO(ctx, [self] { return self->ReadMetadata(); })); 
-} 
- 
-Future<std::shared_ptr<const KeyValueMetadata>> InputStream::ReadMetadataAsync() { 
-  return ReadMetadataAsync(io_context()); 
-} 
- 
+Result<std::shared_ptr<const KeyValueMetadata>> InputStream::ReadMetadata() {
+  return std::shared_ptr<const KeyValueMetadata>{};
+}
+
+// Default ReadMetadataAsync() implementation: simply issue the read on the context's
+// executor
+Future<std::shared_ptr<const KeyValueMetadata>> InputStream::ReadMetadataAsync(
+    const IOContext& ctx) {
+  auto self = shared_from_this();
+  return DeferNotOk(internal::SubmitIO(ctx, [self] { return self->ReadMetadata(); }));
+}
+
+Future<std::shared_ptr<const KeyValueMetadata>> InputStream::ReadMetadataAsync() {
+  return ReadMetadataAsync(io_context());
+}
+
 Result<Iterator<std::shared_ptr<Buffer>>> MakeInputStreamIterator(
     std::shared_ptr<InputStream> stream, int64_t block_size) {
   if (stream->closed()) {
@@ -132,13 +132,13 @@ Result<Iterator<std::shared_ptr<Buffer>>> MakeInputStreamIterator(
   return Iterator<std::shared_ptr<Buffer>>(InputStreamBlockIterator(stream, block_size));
 }
 
-struct RandomAccessFile::Impl { 
+struct RandomAccessFile::Impl {
   std::mutex lock_;
 };
 
 RandomAccessFile::~RandomAccessFile() = default;
 
-RandomAccessFile::RandomAccessFile() : interface_impl_(new Impl()) {} 
+RandomAccessFile::RandomAccessFile() : interface_impl_(new Impl()) {}
 
 Result<int64_t> RandomAccessFile::ReadAt(int64_t position, int64_t nbytes, void* out) {
   std::lock_guard<std::mutex> lock(interface_impl_->lock_);
@@ -154,26 +154,26 @@ Result<std::shared_ptr<Buffer>> RandomAccessFile::ReadAt(int64_t position,
 }
 
 // Default ReadAsync() implementation: simply issue the read on the context's executor
-Future<std::shared_ptr<Buffer>> RandomAccessFile::ReadAsync(const IOContext& ctx, 
+Future<std::shared_ptr<Buffer>> RandomAccessFile::ReadAsync(const IOContext& ctx,
                                                             int64_t position,
                                                             int64_t nbytes) {
-  auto self = checked_pointer_cast<RandomAccessFile>(shared_from_this()); 
-  return DeferNotOk(internal::SubmitIO( 
-      ctx, [self, position, nbytes] { return self->ReadAt(position, nbytes); })); 
+  auto self = checked_pointer_cast<RandomAccessFile>(shared_from_this());
+  return DeferNotOk(internal::SubmitIO(
+      ctx, [self, position, nbytes] { return self->ReadAt(position, nbytes); }));
+}
+
+Future<std::shared_ptr<Buffer>> RandomAccessFile::ReadAsync(int64_t position,
+                                                            int64_t nbytes) {
+  return ReadAsync(io_context(), position, nbytes);
 }
 
-Future<std::shared_ptr<Buffer>> RandomAccessFile::ReadAsync(int64_t position, 
-                                                            int64_t nbytes) { 
-  return ReadAsync(io_context(), position, nbytes); 
-} 
- 
 // Default WillNeed() implementation: no-op
 Status RandomAccessFile::WillNeed(const std::vector<ReadRange>& ranges) {
   return Status::OK();
 }
 
-Status Writable::Write(util::string_view data) { 
-  return Write(data.data(), static_cast<int64_t>(data.size())); 
+Status Writable::Write(util::string_view data) {
+  return Write(data.data(), static_cast<int64_t>(data.size()));
 }
 
 Status Writable::Write(const std::shared_ptr<Buffer>& data) {
@@ -380,15 +380,15 @@ struct ReadRangeCombiner {
     auto end = std::remove_if(ranges.begin(), ranges.end(),
                               [](const ReadRange& range) { return range.length == 0; });
     // Sort in position order
-    std::sort(ranges.begin(), end, 
+    std::sort(ranges.begin(), end,
               [](const ReadRange& a, const ReadRange& b) { return a.offset < b.offset; });
-    // Remove ranges that overlap 100% 
-    end = std::unique(ranges.begin(), end, 
-                      [](const ReadRange& left, const ReadRange& right) { 
-                        return right.offset >= left.offset && 
-                               right.offset + right.length <= left.offset + left.length; 
-                      }); 
-    ranges.resize(end - ranges.begin()); 
+    // Remove ranges that overlap 100%
+    end = std::unique(ranges.begin(), end,
+                      [](const ReadRange& left, const ReadRange& right) {
+                        return right.offset >= left.offset &&
+                               right.offset + right.length <= left.offset + left.length;
+                      });
+    ranges.resize(end - ranges.begin());
 
     // Skip further processing if ranges is empty after removing zero-sized ranges.
     if (ranges.empty()) {
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/interfaces.h b/contrib/libs/apache/arrow/cpp/src/arrow/io/interfaces.h
index 1459b173d89..e524afa99a3 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/interfaces.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/interfaces.h
@@ -24,7 +24,7 @@
 
 #include "arrow/io/type_fwd.h"
 #include "arrow/type_fwd.h"
-#include "arrow/util/cancel.h" 
+#include "arrow/util/cancel.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/string_view.h"
 #include "arrow/util/type_fwd.h"
@@ -49,57 +49,57 @@ struct ReadRange {
   }
 };
 
-/// EXPERIMENTAL: options provider for IO tasks 
-/// 
-/// Includes an Executor (which will be used to execute asynchronous reads), 
-/// a MemoryPool (which will be used to allocate buffers when zero copy reads 
-/// are not possible), and an external id (in case the executor receives tasks from 
-/// multiple sources and must distinguish tasks associated with this IOContext). 
-struct ARROW_EXPORT IOContext { 
-  // No specified executor: will use a global IO thread pool 
-  IOContext() : IOContext(default_memory_pool(), StopToken::Unstoppable()) {} 
- 
-  explicit IOContext(StopToken stop_token) 
-      : IOContext(default_memory_pool(), std::move(stop_token)) {} 
- 
-  explicit IOContext(MemoryPool* pool, StopToken stop_token = StopToken::Unstoppable()); 
- 
-  explicit IOContext(MemoryPool* pool, ::arrow::internal::Executor* executor, 
-                     StopToken stop_token = StopToken::Unstoppable(), 
-                     int64_t external_id = -1) 
-      : pool_(pool), 
-        executor_(executor), 
-        external_id_(external_id), 
-        stop_token_(std::move(stop_token)) {} 
- 
-  explicit IOContext(::arrow::internal::Executor* executor, 
-                     StopToken stop_token = StopToken::Unstoppable(), 
-                     int64_t external_id = -1) 
-      : pool_(default_memory_pool()), 
-        executor_(executor), 
-        external_id_(external_id), 
-        stop_token_(std::move(stop_token)) {} 
- 
-  MemoryPool* pool() const { return pool_; } 
- 
-  ::arrow::internal::Executor* executor() const { return executor_; } 
- 
+/// EXPERIMENTAL: options provider for IO tasks
+///
+/// Includes an Executor (which will be used to execute asynchronous reads),
+/// a MemoryPool (which will be used to allocate buffers when zero copy reads
+/// are not possible), and an external id (in case the executor receives tasks from
+/// multiple sources and must distinguish tasks associated with this IOContext).
+struct ARROW_EXPORT IOContext {
+  // No specified executor: will use a global IO thread pool
+  IOContext() : IOContext(default_memory_pool(), StopToken::Unstoppable()) {}
+
+  explicit IOContext(StopToken stop_token)
+      : IOContext(default_memory_pool(), std::move(stop_token)) {}
+
+  explicit IOContext(MemoryPool* pool, StopToken stop_token = StopToken::Unstoppable());
+
+  explicit IOContext(MemoryPool* pool, ::arrow::internal::Executor* executor,
+                     StopToken stop_token = StopToken::Unstoppable(),
+                     int64_t external_id = -1)
+      : pool_(pool),
+        executor_(executor),
+        external_id_(external_id),
+        stop_token_(std::move(stop_token)) {}
+
+  explicit IOContext(::arrow::internal::Executor* executor,
+                     StopToken stop_token = StopToken::Unstoppable(),
+                     int64_t external_id = -1)
+      : pool_(default_memory_pool()),
+        executor_(executor),
+        external_id_(external_id),
+        stop_token_(std::move(stop_token)) {}
+
+  MemoryPool* pool() const { return pool_; }
+
+  ::arrow::internal::Executor* executor() const { return executor_; }
+
   // An application-specific ID, forwarded to executor task submissions
-  int64_t external_id() const { return external_id_; } 
-
-  StopToken stop_token() const { return stop_token_; } 
- 
- private: 
-  MemoryPool* pool_; 
-  ::arrow::internal::Executor* executor_; 
-  int64_t external_id_; 
-  StopToken stop_token_; 
+  int64_t external_id() const { return external_id_; }
+
+  StopToken stop_token() const { return stop_token_; }
+
+ private:
+  MemoryPool* pool_;
+  ::arrow::internal::Executor* executor_;
+  int64_t external_id_;
+  StopToken stop_token_;
+};
+
+struct ARROW_DEPRECATED("renamed to IOContext in 4.0.0") AsyncContext : public IOContext {
+  using IOContext::IOContext;
 };
 
-struct ARROW_DEPRECATED("renamed to IOContext in 4.0.0") AsyncContext : public IOContext { 
-  using IOContext::IOContext; 
-}; 
- 
 class ARROW_EXPORT FileInterface {
  public:
   virtual ~FileInterface() = 0;
@@ -168,7 +168,7 @@ class ARROW_EXPORT Writable {
   /// \brief Flush buffered bytes, if any
   virtual Status Flush();
 
-  Status Write(util::string_view data); 
+  Status Write(util::string_view data);
 };
 
 class ARROW_EXPORT Readable {
@@ -189,12 +189,12 @@ class ARROW_EXPORT Readable {
   /// In some cases (e.g. a memory-mapped file), this method may avoid a
   /// memory copy.
   virtual Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) = 0;
- 
-  /// EXPERIMENTAL: The IOContext associated with this file. 
-  /// 
-  /// By default, this is the same as default_io_context(), but it may be 
-  /// overriden by subclasses. 
-  virtual const IOContext& io_context() const; 
+
+  /// EXPERIMENTAL: The IOContext associated with this file.
+  ///
+  /// By default, this is the same as default_io_context(), but it may be
+  /// overriden by subclasses.
+  virtual const IOContext& io_context() const;
 };
 
 class ARROW_EXPORT OutputStream : virtual public FileInterface, public Writable {
@@ -202,9 +202,9 @@ class ARROW_EXPORT OutputStream : virtual public FileInterface, public Writable
   OutputStream() = default;
 };
 
-class ARROW_EXPORT InputStream : virtual public FileInterface, 
-                                 virtual public Readable, 
-                                 public std::enable_shared_from_this<InputStream> { 
+class ARROW_EXPORT InputStream : virtual public FileInterface,
+                                 virtual public Readable,
+                                 public std::enable_shared_from_this<InputStream> {
  public:
   /// \brief Advance or skip stream indicated number of bytes
   /// \param[in] nbytes the number to move forward
@@ -227,23 +227,23 @@ class ARROW_EXPORT InputStream : virtual public FileInterface,
   /// Zero copy reads imply the use of Buffer-returning Read() overloads.
   virtual bool supports_zero_copy() const;
 
-  /// \brief Read and return stream metadata 
-  /// 
-  /// If the stream implementation doesn't support metadata, empty metadata 
-  /// is returned.  Note that it is allowed to return a null pointer rather 
-  /// than an allocated empty metadata. 
-  virtual Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata(); 
- 
-  /// \brief Read stream metadata asynchronously 
-  virtual Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync( 
-      const IOContext& io_context); 
-  Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync(); 
- 
+  /// \brief Read and return stream metadata
+  ///
+  /// If the stream implementation doesn't support metadata, empty metadata
+  /// is returned.  Note that it is allowed to return a null pointer rather
+  /// than an allocated empty metadata.
+  virtual Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata();
+
+  /// \brief Read stream metadata asynchronously
+  virtual Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync(
+      const IOContext& io_context);
+  Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync();
+
  protected:
   InputStream() = default;
 };
 
-class ARROW_EXPORT RandomAccessFile : public InputStream, public Seekable { 
+class ARROW_EXPORT RandomAccessFile : public InputStream, public Seekable {
  public:
   /// Necessary because we hold a std::unique_ptr
   ~RandomAccessFile() override;
@@ -292,12 +292,12 @@ class ARROW_EXPORT RandomAccessFile : public InputStream, public Seekable {
   virtual Result<std::shared_ptr<Buffer>> ReadAt(int64_t position, int64_t nbytes);
 
   /// EXPERIMENTAL: Read data asynchronously.
-  virtual Future<std::shared_ptr<Buffer>> ReadAsync(const IOContext&, int64_t position, 
+  virtual Future<std::shared_ptr<Buffer>> ReadAsync(const IOContext&, int64_t position,
                                                     int64_t nbytes);
 
-  /// EXPERIMENTAL: Read data asynchronously, using the file's IOContext. 
-  Future<std::shared_ptr<Buffer>> ReadAsync(int64_t position, int64_t nbytes); 
- 
+  /// EXPERIMENTAL: Read data asynchronously, using the file's IOContext.
+  Future<std::shared_ptr<Buffer>> ReadAsync(int64_t position, int64_t nbytes);
+
   /// EXPERIMENTAL: Inform that the given ranges may be read soon.
   ///
   /// Some implementations might arrange to prefetch some of the data.
@@ -309,8 +309,8 @@ class ARROW_EXPORT RandomAccessFile : public InputStream, public Seekable {
   RandomAccessFile();
 
  private:
-  struct ARROW_NO_EXPORT Impl; 
-  std::unique_ptr<Impl> interface_impl_; 
+  struct ARROW_NO_EXPORT Impl;
+  std::unique_ptr<Impl> interface_impl_;
 };
 
 class ARROW_EXPORT WritableFile : public OutputStream, public Seekable {
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/memory.cc b/contrib/libs/apache/arrow/cpp/src/arrow/io/memory.cc
index b52c456fd89..6495242e63b 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/memory.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/memory.cc
@@ -261,10 +261,10 @@ void FixedSizeBufferWriter::set_memcopy_threshold(int64_t threshold) {
 // ----------------------------------------------------------------------
 // In-memory buffer reader
 
-BufferReader::BufferReader(std::shared_ptr<Buffer> buffer) 
-    : buffer_(std::move(buffer)), 
-      data_(buffer_ ? buffer_->data() : reinterpret_cast<const uint8_t*>("")), 
-      size_(buffer_ ? buffer_->size() : 0), 
+BufferReader::BufferReader(std::shared_ptr<Buffer> buffer)
+    : buffer_(std::move(buffer)),
+      data_(buffer_ ? buffer_->data() : reinterpret_cast<const uint8_t*>("")),
+      size_(buffer_ ? buffer_->size() : 0),
       position_(0),
       is_open_(true) {}
 
@@ -320,7 +320,7 @@ Status BufferReader::WillNeed(const std::vector<ReadRange>& ranges) {
   return st;
 }
 
-Future<std::shared_ptr<Buffer>> BufferReader::ReadAsync(const IOContext&, 
+Future<std::shared_ptr<Buffer>> BufferReader::ReadAsync(const IOContext&,
                                                         int64_t position,
                                                         int64_t nbytes) {
   return Future<std::shared_ptr<Buffer>>::MakeFinished(DoReadAt(position, nbytes));
@@ -344,8 +344,8 @@ Result<std::shared_ptr<Buffer>> BufferReader::DoReadAt(int64_t position, int64_t
   DCHECK_GE(nbytes, 0);
 
   // Arrange for data to be paged in
-  // RETURN_NOT_OK(::arrow::internal::MemoryAdviseWillNeed( 
-  //     {{const_cast<uint8_t*>(data_ + position), static_cast<size_t>(nbytes)}})); 
+  // RETURN_NOT_OK(::arrow::internal::MemoryAdviseWillNeed(
+  //     {{const_cast<uint8_t*>(data_ + position), static_cast<size_t>(nbytes)}}));
 
   if (nbytes > 0 && buffer_ != nullptr) {
     return SliceBuffer(buffer_, position, nbytes);
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/memory.h b/contrib/libs/apache/arrow/cpp/src/arrow/io/memory.h
index ff9e179d862..8213439ef74 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/memory.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/memory.h
@@ -88,7 +88,7 @@ class ARROW_EXPORT BufferOutputStream : public OutputStream {
   uint8_t* mutable_data_;
 };
 
-/// \brief A helper class to track the size of allocations 
+/// \brief A helper class to track the size of allocations
 ///
 /// Writes to this stream do not copy or retain any data, they just bump
 /// a size counter that can be later used to know exactly which data size
@@ -145,7 +145,7 @@ class ARROW_EXPORT FixedSizeBufferWriter : public WritableFile {
 class ARROW_EXPORT BufferReader
     : public internal::RandomAccessFileConcurrencyWrapper<BufferReader> {
  public:
-  explicit BufferReader(std::shared_ptr<Buffer> buffer); 
+  explicit BufferReader(std::shared_ptr<Buffer> buffer);
   explicit BufferReader(const Buffer& buffer);
   BufferReader(const uint8_t* data, int64_t size);
 
@@ -160,7 +160,7 @@ class ARROW_EXPORT BufferReader
   std::shared_ptr<Buffer> buffer() const { return buffer_; }
 
   // Synchronous ReadAsync override
-  Future<std::shared_ptr<Buffer>> ReadAsync(const IOContext&, int64_t position, 
+  Future<std::shared_ptr<Buffer>> ReadAsync(const IOContext&, int64_t position,
                                             int64_t nbytes) override;
   Status WillNeed(const std::vector<ReadRange>& ranges) override;
 
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/stdio.cc b/contrib/libs/apache/arrow/cpp/src/arrow/io/stdio.cc
index 48ac06de186..7ef4843a224 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/stdio.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/stdio.cc
@@ -1,95 +1,95 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "arrow/io/stdio.h" 
- 
-#include <iostream> 
- 
-#include "arrow/buffer.h" 
-#include "arrow/result.h" 
- 
-namespace arrow { 
-namespace io { 
- 
-// 
-// StdoutStream implementation 
-// 
- 
-StdoutStream::StdoutStream() : pos_(0) { set_mode(FileMode::WRITE); } 
- 
-Status StdoutStream::Close() { return Status::OK(); } 
- 
-bool StdoutStream::closed() const { return false; } 
- 
-Result<int64_t> StdoutStream::Tell() const { return pos_; } 
- 
-Status StdoutStream::Write(const void* data, int64_t nbytes) { 
-  pos_ += nbytes; 
-  std::cout.write(reinterpret_cast<const char*>(data), nbytes); 
-  return Status::OK(); 
-} 
- 
-// 
-// StderrStream implementation 
-// 
- 
-StderrStream::StderrStream() : pos_(0) { set_mode(FileMode::WRITE); } 
- 
-Status StderrStream::Close() { return Status::OK(); } 
- 
-bool StderrStream::closed() const { return false; } 
- 
-Result<int64_t> StderrStream::Tell() const { return pos_; } 
- 
-Status StderrStream::Write(const void* data, int64_t nbytes) { 
-  pos_ += nbytes; 
-  std::cerr.write(reinterpret_cast<const char*>(data), nbytes); 
-  return Status::OK(); 
-} 
- 
-// 
-// StdinStream implementation 
-// 
- 
-StdinStream::StdinStream() : pos_(0) { set_mode(FileMode::READ); } 
- 
-Status StdinStream::Close() { return Status::OK(); } 
- 
-bool StdinStream::closed() const { return false; } 
- 
-Result<int64_t> StdinStream::Tell() const { return pos_; } 
- 
-Result<int64_t> StdinStream::Read(int64_t nbytes, void* out) { 
-  std::cin.read(reinterpret_cast<char*>(out), nbytes); 
-  if (std::cin) { 
-    pos_ += nbytes; 
-    return nbytes; 
-  } else { 
-    return 0; 
-  } 
-} 
- 
-Result<std::shared_ptr<Buffer>> StdinStream::Read(int64_t nbytes) { 
-  ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(nbytes)); 
-  ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, Read(nbytes, buffer->mutable_data())); 
-  ARROW_RETURN_NOT_OK(buffer->Resize(bytes_read, false)); 
-  buffer->ZeroPadding(); 
-  return std::move(buffer); 
-} 
- 
-}  // namespace io 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/io/stdio.h"
+
+#include <iostream>
+
+#include "arrow/buffer.h"
+#include "arrow/result.h"
+
+namespace arrow {
+namespace io {
+
+//
+// StdoutStream implementation
+//
+
+StdoutStream::StdoutStream() : pos_(0) { set_mode(FileMode::WRITE); }
+
+Status StdoutStream::Close() { return Status::OK(); }
+
+bool StdoutStream::closed() const { return false; }
+
+Result<int64_t> StdoutStream::Tell() const { return pos_; }
+
+Status StdoutStream::Write(const void* data, int64_t nbytes) {
+  pos_ += nbytes;
+  std::cout.write(reinterpret_cast<const char*>(data), nbytes);
+  return Status::OK();
+}
+
+//
+// StderrStream implementation
+//
+
+StderrStream::StderrStream() : pos_(0) { set_mode(FileMode::WRITE); }
+
+Status StderrStream::Close() { return Status::OK(); }
+
+bool StderrStream::closed() const { return false; }
+
+Result<int64_t> StderrStream::Tell() const { return pos_; }
+
+Status StderrStream::Write(const void* data, int64_t nbytes) {
+  pos_ += nbytes;
+  std::cerr.write(reinterpret_cast<const char*>(data), nbytes);
+  return Status::OK();
+}
+
+//
+// StdinStream implementation
+//
+
+StdinStream::StdinStream() : pos_(0) { set_mode(FileMode::READ); }
+
+Status StdinStream::Close() { return Status::OK(); }
+
+bool StdinStream::closed() const { return false; }
+
+Result<int64_t> StdinStream::Tell() const { return pos_; }
+
+Result<int64_t> StdinStream::Read(int64_t nbytes, void* out) {
+  std::cin.read(reinterpret_cast<char*>(out), nbytes);
+  if (std::cin) {
+    pos_ += nbytes;
+    return nbytes;
+  } else {
+    return 0;
+  }
+}
+
+Result<std::shared_ptr<Buffer>> StdinStream::Read(int64_t nbytes) {
+  ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(nbytes));
+  ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, Read(nbytes, buffer->mutable_data()));
+  ARROW_RETURN_NOT_OK(buffer->Resize(bytes_read, false));
+  buffer->ZeroPadding();
+  return std::move(buffer);
+}
+
+}  // namespace io
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/stdio.h b/contrib/libs/apache/arrow/cpp/src/arrow/io/stdio.h
index 6df07d670af..9484ac77124 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/stdio.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/stdio.h
@@ -1,82 +1,82 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <cstdint> 
- 
-#include "arrow/io/interfaces.h" 
-#include "arrow/util/visibility.h" 
- 
-namespace arrow { 
-namespace io { 
- 
-// Output stream that just writes to stdout. 
-class ARROW_EXPORT StdoutStream : public OutputStream { 
- public: 
-  StdoutStream(); 
-  ~StdoutStream() override {} 
- 
-  Status Close() override; 
-  bool closed() const override; 
- 
-  Result<int64_t> Tell() const override; 
- 
-  Status Write(const void* data, int64_t nbytes) override; 
- 
- private: 
-  int64_t pos_; 
-}; 
- 
-// Output stream that just writes to stderr. 
-class ARROW_EXPORT StderrStream : public OutputStream { 
- public: 
-  StderrStream(); 
-  ~StderrStream() override {} 
- 
-  Status Close() override; 
-  bool closed() const override; 
- 
-  Result<int64_t> Tell() const override; 
- 
-  Status Write(const void* data, int64_t nbytes) override; 
- 
- private: 
-  int64_t pos_; 
-}; 
- 
-// Input stream that just reads from stdin. 
-class ARROW_EXPORT StdinStream : public InputStream { 
- public: 
-  StdinStream(); 
-  ~StdinStream() override {} 
- 
-  Status Close() override; 
-  bool closed() const override; 
- 
-  Result<int64_t> Tell() const override; 
- 
-  Result<int64_t> Read(int64_t nbytes, void* out) override; 
- 
-  Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override; 
- 
- private: 
-  int64_t pos_; 
-}; 
- 
-}  // namespace io 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+
+#include "arrow/io/interfaces.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace io {
+
+// Output stream that just writes to stdout.
+class ARROW_EXPORT StdoutStream : public OutputStream {
+ public:
+  StdoutStream();
+  ~StdoutStream() override {}
+
+  Status Close() override;
+  bool closed() const override;
+
+  Result<int64_t> Tell() const override;
+
+  Status Write(const void* data, int64_t nbytes) override;
+
+ private:
+  int64_t pos_;
+};
+
+// Output stream that just writes to stderr.
+class ARROW_EXPORT StderrStream : public OutputStream {
+ public:
+  StderrStream();
+  ~StderrStream() override {}
+
+  Status Close() override;
+  bool closed() const override;
+
+  Result<int64_t> Tell() const override;
+
+  Status Write(const void* data, int64_t nbytes) override;
+
+ private:
+  int64_t pos_;
+};
+
+// Input stream that just reads from stdin.
+class ARROW_EXPORT StdinStream : public InputStream {
+ public:
+  StdinStream();
+  ~StdinStream() override {}
+
+  Status Close() override;
+  bool closed() const override;
+
+  Result<int64_t> Tell() const override;
+
+  Result<int64_t> Read(int64_t nbytes, void* out) override;
+
+  Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override;
+
+ private:
+  int64_t pos_;
+};
+
+}  // namespace io
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/transform.cc b/contrib/libs/apache/arrow/cpp/src/arrow/io/transform.cc
index 50198ad20ef..3fdf5a7a9ba 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/transform.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/transform.cc
@@ -145,18 +145,18 @@ Result<int64_t> TransformInputStream::Tell() const {
   return impl_->pos_;
 }
 
-Result<std::shared_ptr<const KeyValueMetadata>> TransformInputStream::ReadMetadata() { 
-  RETURN_NOT_OK(impl_->CheckClosed()); 
- 
-  return impl_->wrapped_->ReadMetadata(); 
-} 
- 
-Future<std::shared_ptr<const KeyValueMetadata>> TransformInputStream::ReadMetadataAsync( 
-    const IOContext& io_context) { 
-  RETURN_NOT_OK(impl_->CheckClosed()); 
- 
-  return impl_->wrapped_->ReadMetadataAsync(io_context); 
-} 
- 
+Result<std::shared_ptr<const KeyValueMetadata>> TransformInputStream::ReadMetadata() {
+  RETURN_NOT_OK(impl_->CheckClosed());
+
+  return impl_->wrapped_->ReadMetadata();
+}
+
+Future<std::shared_ptr<const KeyValueMetadata>> TransformInputStream::ReadMetadataAsync(
+    const IOContext& io_context) {
+  RETURN_NOT_OK(impl_->CheckClosed());
+
+  return impl_->wrapped_->ReadMetadataAsync(io_context);
+}
+
 }  // namespace io
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/transform.h b/contrib/libs/apache/arrow/cpp/src/arrow/io/transform.h
index 6ecaa6d6101..c117f275929 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/transform.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/transform.h
@@ -45,10 +45,10 @@ class ARROW_EXPORT TransformInputStream : public InputStream {
   Result<int64_t> Read(int64_t nbytes, void* out) override;
   Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override;
 
-  Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata() override; 
-  Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync( 
-      const IOContext& io_context) override; 
- 
+  Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata() override;
+  Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync(
+      const IOContext& io_context) override;
+
   Result<int64_t> Tell() const override;
 
  protected:
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/type_fwd.h b/contrib/libs/apache/arrow/cpp/src/arrow/io/type_fwd.h
index 632616de1fe..a2fd33bf360 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/type_fwd.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/type_fwd.h
@@ -17,9 +17,9 @@
 
 #pragma once
 
-#include "arrow/type_fwd.h" 
-#include "arrow/util/visibility.h" 
- 
+#include "arrow/type_fwd.h"
+#include "arrow/util/visibility.h"
+
 namespace arrow {
 namespace io {
 
@@ -27,30 +27,30 @@ struct FileMode {
   enum type { READ, WRITE, READWRITE };
 };
 
-struct IOContext; 
-struct CacheOptions; 
- 
-/// EXPERIMENTAL: convenience global singleton for default IOContext settings 
-ARROW_EXPORT 
-const IOContext& default_io_context(); 
- 
-/// \brief Get the capacity of the global I/O thread pool 
-/// 
-/// Return the number of worker threads in the thread pool to which 
-/// Arrow dispatches various I/O-bound tasks.  This is an ideal number, 
-/// not necessarily the exact number of threads at a given point in time. 
-/// 
-/// You can change this number using SetIOThreadPoolCapacity(). 
-ARROW_EXPORT int GetIOThreadPoolCapacity(); 
- 
-/// \brief Set the capacity of the global I/O thread pool 
-/// 
-/// Set the number of worker threads in the thread pool to which 
-/// Arrow dispatches various I/O-bound tasks. 
-/// 
-/// The current number is returned by GetIOThreadPoolCapacity(). 
-ARROW_EXPORT Status SetIOThreadPoolCapacity(int threads); 
- 
+struct IOContext;
+struct CacheOptions;
+
+/// EXPERIMENTAL: convenience global singleton for default IOContext settings
+ARROW_EXPORT
+const IOContext& default_io_context();
+
+/// \brief Get the capacity of the global I/O thread pool
+///
+/// Return the number of worker threads in the thread pool to which
+/// Arrow dispatches various I/O-bound tasks.  This is an ideal number,
+/// not necessarily the exact number of threads at a given point in time.
+///
+/// You can change this number using SetIOThreadPoolCapacity().
+ARROW_EXPORT int GetIOThreadPoolCapacity();
+
+/// \brief Set the capacity of the global I/O thread pool
+///
+/// Set the number of worker threads in the thread pool to which
+/// Arrow dispatches various I/O-bound tasks.
+///
+/// The current number is returned by GetIOThreadPoolCapacity().
+ARROW_EXPORT Status SetIOThreadPoolCapacity(int threads);
+
 class FileInterface;
 class Seekable;
 class Writable;
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/io/util_internal.h b/contrib/libs/apache/arrow/cpp/src/arrow/io/util_internal.h
index dc9d6781ada..b1d75d1d0bd 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/io/util_internal.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/io/util_internal.h
@@ -18,11 +18,11 @@
 #pragma once
 
 #include <memory>
-#include <utility> 
+#include <utility>
 #include <vector>
 
 #include "arrow/io/interfaces.h"
-#include "arrow/util/thread_pool.h" 
+#include "arrow/util/thread_pool.h"
 #include "arrow/util/type_fwd.h"
 #include "arrow/util/visibility.h"
 
@@ -52,15 +52,15 @@ std::vector<ReadRange> CoalesceReadRanges(std::vector<ReadRange> ranges,
 ARROW_EXPORT
 ::arrow::internal::ThreadPool* GetIOThreadPool();
 
-template <typename... SubmitArgs> 
-auto SubmitIO(IOContext io_context, SubmitArgs&&... submit_args) 
-    -> decltype(std::declval<::arrow::internal::Executor*>()->Submit(submit_args...)) { 
-  ::arrow::internal::TaskHints hints; 
-  hints.external_id = io_context.external_id(); 
-  return io_context.executor()->Submit(hints, io_context.stop_token(), 
-                                       std::forward<SubmitArgs>(submit_args)...); 
-} 
- 
+template <typename... SubmitArgs>
+auto SubmitIO(IOContext io_context, SubmitArgs&&... submit_args)
+    -> decltype(std::declval<::arrow::internal::Executor*>()->Submit(submit_args...)) {
+  ::arrow::internal::TaskHints hints;
+  hints.external_id = io_context.external_id();
+  return io_context.executor()->Submit(hints, io_context.stop_token(),
+                                       std::forward<SubmitArgs>(submit_args)...);
+}
+
 }  // namespace internal
 }  // namespace io
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/dictionary.cc b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/dictionary.cc
index 13b1424ee5e..3ab2c8b3847 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/dictionary.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/dictionary.cc
@@ -20,14 +20,14 @@
 #include <algorithm>
 #include <cstdint>
 #include <memory>
-#include <set> 
+#include <set>
 #include <unordered_map>
 #include <utility>
 #include <vector>
 
 #include "arrow/array.h"
 #include "arrow/array/concatenate.h"
-#include "arrow/array/validate.h" 
+#include "arrow/array/validate.h"
 #include "arrow/extension_type.h"
 #include "arrow/record_batch.h"
 #include "arrow/status.h"
@@ -88,16 +88,16 @@ struct DictionaryFieldMapper::Impl {
 
   int num_fields() const { return static_cast<int>(field_path_to_id.size()); }
 
-  int num_dicts() const { 
-    std::set<int64_t> uniqueIds; 
- 
-    for (auto& kv : field_path_to_id) { 
-      uniqueIds.insert(kv.second); 
-    } 
- 
-    return static_cast<int>(uniqueIds.size()); 
-  } 
- 
+  int num_dicts() const {
+    std::set<int64_t> uniqueIds;
+
+    for (auto& kv : field_path_to_id) {
+      uniqueIds.insert(kv.second);
+    }
+
+    return static_cast<int>(uniqueIds.size());
+  }
+
  private:
   void ImportFields(const FieldPosition& pos,
                     const std::vector<std::shared_ptr<Field>>& fields) {
@@ -151,32 +151,32 @@ Result<int64_t> DictionaryFieldMapper::GetFieldId(std::vector<int> field_path) c
 
 int DictionaryFieldMapper::num_fields() const { return impl_->num_fields(); }
 
-int DictionaryFieldMapper::num_dicts() const { return impl_->num_dicts(); } 
- 
+int DictionaryFieldMapper::num_dicts() const { return impl_->num_dicts(); }
+
 // ----------------------------------------------------------------------
 // DictionaryMemo implementation
 
-namespace { 
- 
-bool HasUnresolvedNestedDict(const ArrayData& data) { 
-  if (data.type->id() == Type::DICTIONARY) { 
-    if (data.dictionary == nullptr) { 
-      return true; 
-    } 
-    if (HasUnresolvedNestedDict(*data.dictionary)) { 
-      return true; 
-    } 
-  } 
-  for (const auto& child : data.child_data) { 
-    if (HasUnresolvedNestedDict(*child)) { 
-      return true; 
-    } 
-  } 
-  return false; 
-} 
- 
-}  // namespace 
- 
+namespace {
+
+bool HasUnresolvedNestedDict(const ArrayData& data) {
+  if (data.type->id() == Type::DICTIONARY) {
+    if (data.dictionary == nullptr) {
+      return true;
+    }
+    if (HasUnresolvedNestedDict(*data.dictionary)) {
+      return true;
+    }
+  }
+  for (const auto& child : data.child_data) {
+    if (HasUnresolvedNestedDict(*child)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+}  // namespace
+
 struct DictionaryMemo::Impl {
   // Map of dictionary id to dictionary array(s) (several in case of deltas)
   std::unordered_map<int64_t, ArrayDataVector> id_to_dictionary_;
@@ -205,12 +205,12 @@ struct DictionaryMemo::Impl {
       // corrupted data.  Full validation is necessary for certain types
       // (for example nested dictionaries).
       for (const auto& data : *data_vector) {
-        if (HasUnresolvedNestedDict(*data)) { 
-          return Status::NotImplemented( 
-              "Encountered delta dictionary with an unresolved nested dictionary"); 
-        } 
-        RETURN_NOT_OK(::arrow::internal::ValidateArray(*data)); 
-        RETURN_NOT_OK(::arrow::internal::ValidateArrayFull(*data)); 
+        if (HasUnresolvedNestedDict(*data)) {
+          return Status::NotImplemented(
+              "Encountered delta dictionary with an unresolved nested dictionary");
+        }
+        RETURN_NOT_OK(::arrow::internal::ValidateArray(*data));
+        RETURN_NOT_OK(::arrow::internal::ValidateArrayFull(*data));
         to_combine.push_back(MakeArray(data));
       }
       ARROW_ASSIGN_OR_RAISE(auto combined_dict, Concatenate(to_combine, pool));
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/dictionary.h b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/dictionary.h
index 25fa70f0dfb..e4287cb1974 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/dictionary.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/dictionary.h
@@ -80,10 +80,10 @@ class ARROW_EXPORT DictionaryFieldMapper {
 
   int num_fields() const;
 
-  /// \brief Returns number of unique dictionaries, taking into 
-  /// account that different fields can share the same dictionary. 
-  int num_dicts() const; 
- 
+  /// \brief Returns number of unique dictionaries, taking into
+  /// account that different fields can share the same dictionary.
+  int num_dicts() const;
+
  private:
   struct Impl;
   std::unique_ptr<Impl> impl_;
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/feather.cc b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/feather.cc
index 3354ee930ed..b1c30eec0b3 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/feather.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/feather.cc
@@ -61,15 +61,15 @@ class ExtensionType;
 namespace ipc {
 namespace feather {
 
-namespace { 
+namespace {
 
-using FBB = flatbuffers::FlatBufferBuilder; 
+using FBB = flatbuffers::FlatBufferBuilder;
 
-constexpr const char* kFeatherV1MagicBytes = "FEA1"; 
-constexpr const int kFeatherDefaultAlignment = 8; 
-const uint8_t kPaddingBytes[kFeatherDefaultAlignment] = {0}; 
- 
-inline int64_t PaddedLength(int64_t nbytes) { 
+constexpr const char* kFeatherV1MagicBytes = "FEA1";
+constexpr const int kFeatherDefaultAlignment = 8;
+const uint8_t kPaddingBytes[kFeatherDefaultAlignment] = {0};
+
+inline int64_t PaddedLength(int64_t nbytes) {
   static const int64_t alignment = kFeatherDefaultAlignment;
   return ((nbytes + alignment - 1) / alignment) * alignment;
 }
@@ -120,14 +120,14 @@ struct ColumnType {
   enum type { PRIMITIVE, CATEGORY, TIMESTAMP, DATE, TIME };
 };
 
-inline TimeUnit::type FromFlatbufferEnum(fbs::TimeUnit unit) { 
+inline TimeUnit::type FromFlatbufferEnum(fbs::TimeUnit unit) {
   return static_cast<TimeUnit::type>(static_cast<int>(unit));
 }
 
 /// For compatibility, we need to write any data sometimes just to keep producing
 /// files that can be read with an older reader.
-Status WritePaddedBlank(io::OutputStream* stream, int64_t length, 
-                        int64_t* bytes_written) { 
+Status WritePaddedBlank(io::OutputStream* stream, int64_t length,
+                        int64_t* bytes_written) {
   const uint8_t null = 0;
   for (int64_t i = 0; i < length; i++) {
     RETURN_NOT_OK(stream->Write(&null, 1));
@@ -180,7 +180,7 @@ class ReaderV1 : public Reader {
           GetDataType(col->values(), col->metadata_type(), col->metadata(), &type));
       fields.push_back(::arrow::field(col->name()->str(), type));
     }
-    schema_ = ::arrow::schema(std::move(fields)); 
+    schema_ = ::arrow::schema(std::move(fields));
     return Status::OK();
   }
 
@@ -343,7 +343,7 @@ class ReaderV1 : public Reader {
       columns.emplace_back();
       RETURN_NOT_OK(GetColumn(i, &columns.back()));
     }
-    *out = Table::Make(this->schema(), std::move(columns), this->num_rows()); 
+    *out = Table::Make(this->schema(), std::move(columns), this->num_rows());
     return Status::OK();
   }
 
@@ -360,8 +360,8 @@ class ReaderV1 : public Reader {
       RETURN_NOT_OK(GetColumn(field_index, &columns.back()));
       fields.push_back(my_schema->field(field_index));
     }
-    *out = Table::Make(::arrow::schema(std::move(fields)), std::move(columns), 
-                       this->num_rows()); 
+    *out = Table::Make(::arrow::schema(std::move(fields)), std::move(columns),
+                       this->num_rows());
     return Status::OK();
   }
 
@@ -380,8 +380,8 @@ class ReaderV1 : public Reader {
       RETURN_NOT_OK(GetColumn(field_index, &columns.back()));
       fields.push_back(sch->field(field_index));
     }
-    *out = Table::Make(::arrow::schema(std::move(fields)), std::move(columns), 
-                       this->num_rows()); 
+    *out = Table::Make(::arrow::schema(std::move(fields)), std::move(columns),
+                       this->num_rows());
     return Status::OK();
   }
 
@@ -440,14 +440,14 @@ Result<fbs::Type> ToFlatbufferType(const DataType& type) {
   }
 }
 
-inline flatbuffers::Offset<fbs::PrimitiveArray> GetPrimitiveArray( 
+inline flatbuffers::Offset<fbs::PrimitiveArray> GetPrimitiveArray(
     FBB& fbb, const ArrayMetadata& array) {
   return fbs::CreatePrimitiveArray(fbb, array.type, fbs::Encoding::PLAIN, array.offset,
                                    array.length, array.null_count, array.total_bytes);
 }
 
 // Convert Feather enums to Flatbuffer enums
-inline fbs::TimeUnit ToFlatbufferEnum(TimeUnit::type unit) { 
+inline fbs::TimeUnit ToFlatbufferEnum(TimeUnit::type unit) {
   return static_cast<fbs::TimeUnit>(static_cast<int>(unit));
 }
 
@@ -459,7 +459,7 @@ const fbs::TypeMetadata COLUMN_TYPE_ENUM_MAPPING[] = {
     fbs::TypeMetadata::TimeMetadata        // TIME
 };
 
-inline fbs::TypeMetadata ToFlatbufferEnum(ColumnType::type column_type) { 
+inline fbs::TypeMetadata ToFlatbufferEnum(ColumnType::type column_type) {
   return COLUMN_TYPE_ENUM_MAPPING[column_type];
 }
 
@@ -755,8 +755,8 @@ class ReaderV2 : public Reader {
   std::shared_ptr<Schema> schema_;
 };
 
-}  // namespace 
- 
+}  // namespace
+
 Result<std::shared_ptr<Reader>> Reader::Open(
     const std::shared_ptr<io::RandomAccessFile>& source) {
   // Pathological issue where the file is smaller than header and footer
@@ -801,8 +801,8 @@ Status WriteTable(const Table& table, io::OutputStream* dst,
     return WriteFeatherV1(table, dst);
   } else {
     IpcWriteOptions ipc_options = IpcWriteOptions::Defaults();
-    ipc_options.unify_dictionaries = true; 
-    ipc_options.allow_64bit = true; 
+    ipc_options.unify_dictionaries = true;
+    ipc_options.allow_64bit = true;
     ARROW_ASSIGN_OR_RAISE(
         ipc_options.codec,
         util::Codec::Create(properties.compression, properties.compression_level));
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/feather.h b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/feather.h
index 3c43cf7cff7..a32ff6d0a5a 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/feather.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/feather.h
@@ -25,7 +25,7 @@
 #include <string>
 #include <vector>
 
-#include "arrow/type_fwd.h" 
+#include "arrow/type_fwd.h"
 #include "arrow/util/compression.h"
 #include "arrow/util/visibility.h"
 
@@ -128,7 +128,7 @@ struct ARROW_EXPORT WriteProperties {
   Compression::type compression = Compression::UNCOMPRESSED;
 
   /// Compressor-specific compression level
-  int compression_level = ::arrow::util::kUseDefaultCompressionLevel; 
+  int compression_level = ::arrow::util::kUseDefaultCompressionLevel;
 };
 
 ARROW_EXPORT
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/json_simple.h b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/json_simple.h
index 805a0c44354..4dd3a664aa6 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/json_simple.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/json_simple.h
@@ -1,61 +1,61 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-// Implement a simple JSON representation format for arrays 
- 
-#pragma once 
- 
-#include <memory> 
-#include <string> 
- 
-#include "arrow/status.h" 
-#include "arrow/util/string_view.h" 
-#include "arrow/util/visibility.h" 
- 
-namespace arrow { 
- 
-class Array; 
-class DataType; 
- 
-namespace ipc { 
-namespace internal { 
-namespace json { 
- 
-ARROW_EXPORT 
-Status ArrayFromJSON(const std::shared_ptr<DataType>&, const std::string& json, 
-                     std::shared_ptr<Array>* out); 
- 
-ARROW_EXPORT 
-Status ArrayFromJSON(const std::shared_ptr<DataType>&, util::string_view json, 
-                     std::shared_ptr<Array>* out); 
- 
-ARROW_EXPORT 
-Status ArrayFromJSON(const std::shared_ptr<DataType>&, const char* json, 
-                     std::shared_ptr<Array>* out); 
- 
-ARROW_EXPORT 
-Status DictArrayFromJSON(const std::shared_ptr<DataType>&, util::string_view indices_json, 
-                         util::string_view dictionary_json, std::shared_ptr<Array>* out); 
- 
-ARROW_EXPORT 
-Status ScalarFromJSON(const std::shared_ptr<DataType>&, util::string_view json, 
-                      std::shared_ptr<Scalar>* out); 
- 
-}  // namespace json 
-}  // namespace internal 
-}  // namespace ipc 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Implement a simple JSON representation format for arrays
+
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include "arrow/status.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Array;
+class DataType;
+
+namespace ipc {
+namespace internal {
+namespace json {
+
+ARROW_EXPORT
+Status ArrayFromJSON(const std::shared_ptr<DataType>&, const std::string& json,
+                     std::shared_ptr<Array>* out);
+
+ARROW_EXPORT
+Status ArrayFromJSON(const std::shared_ptr<DataType>&, util::string_view json,
+                     std::shared_ptr<Array>* out);
+
+ARROW_EXPORT
+Status ArrayFromJSON(const std::shared_ptr<DataType>&, const char* json,
+                     std::shared_ptr<Array>* out);
+
+ARROW_EXPORT
+Status DictArrayFromJSON(const std::shared_ptr<DataType>&, util::string_view indices_json,
+                         util::string_view dictionary_json, std::shared_ptr<Array>* out);
+
+ARROW_EXPORT
+Status ScalarFromJSON(const std::shared_ptr<DataType>&, util::string_view json,
+                      std::shared_ptr<Scalar>* out);
+
+}  // namespace json
+}  // namespace internal
+}  // namespace ipc
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/message.cc b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/message.cc
index e047e29c201..197556efcea 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/message.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/message.cc
@@ -32,8 +32,8 @@
 #include "arrow/ipc/options.h"
 #include "arrow/ipc/util.h"
 #include "arrow/status.h"
-#include "arrow/util/endian.h" 
-#include "arrow/util/future.h" 
+#include "arrow/util/endian.h"
+#include "arrow/util/future.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/ubsan.h"
 
@@ -269,10 +269,10 @@ std::string FormatMessageType(MessageType type) {
       return "record batch";
     case MessageType::DICTIONARY_BATCH:
       return "dictionary";
-    case MessageType::TENSOR: 
-      return "tensor"; 
-    case MessageType::SPARSE_TENSOR: 
-      return "sparse tensor"; 
+    case MessageType::TENSOR:
+      return "tensor";
+    case MessageType::SPARSE_TENSOR:
+      return "sparse tensor";
     default:
       break;
   }
@@ -325,60 +325,60 @@ Result<std::unique_ptr<Message>> ReadMessage(int64_t offset, int32_t metadata_le
   }
 }
 
-Future<std::shared_ptr<Message>> ReadMessageAsync(int64_t offset, int32_t metadata_length, 
-                                                  int64_t body_length, 
-                                                  io::RandomAccessFile* file, 
-                                                  const io::IOContext& context) { 
-  struct State { 
-    std::unique_ptr<Message> result; 
-    std::shared_ptr<MessageDecoderListener> listener; 
-    std::shared_ptr<MessageDecoder> decoder; 
-  }; 
-  auto state = std::make_shared<State>(); 
-  state->listener = std::make_shared<AssignMessageDecoderListener>(&state->result); 
-  state->decoder = std::make_shared<MessageDecoder>(state->listener); 
- 
-  if (metadata_length < state->decoder->next_required_size()) { 
-    return Status::Invalid("metadata_length should be at least ", 
-                           state->decoder->next_required_size()); 
-  } 
-  return file->ReadAsync(context, offset, metadata_length + body_length) 
-      .Then([=](std::shared_ptr<Buffer> metadata) -> Result<std::shared_ptr<Message>> { 
-        if (metadata->size() < metadata_length) { 
-          return Status::Invalid("Expected to read ", metadata_length, 
-                                 " metadata bytes but got ", metadata->size()); 
-        } 
-        ARROW_RETURN_NOT_OK( 
-            state->decoder->Consume(SliceBuffer(metadata, 0, metadata_length))); 
-        switch (state->decoder->state()) { 
-          case MessageDecoder::State::INITIAL: 
-            return std::move(state->result); 
-          case MessageDecoder::State::METADATA_LENGTH: 
-            return Status::Invalid("metadata length is missing. File offset: ", offset, 
-                                   ", metadata length: ", metadata_length); 
-          case MessageDecoder::State::METADATA: 
-            return Status::Invalid("flatbuffer size ", 
-                                   state->decoder->next_required_size(), 
-                                   " invalid. File offset: ", offset, 
-                                   ", metadata length: ", metadata_length); 
-          case MessageDecoder::State::BODY: { 
-            auto body = SliceBuffer(metadata, metadata_length, body_length); 
-            if (body->size() < state->decoder->next_required_size()) { 
-              return Status::IOError("Expected to be able to read ", 
-                                     state->decoder->next_required_size(), 
-                                     " bytes for message body, got ", body->size()); 
-            } 
-            RETURN_NOT_OK(state->decoder->Consume(body)); 
-            return std::move(state->result); 
-          } 
-          case MessageDecoder::State::EOS: 
-            return Status::Invalid("Unexpected empty message in IPC file format"); 
-          default: 
-            return Status::Invalid("Unexpected state: ", state->decoder->state()); 
-        } 
-      }); 
-} 
- 
+Future<std::shared_ptr<Message>> ReadMessageAsync(int64_t offset, int32_t metadata_length,
+                                                  int64_t body_length,
+                                                  io::RandomAccessFile* file,
+                                                  const io::IOContext& context) {
+  struct State {
+    std::unique_ptr<Message> result;
+    std::shared_ptr<MessageDecoderListener> listener;
+    std::shared_ptr<MessageDecoder> decoder;
+  };
+  auto state = std::make_shared<State>();
+  state->listener = std::make_shared<AssignMessageDecoderListener>(&state->result);
+  state->decoder = std::make_shared<MessageDecoder>(state->listener);
+
+  if (metadata_length < state->decoder->next_required_size()) {
+    return Status::Invalid("metadata_length should be at least ",
+                           state->decoder->next_required_size());
+  }
+  return file->ReadAsync(context, offset, metadata_length + body_length)
+      .Then([=](std::shared_ptr<Buffer> metadata) -> Result<std::shared_ptr<Message>> {
+        if (metadata->size() < metadata_length) {
+          return Status::Invalid("Expected to read ", metadata_length,
+                                 " metadata bytes but got ", metadata->size());
+        }
+        ARROW_RETURN_NOT_OK(
+            state->decoder->Consume(SliceBuffer(metadata, 0, metadata_length)));
+        switch (state->decoder->state()) {
+          case MessageDecoder::State::INITIAL:
+            return std::move(state->result);
+          case MessageDecoder::State::METADATA_LENGTH:
+            return Status::Invalid("metadata length is missing. File offset: ", offset,
+                                   ", metadata length: ", metadata_length);
+          case MessageDecoder::State::METADATA:
+            return Status::Invalid("flatbuffer size ",
+                                   state->decoder->next_required_size(),
+                                   " invalid. File offset: ", offset,
+                                   ", metadata length: ", metadata_length);
+          case MessageDecoder::State::BODY: {
+            auto body = SliceBuffer(metadata, metadata_length, body_length);
+            if (body->size() < state->decoder->next_required_size()) {
+              return Status::IOError("Expected to be able to read ",
+                                     state->decoder->next_required_size(),
+                                     " bytes for message body, got ", body->size());
+            }
+            RETURN_NOT_OK(state->decoder->Consume(body));
+            return std::move(state->result);
+          }
+          case MessageDecoder::State::EOS:
+            return Status::Invalid("Unexpected empty message in IPC file format");
+          default:
+            return Status::Invalid("Unexpected state: ", state->decoder->state());
+        }
+      });
+}
+
 Status AlignStream(io::InputStream* stream, int32_t alignment) {
   ARROW_ASSIGN_OR_RAISE(int64_t position, stream->Tell());
   return stream->Advance(PaddedLength(position, alignment) - position);
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/message.h b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/message.h
index d437bdfe773..b2683259cb4 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/message.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/message.h
@@ -365,7 +365,7 @@ class ARROW_EXPORT MessageDecoder {
   ///   memcpy(buffer->mutable_data() + current_buffer_size,
   ///          small_chunk,
   ///          small_chunk_size);
-  ///   if (buffer->size() < decoder.next_required_size()) { 
+  ///   if (buffer->size() < decoder.next_required_size()) {
   ///     continue;
   ///   }
   ///   std::shared_ptr<arrow::Buffer> chunk(buffer.release());
@@ -459,11 +459,11 @@ Result<std::unique_ptr<Message>> ReadMessage(const int64_t offset,
                                              const int32_t metadata_length,
                                              io::RandomAccessFile* file);
 
-ARROW_EXPORT 
-Future<std::shared_ptr<Message>> ReadMessageAsync( 
-    const int64_t offset, const int32_t metadata_length, const int64_t body_length, 
-    io::RandomAccessFile* file, const io::IOContext& context = io::default_io_context()); 
- 
+ARROW_EXPORT
+Future<std::shared_ptr<Message>> ReadMessageAsync(
+    const int64_t offset, const int32_t metadata_length, const int64_t body_length,
+    io::RandomAccessFile* file, const io::IOContext& context = io::default_io_context());
+
 /// \brief Advance stream to an 8-byte offset if its position is not a multiple
 /// of 8 already
 /// \param[in] stream an input stream
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/metadata_internal.cc b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/metadata_internal.cc
index 9d0db6a0d8b..4b332bd9e1e 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/metadata_internal.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/metadata_internal.cc
@@ -271,12 +271,12 @@ Status ConcreteTypeFromFlatbuffer(flatbuf::Type type, const void* type_data,
       return Status::OK();
     case flatbuf::Type::Decimal: {
       auto dec_type = static_cast<const flatbuf::Decimal*>(type_data);
-      if (dec_type->bitWidth() == 128) { 
-        return Decimal128Type::Make(dec_type->precision(), dec_type->scale()).Value(out); 
-      } else if (dec_type->bitWidth() == 256) { 
-        return Decimal256Type::Make(dec_type->precision(), dec_type->scale()).Value(out); 
-      } else { 
-        return Status::Invalid("Library only supports 128-bit or 256-bit decimal values"); 
+      if (dec_type->bitWidth() == 128) {
+        return Decimal128Type::Make(dec_type->precision(), dec_type->scale()).Value(out);
+      } else if (dec_type->bitWidth() == 256) {
+        return Decimal256Type::Make(dec_type->precision(), dec_type->scale()).Value(out);
+      } else {
+        return Status::Invalid("Library only supports 128-bit or 256-bit decimal values");
       }
     }
     case flatbuf::Type::Date: {
@@ -428,7 +428,7 @@ static Status GetDictionaryEncoding(FBB& fbb, const std::shared_ptr<Field>& fiel
                                     const DictionaryType& type, int64_t dictionary_id,
                                     DictionaryOffset* out) {
   // We assume that the dictionary index type (as an integer) has already been
-  // validated elsewhere, and can safely assume we are dealing with integers 
+  // validated elsewhere, and can safely assume we are dealing with integers
   const auto& index_type = checked_cast<const IntegerType&>(*type.index_type());
 
   auto index_type_offset =
@@ -594,24 +594,24 @@ class FieldToFlatbufferVisitor {
     return Status::OK();
   }
 
-  Status Visit(const Decimal128Type& type) { 
+  Status Visit(const Decimal128Type& type) {
     const auto& dec_type = checked_cast<const Decimal128Type&>(type);
     fb_type_ = flatbuf::Type::Decimal;
-    type_offset_ = flatbuf::CreateDecimal(fbb_, dec_type.precision(), dec_type.scale(), 
-                                          /*bitWidth=*/128) 
-                       .Union(); 
+    type_offset_ = flatbuf::CreateDecimal(fbb_, dec_type.precision(), dec_type.scale(),
+                                          /*bitWidth=*/128)
+                       .Union();
+    return Status::OK();
+  }
+
+  Status Visit(const Decimal256Type& type) {
+    const auto& dec_type = checked_cast<const Decimal256Type&>(type);
+    fb_type_ = flatbuf::Type::Decimal;
+    type_offset_ = flatbuf::CreateDecimal(fbb_, dec_type.precision(), dec_type.scale(),
+                                          /*bitWith=*/256)
+                       .Union();
     return Status::OK();
   }
 
-  Status Visit(const Decimal256Type& type) { 
-    const auto& dec_type = checked_cast<const Decimal256Type&>(type); 
-    fb_type_ = flatbuf::Type::Decimal; 
-    type_offset_ = flatbuf::CreateDecimal(fbb_, dec_type.precision(), dec_type.scale(), 
-                                          /*bitWith=*/256) 
-                       .Union(); 
-    return Status::OK(); 
-  } 
- 
   Status Visit(const ListType& type) {
     fb_type_ = flatbuf::Type::List;
     RETURN_NOT_OK(VisitChildFields(type));
@@ -753,15 +753,15 @@ Status FieldFromFlatbuffer(const flatbuf::Field* field, FieldPosition field_pos,
 
   // Reconstruct the data type
   // 1. Data type children
-  FieldVector child_fields; 
+  FieldVector child_fields;
   const auto& children = field->children();
-  // As a tolerance, allow for a null children field meaning "no children" (ARROW-12100) 
-  if (children != nullptr) { 
-    child_fields.resize(children->size()); 
-    for (int i = 0; i < static_cast<int>(children->size()); ++i) { 
-      RETURN_NOT_OK(FieldFromFlatbuffer(children->Get(i), field_pos.child(i), 
-                                        dictionary_memo, &child_fields[i])); 
-    } 
+  // As a tolerance, allow for a null children field meaning "no children" (ARROW-12100)
+  if (children != nullptr) {
+    child_fields.resize(children->size());
+    for (int i = 0; i < static_cast<int>(children->size()); ++i) {
+      RETURN_NOT_OK(FieldFromFlatbuffer(children->Get(i), field_pos.child(i),
+                                        dictionary_memo, &child_fields[i]));
+    }
   }
 
   // 2. Top-level concrete data type
@@ -871,12 +871,12 @@ Status SchemaToFlatbuffer(FBB& fbb, const Schema& schema,
 Result<std::shared_ptr<Buffer>> WriteFBMessage(
     FBB& fbb, flatbuf::MessageHeader header_type, flatbuffers::Offset<void> header,
     int64_t body_length, MetadataVersion version,
-    const std::shared_ptr<const KeyValueMetadata>& custom_metadata, MemoryPool* pool) { 
+    const std::shared_ptr<const KeyValueMetadata>& custom_metadata, MemoryPool* pool) {
   auto message = flatbuf::CreateMessage(fbb, MetadataVersionToFlatbuffer(version),
                                         header_type, header, body_length,
                                         SerializeCustomMetadata(fbb, custom_metadata));
   fbb.Finish(message);
-  return WriteFlatbufferBuilder(fbb, pool); 
+  return WriteFlatbufferBuilder(fbb, pool);
 }
 
 using FieldNodeVector =
@@ -1183,8 +1183,8 @@ Status WriteSchemaMessage(const Schema& schema, const DictionaryFieldMapper& map
   flatbuffers::Offset<flatbuf::Schema> fb_schema;
   RETURN_NOT_OK(SchemaToFlatbuffer(fbb, schema, mapper, &fb_schema));
   return WriteFBMessage(fbb, flatbuf::MessageHeader::Schema, fb_schema.Union(),
-                        /*body_length=*/0, options.metadata_version, 
-                        /*custom_metadata=*/nullptr, options.memory_pool) 
+                        /*body_length=*/0, options.metadata_version,
+                        /*custom_metadata=*/nullptr, options.memory_pool)
       .Value(out);
 }
 
@@ -1198,8 +1198,8 @@ Status WriteRecordBatchMessage(
   RETURN_NOT_OK(
       MakeRecordBatch(fbb, length, body_length, nodes, buffers, options, &record_batch));
   return WriteFBMessage(fbb, flatbuf::MessageHeader::RecordBatch, record_batch.Union(),
-                        body_length, options.metadata_version, custom_metadata, 
-                        options.memory_pool) 
+                        body_length, options.metadata_version, custom_metadata,
+                        options.memory_pool)
       .Value(out);
 }
 
@@ -1233,8 +1233,8 @@ Result<std::shared_ptr<Buffer>> WriteTensorMessage(const Tensor& tensor,
       flatbuf::CreateTensor(fbb, fb_type_type, fb_type, fb_shape, fb_strides, &buffer);
 
   return WriteFBMessage(fbb, flatbuf::MessageHeader::Tensor, fb_tensor.Union(),
-                        body_length, options.metadata_version, 
-                        /*custom_metadata=*/nullptr, options.memory_pool); 
+                        body_length, options.metadata_version,
+                        /*custom_metadata=*/nullptr, options.memory_pool);
 }
 
 Result<std::shared_ptr<Buffer>> WriteSparseTensorMessage(
@@ -1245,8 +1245,8 @@ Result<std::shared_ptr<Buffer>> WriteSparseTensorMessage(
   RETURN_NOT_OK(
       MakeSparseTensor(fbb, sparse_tensor, body_length, buffers, &fb_sparse_tensor));
   return WriteFBMessage(fbb, flatbuf::MessageHeader::SparseTensor,
-                        fb_sparse_tensor.Union(), body_length, options.metadata_version, 
-                        /*custom_metadata=*/nullptr, options.memory_pool); 
+                        fb_sparse_tensor.Union(), body_length, options.metadata_version,
+                        /*custom_metadata=*/nullptr, options.memory_pool);
 }
 
 Status WriteDictionaryMessage(
@@ -1261,8 +1261,8 @@ Status WriteDictionaryMessage(
   auto dictionary_batch =
       flatbuf::CreateDictionaryBatch(fbb, id, record_batch, is_delta).Union();
   return WriteFBMessage(fbb, flatbuf::MessageHeader::DictionaryBatch, dictionary_batch,
-                        body_length, options.metadata_version, custom_metadata, 
-                        options.memory_pool) 
+                        body_length, options.metadata_version, custom_metadata,
+                        options.memory_pool)
       .Value(out);
 }
 
@@ -1338,11 +1338,11 @@ Status GetSchema(const void* opaque_schema, DictionaryMemo* dictionary_memo,
 
   std::shared_ptr<KeyValueMetadata> metadata;
   RETURN_NOT_OK(internal::GetKeyValueMetadata(schema->custom_metadata(), &metadata));
-  // set endianess using the value in flatbuf schema 
-  auto endianness = schema->endianness() == flatbuf::Endianness::Little 
-                        ? Endianness::Little 
-                        : Endianness::Big; 
-  *out = ::arrow::schema(std::move(fields), endianness, metadata); 
+  // set endianess using the value in flatbuf schema
+  auto endianness = schema->endianness() == flatbuf::Endianness::Little
+                        ? Endianness::Little
+                        : Endianness::Big;
+  *out = ::arrow::schema(std::move(fields), endianness, metadata);
   return Status::OK();
 }
 
@@ -1356,9 +1356,9 @@ Status GetTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>* type
     return Status::IOError("Header-type of flatbuffer-encoded Message is not Tensor.");
   }
 
-  flatbuffers::uoffset_t ndim = tensor->shape()->size(); 
+  flatbuffers::uoffset_t ndim = tensor->shape()->size();
 
-  for (flatbuffers::uoffset_t i = 0; i < ndim; ++i) { 
+  for (flatbuffers::uoffset_t i = 0; i < ndim; ++i) {
     auto dim = tensor->shape()->Get(i);
 
     shape->push_back(dim->size());
@@ -1366,12 +1366,12 @@ Status GetTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>* type
   }
 
   if (tensor->strides() && tensor->strides()->size() > 0) {
-    if (tensor->strides()->size() != ndim) { 
-      return Status::IOError( 
-          "The sizes of shape and strides in a tensor are mismatched."); 
-    } 
- 
-    for (decltype(ndim) i = 0; i < ndim; ++i) { 
+    if (tensor->strides()->size() != ndim) {
+      return Status::IOError(
+          "The sizes of shape and strides in a tensor are mismatched.");
+    }
+
+    for (decltype(ndim) i = 0; i < ndim; ++i) {
       strides->push_back(tensor->strides()->Get(i));
     }
   }
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/metadata_internal.h b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/metadata_internal.h
index d47b244d324..9cf489dd668 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/metadata_internal.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/metadata_internal.h
@@ -156,22 +156,22 @@ Status GetSparseTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>
 Status GetKeyValueMetadata(const KVVector* fb_metadata,
                            std::shared_ptr<KeyValueMetadata>* out);
 
-template <typename RootType> 
-bool VerifyFlatbuffers(const uint8_t* data, int64_t size) { 
-  // Heuristic: tables in a Arrow flatbuffers buffer must take at least 1 bit 
-  // each in average (ARROW-11559). 
-  // Especially, the only recursive table (the `Field` table in Schema.fbs) 
-  // must have a non-empty `type` member. 
-  flatbuffers::Verifier verifier( 
-      data, static_cast<size_t>(size), 
-      /*max_depth=*/128, 
-      /*max_tables=*/static_cast<flatbuffers::uoffset_t>(8 * size)); 
-  return verifier.VerifyBuffer<RootType>(nullptr); 
-} 
- 
+template <typename RootType>
+bool VerifyFlatbuffers(const uint8_t* data, int64_t size) {
+  // Heuristic: tables in a Arrow flatbuffers buffer must take at least 1 bit
+  // each in average (ARROW-11559).
+  // Especially, the only recursive table (the `Field` table in Schema.fbs)
+  // must have a non-empty `type` member.
+  flatbuffers::Verifier verifier(
+      data, static_cast<size_t>(size),
+      /*max_depth=*/128,
+      /*max_tables=*/static_cast<flatbuffers::uoffset_t>(8 * size));
+  return verifier.VerifyBuffer<RootType>(nullptr);
+}
+
 static inline Status VerifyMessage(const uint8_t* data, int64_t size,
                                    const flatbuf::Message** out) {
-  if (!VerifyFlatbuffers<flatbuf::Message>(data, size)) { 
+  if (!VerifyFlatbuffers<flatbuf::Message>(data, size)) {
     return Status::IOError("Invalid flatbuffers message.");
   }
   *out = flatbuf::GetMessage(data);
@@ -211,11 +211,11 @@ Status WriteDictionaryMessage(
     const IpcWriteOptions& options, std::shared_ptr<Buffer>* out);
 
 static inline Result<std::shared_ptr<Buffer>> WriteFlatbufferBuilder(
-    flatbuffers::FlatBufferBuilder& fbb,  // NOLINT non-const reference 
-    MemoryPool* pool = default_memory_pool()) { 
+    flatbuffers::FlatBufferBuilder& fbb,  // NOLINT non-const reference
+    MemoryPool* pool = default_memory_pool()) {
   int32_t size = fbb.GetSize();
 
-  ARROW_ASSIGN_OR_RAISE(auto result, AllocateBuffer(size, pool)); 
+  ARROW_ASSIGN_OR_RAISE(auto result, AllocateBuffer(size, pool));
 
   uint8_t* dst = result->mutable_data();
   memcpy(dst, fbb.GetBufferPointer(), size);
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/options.h b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/options.h
index 2845a61523a..2e0f800b5ad 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/options.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/options.h
@@ -39,26 +39,26 @@ constexpr int kMaxNestingDepth = 64;
 
 /// \brief Options for writing Arrow IPC messages
 struct ARROW_EXPORT IpcWriteOptions {
-  /// \brief If true, allow field lengths that don't fit in a signed 32-bit int. 
-  /// 
-  /// Some implementations may not be able to parse streams created with this option. 
+  /// \brief If true, allow field lengths that don't fit in a signed 32-bit int.
+  ///
+  /// Some implementations may not be able to parse streams created with this option.
   bool allow_64bit = false;
- 
-  /// \brief The maximum permitted schema nesting depth. 
+
+  /// \brief The maximum permitted schema nesting depth.
   int max_recursion_depth = kMaxNestingDepth;
 
-  /// \brief Write padding after memory buffers up to this multiple of bytes. 
+  /// \brief Write padding after memory buffers up to this multiple of bytes.
   int32_t alignment = 8;
 
-  /// \brief Write the pre-0.15.0 IPC message format 
-  /// 
-  /// This legacy format consists of a 4-byte prefix instead of 8-byte. 
+  /// \brief Write the pre-0.15.0 IPC message format
+  ///
+  /// This legacy format consists of a 4-byte prefix instead of 8-byte.
   bool write_legacy_ipc_format = false;
 
   /// \brief The memory pool to use for allocations made during IPC writing
-  /// 
-  /// While Arrow IPC is predominantly zero-copy, it may have to allocate 
-  /// memory in some cases (for example if compression is enabled). 
+  ///
+  /// While Arrow IPC is predominantly zero-copy, it may have to allocate
+  /// memory in some cases (for example if compression is enabled).
   MemoryPool* memory_pool = default_memory_pool();
 
   /// \brief Compression codec to use for record batch body buffers
@@ -70,38 +70,38 @@ struct ARROW_EXPORT IpcWriteOptions {
   /// like compression
   bool use_threads = true;
 
-  /// \brief Whether to emit dictionary deltas 
-  /// 
-  /// If false, a changed dictionary for a given field will emit a full 
-  /// dictionary replacement. 
-  /// If true, a changed dictionary will be compared against the previous 
-  /// version. If possible, a dictionary delta will be omitted, otherwise 
-  /// a full dictionary replacement. 
-  /// 
-  /// Default is false to maximize stream compatibility. 
-  /// 
-  /// Also, note that if a changed dictionary is a nested dictionary, 
-  /// then a delta is never emitted, for compatibility with the read path. 
-  bool emit_dictionary_deltas = false; 
- 
-  /// \brief Whether to unify dictionaries for the IPC file format 
-  /// 
-  /// The IPC file format doesn't support dictionary replacements or deltas. 
-  /// Therefore, chunks of a column with a dictionary type must have the same 
-  /// dictionary in each record batch. 
-  /// 
-  /// If this option is true, RecordBatchWriter::WriteTable will attempt 
-  /// to unify dictionaries across each table column.  If this option is 
-  /// false, unequal dictionaries across a table column will simply raise 
-  /// an error. 
-  /// 
-  /// Note that enabling this option has a runtime cost. Also, not all types 
-  /// currently support dictionary unification. 
-  /// 
-  /// This option is ignored for IPC streams, which support dictionary replacement 
-  /// and deltas. 
-  bool unify_dictionaries = false; 
- 
+  /// \brief Whether to emit dictionary deltas
+  ///
+  /// If false, a changed dictionary for a given field will emit a full
+  /// dictionary replacement.
+  /// If true, a changed dictionary will be compared against the previous
+  /// version. If possible, a dictionary delta will be omitted, otherwise
+  /// a full dictionary replacement.
+  ///
+  /// Default is false to maximize stream compatibility.
+  ///
+  /// Also, note that if a changed dictionary is a nested dictionary,
+  /// then a delta is never emitted, for compatibility with the read path.
+  bool emit_dictionary_deltas = false;
+
+  /// \brief Whether to unify dictionaries for the IPC file format
+  ///
+  /// The IPC file format doesn't support dictionary replacements or deltas.
+  /// Therefore, chunks of a column with a dictionary type must have the same
+  /// dictionary in each record batch.
+  ///
+  /// If this option is true, RecordBatchWriter::WriteTable will attempt
+  /// to unify dictionaries across each table column.  If this option is
+  /// false, unequal dictionaries across a table column will simply raise
+  /// an error.
+  ///
+  /// Note that enabling this option has a runtime cost. Also, not all types
+  /// currently support dictionary unification.
+  ///
+  /// This option is ignored for IPC streams, which support dictionary replacement
+  /// and deltas.
+  bool unify_dictionaries = false;
+
   /// \brief Format version to use for IPC messages and their metadata.
   ///
   /// Presently using V5 version (readable by 1.0.0 and later).
@@ -115,40 +115,40 @@ struct ARROW_EXPORT IpcWriteOptions {
 using IpcOptions = IpcWriteOptions;
 #endif
 
-/// \brief Options for reading Arrow IPC messages 
+/// \brief Options for reading Arrow IPC messages
 struct ARROW_EXPORT IpcReadOptions {
-  /// \brief The maximum permitted schema nesting depth. 
+  /// \brief The maximum permitted schema nesting depth.
   int max_recursion_depth = kMaxNestingDepth;
 
-  /// \brief The memory pool to use for allocations made during IPC reading 
-  /// 
-  /// While Arrow IPC is predominantly zero-copy, it may have to allocate 
-  /// memory in some cases (for example if compression is enabled). 
+  /// \brief The memory pool to use for allocations made during IPC reading
+  ///
+  /// While Arrow IPC is predominantly zero-copy, it may have to allocate
+  /// memory in some cases (for example if compression is enabled).
   MemoryPool* memory_pool = default_memory_pool();
 
   /// \brief EXPERIMENTAL: Top-level schema fields to include when
-  /// deserializing RecordBatch. 
-  /// 
-  /// If empty (the default), return all deserialized fields. 
-  /// If non-empty, the values are the indices of fields in the top-level schema. 
+  /// deserializing RecordBatch.
+  ///
+  /// If empty (the default), return all deserialized fields.
+  /// If non-empty, the values are the indices of fields in the top-level schema.
   std::vector<int> included_fields;
 
   /// \brief Use global CPU thread pool to parallelize any computational tasks
   /// like decompression
   bool use_threads = true;
 
-  /// \brief EXPERIMENTAL: Convert incoming data to platform-native endianness 
-  /// 
-  /// If the endianness of the received schema is not equal to platform-native 
-  /// endianness, then all buffers with endian-sensitive data will be byte-swapped. 
-  /// This includes the value buffers of numeric types, temporal types, decimal 
-  /// types, as well as the offset buffers of variable-sized binary and list-like 
-  /// types. 
-  /// 
-  /// Endianness conversion is achieved by the RecordBatchFileReader, 
-  /// RecordBatchStreamReader and StreamDecoder classes. 
-  bool ensure_native_endian = true; 
- 
+  /// \brief EXPERIMENTAL: Convert incoming data to platform-native endianness
+  ///
+  /// If the endianness of the received schema is not equal to platform-native
+  /// endianness, then all buffers with endian-sensitive data will be byte-swapped.
+  /// This includes the value buffers of numeric types, temporal types, decimal
+  /// types, as well as the offset buffers of variable-sized binary and list-like
+  /// types.
+  ///
+  /// Endianness conversion is achieved by the RecordBatchFileReader,
+  /// RecordBatchStreamReader and StreamDecoder classes.
+  bool ensure_native_endian = true;
+
   static IpcReadOptions Defaults();
 };
 
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/reader.cc b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/reader.cc
index 5e90be7d4e6..a3c345cc440 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/reader.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/reader.cc
@@ -31,7 +31,7 @@
 #include "arrow/array.h"
 #include "arrow/buffer.h"
 #include "arrow/extension_type.h"
-#include "arrow/io/caching.h" 
+#include "arrow/io/caching.h"
 #include "arrow/io/interfaces.h"
 #include "arrow/io/memory.h"
 #include "arrow/ipc/message.h"
@@ -47,14 +47,14 @@
 #include "arrow/util/bitmap_ops.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/compression.h"
-#include "arrow/util/endian.h" 
+#include "arrow/util/endian.h"
 #include "arrow/util/key_value_metadata.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/parallel.h"
-#include "arrow/util/string.h" 
-#include "arrow/util/thread_pool.h" 
+#include "arrow/util/string.h"
+#include "arrow/util/thread_pool.h"
 #include "arrow/util/ubsan.h"
-#include "arrow/util/vector.h" 
+#include "arrow/util/vector.h"
 #include "arrow/visitor_inline.h"
 
 #include "generated/File_generated.h"  // IWYU pragma: export
@@ -112,30 +112,30 @@ Status InvalidMessageType(MessageType expected, MessageType actual) {
 // ----------------------------------------------------------------------
 // Record batch read path
 
-/// \brief Structure to keep common arguments to be passed 
-struct IpcReadContext { 
-  IpcReadContext(DictionaryMemo* memo, const IpcReadOptions& option, bool swap, 
-                 MetadataVersion version = MetadataVersion::V5, 
-                 Compression::type kind = Compression::UNCOMPRESSED) 
-      : dictionary_memo(memo), 
-        options(option), 
-        metadata_version(version), 
-        compression(kind), 
-        swap_endian(swap) {} 
- 
-  DictionaryMemo* dictionary_memo; 
- 
-  const IpcReadOptions& options; 
- 
-  MetadataVersion metadata_version; 
- 
-  Compression::type compression; 
- 
-  /// \brief LoadRecordBatch() or LoadRecordBatchSubset() swaps endianness of elements 
-  /// if this flag is true 
-  const bool swap_endian; 
-}; 
- 
+/// \brief Structure to keep common arguments to be passed
+struct IpcReadContext {
+  IpcReadContext(DictionaryMemo* memo, const IpcReadOptions& option, bool swap,
+                 MetadataVersion version = MetadataVersion::V5,
+                 Compression::type kind = Compression::UNCOMPRESSED)
+      : dictionary_memo(memo),
+        options(option),
+        metadata_version(version),
+        compression(kind),
+        swap_endian(swap) {}
+
+  DictionaryMemo* dictionary_memo;
+
+  const IpcReadOptions& options;
+
+  MetadataVersion metadata_version;
+
+  Compression::type compression;
+
+  /// \brief LoadRecordBatch() or LoadRecordBatchSubset() swaps endianness of elements
+  /// if this flag is true
+  const bool swap_endian;
+};
+
 /// The field_index and buffer_index are incremented based on how much of the
 /// batch is "consumed" (through nested data reconstruction, for example)
 class ArrayLoader {
@@ -467,9 +467,9 @@ Status DecompressBuffers(Compression::type compression, const IpcReadOptions& op
 
 Result<std::shared_ptr<RecordBatch>> LoadRecordBatchSubset(
     const flatbuf::RecordBatch* metadata, const std::shared_ptr<Schema>& schema,
-    const std::vector<bool>* inclusion_mask, const IpcReadContext& context, 
-    io::RandomAccessFile* file) { 
-  ArrayLoader loader(metadata, context.metadata_version, context.options, file); 
+    const std::vector<bool>* inclusion_mask, const IpcReadContext& context,
+    io::RandomAccessFile* file) {
+  ArrayLoader loader(metadata, context.metadata_version, context.options, file);
 
   ArrayDataVector columns(schema->num_fields());
   ArrayDataVector filtered_columns;
@@ -499,8 +499,8 @@ Result<std::shared_ptr<RecordBatch>> LoadRecordBatchSubset(
 
   // Dictionary resolution needs to happen on the unfiltered columns,
   // because fields are mapped structurally (by path in the original schema).
-  RETURN_NOT_OK(ResolveDictionaries(columns, *context.dictionary_memo, 
-                                    context.options.memory_pool)); 
+  RETURN_NOT_OK(ResolveDictionaries(columns, *context.dictionary_memo,
+                                    context.options.memory_pool));
 
   if (inclusion_mask) {
     filtered_schema = ::arrow::schema(std::move(filtered_fields), schema->metadata());
@@ -509,30 +509,30 @@ Result<std::shared_ptr<RecordBatch>> LoadRecordBatchSubset(
     filtered_schema = schema;
     filtered_columns = std::move(columns);
   }
-  if (context.compression != Compression::UNCOMPRESSED) { 
-    RETURN_NOT_OK( 
-        DecompressBuffers(context.compression, context.options, &filtered_columns)); 
+  if (context.compression != Compression::UNCOMPRESSED) {
+    RETURN_NOT_OK(
+        DecompressBuffers(context.compression, context.options, &filtered_columns));
   }
 
-  // swap endian in a set of ArrayData if necessary (swap_endian == true) 
-  if (context.swap_endian) { 
-    for (int i = 0; i < static_cast<int>(filtered_columns.size()); ++i) { 
-      ARROW_ASSIGN_OR_RAISE(filtered_columns[i], 
-                            arrow::internal::SwapEndianArrayData(filtered_columns[i])); 
-    } 
-  } 
-  return RecordBatch::Make(std::move(filtered_schema), metadata->length(), 
+  // swap endian in a set of ArrayData if necessary (swap_endian == true)
+  if (context.swap_endian) {
+    for (int i = 0; i < static_cast<int>(filtered_columns.size()); ++i) {
+      ARROW_ASSIGN_OR_RAISE(filtered_columns[i],
+                            arrow::internal::SwapEndianArrayData(filtered_columns[i]));
+    }
+  }
+  return RecordBatch::Make(std::move(filtered_schema), metadata->length(),
                            std::move(filtered_columns));
 }
 
 Result<std::shared_ptr<RecordBatch>> LoadRecordBatch(
     const flatbuf::RecordBatch* metadata, const std::shared_ptr<Schema>& schema,
-    const std::vector<bool>& inclusion_mask, const IpcReadContext& context, 
-    io::RandomAccessFile* file) { 
+    const std::vector<bool>& inclusion_mask, const IpcReadContext& context,
+    io::RandomAccessFile* file) {
   if (inclusion_mask.size() > 0) {
-    return LoadRecordBatchSubset(metadata, schema, &inclusion_mask, context, file); 
+    return LoadRecordBatchSubset(metadata, schema, &inclusion_mask, context, file);
   } else {
-    return LoadRecordBatchSubset(metadata, schema, /*param_name=*/nullptr, context, file); 
+    return LoadRecordBatchSubset(metadata, schema, /*param_name=*/nullptr, context, file);
   }
 }
 
@@ -569,9 +569,9 @@ Status GetCompressionExperimental(const flatbuf::Message* message,
     RETURN_NOT_OK(internal::GetKeyValueMetadata(message->custom_metadata(), &metadata));
     int index = metadata->FindKey("ARROW:experimental_compression");
     if (index != -1) {
-      // Arrow 0.17 stored string in upper case, internal utils now require lower case 
-      auto name = arrow::internal::AsciiToLower(metadata->value(index)); 
-      ARROW_ASSIGN_OR_RAISE(*out, util::Codec::GetCompressionType(name)); 
+      // Arrow 0.17 stored string in upper case, internal utils now require lower case
+      auto name = arrow::internal::AsciiToLower(metadata->value(index));
+      ARROW_ASSIGN_OR_RAISE(*out, util::Codec::GetCompressionType(name));
     }
     return internal::CheckCompressionSupported(*out);
   }
@@ -610,8 +610,8 @@ Result<std::shared_ptr<RecordBatch>> ReadRecordBatch(
 
 Result<std::shared_ptr<RecordBatch>> ReadRecordBatchInternal(
     const Buffer& metadata, const std::shared_ptr<Schema>& schema,
-    const std::vector<bool>& inclusion_mask, IpcReadContext& context, 
-    io::RandomAccessFile* file) { 
+    const std::vector<bool>& inclusion_mask, IpcReadContext& context,
+    io::RandomAccessFile* file) {
   const flatbuf::Message* message = nullptr;
   RETURN_NOT_OK(internal::VerifyMessage(metadata.data(), metadata.size(), &message));
   auto batch = message->header_as_RecordBatch();
@@ -622,15 +622,15 @@ Result<std::shared_ptr<RecordBatch>> ReadRecordBatchInternal(
 
   Compression::type compression;
   RETURN_NOT_OK(GetCompression(batch, &compression));
-  if (context.compression == Compression::UNCOMPRESSED && 
+  if (context.compression == Compression::UNCOMPRESSED &&
       message->version() == flatbuf::MetadataVersion::V4) {
     // Possibly obtain codec information from experimental serialization format
     // in 0.17.x
     RETURN_NOT_OK(GetCompressionExperimental(message, &compression));
   }
-  context.compression = compression; 
-  context.metadata_version = internal::GetMetadataVersion(message->version()); 
-  return LoadRecordBatch(batch, schema, inclusion_mask, context, file); 
+  context.compression = compression;
+  context.metadata_version = internal::GetMetadataVersion(message->version());
+  return LoadRecordBatch(batch, schema, inclusion_mask, context, file);
 }
 
 // If we are selecting only certain fields, populate an inclusion mask for fast lookups.
@@ -663,8 +663,8 @@ Status GetInclusionMaskAndOutSchema(const std::shared_ptr<Schema>& full_schema,
     included_fields.push_back(full_schema->field(i));
   }
 
-  *out_schema = schema(std::move(included_fields), full_schema->endianness(), 
-                       full_schema->metadata()); 
+  *out_schema = schema(std::move(included_fields), full_schema->endianness(),
+                       full_schema->metadata());
   return Status::OK();
 }
 
@@ -672,32 +672,32 @@ Status UnpackSchemaMessage(const void* opaque_schema, const IpcReadOptions& opti
                            DictionaryMemo* dictionary_memo,
                            std::shared_ptr<Schema>* schema,
                            std::shared_ptr<Schema>* out_schema,
-                           std::vector<bool>* field_inclusion_mask, bool* swap_endian) { 
+                           std::vector<bool>* field_inclusion_mask, bool* swap_endian) {
   RETURN_NOT_OK(internal::GetSchema(opaque_schema, dictionary_memo, schema));
 
   // If we are selecting only certain fields, populate the inclusion mask now
   // for fast lookups
-  RETURN_NOT_OK(GetInclusionMaskAndOutSchema(*schema, options.included_fields, 
-                                             field_inclusion_mask, out_schema)); 
-  *swap_endian = options.ensure_native_endian && !out_schema->get()->is_native_endian(); 
-  if (*swap_endian) { 
-    // create a new schema with native endianness before swapping endian in ArrayData 
-    *schema = schema->get()->WithEndianness(Endianness::Native); 
-    *out_schema = out_schema->get()->WithEndianness(Endianness::Native); 
-  } 
-  return Status::OK(); 
+  RETURN_NOT_OK(GetInclusionMaskAndOutSchema(*schema, options.included_fields,
+                                             field_inclusion_mask, out_schema));
+  *swap_endian = options.ensure_native_endian && !out_schema->get()->is_native_endian();
+  if (*swap_endian) {
+    // create a new schema with native endianness before swapping endian in ArrayData
+    *schema = schema->get()->WithEndianness(Endianness::Native);
+    *out_schema = out_schema->get()->WithEndianness(Endianness::Native);
+  }
+  return Status::OK();
 }
 
 Status UnpackSchemaMessage(const Message& message, const IpcReadOptions& options,
                            DictionaryMemo* dictionary_memo,
                            std::shared_ptr<Schema>* schema,
                            std::shared_ptr<Schema>* out_schema,
-                           std::vector<bool>* field_inclusion_mask, bool* swap_endian) { 
+                           std::vector<bool>* field_inclusion_mask, bool* swap_endian) {
   CHECK_MESSAGE_TYPE(MessageType::SCHEMA, message.type());
   CHECK_HAS_NO_BODY(message);
 
   return UnpackSchemaMessage(message.header(), options, dictionary_memo, schema,
-                             out_schema, field_inclusion_mask, swap_endian); 
+                             out_schema, field_inclusion_mask, swap_endian);
 }
 
 Result<std::shared_ptr<RecordBatch>> ReadRecordBatch(
@@ -707,14 +707,14 @@ Result<std::shared_ptr<RecordBatch>> ReadRecordBatch(
   std::shared_ptr<Schema> out_schema;
   // Empty means do not use
   std::vector<bool> inclusion_mask;
-  IpcReadContext context(const_cast<DictionaryMemo*>(dictionary_memo), options, false); 
-  RETURN_NOT_OK(GetInclusionMaskAndOutSchema(schema, context.options.included_fields, 
+  IpcReadContext context(const_cast<DictionaryMemo*>(dictionary_memo), options, false);
+  RETURN_NOT_OK(GetInclusionMaskAndOutSchema(schema, context.options.included_fields,
                                              &inclusion_mask, &out_schema));
-  return ReadRecordBatchInternal(metadata, schema, inclusion_mask, context, file); 
+  return ReadRecordBatchInternal(metadata, schema, inclusion_mask, context, file);
 }
 
-Status ReadDictionary(const Buffer& metadata, const IpcReadContext& context, 
-                      DictionaryKind* kind, io::RandomAccessFile* file) { 
+Status ReadDictionary(const Buffer& metadata, const IpcReadContext& context,
+                      DictionaryKind* kind, io::RandomAccessFile* file) {
   const flatbuf::Message* message = nullptr;
   RETURN_NOT_OK(internal::VerifyMessage(metadata.data(), metadata.size(), &message));
   const auto dictionary_batch = message->header_as_DictionaryBatch();
@@ -741,46 +741,46 @@ Status ReadDictionary(const Buffer& metadata, const IpcReadContext& context,
 
   // Look up the dictionary value type, which must have been added to the
   // DictionaryMemo already prior to invoking this function
-  ARROW_ASSIGN_OR_RAISE(auto value_type, context.dictionary_memo->GetDictionaryType(id)); 
+  ARROW_ASSIGN_OR_RAISE(auto value_type, context.dictionary_memo->GetDictionaryType(id));
 
   // Load the dictionary data from the dictionary batch
   ArrayLoader loader(batch_meta, internal::GetMetadataVersion(message->version()),
-                     context.options, file); 
-  auto dict_data = std::make_shared<ArrayData>(); 
+                     context.options, file);
+  auto dict_data = std::make_shared<ArrayData>();
   const Field dummy_field("", value_type);
   RETURN_NOT_OK(loader.Load(&dummy_field, dict_data.get()));
 
   if (compression != Compression::UNCOMPRESSED) {
     ArrayDataVector dict_fields{dict_data};
-    RETURN_NOT_OK(DecompressBuffers(compression, context.options, &dict_fields)); 
+    RETURN_NOT_OK(DecompressBuffers(compression, context.options, &dict_fields));
+  }
+
+  // swap endian in dict_data if necessary (swap_endian == true)
+  if (context.swap_endian) {
+    ARROW_ASSIGN_OR_RAISE(dict_data, ::arrow::internal::SwapEndianArrayData(dict_data));
   }
 
-  // swap endian in dict_data if necessary (swap_endian == true) 
-  if (context.swap_endian) { 
-    ARROW_ASSIGN_OR_RAISE(dict_data, ::arrow::internal::SwapEndianArrayData(dict_data)); 
-  } 
- 
   if (dictionary_batch->isDelta()) {
     if (kind != nullptr) {
       *kind = DictionaryKind::Delta;
     }
-    return context.dictionary_memo->AddDictionaryDelta(id, dict_data); 
+    return context.dictionary_memo->AddDictionaryDelta(id, dict_data);
   }
   ARROW_ASSIGN_OR_RAISE(bool inserted,
-                        context.dictionary_memo->AddOrReplaceDictionary(id, dict_data)); 
+                        context.dictionary_memo->AddOrReplaceDictionary(id, dict_data));
   if (kind != nullptr) {
     *kind = inserted ? DictionaryKind::New : DictionaryKind::Replacement;
   }
   return Status::OK();
 }
 
-Status ReadDictionary(const Message& message, const IpcReadContext& context, 
-                      DictionaryKind* kind) { 
+Status ReadDictionary(const Message& message, const IpcReadContext& context,
+                      DictionaryKind* kind) {
   // Only invoke this method if we already know we have a dictionary message
   DCHECK_EQ(message.type(), MessageType::DICTIONARY_BATCH);
   CHECK_HAS_BODY(message);
   ARROW_ASSIGN_OR_RAISE(auto reader, Buffer::GetReader(message.body()));
-  return ReadDictionary(*message.metadata(), context, kind, reader.get()); 
+  return ReadDictionary(*message.metadata(), context, kind, reader.get());
 }
 
 // ----------------------------------------------------------------------
@@ -799,10 +799,10 @@ class RecordBatchStreamReaderImpl : public RecordBatchStreamReader {
       return Status::Invalid("Tried reading schema message, was null or length 0");
     }
 
-    RETURN_NOT_OK(UnpackSchemaMessage(*message, options, &dictionary_memo_, &schema_, 
-                                      &out_schema_, &field_inclusion_mask_, 
-                                      &swap_endian_)); 
-    return Status::OK(); 
+    RETURN_NOT_OK(UnpackSchemaMessage(*message, options, &dictionary_memo_, &schema_,
+                                      &out_schema_, &field_inclusion_mask_,
+                                      &swap_endian_));
+    return Status::OK();
   }
 
   Status ReadNext(std::shared_ptr<RecordBatch>* batch) override {
@@ -834,9 +834,9 @@ class RecordBatchStreamReaderImpl : public RecordBatchStreamReader {
 
     CHECK_HAS_BODY(*message);
     ARROW_ASSIGN_OR_RAISE(auto reader, Buffer::GetReader(message->body()));
-    IpcReadContext context(&dictionary_memo_, options_, swap_endian_); 
+    IpcReadContext context(&dictionary_memo_, options_, swap_endian_);
     return ReadRecordBatchInternal(*message->metadata(), schema_, field_inclusion_mask_,
-                                   context, reader.get()) 
+                                   context, reader.get())
         .Value(batch);
   }
 
@@ -866,8 +866,8 @@ class RecordBatchStreamReaderImpl : public RecordBatchStreamReader {
   // Read dictionary from dictionary batch
   Status ReadDictionary(const Message& message) {
     DictionaryKind kind;
-    IpcReadContext context(&dictionary_memo_, options_, swap_endian_); 
-    RETURN_NOT_OK(::arrow::ipc::ReadDictionary(message, context, &kind)); 
+    IpcReadContext context(&dictionary_memo_, options_, swap_endian_);
+    RETURN_NOT_OK(::arrow::ipc::ReadDictionary(message, context, &kind));
     switch (kind) {
       case DictionaryKind::New:
         break;
@@ -888,7 +888,7 @@ class RecordBatchStreamReaderImpl : public RecordBatchStreamReader {
 
     // TODO(wesm): In future, we may want to reconcile the ids in the stream with
     // those found in the schema
-    const auto num_dicts = dictionary_memo_.fields().num_dicts(); 
+    const auto num_dicts = dictionary_memo_.fields().num_dicts();
     for (int i = 0; i < num_dicts; ++i) {
       ARROW_ASSIGN_OR_RAISE(message, ReadNextMessage());
       if (!message) {
@@ -933,8 +933,8 @@ class RecordBatchStreamReaderImpl : public RecordBatchStreamReader {
 
   DictionaryMemo dictionary_memo_;
   std::shared_ptr<Schema> schema_, out_schema_;
- 
-  bool swap_endian_; 
+
+  bool swap_endian_;
 };
 
 // ----------------------------------------------------------------------
@@ -961,94 +961,94 @@ Result<std::shared_ptr<RecordBatchStreamReader>> RecordBatchStreamReader::Open(
 // ----------------------------------------------------------------------
 // Reader implementation
 
-// Common functions used in both the random-access file reader and the 
-// asynchronous generator 
+// Common functions used in both the random-access file reader and the
+// asynchronous generator
 static inline FileBlock FileBlockFromFlatbuffer(const flatbuf::Block* block) {
   return FileBlock{block->offset(), block->metaDataLength(), block->bodyLength()};
 }
 
-static Result<std::unique_ptr<Message>> ReadMessageFromBlock(const FileBlock& block, 
-                                                             io::RandomAccessFile* file) { 
-  if (!BitUtil::IsMultipleOf8(block.offset) || 
-      !BitUtil::IsMultipleOf8(block.metadata_length) || 
-      !BitUtil::IsMultipleOf8(block.body_length)) { 
-    return Status::Invalid("Unaligned block in IPC file"); 
-  } 
- 
-  // TODO(wesm): this breaks integration tests, see ARROW-3256 
-  // DCHECK_EQ((*out)->body_length(), block.body_length); 
- 
-  ARROW_ASSIGN_OR_RAISE(auto message, 
-                        ReadMessage(block.offset, block.metadata_length, file)); 
-  return std::move(message); 
-} 
- 
-static Future<std::shared_ptr<Message>> ReadMessageFromBlockAsync( 
-    const FileBlock& block, io::RandomAccessFile* file, const io::IOContext& io_context) { 
-  if (!BitUtil::IsMultipleOf8(block.offset) || 
-      !BitUtil::IsMultipleOf8(block.metadata_length) || 
-      !BitUtil::IsMultipleOf8(block.body_length)) { 
-    return Status::Invalid("Unaligned block in IPC file"); 
-  } 
- 
-  // TODO(wesm): this breaks integration tests, see ARROW-3256 
-  // DCHECK_EQ((*out)->body_length(), block.body_length); 
- 
-  return ReadMessageAsync(block.offset, block.metadata_length, block.body_length, file, 
-                          io_context); 
-} 
- 
-static Status ReadOneDictionary(Message* message, const IpcReadContext& context) { 
-  CHECK_HAS_BODY(*message); 
-  ARROW_ASSIGN_OR_RAISE(auto reader, Buffer::GetReader(message->body())); 
-  DictionaryKind kind; 
-  RETURN_NOT_OK(ReadDictionary(*message->metadata(), context, &kind, reader.get())); 
-  if (kind != DictionaryKind::New) { 
-    return Status::Invalid( 
-        "Unsupported dictionary replacement or " 
-        "dictionary delta in IPC file"); 
-  } 
-  return Status::OK(); 
-} 
- 
-class RecordBatchFileReaderImpl; 
- 
-/// A generator of record batches. 
-/// 
-/// All batches are yielded in order. 
-class ARROW_EXPORT IpcFileRecordBatchGenerator { 
- public: 
-  using Item = std::shared_ptr<RecordBatch>; 
- 
-  explicit IpcFileRecordBatchGenerator( 
-      std::shared_ptr<RecordBatchFileReaderImpl> state, 
-      std::shared_ptr<io::internal::ReadRangeCache> cached_source, 
-      const io::IOContext& io_context, arrow::internal::Executor* executor) 
-      : state_(std::move(state)), 
-        cached_source_(std::move(cached_source)), 
-        io_context_(io_context), 
-        executor_(executor), 
-        index_(0) {} 
- 
-  Future<Item> operator()(); 
-  Future<std::shared_ptr<Message>> ReadBlock(const FileBlock& block); 
- 
-  static Status ReadDictionaries( 
-      RecordBatchFileReaderImpl* state, 
-      std::vector<std::shared_ptr<Message>> dictionary_messages); 
-  static Result<std::shared_ptr<RecordBatch>> ReadRecordBatch( 
-      RecordBatchFileReaderImpl* state, Message* message); 
- 
- private: 
-  std::shared_ptr<RecordBatchFileReaderImpl> state_; 
-  std::shared_ptr<io::internal::ReadRangeCache> cached_source_; 
-  io::IOContext io_context_; 
-  arrow::internal::Executor* executor_; 
-  int index_; 
-  // Odd Future type, but this lets us use All() easily 
-  Future<> read_dictionaries_; 
-}; 
- 
+static Result<std::unique_ptr<Message>> ReadMessageFromBlock(const FileBlock& block,
+                                                             io::RandomAccessFile* file) {
+  if (!BitUtil::IsMultipleOf8(block.offset) ||
+      !BitUtil::IsMultipleOf8(block.metadata_length) ||
+      !BitUtil::IsMultipleOf8(block.body_length)) {
+    return Status::Invalid("Unaligned block in IPC file");
+  }
+
+  // TODO(wesm): this breaks integration tests, see ARROW-3256
+  // DCHECK_EQ((*out)->body_length(), block.body_length);
+
+  ARROW_ASSIGN_OR_RAISE(auto message,
+                        ReadMessage(block.offset, block.metadata_length, file));
+  return std::move(message);
+}
+
+static Future<std::shared_ptr<Message>> ReadMessageFromBlockAsync(
+    const FileBlock& block, io::RandomAccessFile* file, const io::IOContext& io_context) {
+  if (!BitUtil::IsMultipleOf8(block.offset) ||
+      !BitUtil::IsMultipleOf8(block.metadata_length) ||
+      !BitUtil::IsMultipleOf8(block.body_length)) {
+    return Status::Invalid("Unaligned block in IPC file");
+  }
+
+  // TODO(wesm): this breaks integration tests, see ARROW-3256
+  // DCHECK_EQ((*out)->body_length(), block.body_length);
+
+  return ReadMessageAsync(block.offset, block.metadata_length, block.body_length, file,
+                          io_context);
+}
+
+static Status ReadOneDictionary(Message* message, const IpcReadContext& context) {
+  CHECK_HAS_BODY(*message);
+  ARROW_ASSIGN_OR_RAISE(auto reader, Buffer::GetReader(message->body()));
+  DictionaryKind kind;
+  RETURN_NOT_OK(ReadDictionary(*message->metadata(), context, &kind, reader.get()));
+  if (kind != DictionaryKind::New) {
+    return Status::Invalid(
+        "Unsupported dictionary replacement or "
+        "dictionary delta in IPC file");
+  }
+  return Status::OK();
+}
+
+class RecordBatchFileReaderImpl;
+
+/// A generator of record batches.
+///
+/// All batches are yielded in order.
+class ARROW_EXPORT IpcFileRecordBatchGenerator {
+ public:
+  using Item = std::shared_ptr<RecordBatch>;
+
+  explicit IpcFileRecordBatchGenerator(
+      std::shared_ptr<RecordBatchFileReaderImpl> state,
+      std::shared_ptr<io::internal::ReadRangeCache> cached_source,
+      const io::IOContext& io_context, arrow::internal::Executor* executor)
+      : state_(std::move(state)),
+        cached_source_(std::move(cached_source)),
+        io_context_(io_context),
+        executor_(executor),
+        index_(0) {}
+
+  Future<Item> operator()();
+  Future<std::shared_ptr<Message>> ReadBlock(const FileBlock& block);
+
+  static Status ReadDictionaries(
+      RecordBatchFileReaderImpl* state,
+      std::vector<std::shared_ptr<Message>> dictionary_messages);
+  static Result<std::shared_ptr<RecordBatch>> ReadRecordBatch(
+      RecordBatchFileReaderImpl* state, Message* message);
+
+ private:
+  std::shared_ptr<RecordBatchFileReaderImpl> state_;
+  std::shared_ptr<io::internal::ReadRangeCache> cached_source_;
+  io::IOContext io_context_;
+  arrow::internal::Executor* executor_;
+  int index_;
+  // Odd Future type, but this lets us use All() easily
+  Future<> read_dictionaries_;
+};
+
 class RecordBatchFileReaderImpl : public RecordBatchFileReader {
  public:
   RecordBatchFileReaderImpl() : file_(NULLPTR), footer_offset_(0), footer_(NULLPTR) {}
@@ -1074,33 +1074,33 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader {
 
     CHECK_HAS_BODY(*message);
     ARROW_ASSIGN_OR_RAISE(auto reader, Buffer::GetReader(message->body()));
-    IpcReadContext context(&dictionary_memo_, options_, swap_endian_); 
-    ARROW_ASSIGN_OR_RAISE(auto batch, ReadRecordBatchInternal( 
-                                          *message->metadata(), schema_, 
-                                          field_inclusion_mask_, context, reader.get())); 
+    IpcReadContext context(&dictionary_memo_, options_, swap_endian_);
+    ARROW_ASSIGN_OR_RAISE(auto batch, ReadRecordBatchInternal(
+                                          *message->metadata(), schema_,
+                                          field_inclusion_mask_, context, reader.get()));
     ++stats_.num_record_batches;
     return batch;
   }
 
-  Result<int64_t> CountRows() override { 
-    int64_t total = 0; 
-    for (int i = 0; i < num_record_batches(); i++) { 
-      ARROW_ASSIGN_OR_RAISE(auto outer_message, 
-                            ReadMessageFromBlock(GetRecordBatchBlock(i))); 
-      auto metadata = outer_message->metadata(); 
-      const flatbuf::Message* message = nullptr; 
-      RETURN_NOT_OK( 
-          internal::VerifyMessage(metadata->data(), metadata->size(), &message)); 
-      auto batch = message->header_as_RecordBatch(); 
-      if (batch == nullptr) { 
-        return Status::IOError( 
-            "Header-type of flatbuffer-encoded Message is not RecordBatch."); 
-      } 
-      total += batch->length(); 
-    } 
-    return total; 
-  } 
- 
+  Result<int64_t> CountRows() override {
+    int64_t total = 0;
+    for (int i = 0; i < num_record_batches(); i++) {
+      ARROW_ASSIGN_OR_RAISE(auto outer_message,
+                            ReadMessageFromBlock(GetRecordBatchBlock(i)));
+      auto metadata = outer_message->metadata();
+      const flatbuf::Message* message = nullptr;
+      RETURN_NOT_OK(
+          internal::VerifyMessage(metadata->data(), metadata->size(), &message));
+      auto batch = message->header_as_RecordBatch();
+      if (batch == nullptr) {
+        return Status::IOError(
+            "Header-type of flatbuffer-encoded Message is not RecordBatch.");
+      }
+      total += batch->length();
+    }
+    return total;
+  }
+
   Status Open(const std::shared_ptr<io::RandomAccessFile>& file, int64_t footer_offset,
               const IpcReadOptions& options) {
     owned_file_ = file;
@@ -1116,75 +1116,75 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader {
 
     // Get the schema and record any observed dictionaries
     RETURN_NOT_OK(UnpackSchemaMessage(footer_->schema(), options, &dictionary_memo_,
-                                      &schema_, &out_schema_, &field_inclusion_mask_, 
-                                      &swap_endian_)); 
+                                      &schema_, &out_schema_, &field_inclusion_mask_,
+                                      &swap_endian_));
     ++stats_.num_messages;
     return Status::OK();
   }
 
-  Future<> OpenAsync(const std::shared_ptr<io::RandomAccessFile>& file, 
-                     int64_t footer_offset, const IpcReadOptions& options) { 
-    owned_file_ = file; 
-    return OpenAsync(file.get(), footer_offset, options); 
-  } 
- 
-  Future<> OpenAsync(io::RandomAccessFile* file, int64_t footer_offset, 
-                     const IpcReadOptions& options) { 
-    file_ = file; 
-    options_ = options; 
-    footer_offset_ = footer_offset; 
-    auto cpu_executor = ::arrow::internal::GetCpuThreadPool(); 
-    auto self = std::dynamic_pointer_cast<RecordBatchFileReaderImpl>(shared_from_this()); 
-    return ReadFooterAsync(cpu_executor).Then([self, options]() -> Status { 
-      // Get the schema and record any observed dictionaries 
-      RETURN_NOT_OK(UnpackSchemaMessage( 
-          self->footer_->schema(), options, &self->dictionary_memo_, &self->schema_, 
-          &self->out_schema_, &self->field_inclusion_mask_, &self->swap_endian_)); 
-      ++self->stats_.num_messages; 
-      return Status::OK(); 
-    }); 
-  } 
- 
+  Future<> OpenAsync(const std::shared_ptr<io::RandomAccessFile>& file,
+                     int64_t footer_offset, const IpcReadOptions& options) {
+    owned_file_ = file;
+    return OpenAsync(file.get(), footer_offset, options);
+  }
+
+  Future<> OpenAsync(io::RandomAccessFile* file, int64_t footer_offset,
+                     const IpcReadOptions& options) {
+    file_ = file;
+    options_ = options;
+    footer_offset_ = footer_offset;
+    auto cpu_executor = ::arrow::internal::GetCpuThreadPool();
+    auto self = std::dynamic_pointer_cast<RecordBatchFileReaderImpl>(shared_from_this());
+    return ReadFooterAsync(cpu_executor).Then([self, options]() -> Status {
+      // Get the schema and record any observed dictionaries
+      RETURN_NOT_OK(UnpackSchemaMessage(
+          self->footer_->schema(), options, &self->dictionary_memo_, &self->schema_,
+          &self->out_schema_, &self->field_inclusion_mask_, &self->swap_endian_));
+      ++self->stats_.num_messages;
+      return Status::OK();
+    });
+  }
+
   std::shared_ptr<Schema> schema() const override { return out_schema_; }
 
   std::shared_ptr<const KeyValueMetadata> metadata() const override { return metadata_; }
 
   ReadStats stats() const override { return stats_; }
 
-  Result<AsyncGenerator<std::shared_ptr<RecordBatch>>> GetRecordBatchGenerator( 
-      const bool coalesce, const io::IOContext& io_context, 
-      const io::CacheOptions cache_options, 
-      arrow::internal::Executor* executor) override { 
-    auto state = std::dynamic_pointer_cast<RecordBatchFileReaderImpl>(shared_from_this()); 
-    std::shared_ptr<io::internal::ReadRangeCache> cached_source; 
-    if (coalesce) { 
-      if (!owned_file_) return Status::Invalid("Cannot coalesce without an owned file"); 
-      cached_source = std::make_shared<io::internal::ReadRangeCache>( 
-          owned_file_, io_context, cache_options); 
-      auto num_dictionaries = this->num_dictionaries(); 
-      auto num_record_batches = this->num_record_batches(); 
-      std::vector<io::ReadRange> ranges(num_dictionaries + num_record_batches); 
-      for (int i = 0; i < num_dictionaries; i++) { 
-        auto block = FileBlockFromFlatbuffer(footer_->dictionaries()->Get(i)); 
-        ranges[i].offset = block.offset; 
-        ranges[i].length = block.metadata_length + block.body_length; 
-      } 
-      for (int i = 0; i < num_record_batches; i++) { 
-        auto block = FileBlockFromFlatbuffer(footer_->recordBatches()->Get(i)); 
-        ranges[num_dictionaries + i].offset = block.offset; 
-        ranges[num_dictionaries + i].length = block.metadata_length + block.body_length; 
-      } 
-      RETURN_NOT_OK(cached_source->Cache(std::move(ranges))); 
-    } 
-    return IpcFileRecordBatchGenerator(std::move(state), std::move(cached_source), 
-                                       io_context, executor); 
-  } 
- 
+  Result<AsyncGenerator<std::shared_ptr<RecordBatch>>> GetRecordBatchGenerator(
+      const bool coalesce, const io::IOContext& io_context,
+      const io::CacheOptions cache_options,
+      arrow::internal::Executor* executor) override {
+    auto state = std::dynamic_pointer_cast<RecordBatchFileReaderImpl>(shared_from_this());
+    std::shared_ptr<io::internal::ReadRangeCache> cached_source;
+    if (coalesce) {
+      if (!owned_file_) return Status::Invalid("Cannot coalesce without an owned file");
+      cached_source = std::make_shared<io::internal::ReadRangeCache>(
+          owned_file_, io_context, cache_options);
+      auto num_dictionaries = this->num_dictionaries();
+      auto num_record_batches = this->num_record_batches();
+      std::vector<io::ReadRange> ranges(num_dictionaries + num_record_batches);
+      for (int i = 0; i < num_dictionaries; i++) {
+        auto block = FileBlockFromFlatbuffer(footer_->dictionaries()->Get(i));
+        ranges[i].offset = block.offset;
+        ranges[i].length = block.metadata_length + block.body_length;
+      }
+      for (int i = 0; i < num_record_batches; i++) {
+        auto block = FileBlockFromFlatbuffer(footer_->recordBatches()->Get(i));
+        ranges[num_dictionaries + i].offset = block.offset;
+        ranges[num_dictionaries + i].length = block.metadata_length + block.body_length;
+      }
+      RETURN_NOT_OK(cached_source->Cache(std::move(ranges)));
+    }
+    return IpcFileRecordBatchGenerator(std::move(state), std::move(cached_source),
+                                       io_context, executor);
+  }
+
  private:
-  friend AsyncGenerator<std::shared_ptr<Message>> MakeMessageGenerator( 
-      std::shared_ptr<RecordBatchFileReaderImpl>, const io::IOContext&); 
-  friend class IpcFileRecordBatchGenerator; 
- 
+  friend AsyncGenerator<std::shared_ptr<Message>> MakeMessageGenerator(
+      std::shared_ptr<RecordBatchFileReaderImpl>, const io::IOContext&);
+  friend class IpcFileRecordBatchGenerator;
+
   FileBlock GetRecordBatchBlock(int i) const {
     return FileBlockFromFlatbuffer(footer_->recordBatches()->Get(i));
   }
@@ -1194,28 +1194,28 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader {
   }
 
   Result<std::unique_ptr<Message>> ReadMessageFromBlock(const FileBlock& block) {
-    ARROW_ASSIGN_OR_RAISE(auto message, arrow::ipc::ReadMessageFromBlock(block, file_)); 
+    ARROW_ASSIGN_OR_RAISE(auto message, arrow::ipc::ReadMessageFromBlock(block, file_));
     ++stats_.num_messages;
     return std::move(message);
   }
 
   Status ReadDictionaries() {
     // Read all the dictionaries
-    IpcReadContext context(&dictionary_memo_, options_, swap_endian_); 
+    IpcReadContext context(&dictionary_memo_, options_, swap_endian_);
     for (int i = 0; i < num_dictionaries(); ++i) {
       ARROW_ASSIGN_OR_RAISE(auto message, ReadMessageFromBlock(GetDictionaryBlock(i)));
-      RETURN_NOT_OK(ReadOneDictionary(message.get(), context)); 
+      RETURN_NOT_OK(ReadOneDictionary(message.get(), context));
       ++stats_.num_dictionary_batches;
     }
     return Status::OK();
   }
 
   Status ReadFooter() {
-    auto fut = ReadFooterAsync(/*executor=*/nullptr); 
-    return fut.status(); 
-  } 
- 
-  Future<> ReadFooterAsync(arrow::internal::Executor* executor) { 
+    auto fut = ReadFooterAsync(/*executor=*/nullptr);
+    return fut.status();
+  }
+
+  Future<> ReadFooterAsync(arrow::internal::Executor* executor) {
     const int32_t magic_size = static_cast<int>(strlen(kArrowMagicBytes));
 
     if (footer_offset_ <= magic_size * 2 + 4) {
@@ -1223,53 +1223,53 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader {
     }
 
     int file_end_size = static_cast<int>(magic_size + sizeof(int32_t));
-    auto self = std::dynamic_pointer_cast<RecordBatchFileReaderImpl>(shared_from_this()); 
-    auto read_magic = file_->ReadAsync(footer_offset_ - file_end_size, file_end_size); 
-    if (executor) read_magic = executor->Transfer(std::move(read_magic)); 
-    return read_magic 
-        .Then([=](const std::shared_ptr<Buffer>& buffer) 
-                  -> Future<std::shared_ptr<Buffer>> { 
-          const int64_t expected_footer_size = magic_size + sizeof(int32_t); 
-          if (buffer->size() < expected_footer_size) { 
-            return Status::Invalid("Unable to read ", expected_footer_size, 
-                                   "from end of file"); 
-          } 
-
-          if (memcmp(buffer->data() + sizeof(int32_t), kArrowMagicBytes, magic_size)) { 
-            return Status::Invalid("Not an Arrow file"); 
-          } 
-
-          int32_t footer_length = BitUtil::FromLittleEndian( 
-              *reinterpret_cast<const int32_t*>(buffer->data())); 
-
-          if (footer_length <= 0 || 
-              footer_length > self->footer_offset_ - magic_size * 2 - 4) { 
-            return Status::Invalid("File is smaller than indicated metadata size"); 
-          } 
-
-          // Now read the footer 
-          auto read_footer = self->file_->ReadAsync( 
-              self->footer_offset_ - footer_length - file_end_size, footer_length); 
-          if (executor) read_footer = executor->Transfer(std::move(read_footer)); 
-          return read_footer; 
-        }) 
-        .Then([=](const std::shared_ptr<Buffer>& buffer) -> Status { 
-          self->footer_buffer_ = buffer; 
-          const auto data = self->footer_buffer_->data(); 
-          const auto size = self->footer_buffer_->size(); 
-          if (!internal::VerifyFlatbuffers<flatbuf::Footer>(data, size)) { 
-            return Status::IOError("Verification of flatbuffer-encoded Footer failed."); 
-          } 
-          self->footer_ = flatbuf::GetFooter(data); 
-
-          auto fb_metadata = self->footer_->custom_metadata(); 
-          if (fb_metadata != nullptr) { 
-            std::shared_ptr<KeyValueMetadata> md; 
-            RETURN_NOT_OK(internal::GetKeyValueMetadata(fb_metadata, &md)); 
-            self->metadata_ = std::move(md);  // const-ify 
-          } 
-          return Status::OK(); 
-        }); 
+    auto self = std::dynamic_pointer_cast<RecordBatchFileReaderImpl>(shared_from_this());
+    auto read_magic = file_->ReadAsync(footer_offset_ - file_end_size, file_end_size);
+    if (executor) read_magic = executor->Transfer(std::move(read_magic));
+    return read_magic
+        .Then([=](const std::shared_ptr<Buffer>& buffer)
+                  -> Future<std::shared_ptr<Buffer>> {
+          const int64_t expected_footer_size = magic_size + sizeof(int32_t);
+          if (buffer->size() < expected_footer_size) {
+            return Status::Invalid("Unable to read ", expected_footer_size,
+                                   "from end of file");
+          }
+
+          if (memcmp(buffer->data() + sizeof(int32_t), kArrowMagicBytes, magic_size)) {
+            return Status::Invalid("Not an Arrow file");
+          }
+
+          int32_t footer_length = BitUtil::FromLittleEndian(
+              *reinterpret_cast<const int32_t*>(buffer->data()));
+
+          if (footer_length <= 0 ||
+              footer_length > self->footer_offset_ - magic_size * 2 - 4) {
+            return Status::Invalid("File is smaller than indicated metadata size");
+          }
+
+          // Now read the footer
+          auto read_footer = self->file_->ReadAsync(
+              self->footer_offset_ - footer_length - file_end_size, footer_length);
+          if (executor) read_footer = executor->Transfer(std::move(read_footer));
+          return read_footer;
+        })
+        .Then([=](const std::shared_ptr<Buffer>& buffer) -> Status {
+          self->footer_buffer_ = buffer;
+          const auto data = self->footer_buffer_->data();
+          const auto size = self->footer_buffer_->size();
+          if (!internal::VerifyFlatbuffers<flatbuf::Footer>(data, size)) {
+            return Status::IOError("Verification of flatbuffer-encoded Footer failed.");
+          }
+          self->footer_ = flatbuf::GetFooter(data);
+
+          auto fb_metadata = self->footer_->custom_metadata();
+          if (fb_metadata != nullptr) {
+            std::shared_ptr<KeyValueMetadata> md;
+            RETURN_NOT_OK(internal::GetKeyValueMetadata(fb_metadata, &md));
+            self->metadata_ = std::move(md);  // const-ify
+          }
+          return Status::OK();
+        });
   }
 
   int num_dictionaries() const {
@@ -1300,8 +1300,8 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader {
   std::shared_ptr<Schema> out_schema_;
 
   ReadStats stats_;
- 
-  bool swap_endian_; 
+
+  bool swap_endian_;
 };
 
 Result<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::Open(
@@ -1331,109 +1331,109 @@ Result<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::Open(
   return result;
 }
 
-Future<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::OpenAsync( 
-    const std::shared_ptr<io::RandomAccessFile>& file, const IpcReadOptions& options) { 
-  ARROW_ASSIGN_OR_RAISE(int64_t footer_offset, file->GetSize()); 
-  return OpenAsync(std::move(file), footer_offset, options); 
-} 
- 
-Future<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::OpenAsync( 
-    io::RandomAccessFile* file, const IpcReadOptions& options) { 
-  ARROW_ASSIGN_OR_RAISE(int64_t footer_offset, file->GetSize()); 
-  return OpenAsync(file, footer_offset, options); 
-} 
- 
-Future<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::OpenAsync( 
-    const std::shared_ptr<io::RandomAccessFile>& file, int64_t footer_offset, 
-    const IpcReadOptions& options) { 
-  auto result = std::make_shared<RecordBatchFileReaderImpl>(); 
-  return result->OpenAsync(file, footer_offset, options) 
-      .Then([=]() -> Result<std::shared_ptr<RecordBatchFileReader>> { return result; }); 
-} 
- 
-Future<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::OpenAsync( 
-    io::RandomAccessFile* file, int64_t footer_offset, const IpcReadOptions& options) { 
-  auto result = std::make_shared<RecordBatchFileReaderImpl>(); 
-  return result->OpenAsync(file, footer_offset, options) 
-      .Then([=]() -> Result<std::shared_ptr<RecordBatchFileReader>> { return result; }); 
-} 
- 
-Future<IpcFileRecordBatchGenerator::Item> IpcFileRecordBatchGenerator::operator()() { 
-  auto state = state_; 
-  if (!read_dictionaries_.is_valid()) { 
-    std::vector<Future<std::shared_ptr<Message>>> messages(state->num_dictionaries()); 
-    for (int i = 0; i < state->num_dictionaries(); i++) { 
-      auto block = FileBlockFromFlatbuffer(state->footer_->dictionaries()->Get(i)); 
-      messages[i] = ReadBlock(block); 
-    } 
-    auto read_messages = All(std::move(messages)); 
-    if (executor_) read_messages = executor_->Transfer(read_messages); 
-    read_dictionaries_ = read_messages.Then( 
-        [=](const std::vector<Result<std::shared_ptr<Message>>>& maybe_messages) 
-            -> Status { 
-          ARROW_ASSIGN_OR_RAISE(auto messages, 
-                                arrow::internal::UnwrapOrRaise(maybe_messages)); 
-          return ReadDictionaries(state.get(), std::move(messages)); 
-        }); 
-  } 
-  if (index_ >= state_->num_record_batches()) { 
-    return Future<Item>::MakeFinished(IterationTraits<Item>::End()); 
-  } 
-  auto block = FileBlockFromFlatbuffer(state->footer_->recordBatches()->Get(index_++)); 
-  auto read_message = ReadBlock(block); 
-  auto read_messages = read_dictionaries_.Then([read_message]() { return read_message; }); 
-  // Force transfer. This may be wasteful in some cases, but ensures we get off the 
-  // I/O threads as soon as possible, and ensures we don't decode record batches 
-  // synchronously in the case that the message read has already finished. 
-  if (executor_) { 
-    auto executor = executor_; 
-    return read_messages.Then( 
-        [=](const std::shared_ptr<Message>& message) -> Future<Item> { 
-          return DeferNotOk(executor->Submit( 
-              [=]() { return ReadRecordBatch(state.get(), message.get()); })); 
-        }); 
-  } 
-  return read_messages.Then([=](const std::shared_ptr<Message>& message) -> Result<Item> { 
-    return ReadRecordBatch(state.get(), message.get()); 
-  }); 
-} 
- 
-Future<std::shared_ptr<Message>> IpcFileRecordBatchGenerator::ReadBlock( 
-    const FileBlock& block) { 
-  if (cached_source_) { 
-    auto cached_source = cached_source_; 
-    io::ReadRange range{block.offset, block.metadata_length + block.body_length}; 
-    auto pool = state_->options_.memory_pool; 
-    return cached_source->WaitFor({range}).Then( 
-        [cached_source, pool, range]() -> Result<std::shared_ptr<Message>> { 
-          ARROW_ASSIGN_OR_RAISE(auto buffer, cached_source->Read(range)); 
-          io::BufferReader stream(std::move(buffer)); 
-          return ReadMessage(&stream, pool); 
-        }); 
-  } else { 
-    return ReadMessageFromBlockAsync(block, state_->file_, io_context_); 
-  } 
-} 
- 
-Status IpcFileRecordBatchGenerator::ReadDictionaries( 
-    RecordBatchFileReaderImpl* state, 
-    std::vector<std::shared_ptr<Message>> dictionary_messages) { 
-  IpcReadContext context(&state->dictionary_memo_, state->options_, state->swap_endian_); 
-  for (const auto& message : dictionary_messages) { 
-    RETURN_NOT_OK(ReadOneDictionary(message.get(), context)); 
-  } 
-  return Status::OK(); 
-} 
- 
-Result<std::shared_ptr<RecordBatch>> IpcFileRecordBatchGenerator::ReadRecordBatch( 
-    RecordBatchFileReaderImpl* state, Message* message) { 
-  CHECK_HAS_BODY(*message); 
-  ARROW_ASSIGN_OR_RAISE(auto reader, Buffer::GetReader(message->body())); 
-  IpcReadContext context(&state->dictionary_memo_, state->options_, state->swap_endian_); 
-  return ReadRecordBatchInternal(*message->metadata(), state->schema_, 
-                                 state->field_inclusion_mask_, context, reader.get()); 
-} 
- 
+Future<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::OpenAsync(
+    const std::shared_ptr<io::RandomAccessFile>& file, const IpcReadOptions& options) {
+  ARROW_ASSIGN_OR_RAISE(int64_t footer_offset, file->GetSize());
+  return OpenAsync(std::move(file), footer_offset, options);
+}
+
+Future<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::OpenAsync(
+    io::RandomAccessFile* file, const IpcReadOptions& options) {
+  ARROW_ASSIGN_OR_RAISE(int64_t footer_offset, file->GetSize());
+  return OpenAsync(file, footer_offset, options);
+}
+
+Future<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::OpenAsync(
+    const std::shared_ptr<io::RandomAccessFile>& file, int64_t footer_offset,
+    const IpcReadOptions& options) {
+  auto result = std::make_shared<RecordBatchFileReaderImpl>();
+  return result->OpenAsync(file, footer_offset, options)
+      .Then([=]() -> Result<std::shared_ptr<RecordBatchFileReader>> { return result; });
+}
+
+Future<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::OpenAsync(
+    io::RandomAccessFile* file, int64_t footer_offset, const IpcReadOptions& options) {
+  auto result = std::make_shared<RecordBatchFileReaderImpl>();
+  return result->OpenAsync(file, footer_offset, options)
+      .Then([=]() -> Result<std::shared_ptr<RecordBatchFileReader>> { return result; });
+}
+
+Future<IpcFileRecordBatchGenerator::Item> IpcFileRecordBatchGenerator::operator()() {
+  auto state = state_;
+  if (!read_dictionaries_.is_valid()) {
+    std::vector<Future<std::shared_ptr<Message>>> messages(state->num_dictionaries());
+    for (int i = 0; i < state->num_dictionaries(); i++) {
+      auto block = FileBlockFromFlatbuffer(state->footer_->dictionaries()->Get(i));
+      messages[i] = ReadBlock(block);
+    }
+    auto read_messages = All(std::move(messages));
+    if (executor_) read_messages = executor_->Transfer(read_messages);
+    read_dictionaries_ = read_messages.Then(
+        [=](const std::vector<Result<std::shared_ptr<Message>>>& maybe_messages)
+            -> Status {
+          ARROW_ASSIGN_OR_RAISE(auto messages,
+                                arrow::internal::UnwrapOrRaise(maybe_messages));
+          return ReadDictionaries(state.get(), std::move(messages));
+        });
+  }
+  if (index_ >= state_->num_record_batches()) {
+    return Future<Item>::MakeFinished(IterationTraits<Item>::End());
+  }
+  auto block = FileBlockFromFlatbuffer(state->footer_->recordBatches()->Get(index_++));
+  auto read_message = ReadBlock(block);
+  auto read_messages = read_dictionaries_.Then([read_message]() { return read_message; });
+  // Force transfer. This may be wasteful in some cases, but ensures we get off the
+  // I/O threads as soon as possible, and ensures we don't decode record batches
+  // synchronously in the case that the message read has already finished.
+  if (executor_) {
+    auto executor = executor_;
+    return read_messages.Then(
+        [=](const std::shared_ptr<Message>& message) -> Future<Item> {
+          return DeferNotOk(executor->Submit(
+              [=]() { return ReadRecordBatch(state.get(), message.get()); }));
+        });
+  }
+  return read_messages.Then([=](const std::shared_ptr<Message>& message) -> Result<Item> {
+    return ReadRecordBatch(state.get(), message.get());
+  });
+}
+
+Future<std::shared_ptr<Message>> IpcFileRecordBatchGenerator::ReadBlock(
+    const FileBlock& block) {
+  if (cached_source_) {
+    auto cached_source = cached_source_;
+    io::ReadRange range{block.offset, block.metadata_length + block.body_length};
+    auto pool = state_->options_.memory_pool;
+    return cached_source->WaitFor({range}).Then(
+        [cached_source, pool, range]() -> Result<std::shared_ptr<Message>> {
+          ARROW_ASSIGN_OR_RAISE(auto buffer, cached_source->Read(range));
+          io::BufferReader stream(std::move(buffer));
+          return ReadMessage(&stream, pool);
+        });
+  } else {
+    return ReadMessageFromBlockAsync(block, state_->file_, io_context_);
+  }
+}
+
+Status IpcFileRecordBatchGenerator::ReadDictionaries(
+    RecordBatchFileReaderImpl* state,
+    std::vector<std::shared_ptr<Message>> dictionary_messages) {
+  IpcReadContext context(&state->dictionary_memo_, state->options_, state->swap_endian_);
+  for (const auto& message : dictionary_messages) {
+    RETURN_NOT_OK(ReadOneDictionary(message.get(), context));
+  }
+  return Status::OK();
+}
+
+Result<std::shared_ptr<RecordBatch>> IpcFileRecordBatchGenerator::ReadRecordBatch(
+    RecordBatchFileReaderImpl* state, Message* message) {
+  CHECK_HAS_BODY(*message);
+  ARROW_ASSIGN_OR_RAISE(auto reader, Buffer::GetReader(message->body()));
+  IpcReadContext context(&state->dictionary_memo_, state->options_, state->swap_endian_);
+  return ReadRecordBatchInternal(*message->metadata(), state->schema_,
+                                 state->field_inclusion_mask_, context, reader.get());
+}
+
 Status Listener::OnEOS() { return Status::OK(); }
 
 Status Listener::OnSchemaDecoded(std::shared_ptr<Schema> schema) { return Status::OK(); }
@@ -1452,16 +1452,16 @@ class StreamDecoder::StreamDecoderImpl : public MessageDecoderListener {
   };
 
  public:
-  explicit StreamDecoderImpl(std::shared_ptr<Listener> listener, IpcReadOptions options) 
-      : listener_(std::move(listener)), 
-        options_(std::move(options)), 
+  explicit StreamDecoderImpl(std::shared_ptr<Listener> listener, IpcReadOptions options)
+      : listener_(std::move(listener)),
+        options_(std::move(options)),
         state_(State::SCHEMA),
         message_decoder_(std::shared_ptr<StreamDecoderImpl>(this, [](void*) {}),
                          options_.memory_pool),
-        n_required_dictionaries_(0) {} 
+        n_required_dictionaries_(0) {}
 
   Status OnMessageDecoded(std::unique_ptr<Message> message) override {
-    ++stats_.num_messages; 
+    ++stats_.num_messages;
     switch (state_) {
       case State::SCHEMA:
         ARROW_RETURN_NOT_OK(OnSchemaMessageDecoded(std::move(message)));
@@ -1495,13 +1495,13 @@ class StreamDecoder::StreamDecoderImpl : public MessageDecoderListener {
 
   int64_t next_required_size() const { return message_decoder_.next_required_size(); }
 
-  ReadStats stats() const { return stats_; } 
- 
+  ReadStats stats() const { return stats_; }
+
  private:
   Status OnSchemaMessageDecoded(std::unique_ptr<Message> message) {
     RETURN_NOT_OK(UnpackSchemaMessage(*message, options_, &dictionary_memo_, &schema_,
-                                      &out_schema_, &field_inclusion_mask_, 
-                                      &swap_endian_)); 
+                                      &out_schema_, &field_inclusion_mask_,
+                                      &swap_endian_));
 
     n_required_dictionaries_ = dictionary_memo_.fields().num_fields();
     if (n_required_dictionaries_ == 0) {
@@ -1529,54 +1529,54 @@ class StreamDecoder::StreamDecoderImpl : public MessageDecoderListener {
   }
 
   Status OnRecordBatchMessageDecoded(std::unique_ptr<Message> message) {
-    IpcReadContext context(&dictionary_memo_, options_, swap_endian_); 
+    IpcReadContext context(&dictionary_memo_, options_, swap_endian_);
     if (message->type() == MessageType::DICTIONARY_BATCH) {
       return ReadDictionary(*message);
     } else {
       CHECK_HAS_BODY(*message);
       ARROW_ASSIGN_OR_RAISE(auto reader, Buffer::GetReader(message->body()));
-      IpcReadContext context(&dictionary_memo_, options_, swap_endian_); 
+      IpcReadContext context(&dictionary_memo_, options_, swap_endian_);
       ARROW_ASSIGN_OR_RAISE(
           auto batch,
           ReadRecordBatchInternal(*message->metadata(), schema_, field_inclusion_mask_,
-                                  context, reader.get())); 
-      ++stats_.num_record_batches; 
+                                  context, reader.get()));
+      ++stats_.num_record_batches;
       return listener_->OnRecordBatchDecoded(std::move(batch));
     }
   }
 
   // Read dictionary from dictionary batch
   Status ReadDictionary(const Message& message) {
-    DictionaryKind kind; 
-    IpcReadContext context(&dictionary_memo_, options_, swap_endian_); 
-    RETURN_NOT_OK(::arrow::ipc::ReadDictionary(message, context, &kind)); 
-    ++stats_.num_dictionary_batches; 
-    switch (kind) { 
-      case DictionaryKind::New: 
-        break; 
-      case DictionaryKind::Delta: 
-        ++stats_.num_dictionary_deltas; 
-        break; 
-      case DictionaryKind::Replacement: 
-        ++stats_.num_replaced_dictionaries; 
-        break; 
-    } 
-    return Status::OK(); 
+    DictionaryKind kind;
+    IpcReadContext context(&dictionary_memo_, options_, swap_endian_);
+    RETURN_NOT_OK(::arrow::ipc::ReadDictionary(message, context, &kind));
+    ++stats_.num_dictionary_batches;
+    switch (kind) {
+      case DictionaryKind::New:
+        break;
+      case DictionaryKind::Delta:
+        ++stats_.num_dictionary_deltas;
+        break;
+      case DictionaryKind::Replacement:
+        ++stats_.num_replaced_dictionaries;
+        break;
+    }
+    return Status::OK();
   }
 
   std::shared_ptr<Listener> listener_;
-  const IpcReadOptions options_; 
+  const IpcReadOptions options_;
   State state_;
   MessageDecoder message_decoder_;
   std::vector<bool> field_inclusion_mask_;
   int n_required_dictionaries_;
   DictionaryMemo dictionary_memo_;
   std::shared_ptr<Schema> schema_, out_schema_;
-  ReadStats stats_; 
-  bool swap_endian_; 
+  ReadStats stats_;
+  bool swap_endian_;
 };
 
-StreamDecoder::StreamDecoder(std::shared_ptr<Listener> listener, IpcReadOptions options) { 
+StreamDecoder::StreamDecoder(std::shared_ptr<Listener> listener, IpcReadOptions options) {
   impl_.reset(new StreamDecoderImpl(std::move(listener), options));
 }
 
@@ -1593,8 +1593,8 @@ std::shared_ptr<Schema> StreamDecoder::schema() const { return impl_->schema();
 
 int64_t StreamDecoder::next_required_size() const { return impl_->next_required_size(); }
 
-ReadStats StreamDecoder::stats() const { return impl_->stats(); } 
- 
+ReadStats StreamDecoder::stats() const { return impl_->stats(); }
+
 Result<std::shared_ptr<Schema>> ReadSchema(io::InputStream* stream,
                                            DictionaryMemo* dictionary_memo) {
   std::unique_ptr<MessageReader> reader = MessageReader::Open(stream);
@@ -2059,23 +2059,23 @@ Status FuzzIpcFile(const uint8_t* data, int64_t size) {
   return Status::OK();
 }
 
-Status FuzzIpcTensorStream(const uint8_t* data, int64_t size) { 
-  auto buffer = std::make_shared<Buffer>(data, size); 
-  io::BufferReader buffer_reader(buffer); 
- 
-  std::shared_ptr<Tensor> tensor; 
- 
-  while (true) { 
-    ARROW_ASSIGN_OR_RAISE(tensor, ReadTensor(&buffer_reader)); 
-    if (tensor == nullptr) { 
-      break; 
-    } 
-    RETURN_NOT_OK(tensor->Validate()); 
-  } 
- 
-  return Status::OK(); 
-} 
- 
+Status FuzzIpcTensorStream(const uint8_t* data, int64_t size) {
+  auto buffer = std::make_shared<Buffer>(data, size);
+  io::BufferReader buffer_reader(buffer);
+
+  std::shared_ptr<Tensor> tensor;
+
+  while (true) {
+    ARROW_ASSIGN_OR_RAISE(tensor, ReadTensor(&buffer_reader));
+    if (tensor == nullptr) {
+      break;
+    }
+    RETURN_NOT_OK(tensor->Validate());
+  }
+
+  return Status::OK();
+}
+
 }  // namespace internal
 }  // namespace ipc
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/reader.h b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/reader.h
index 60db2837a68..6f2157557f3 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/reader.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/reader.h
@@ -25,14 +25,14 @@
 #include <utility>
 #include <vector>
 
-#include "arrow/io/caching.h" 
-#include "arrow/io/type_fwd.h" 
+#include "arrow/io/caching.h"
+#include "arrow/io/type_fwd.h"
 #include "arrow/ipc/message.h"
 #include "arrow/ipc/options.h"
 #include "arrow/record_batch.h"
 #include "arrow/result.h"
-#include "arrow/type_fwd.h" 
-#include "arrow/util/async_generator.h" 
+#include "arrow/type_fwd.h"
+#include "arrow/util/async_generator.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/visibility.h"
 
@@ -101,8 +101,8 @@ class ARROW_EXPORT RecordBatchStreamReader : public RecordBatchReader {
 };
 
 /// \brief Reads the record batch file format
-class ARROW_EXPORT RecordBatchFileReader 
-    : public std::enable_shared_from_this<RecordBatchFileReader> { 
+class ARROW_EXPORT RecordBatchFileReader
+    : public std::enable_shared_from_this<RecordBatchFileReader> {
  public:
   virtual ~RecordBatchFileReader() = default;
 
@@ -150,26 +150,26 @@ class ARROW_EXPORT RecordBatchFileReader
       const std::shared_ptr<io::RandomAccessFile>& file, int64_t footer_offset,
       const IpcReadOptions& options = IpcReadOptions::Defaults());
 
-  /// \brief Open a file asynchronously (owns the file). 
-  static Future<std::shared_ptr<RecordBatchFileReader>> OpenAsync( 
-      const std::shared_ptr<io::RandomAccessFile>& file, 
-      const IpcReadOptions& options = IpcReadOptions::Defaults()); 
- 
-  /// \brief Open a file asynchronously (borrows the file). 
-  static Future<std::shared_ptr<RecordBatchFileReader>> OpenAsync( 
-      io::RandomAccessFile* file, 
-      const IpcReadOptions& options = IpcReadOptions::Defaults()); 
- 
-  /// \brief Open a file asynchronously (owns the file). 
-  static Future<std::shared_ptr<RecordBatchFileReader>> OpenAsync( 
-      const std::shared_ptr<io::RandomAccessFile>& file, int64_t footer_offset, 
-      const IpcReadOptions& options = IpcReadOptions::Defaults()); 
- 
-  /// \brief Open a file asynchronously (borrows the file). 
-  static Future<std::shared_ptr<RecordBatchFileReader>> OpenAsync( 
-      io::RandomAccessFile* file, int64_t footer_offset, 
-      const IpcReadOptions& options = IpcReadOptions::Defaults()); 
- 
+  /// \brief Open a file asynchronously (owns the file).
+  static Future<std::shared_ptr<RecordBatchFileReader>> OpenAsync(
+      const std::shared_ptr<io::RandomAccessFile>& file,
+      const IpcReadOptions& options = IpcReadOptions::Defaults());
+
+  /// \brief Open a file asynchronously (borrows the file).
+  static Future<std::shared_ptr<RecordBatchFileReader>> OpenAsync(
+      io::RandomAccessFile* file,
+      const IpcReadOptions& options = IpcReadOptions::Defaults());
+
+  /// \brief Open a file asynchronously (owns the file).
+  static Future<std::shared_ptr<RecordBatchFileReader>> OpenAsync(
+      const std::shared_ptr<io::RandomAccessFile>& file, int64_t footer_offset,
+      const IpcReadOptions& options = IpcReadOptions::Defaults());
+
+  /// \brief Open a file asynchronously (borrows the file).
+  static Future<std::shared_ptr<RecordBatchFileReader>> OpenAsync(
+      io::RandomAccessFile* file, int64_t footer_offset,
+      const IpcReadOptions& options = IpcReadOptions::Defaults());
+
   /// \brief The schema read from the file
   virtual std::shared_ptr<Schema> schema() const = 0;
 
@@ -192,24 +192,24 @@ class ARROW_EXPORT RecordBatchFileReader
 
   /// \brief Return current read statistics
   virtual ReadStats stats() const = 0;
- 
-  /// \brief Computes the total number of rows in the file. 
-  virtual Result<int64_t> CountRows() = 0; 
- 
-  /// \brief Get a reentrant generator of record batches. 
-  /// 
-  /// \param[in] coalesce If true, enable I/O coalescing. 
-  /// \param[in] io_context The IOContext to use (controls which thread pool 
-  ///     is used for I/O). 
-  /// \param[in] cache_options Options for coalescing (if enabled). 
-  /// \param[in] executor Optionally, an executor to use for decoding record 
-  ///     batches. This is generally only a benefit for very wide and/or 
-  ///     compressed batches. 
-  virtual Result<AsyncGenerator<std::shared_ptr<RecordBatch>>> GetRecordBatchGenerator( 
-      const bool coalesce = false, 
-      const io::IOContext& io_context = io::default_io_context(), 
-      const io::CacheOptions cache_options = io::CacheOptions::LazyDefaults(), 
-      arrow::internal::Executor* executor = NULLPTR) = 0; 
+
+  /// \brief Computes the total number of rows in the file.
+  virtual Result<int64_t> CountRows() = 0;
+
+  /// \brief Get a reentrant generator of record batches.
+  ///
+  /// \param[in] coalesce If true, enable I/O coalescing.
+  /// \param[in] io_context The IOContext to use (controls which thread pool
+  ///     is used for I/O).
+  /// \param[in] cache_options Options for coalescing (if enabled).
+  /// \param[in] executor Optionally, an executor to use for decoding record
+  ///     batches. This is generally only a benefit for very wide and/or
+  ///     compressed batches.
+  virtual Result<AsyncGenerator<std::shared_ptr<RecordBatch>>> GetRecordBatchGenerator(
+      const bool coalesce = false,
+      const io::IOContext& io_context = io::default_io_context(),
+      const io::CacheOptions cache_options = io::CacheOptions::LazyDefaults(),
+      arrow::internal::Executor* executor = NULLPTR) = 0;
 };
 
 /// \brief A general listener class to receive events.
@@ -304,7 +304,7 @@ class ARROW_EXPORT StreamDecoder {
   /// Listener::OnRecordBatchDecoded() to receive decoded record batches
   /// \param[in] options any IPC reading options (optional)
   StreamDecoder(std::shared_ptr<Listener> listener,
-                IpcReadOptions options = IpcReadOptions::Defaults()); 
+                IpcReadOptions options = IpcReadOptions::Defaults());
 
   virtual ~StreamDecoder();
 
@@ -380,7 +380,7 @@ class ARROW_EXPORT StreamDecoder {
   ///   memcpy(buffer->mutable_data() + current_buffer_size,
   ///          small_chunk,
   ///          small_chunk_size);
-  ///   if (buffer->size() < decoder.next_required_size()) { 
+  ///   if (buffer->size() < decoder.next_required_size()) {
   ///     continue;
   ///   }
   ///   std::shared_ptr<arrow::Buffer> chunk(buffer.release());
@@ -397,9 +397,9 @@ class ARROW_EXPORT StreamDecoder {
   /// decoder
   int64_t next_required_size() const;
 
-  /// \brief Return current read statistics 
-  ReadStats stats() const; 
- 
+  /// \brief Return current read statistics
+  ReadStats stats() const;
+
  private:
   class StreamDecoderImpl;
   std::unique_ptr<StreamDecoderImpl> impl_;
@@ -526,8 +526,8 @@ Result<std::shared_ptr<SparseTensor>> ReadSparseTensorPayload(const IpcPayload&
 ARROW_EXPORT
 Status FuzzIpcStream(const uint8_t* data, int64_t size);
 ARROW_EXPORT
-Status FuzzIpcTensorStream(const uint8_t* data, int64_t size); 
-ARROW_EXPORT 
+Status FuzzIpcTensorStream(const uint8_t* data, int64_t size);
+ARROW_EXPORT
 Status FuzzIpcFile(const uint8_t* data, int64_t size);
 
 }  // namespace internal
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/type_fwd.h b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/type_fwd.h
index abb1dbc2dd6..3493c4f1409 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/type_fwd.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/type_fwd.h
@@ -47,9 +47,9 @@ enum class MessageType {
   SPARSE_TENSOR
 };
 
-struct IpcReadOptions; 
-struct IpcWriteOptions; 
- 
+struct IpcReadOptions;
+struct IpcWriteOptions;
+
 class MessageReader;
 
 class RecordBatchStreamReader;
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/writer.cc b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/writer.cc
index 7bb86316497..7b9254b7e59 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/writer.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/writer.cc
@@ -49,7 +49,7 @@
 #include "arrow/util/bitmap_ops.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/compression.h"
-#include "arrow/util/endian.h" 
+#include "arrow/util/endian.h"
 #include "arrow/util/key_value_metadata.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/make_unique.h"
@@ -70,18 +70,18 @@ using internal::kArrowMagicBytes;
 
 namespace {
 
-bool HasNestedDict(const ArrayData& data) { 
-  if (data.type->id() == Type::DICTIONARY) { 
-    return true; 
-  } 
-  for (const auto& child : data.child_data) { 
-    if (HasNestedDict(*child)) { 
-      return true; 
-    } 
-  } 
-  return false; 
-} 
- 
+bool HasNestedDict(const ArrayData& data) {
+  if (data.type->id() == Type::DICTIONARY) {
+    return true;
+  }
+  for (const auto& child : data.child_data) {
+    if (HasNestedDict(*child)) {
+      return true;
+    }
+  }
+  return false;
+}
+
 Status GetTruncatedBitmap(int64_t offset, int64_t length,
                           const std::shared_ptr<Buffer> input, MemoryPool* pool,
                           std::shared_ptr<Buffer>* buffer) {
@@ -557,7 +557,7 @@ class DictionarySerializer : public RecordBatchSerializer {
   Status Assemble(const std::shared_ptr<Array>& dictionary) {
     // Make a dummy record batch. A bit tedious as we have to make a schema
     auto schema = arrow::schema({arrow::field("dictionary", dictionary->type())});
-    auto batch = RecordBatch::Make(std::move(schema), dictionary->length(), {dictionary}); 
+    auto batch = RecordBatch::Make(std::move(schema), dictionary->length(), {dictionary});
     return RecordBatchSerializer::Assemble(*batch);
   }
 
@@ -997,21 +997,21 @@ class ARROW_EXPORT IpcFormatWriter : public RecordBatchWriter {
 
     IpcPayload payload;
     RETURN_NOT_OK(GetRecordBatchPayload(batch, options_, &payload));
-    RETURN_NOT_OK(WritePayload(payload)); 
-    ++stats_.num_record_batches; 
-    return Status::OK(); 
+    RETURN_NOT_OK(WritePayload(payload));
+    ++stats_.num_record_batches;
+    return Status::OK();
+  }
+
+  Status WriteTable(const Table& table, int64_t max_chunksize) override {
+    if (is_file_format_ && options_.unify_dictionaries) {
+      ARROW_ASSIGN_OR_RAISE(auto unified_table,
+                            DictionaryUnifier::UnifyTable(table, options_.memory_pool));
+      return RecordBatchWriter::WriteTable(*unified_table, max_chunksize);
+    } else {
+      return RecordBatchWriter::WriteTable(table, max_chunksize);
+    }
   }
 
-  Status WriteTable(const Table& table, int64_t max_chunksize) override { 
-    if (is_file_format_ && options_.unify_dictionaries) { 
-      ARROW_ASSIGN_OR_RAISE(auto unified_table, 
-                            DictionaryUnifier::UnifyTable(table, options_.memory_pool)); 
-      return RecordBatchWriter::WriteTable(*unified_table, max_chunksize); 
-    } else { 
-      return RecordBatchWriter::WriteTable(table, max_chunksize); 
-    } 
-  } 
- 
   Status Close() override {
     RETURN_NOT_OK(CheckStarted());
     return payload_writer_->Close();
@@ -1023,11 +1023,11 @@ class ARROW_EXPORT IpcFormatWriter : public RecordBatchWriter {
 
     IpcPayload payload;
     RETURN_NOT_OK(GetSchemaPayload(schema_, options_, mapper_, &payload));
-    return WritePayload(payload); 
+    return WritePayload(payload);
   }
 
-  WriteStats stats() const override { return stats_; } 
- 
+  WriteStats stats() const override { return stats_; }
+
  protected:
   Status CheckStarted() {
     if (!started_) {
@@ -1038,7 +1038,7 @@ class ARROW_EXPORT IpcFormatWriter : public RecordBatchWriter {
 
   Status WriteDictionaries(const RecordBatch& batch) {
     ARROW_ASSIGN_OR_RAISE(const auto dictionaries, CollectDictionaries(batch, mapper_));
-    const auto equal_options = EqualOptions().nans_equal(true); 
+    const auto equal_options = EqualOptions().nans_equal(true);
 
     for (const auto& pair : dictionaries) {
       int64_t dictionary_id = pair.first;
@@ -1047,57 +1047,57 @@ class ARROW_EXPORT IpcFormatWriter : public RecordBatchWriter {
       // If a dictionary with this id was already emitted, check if it was the same.
       auto* last_dictionary = &last_dictionaries_[dictionary_id];
       const bool dictionary_exists = (*last_dictionary != nullptr);
-      int64_t delta_start = 0; 
+      int64_t delta_start = 0;
       if (dictionary_exists) {
         if ((*last_dictionary)->data() == dictionary->data()) {
           // Fast shortcut for a common case.
           // Same dictionary data by pointer => no need to emit it again
           continue;
         }
-        const int64_t last_length = (*last_dictionary)->length(); 
-        const int64_t new_length = dictionary->length(); 
-        if (new_length == last_length && 
-            ((*last_dictionary)->Equals(dictionary, equal_options))) { 
+        const int64_t last_length = (*last_dictionary)->length();
+        const int64_t new_length = dictionary->length();
+        if (new_length == last_length &&
+            ((*last_dictionary)->Equals(dictionary, equal_options))) {
           // Same dictionary by value => no need to emit it again
           // (while this can have a CPU cost, this code path is required
           //  for the IPC file format)
           continue;
         }
-        if (is_file_format_) { 
-          return Status::Invalid( 
-              "Dictionary replacement detected when writing IPC file format. " 
-              "Arrow IPC files only support a single dictionary for a given field " 
-              "across all batches."); 
-        } 
- 
-        // (the read path doesn't support outer dictionary deltas, don't emit them) 
-        if (new_length > last_length && options_.emit_dictionary_deltas && 
-            !HasNestedDict(*dictionary->data()) && 
-            ((*last_dictionary) 
-                 ->RangeEquals(dictionary, 0, last_length, 0, equal_options))) { 
-          // New dictionary starts with the current dictionary 
-          delta_start = last_length; 
-        } 
+        if (is_file_format_) {
+          return Status::Invalid(
+              "Dictionary replacement detected when writing IPC file format. "
+              "Arrow IPC files only support a single dictionary for a given field "
+              "across all batches.");
+        }
+
+        // (the read path doesn't support outer dictionary deltas, don't emit them)
+        if (new_length > last_length && options_.emit_dictionary_deltas &&
+            !HasNestedDict(*dictionary->data()) &&
+            ((*last_dictionary)
+                 ->RangeEquals(dictionary, 0, last_length, 0, equal_options))) {
+          // New dictionary starts with the current dictionary
+          delta_start = last_length;
+        }
       }
 
-      IpcPayload payload; 
-      if (delta_start) { 
-        RETURN_NOT_OK(GetDictionaryPayload(dictionary_id, /*is_delta=*/true, 
-                                           dictionary->Slice(delta_start), options_, 
-                                           &payload)); 
-      } else { 
-        RETURN_NOT_OK( 
-            GetDictionaryPayload(dictionary_id, dictionary, options_, &payload)); 
+      IpcPayload payload;
+      if (delta_start) {
+        RETURN_NOT_OK(GetDictionaryPayload(dictionary_id, /*is_delta=*/true,
+                                           dictionary->Slice(delta_start), options_,
+                                           &payload));
+      } else {
+        RETURN_NOT_OK(
+            GetDictionaryPayload(dictionary_id, dictionary, options_, &payload));
+      }
+      RETURN_NOT_OK(WritePayload(payload));
+      ++stats_.num_dictionary_batches;
+      if (dictionary_exists) {
+        if (delta_start) {
+          ++stats_.num_dictionary_deltas;
+        } else {
+          ++stats_.num_replaced_dictionaries;
+        }
       }
-      RETURN_NOT_OK(WritePayload(payload)); 
-      ++stats_.num_dictionary_batches; 
-      if (dictionary_exists) { 
-        if (delta_start) { 
-          ++stats_.num_dictionary_deltas; 
-        } else { 
-          ++stats_.num_replaced_dictionaries; 
-        } 
-      } 
 
       // Remember dictionary for next batches
       *last_dictionary = dictionary;
@@ -1105,12 +1105,12 @@ class ARROW_EXPORT IpcFormatWriter : public RecordBatchWriter {
     return Status::OK();
   }
 
-  Status WritePayload(const IpcPayload& payload) { 
-    RETURN_NOT_OK(payload_writer_->WritePayload(payload)); 
-    ++stats_.num_messages; 
-    return Status::OK(); 
-  } 
- 
+  Status WritePayload(const IpcPayload& payload) {
+    RETURN_NOT_OK(payload_writer_->WritePayload(payload));
+    ++stats_.num_messages;
+    return Status::OK();
+  }
+
   std::unique_ptr<IpcPayloadWriter> payload_writer_;
   std::shared_ptr<Schema> shared_schema_;
   const Schema& schema_;
@@ -1126,7 +1126,7 @@ class ARROW_EXPORT IpcFormatWriter : public RecordBatchWriter {
 
   bool started_ = false;
   IpcWriteOptions options_;
-  WriteStats stats_; 
+  WriteStats stats_;
 };
 
 class StreamBookKeeper {
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/writer.h b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/writer.h
index 05d62d1bcad..0ea83d7630a 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/writer.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/writer.h
@@ -60,23 +60,23 @@ struct IpcPayload {
   int64_t body_length = 0;
 };
 
-struct WriteStats { 
-  /// Number of IPC messages written. 
-  int64_t num_messages = 0; 
-  /// Number of record batches written. 
-  int64_t num_record_batches = 0; 
-  /// Number of dictionary batches written. 
-  /// 
-  /// Note: num_dictionary_batches >= num_dictionary_deltas + num_replaced_dictionaries 
-  int64_t num_dictionary_batches = 0; 
- 
-  /// Number of dictionary deltas written. 
-  int64_t num_dictionary_deltas = 0; 
-  /// Number of replaced dictionaries (i.e. where a dictionary batch replaces 
-  /// an existing dictionary with an unrelated new dictionary). 
-  int64_t num_replaced_dictionaries = 0; 
-}; 
- 
+struct WriteStats {
+  /// Number of IPC messages written.
+  int64_t num_messages = 0;
+  /// Number of record batches written.
+  int64_t num_record_batches = 0;
+  /// Number of dictionary batches written.
+  ///
+  /// Note: num_dictionary_batches >= num_dictionary_deltas + num_replaced_dictionaries
+  int64_t num_dictionary_batches = 0;
+
+  /// Number of dictionary deltas written.
+  int64_t num_dictionary_deltas = 0;
+  /// Number of replaced dictionaries (i.e. where a dictionary batch replaces
+  /// an existing dictionary with an unrelated new dictionary).
+  int64_t num_replaced_dictionaries = 0;
+};
+
 /// \class RecordBatchWriter
 /// \brief Abstract interface for writing a stream of record batches
 class ARROW_EXPORT RecordBatchWriter {
@@ -96,25 +96,25 @@ class ARROW_EXPORT RecordBatchWriter {
 
   /// \brief Write Table with a particular chunksize
   /// \param[in] table table to write
-  /// \param[in] max_chunksize maximum length of table chunks. To indicate 
-  /// that no maximum should be enforced, pass -1. 
+  /// \param[in] max_chunksize maximum length of table chunks. To indicate
+  /// that no maximum should be enforced, pass -1.
   /// \return Status
-  virtual Status WriteTable(const Table& table, int64_t max_chunksize); 
+  virtual Status WriteTable(const Table& table, int64_t max_chunksize);
 
   /// \brief Perform any logic necessary to finish the stream
   ///
   /// \return Status
   virtual Status Close() = 0;
- 
-  /// \brief Return current write statistics 
-  virtual WriteStats stats() const = 0; 
+
+  /// \brief Return current write statistics
+  virtual WriteStats stats() const = 0;
 };
 
-/// \defgroup record-batch-writer-factories Functions for creating RecordBatchWriter 
-/// instances 
-/// 
-/// @{ 
- 
+/// \defgroup record-batch-writer-factories Functions for creating RecordBatchWriter
+/// instances
+///
+/// @{
+
 /// Create a new IPC stream writer from stream sink and schema. User is
 /// responsible for closing the actual OutputStream.
 ///
@@ -165,14 +165,14 @@ Result<std::shared_ptr<RecordBatchWriter>> MakeFileWriter(
     const IpcWriteOptions& options = IpcWriteOptions::Defaults(),
     const std::shared_ptr<const KeyValueMetadata>& metadata = NULLPTR);
 
-/// @} 
- 
-ARROW_DEPRECATED("Use MakeStreamWriter") 
-ARROW_EXPORT 
-Result<std::shared_ptr<RecordBatchWriter>> NewStreamWriter( 
-    io::OutputStream* sink, const std::shared_ptr<Schema>& schema, 
-    const IpcWriteOptions& options = IpcWriteOptions::Defaults()); 
- 
+/// @}
+
+ARROW_DEPRECATED("Use MakeStreamWriter")
+ARROW_EXPORT
+Result<std::shared_ptr<RecordBatchWriter>> NewStreamWriter(
+    io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
+    const IpcWriteOptions& options = IpcWriteOptions::Defaults());
+
 ARROW_DEPRECATED("Use MakeFileWriter")
 ARROW_EXPORT
 Result<std::shared_ptr<RecordBatchWriter>> NewFileWriter(
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/memory_pool.cc b/contrib/libs/apache/arrow/cpp/src/arrow/memory_pool.cc
index eb5c3643dd4..2d6f3176224 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/memory_pool.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/memory_pool.cc
@@ -18,32 +18,32 @@
 #include "arrow/memory_pool.h"
 
 #include <algorithm>  // IWYU pragma: keep
-#include <atomic> 
-#include <cstdlib>   // IWYU pragma: keep 
-#include <cstring>   // IWYU pragma: keep 
-#include <iostream>  // IWYU pragma: keep 
+#include <atomic>
+#include <cstdlib>   // IWYU pragma: keep
+#include <cstring>   // IWYU pragma: keep
+#include <iostream>  // IWYU pragma: keep
 #include <limits>
 #include <memory>
 
-#if defined(sun) || defined(__sun) 
-#include <stdlib.h> 
-#endif 
- 
-#include "arrow/buffer.h" 
-#include "arrow/io/util_internal.h" 
-#include "arrow/result.h" 
+#if defined(sun) || defined(__sun)
+#include <stdlib.h>
+#endif
+
+#include "arrow/buffer.h"
+#include "arrow/io/util_internal.h"
+#include "arrow/result.h"
 #include "arrow/status.h"
-#include "arrow/util/bit_util.h" 
-#include "arrow/util/io_util.h" 
+#include "arrow/util/bit_util.h"
+#include "arrow/util/io_util.h"
 #include "arrow/util/logging.h"  // IWYU pragma: keep
-#include "arrow/util/optional.h" 
-#include "arrow/util/string.h" 
-#include "arrow/util/thread_pool.h" 
-
-#ifdef __GLIBC__ 
-#include <malloc.h> 
-#endif 
- 
+#include "arrow/util/optional.h"
+#include "arrow/util/string.h"
+#include "arrow/util/thread_pool.h"
+
+#ifdef __GLIBC__
+#include <malloc.h>
+#endif
+
 #ifdef ARROW_JEMALLOC
 // Needed to support jemalloc 3 and 4
 #define JEMALLOC_MANGLE
@@ -101,88 +101,88 @@ const char* je_arrow_malloc_conf =
 
 namespace arrow {
 
-namespace { 
- 
+namespace {
+
 constexpr size_t kAlignment = 64;
 
-constexpr char kDefaultBackendEnvVar[] = "ARROW_DEFAULT_MEMORY_POOL"; 
-
-enum class MemoryPoolBackend : uint8_t { System, Jemalloc, Mimalloc }; 
- 
-struct SupportedBackend { 
-  const char* name; 
-  MemoryPoolBackend backend; 
-}; 
- 
-// See ARROW-12248 for why we use static in-function singletons rather than 
-// global constants below (in SupportedBackends() and UserSelectedBackend()). 
-// In some contexts (especially R bindings) `default_memory_pool()` may be 
-// called before all globals are initialized, and then the ARROW_DEFAULT_MEMORY_POOL 
-// environment variable would be ignored. 
- 
-const std::vector<SupportedBackend>& SupportedBackends() { 
-  static std::vector<SupportedBackend> backends = { 
-  // ARROW-12316: Apple => mimalloc first, then jemalloc 
-  //              non-Apple => jemalloc first, then mimalloc 
-#if defined(ARROW_JEMALLOC) && !defined(__APPLE__) 
-    {"jemalloc", MemoryPoolBackend::Jemalloc}, 
-#endif 
-#ifdef ARROW_MIMALLOC 
-    {"mimalloc", MemoryPoolBackend::Mimalloc}, 
-#endif 
-#if defined(ARROW_JEMALLOC) && defined(__APPLE__) 
-    {"jemalloc", MemoryPoolBackend::Jemalloc}, 
-#endif 
-    {"system", MemoryPoolBackend::System} 
-  }; 
-  return backends; 
-} 
- 
-// Return the MemoryPoolBackend selected by the user through the 
-// ARROW_DEFAULT_MEMORY_POOL environment variable, if any. 
-util::optional<MemoryPoolBackend> UserSelectedBackend() { 
-  static auto user_selected_backend = []() -> util::optional<MemoryPoolBackend> { 
-    auto unsupported_backend = [](const std::string& name) { 
-      std::vector<std::string> supported; 
-      for (const auto backend : SupportedBackends()) { 
-        supported.push_back(std::string("'") + backend.name + "'"); 
-      } 
-      ARROW_LOG(WARNING) << "Unsupported backend '" << name << "' specified in " 
-                         << kDefaultBackendEnvVar << " (supported backends are " 
-                         << internal::JoinStrings(supported, ", ") << ")"; 
-    }; 
- 
-    auto maybe_name = internal::GetEnvVar(kDefaultBackendEnvVar); 
-    if (!maybe_name.ok()) { 
-      return {}; 
-    } 
-    const auto name = *std::move(maybe_name); 
-    if (name.empty()) { 
-      // An empty environment variable is considered missing 
-      return {}; 
-    } 
-    const auto found = std::find_if( 
-        SupportedBackends().begin(), SupportedBackends().end(), 
-        [&](const SupportedBackend& backend) { return name == backend.name; }); 
-    if (found != SupportedBackends().end()) { 
-      return found->backend; 
-    } 
-    unsupported_backend(name); 
-    return {}; 
-  }(); 
- 
-  return user_selected_backend; 
-} 
- 
-MemoryPoolBackend DefaultBackend() { 
-  auto backend = UserSelectedBackend(); 
-  if (backend.has_value()) { 
-    return backend.value(); 
-  } 
-  struct SupportedBackend default_backend = SupportedBackends().front(); 
-  return default_backend.backend; 
-} 
- 
+constexpr char kDefaultBackendEnvVar[] = "ARROW_DEFAULT_MEMORY_POOL";
+
+enum class MemoryPoolBackend : uint8_t { System, Jemalloc, Mimalloc };
+
+struct SupportedBackend {
+  const char* name;
+  MemoryPoolBackend backend;
+};
+
+// See ARROW-12248 for why we use static in-function singletons rather than
+// global constants below (in SupportedBackends() and UserSelectedBackend()).
+// In some contexts (especially R bindings) `default_memory_pool()` may be
+// called before all globals are initialized, and then the ARROW_DEFAULT_MEMORY_POOL
+// environment variable would be ignored.
+
+const std::vector<SupportedBackend>& SupportedBackends() {
+  static std::vector<SupportedBackend> backends = {
+  // ARROW-12316: Apple => mimalloc first, then jemalloc
+  //              non-Apple => jemalloc first, then mimalloc
+#if defined(ARROW_JEMALLOC) && !defined(__APPLE__)
+    {"jemalloc", MemoryPoolBackend::Jemalloc},
+#endif
+#ifdef ARROW_MIMALLOC
+    {"mimalloc", MemoryPoolBackend::Mimalloc},
+#endif
+#if defined(ARROW_JEMALLOC) && defined(__APPLE__)
+    {"jemalloc", MemoryPoolBackend::Jemalloc},
+#endif
+    {"system", MemoryPoolBackend::System}
+  };
+  return backends;
+}
+
+// Return the MemoryPoolBackend selected by the user through the
+// ARROW_DEFAULT_MEMORY_POOL environment variable, if any.
+util::optional<MemoryPoolBackend> UserSelectedBackend() {
+  static auto user_selected_backend = []() -> util::optional<MemoryPoolBackend> {
+    auto unsupported_backend = [](const std::string& name) {
+      std::vector<std::string> supported;
+      for (const auto backend : SupportedBackends()) {
+        supported.push_back(std::string("'") + backend.name + "'");
+      }
+      ARROW_LOG(WARNING) << "Unsupported backend '" << name << "' specified in "
+                         << kDefaultBackendEnvVar << " (supported backends are "
+                         << internal::JoinStrings(supported, ", ") << ")";
+    };
+
+    auto maybe_name = internal::GetEnvVar(kDefaultBackendEnvVar);
+    if (!maybe_name.ok()) {
+      return {};
+    }
+    const auto name = *std::move(maybe_name);
+    if (name.empty()) {
+      // An empty environment variable is considered missing
+      return {};
+    }
+    const auto found = std::find_if(
+        SupportedBackends().begin(), SupportedBackends().end(),
+        [&](const SupportedBackend& backend) { return name == backend.name; });
+    if (found != SupportedBackends().end()) {
+      return found->backend;
+    }
+    unsupported_backend(name);
+    return {};
+  }();
+
+  return user_selected_backend;
+}
+
+MemoryPoolBackend DefaultBackend() {
+  auto backend = UserSelectedBackend();
+  if (backend.has_value()) {
+    return backend.value();
+  }
+  struct SupportedBackend default_backend = SupportedBackends().front();
+  return default_backend.backend;
+}
+
 // A static piece of memory for 0-size allocations, so as to return
 // an aligned non-null pointer.
 alignas(kAlignment) static uint8_t zero_size_area[1];
@@ -204,11 +204,11 @@ class SystemAllocator {
     if (!*out) {
       return Status::OutOfMemory("malloc of size ", size, " failed");
     }
-#elif defined(sun) || defined(__sun) 
-    *out = reinterpret_cast<uint8_t*>(memalign(kAlignment, static_cast<size_t>(size))); 
-    if (!*out) { 
-      return Status::OutOfMemory("malloc of size ", size, " failed"); 
-    } 
+#elif defined(sun) || defined(__sun)
+    *out = reinterpret_cast<uint8_t*>(memalign(kAlignment, static_cast<size_t>(size)));
+    if (!*out) {
+      return Status::OutOfMemory("malloc of size ", size, " failed");
+    }
 #else
     const int result = posix_memalign(reinterpret_cast<void**>(out), kAlignment,
                                       static_cast<size_t>(size));
@@ -262,14 +262,14 @@ class SystemAllocator {
 #endif
     }
   }
- 
-  static void ReleaseUnused() { 
-#ifdef __GLIBC__ 
-    // The return value of malloc_trim is not an error but to inform 
-    // you if memory was actually released or not, which we do not care about here 
-    ARROW_UNUSED(malloc_trim(0)); 
-#endif 
-  } 
+
+  static void ReleaseUnused() {
+#ifdef __GLIBC__
+    // The return value of malloc_trim is not an error but to inform
+    // you if memory was actually released or not, which we do not care about here
+    ARROW_UNUSED(malloc_trim(0));
+#endif
+  }
 };
 
 #ifdef ARROW_JEMALLOC
@@ -317,10 +317,10 @@ class JemallocAllocator {
       dallocx(ptr, MALLOCX_ALIGN(kAlignment));
     }
   }
- 
-  static void ReleaseUnused() { 
-    mallctl("arena." ARROW_STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", NULL, NULL, NULL, 0); 
-  } 
+
+  static void ReleaseUnused() {
+    mallctl("arena." ARROW_STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", NULL, NULL, NULL, 0);
+  }
 };
 
 #endif  // defined(ARROW_JEMALLOC)
@@ -343,8 +343,8 @@ class MimallocAllocator {
     return Status::OK();
   }
 
-  static void ReleaseUnused() { mi_collect(true); } 
- 
+  static void ReleaseUnused() { mi_collect(true); }
+
   static Status ReallocateAligned(int64_t old_size, int64_t new_size, uint8_t** ptr) {
     uint8_t* previous_ptr = *ptr;
     if (previous_ptr == zero_size_area) {
@@ -451,8 +451,8 @@ class BaseMemoryPoolImpl : public MemoryPool {
     stats_.UpdateAllocatedBytes(-size);
   }
 
-  void ReleaseUnused() override { Allocator::ReleaseUnused(); } 
- 
+  void ReleaseUnused() override { Allocator::ReleaseUnused(); }
+
   int64_t bytes_allocated() const override { return stats_.bytes_allocated(); }
 
   int64_t max_memory() const override { return stats_.max_memory(); }
@@ -480,46 +480,46 @@ class MimallocMemoryPool : public BaseMemoryPoolImpl<MimallocAllocator> {
 };
 #endif
 
-std::unique_ptr<MemoryPool> MemoryPool::CreateDefault() { 
-  auto backend = DefaultBackend(); 
-  switch (backend) { 
-    case MemoryPoolBackend::System: 
-      return std::unique_ptr<MemoryPool>(new SystemMemoryPool); 
+std::unique_ptr<MemoryPool> MemoryPool::CreateDefault() {
+  auto backend = DefaultBackend();
+  switch (backend) {
+    case MemoryPoolBackend::System:
+      return std::unique_ptr<MemoryPool>(new SystemMemoryPool);
 #ifdef ARROW_JEMALLOC
-    case MemoryPoolBackend::Jemalloc: 
-      return std::unique_ptr<MemoryPool>(new JemallocMemoryPool); 
+    case MemoryPoolBackend::Jemalloc:
+      return std::unique_ptr<MemoryPool>(new JemallocMemoryPool);
+#endif
+#ifdef ARROW_MIMALLOC
+    case MemoryPoolBackend::Mimalloc:
+      return std::unique_ptr<MemoryPool>(new MimallocMemoryPool);
 #endif
-#ifdef ARROW_MIMALLOC 
-    case MemoryPoolBackend::Mimalloc: 
-      return std::unique_ptr<MemoryPool>(new MimallocMemoryPool); 
-#endif 
-    default: 
-      ARROW_LOG(FATAL) << "Internal error: cannot create default memory pool"; 
-      return nullptr; 
-  } 
+    default:
+      ARROW_LOG(FATAL) << "Internal error: cannot create default memory pool";
+      return nullptr;
+  }
 }
 
-static struct GlobalState { 
-  ~GlobalState() { finalizing.store(true, std::memory_order_relaxed); } 
- 
-  bool is_finalizing() const { return finalizing.load(std::memory_order_relaxed); } 
- 
-  std::atomic<bool> finalizing{false};  // constructed first, destroyed last 
- 
-  SystemMemoryPool system_pool; 
+static struct GlobalState {
+  ~GlobalState() { finalizing.store(true, std::memory_order_relaxed); }
+
+  bool is_finalizing() const { return finalizing.load(std::memory_order_relaxed); }
+
+  std::atomic<bool> finalizing{false};  // constructed first, destroyed last
+
+  SystemMemoryPool system_pool;
 #ifdef ARROW_JEMALLOC
-  JemallocMemoryPool jemalloc_pool; 
+  JemallocMemoryPool jemalloc_pool;
 #endif
 #ifdef ARROW_MIMALLOC
-  MimallocMemoryPool mimalloc_pool; 
+  MimallocMemoryPool mimalloc_pool;
 #endif
-} global_state; 
+} global_state;
 
-MemoryPool* system_memory_pool() { return &global_state.system_pool; } 
+MemoryPool* system_memory_pool() { return &global_state.system_pool; }
 
 Status jemalloc_memory_pool(MemoryPool** out) {
 #ifdef ARROW_JEMALLOC
-  *out = &global_state.jemalloc_pool; 
+  *out = &global_state.jemalloc_pool;
   return Status::OK();
 #else
   return Status::NotImplemented("This Arrow build does not enable jemalloc");
@@ -528,7 +528,7 @@ Status jemalloc_memory_pool(MemoryPool** out) {
 
 Status mimalloc_memory_pool(MemoryPool** out) {
 #ifdef ARROW_MIMALLOC
-  *out = &global_state.mimalloc_pool; 
+  *out = &global_state.mimalloc_pool;
   return Status::OK();
 #else
   return Status::NotImplemented("This Arrow build does not enable mimalloc");
@@ -536,22 +536,22 @@ Status mimalloc_memory_pool(MemoryPool** out) {
 }
 
 MemoryPool* default_memory_pool() {
-  auto backend = DefaultBackend(); 
-  switch (backend) { 
-    case MemoryPoolBackend::System: 
-      return &global_state.system_pool; 
+  auto backend = DefaultBackend();
+  switch (backend) {
+    case MemoryPoolBackend::System:
+      return &global_state.system_pool;
 #ifdef ARROW_JEMALLOC
-    case MemoryPoolBackend::Jemalloc: 
-      return &global_state.jemalloc_pool; 
+    case MemoryPoolBackend::Jemalloc:
+      return &global_state.jemalloc_pool;
+#endif
+#ifdef ARROW_MIMALLOC
+    case MemoryPoolBackend::Mimalloc:
+      return &global_state.mimalloc_pool;
 #endif
-#ifdef ARROW_MIMALLOC 
-    case MemoryPoolBackend::Mimalloc: 
-      return &global_state.mimalloc_pool; 
-#endif 
-    default: 
-      ARROW_LOG(FATAL) << "Internal error: cannot create default memory pool"; 
-      return nullptr; 
-  } 
+    default:
+      ARROW_LOG(FATAL) << "Internal error: cannot create default memory pool";
+      return nullptr;
+  }
 }
 
 #define RETURN_IF_JEMALLOC_ERROR(ERR)                  \
@@ -674,124 +674,124 @@ int64_t ProxyMemoryPool::max_memory() const { return impl_->max_memory(); }
 
 std::string ProxyMemoryPool::backend_name() const { return impl_->backend_name(); }
 
-std::vector<std::string> SupportedMemoryBackendNames() { 
-  std::vector<std::string> supported; 
-  for (const auto backend : SupportedBackends()) { 
-    supported.push_back(backend.name); 
-  } 
-  return supported; 
-} 
- 
-// ----------------------------------------------------------------------- 
-// Pool buffer and allocation 
- 
-/// A Buffer whose lifetime is tied to a particular MemoryPool 
-class PoolBuffer final : public ResizableBuffer { 
- public: 
-  explicit PoolBuffer(std::shared_ptr<MemoryManager> mm, MemoryPool* pool) 
-      : ResizableBuffer(nullptr, 0, std::move(mm)), pool_(pool) {} 
- 
-  ~PoolBuffer() override { 
-    // Avoid calling pool_->Free if the global pools are destroyed 
-    // (XXX this will not work with user-defined pools) 
- 
-    // This can happen if a Future is destructing on one thread while or 
-    // after memory pools are destructed on the main thread (as there is 
-    // no guarantee of destructor order between thread/memory pools) 
-    uint8_t* ptr = mutable_data(); 
-    if (ptr && !global_state.is_finalizing()) { 
-      pool_->Free(ptr, capacity_); 
-    } 
-  } 
- 
-  Status Reserve(const int64_t capacity) override { 
-    if (capacity < 0) { 
-      return Status::Invalid("Negative buffer capacity: ", capacity); 
-    } 
-    uint8_t* ptr = mutable_data(); 
-    if (!ptr || capacity > capacity_) { 
-      int64_t new_capacity = BitUtil::RoundUpToMultipleOf64(capacity); 
-      if (ptr) { 
-        RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, &ptr)); 
-      } else { 
-        RETURN_NOT_OK(pool_->Allocate(new_capacity, &ptr)); 
-      } 
-      data_ = ptr; 
-      capacity_ = new_capacity; 
-    } 
-    return Status::OK(); 
-  } 
- 
-  Status Resize(const int64_t new_size, bool shrink_to_fit = true) override { 
-    if (ARROW_PREDICT_FALSE(new_size < 0)) { 
-      return Status::Invalid("Negative buffer resize: ", new_size); 
-    } 
-    uint8_t* ptr = mutable_data(); 
-    if (ptr && shrink_to_fit && new_size <= size_) { 
-      // Buffer is non-null and is not growing, so shrink to the requested size without 
-      // excess space. 
-      int64_t new_capacity = BitUtil::RoundUpToMultipleOf64(new_size); 
-      if (capacity_ != new_capacity) { 
-        // Buffer hasn't got yet the requested size. 
-        RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, &ptr)); 
-        data_ = ptr; 
-        capacity_ = new_capacity; 
-      } 
-    } else { 
-      RETURN_NOT_OK(Reserve(new_size)); 
-    } 
-    size_ = new_size; 
- 
-    return Status::OK(); 
-  } 
- 
-  static std::shared_ptr<PoolBuffer> MakeShared(MemoryPool* pool) { 
-    std::shared_ptr<MemoryManager> mm; 
-    if (pool == nullptr) { 
-      pool = default_memory_pool(); 
-      mm = default_cpu_memory_manager(); 
-    } else { 
-      mm = CPUDevice::memory_manager(pool); 
-    } 
-    return std::make_shared<PoolBuffer>(std::move(mm), pool); 
-  } 
- 
-  static std::unique_ptr<PoolBuffer> MakeUnique(MemoryPool* pool) { 
-    std::shared_ptr<MemoryManager> mm; 
-    if (pool == nullptr) { 
-      pool = default_memory_pool(); 
-      mm = default_cpu_memory_manager(); 
-    } else { 
-      mm = CPUDevice::memory_manager(pool); 
-    } 
-    return std::unique_ptr<PoolBuffer>(new PoolBuffer(std::move(mm), pool)); 
-  } 
- 
- private: 
-  MemoryPool* pool_; 
-}; 
- 
-namespace { 
-// A utility that does most of the work of the `AllocateBuffer` and 
-// `AllocateResizableBuffer` methods. The argument `buffer` should be a smart pointer to 
-// a PoolBuffer. 
-template <typename BufferPtr, typename PoolBufferPtr> 
-inline Result<BufferPtr> ResizePoolBuffer(PoolBufferPtr&& buffer, const int64_t size) { 
-  RETURN_NOT_OK(buffer->Resize(size)); 
-  buffer->ZeroPadding(); 
-  return std::move(buffer); 
-} 
- 
-}  // namespace 
- 
-Result<std::unique_ptr<Buffer>> AllocateBuffer(const int64_t size, MemoryPool* pool) { 
-  return ResizePoolBuffer<std::unique_ptr<Buffer>>(PoolBuffer::MakeUnique(pool), size); 
-} 
- 
-Result<std::unique_ptr<ResizableBuffer>> AllocateResizableBuffer(const int64_t size, 
-                                                                 MemoryPool* pool) { 
-  return ResizePoolBuffer<std::unique_ptr<ResizableBuffer>>(PoolBuffer::MakeUnique(pool), 
-                                                            size); 
-} 
- 
+std::vector<std::string> SupportedMemoryBackendNames() {
+  std::vector<std::string> supported;
+  for (const auto backend : SupportedBackends()) {
+    supported.push_back(backend.name);
+  }
+  return supported;
+}
+
+// -----------------------------------------------------------------------
+// Pool buffer and allocation
+
+/// A Buffer whose lifetime is tied to a particular MemoryPool
+class PoolBuffer final : public ResizableBuffer {
+ public:
+  explicit PoolBuffer(std::shared_ptr<MemoryManager> mm, MemoryPool* pool)
+      : ResizableBuffer(nullptr, 0, std::move(mm)), pool_(pool) {}
+
+  ~PoolBuffer() override {
+    // Avoid calling pool_->Free if the global pools are destroyed
+    // (XXX this will not work with user-defined pools)
+
+    // This can happen if a Future is destructing on one thread while or
+    // after memory pools are destructed on the main thread (as there is
+    // no guarantee of destructor order between thread/memory pools)
+    uint8_t* ptr = mutable_data();
+    if (ptr && !global_state.is_finalizing()) {
+      pool_->Free(ptr, capacity_);
+    }
+  }
+
+  Status Reserve(const int64_t capacity) override {
+    if (capacity < 0) {
+      return Status::Invalid("Negative buffer capacity: ", capacity);
+    }
+    uint8_t* ptr = mutable_data();
+    if (!ptr || capacity > capacity_) {
+      int64_t new_capacity = BitUtil::RoundUpToMultipleOf64(capacity);
+      if (ptr) {
+        RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, &ptr));
+      } else {
+        RETURN_NOT_OK(pool_->Allocate(new_capacity, &ptr));
+      }
+      data_ = ptr;
+      capacity_ = new_capacity;
+    }
+    return Status::OK();
+  }
+
+  Status Resize(const int64_t new_size, bool shrink_to_fit = true) override {
+    if (ARROW_PREDICT_FALSE(new_size < 0)) {
+      return Status::Invalid("Negative buffer resize: ", new_size);
+    }
+    uint8_t* ptr = mutable_data();
+    if (ptr && shrink_to_fit && new_size <= size_) {
+      // Buffer is non-null and is not growing, so shrink to the requested size without
+      // excess space.
+      int64_t new_capacity = BitUtil::RoundUpToMultipleOf64(new_size);
+      if (capacity_ != new_capacity) {
+        // Buffer hasn't got yet the requested size.
+        RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, &ptr));
+        data_ = ptr;
+        capacity_ = new_capacity;
+      }
+    } else {
+      RETURN_NOT_OK(Reserve(new_size));
+    }
+    size_ = new_size;
+
+    return Status::OK();
+  }
+
+  static std::shared_ptr<PoolBuffer> MakeShared(MemoryPool* pool) {
+    std::shared_ptr<MemoryManager> mm;
+    if (pool == nullptr) {
+      pool = default_memory_pool();
+      mm = default_cpu_memory_manager();
+    } else {
+      mm = CPUDevice::memory_manager(pool);
+    }
+    return std::make_shared<PoolBuffer>(std::move(mm), pool);
+  }
+
+  static std::unique_ptr<PoolBuffer> MakeUnique(MemoryPool* pool) {
+    std::shared_ptr<MemoryManager> mm;
+    if (pool == nullptr) {
+      pool = default_memory_pool();
+      mm = default_cpu_memory_manager();
+    } else {
+      mm = CPUDevice::memory_manager(pool);
+    }
+    return std::unique_ptr<PoolBuffer>(new PoolBuffer(std::move(mm), pool));
+  }
+
+ private:
+  MemoryPool* pool_;
+};
+
+namespace {
+// A utility that does most of the work of the `AllocateBuffer` and
+// `AllocateResizableBuffer` methods. The argument `buffer` should be a smart pointer to
+// a PoolBuffer.
+template <typename BufferPtr, typename PoolBufferPtr>
+inline Result<BufferPtr> ResizePoolBuffer(PoolBufferPtr&& buffer, const int64_t size) {
+  RETURN_NOT_OK(buffer->Resize(size));
+  buffer->ZeroPadding();
+  return std::move(buffer);
+}
+
+}  // namespace
+
+Result<std::unique_ptr<Buffer>> AllocateBuffer(const int64_t size, MemoryPool* pool) {
+  return ResizePoolBuffer<std::unique_ptr<Buffer>>(PoolBuffer::MakeUnique(pool), size);
+}
+
+Result<std::unique_ptr<ResizableBuffer>> AllocateResizableBuffer(const int64_t size,
+                                                                 MemoryPool* pool) {
+  return ResizePoolBuffer<std::unique_ptr<ResizableBuffer>>(PoolBuffer::MakeUnique(pool),
+                                                            size);
+}
+
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/memory_pool.h b/contrib/libs/apache/arrow/cpp/src/arrow/memory_pool.h
index 45c49ff5cc8..81b1b112dc7 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/memory_pool.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/memory_pool.h
@@ -63,7 +63,7 @@ class MemoryPoolStats {
 /// take care of the required 64-byte alignment.
 class ARROW_EXPORT MemoryPool {
  public:
-  virtual ~MemoryPool() = default; 
+  virtual ~MemoryPool() = default;
 
   /// \brief EXPERIMENTAL. Create a new instance of the default MemoryPool
   static std::unique_ptr<MemoryPool> CreateDefault();
@@ -87,13 +87,13 @@ class ARROW_EXPORT MemoryPool {
   ///   faster deallocation if supported by its backend.
   virtual void Free(uint8_t* buffer, int64_t size) = 0;
 
-  /// Return unused memory to the OS 
-  /// 
-  /// Only applies to allocators that hold onto unused memory.  This will be 
-  /// best effort, a memory pool may not implement this feature or may be 
-  /// unable to fulfill the request due to fragmentation. 
-  virtual void ReleaseUnused() {} 
- 
+  /// Return unused memory to the OS
+  ///
+  /// Only applies to allocators that hold onto unused memory.  This will be
+  /// best effort, a memory pool may not implement this feature or may be
+  /// unable to fulfill the request due to fragmentation.
+  virtual void ReleaseUnused() {}
+
   /// The number of bytes that were allocated and not yet free'd through
   /// this allocator.
   virtual int64_t bytes_allocated() const = 0;
@@ -104,11 +104,11 @@ class ARROW_EXPORT MemoryPool {
   /// returns -1
   virtual int64_t max_memory() const;
 
-  /// The name of the backend used by this MemoryPool (e.g. "system" or "jemalloc"). 
+  /// The name of the backend used by this MemoryPool (e.g. "system" or "jemalloc").
   virtual std::string backend_name() const = 0;
 
  protected:
-  MemoryPool() = default; 
+  MemoryPool() = default;
 };
 
 class ARROW_EXPORT LoggingMemoryPool : public MemoryPool {
@@ -156,10 +156,10 @@ class ARROW_EXPORT ProxyMemoryPool : public MemoryPool {
   std::unique_ptr<ProxyMemoryPoolImpl> impl_;
 };
 
-/// \brief Return a process-wide memory pool based on the system allocator. 
+/// \brief Return a process-wide memory pool based on the system allocator.
 ARROW_EXPORT MemoryPool* system_memory_pool();
 
-/// \brief Return a process-wide memory pool based on jemalloc. 
+/// \brief Return a process-wide memory pool based on jemalloc.
 ///
 /// May return NotImplemented if jemalloc is not available.
 ARROW_EXPORT Status jemalloc_memory_pool(MemoryPool** out);
@@ -175,11 +175,11 @@ ARROW_EXPORT Status jemalloc_memory_pool(MemoryPool** out);
 ARROW_EXPORT
 Status jemalloc_set_decay_ms(int ms);
 
-/// \brief Return a process-wide memory pool based on mimalloc. 
+/// \brief Return a process-wide memory pool based on mimalloc.
 ///
 /// May return NotImplemented if mimalloc is not available.
 ARROW_EXPORT Status mimalloc_memory_pool(MemoryPool** out);
 
-ARROW_EXPORT std::vector<std::string> SupportedMemoryBackendNames(); 
- 
+ARROW_EXPORT std::vector<std::string> SupportedMemoryBackendNames();
+
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/pretty_print.cc b/contrib/libs/apache/arrow/cpp/src/arrow/pretty_print.cc
index 8187af43345..8d1c16e0ed6 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/pretty_print.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/pretty_print.cc
@@ -15,8 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "arrow/pretty_print.h" 
- 
+#include "arrow/pretty_print.h"
+
 #include <algorithm>
 #include <chrono>
 #include <cstddef>
@@ -69,12 +69,12 @@ class PrettyPrinter {
 };
 
 void PrettyPrinter::OpenArray(const Array& array) {
-  if (!options_.skip_new_lines) { 
-    Indent(); 
-  } 
+  if (!options_.skip_new_lines) {
+    Indent();
+  }
   (*sink_) << "[";
   if (array.length() > 0) {
-    Newline(); 
+    Newline();
     indent_ += options_.indent_size;
   }
 }
@@ -125,15 +125,15 @@ class ArrayPrinter : public PrettyPrinter {
       if (skip_comma) {
         skip_comma = false;
       } else {
-        (*sink_) << ","; 
-        Newline(); 
+        (*sink_) << ",";
+        Newline();
+      }
+      if (!options_.skip_new_lines) {
+        Indent();
       }
-      if (!options_.skip_new_lines) { 
-        Indent(); 
-      } 
       if ((i >= options_.window) && (i < (array.length() - options_.window))) {
-        (*sink_) << "..."; 
-        Newline(); 
+        (*sink_) << "...";
+        Newline();
         i = array.length() - options_.window - 1;
         skip_comma = true;
       } else if (array.IsNull(i)) {
@@ -142,7 +142,7 @@ class ArrayPrinter : public PrettyPrinter {
         func(i);
       }
     }
-    Newline(); 
+    Newline();
   }
 
   Status WriteDataValues(const BooleanArray& array) {
@@ -232,11 +232,11 @@ class ArrayPrinter : public PrettyPrinter {
     return Status::OK();
   }
 
-  Status WriteDataValues(const Decimal256Array& array) { 
-    WriteValues(array, [&](int64_t i) { (*sink_) << array.FormatValue(i); }); 
-    return Status::OK(); 
-  } 
- 
+  Status WriteDataValues(const Decimal256Array& array) {
+    WriteValues(array, [&](int64_t i) { (*sink_) << array.FormatValue(i); });
+    return Status::OK();
+  }
+
   template <typename T>
   enable_if_list_like<typename T::TypeClass, Status> WriteDataValues(const T& array) {
     bool skip_comma = true;
@@ -244,13 +244,13 @@ class ArrayPrinter : public PrettyPrinter {
       if (skip_comma) {
         skip_comma = false;
       } else {
-        (*sink_) << ","; 
-        Newline(); 
+        (*sink_) << ",";
+        Newline();
       }
       if ((i >= options_.window) && (i < (array.length() - options_.window))) {
         Indent();
-        (*sink_) << "..."; 
-        Newline(); 
+        (*sink_) << "...";
+        Newline();
         i = array.length() - options_.window - 1;
         skip_comma = true;
       } else if (array.IsNull(i)) {
@@ -259,11 +259,11 @@ class ArrayPrinter : public PrettyPrinter {
       } else {
         std::shared_ptr<Array> slice =
             array.values()->Slice(array.value_offset(i), array.value_length(i));
-        RETURN_NOT_OK( 
-            PrettyPrint(*slice, PrettyPrintOptions{indent_, options_.window}, sink_)); 
+        RETURN_NOT_OK(
+            PrettyPrint(*slice, PrettyPrintOptions{indent_, options_.window}, sink_));
       }
     }
-    Newline(); 
+    Newline();
     return Status::OK();
   }
 
@@ -273,36 +273,36 @@ class ArrayPrinter : public PrettyPrinter {
       if (skip_comma) {
         skip_comma = false;
       } else {
-        (*sink_) << ","; 
-        Newline(); 
+        (*sink_) << ",";
+        Newline();
       }
- 
-      if (!options_.skip_new_lines) { 
-        Indent(); 
-      } 
- 
+
+      if (!options_.skip_new_lines) {
+        Indent();
+      }
+
       if ((i >= options_.window) && (i < (array.length() - options_.window))) {
-        (*sink_) << "..."; 
-        Newline(); 
+        (*sink_) << "...";
+        Newline();
         i = array.length() - options_.window - 1;
         skip_comma = true;
       } else if (array.IsNull(i)) {
         (*sink_) << options_.null_rep;
       } else {
-        (*sink_) << "keys:"; 
-        Newline(); 
+        (*sink_) << "keys:";
+        Newline();
         auto keys_slice =
             array.keys()->Slice(array.value_offset(i), array.value_length(i));
-        RETURN_NOT_OK(PrettyPrint(*keys_slice, 
-                                  PrettyPrintOptions{indent_, options_.window}, sink_)); 
-        Newline(); 
+        RETURN_NOT_OK(PrettyPrint(*keys_slice,
+                                  PrettyPrintOptions{indent_, options_.window}, sink_));
+        Newline();
         Indent();
-        (*sink_) << "values:"; 
-        Newline(); 
+        (*sink_) << "values:";
+        Newline();
         auto values_slice =
             array.items()->Slice(array.value_offset(i), array.value_length(i));
-        RETURN_NOT_OK(PrettyPrint(*values_slice, 
-                                  PrettyPrintOptions{indent_, options_.window}, sink_)); 
+        RETURN_NOT_OK(PrettyPrint(*values_slice,
+                                  PrettyPrintOptions{indent_, options_.window}, sink_));
       }
     }
     (*sink_) << "\n";
@@ -341,7 +341,7 @@ class ArrayPrinter : public PrettyPrinter {
                        int64_t length) {
     for (size_t i = 0; i < fields.size(); ++i) {
       Newline();
-      Indent(); 
+      Indent();
       std::stringstream ss;
       ss << "-- child " << i << " type: " << fields[i]->type()->ToString() << "\n";
       Write(ss.str());
@@ -369,14 +369,14 @@ class ArrayPrinter : public PrettyPrinter {
     RETURN_NOT_OK(WriteValidityBitmap(array));
 
     Newline();
-    Indent(); 
+    Indent();
     Write("-- type_ids: ");
     UInt8Array type_codes(array.length(), array.type_codes(), nullptr, 0, array.offset());
     RETURN_NOT_OK(PrettyPrint(type_codes, indent_ + options_.indent_size, sink_));
 
     if (array.mode() == UnionMode::DENSE) {
       Newline();
-      Indent(); 
+      Indent();
       Write("-- value_offsets: ");
       Int32Array value_offsets(
           array.length(), checked_cast<const DenseUnionArray&>(array).value_offsets(),
@@ -395,13 +395,13 @@ class ArrayPrinter : public PrettyPrinter {
 
   Status Visit(const DictionaryArray& array) {
     Newline();
-    Indent(); 
+    Indent();
     Write("-- dictionary:\n");
     RETURN_NOT_OK(
         PrettyPrint(*array.dictionary(), indent_ + options_.indent_size, sink_));
 
     Newline();
-    Indent(); 
+    Indent();
     Write("-- indices:\n");
     return PrettyPrint(*array.indices(), indent_ + options_.indent_size, sink_);
   }
@@ -452,7 +452,7 @@ Status ArrayPrinter::WriteValidityBitmap(const Array& array) {
 
   if (array.null_count() > 0) {
     Newline();
-    Indent(); 
+    Indent();
     BooleanArray is_valid(array.length(), array.null_bitmap(), nullptr, 0,
                           array.offset());
     return PrettyPrint(is_valid, indent_ + options_.indent_size, sink_);
@@ -492,28 +492,28 @@ Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& op
   for (int i = 0; i < indent; ++i) {
     (*sink) << " ";
   }
-  (*sink) << "["; 
-  if (!options.skip_new_lines) { 
-    *sink << "\n"; 
-  } 
+  (*sink) << "[";
+  if (!options.skip_new_lines) {
+    *sink << "\n";
+  }
   bool skip_comma = true;
   for (int i = 0; i < num_chunks; ++i) {
     if (skip_comma) {
       skip_comma = false;
     } else {
-      (*sink) << ","; 
-      if (!options.skip_new_lines) { 
-        *sink << "\n"; 
-      } 
+      (*sink) << ",";
+      if (!options.skip_new_lines) {
+        *sink << "\n";
+      }
     }
     if ((i >= window) && (i < (num_chunks - window))) {
       for (int i = 0; i < indent; ++i) {
         (*sink) << " ";
       }
-      (*sink) << "..."; 
-      if (!options.skip_new_lines) { 
-        *sink << "\n"; 
-      } 
+      (*sink) << "...";
+      if (!options.skip_new_lines) {
+        *sink << "\n";
+      }
       i = num_chunks - window - 1;
       skip_comma = true;
     } else {
@@ -523,9 +523,9 @@ Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& op
       RETURN_NOT_OK(printer.Print(*chunked_arr.chunk(i)));
     }
   }
-  if (!options.skip_new_lines) { 
-    *sink << "\n"; 
-  } 
+  if (!options.skip_new_lines) {
+    *sink << "\n";
+  }
 
   for (int i = 0; i < indent; ++i) {
     (*sink) << " ";
@@ -605,7 +605,7 @@ class SchemaPrinter : public PrettyPrinter {
   void PrintVerboseMetadata(const KeyValueMetadata& metadata) {
     for (int64_t i = 0; i < metadata.size(); ++i) {
       Newline();
-      Indent(); 
+      Indent();
       Write(metadata.key(i) + ": '" + metadata.value(i) + "'");
     }
   }
@@ -613,7 +613,7 @@ class SchemaPrinter : public PrettyPrinter {
   void PrintTruncatedMetadata(const KeyValueMetadata& metadata) {
     for (int64_t i = 0; i < metadata.size(); ++i) {
       Newline();
-      Indent(); 
+      Indent();
       size_t size = metadata.value(i).size();
       size_t truncated_size = std::max<size_t>(10, 70 - metadata.key(i).size() - indent_);
       if (size <= truncated_size) {
@@ -629,7 +629,7 @@ class SchemaPrinter : public PrettyPrinter {
   void PrintMetadata(const std::string& metadata_type, const KeyValueMetadata& metadata) {
     if (metadata.size() > 0) {
       Newline();
-      Indent(); 
+      Indent();
       Write(metadata_type);
       if (options_.truncate_metadata) {
         PrintTruncatedMetadata(metadata);
@@ -643,7 +643,7 @@ class SchemaPrinter : public PrettyPrinter {
     for (int i = 0; i < schema_.num_fields(); ++i) {
       if (i > 0) {
         Newline();
-        Indent(); 
+        Indent();
       } else {
         Indent();
       }
@@ -668,7 +668,7 @@ Status SchemaPrinter::PrintType(const DataType& type, bool nullable) {
   }
   for (int i = 0; i < type.num_fields(); ++i) {
     Newline();
-    Indent(); 
+    Indent();
 
     std::stringstream ss;
     ss << "child " << i << ", ";
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/pretty_print.h b/contrib/libs/apache/arrow/cpp/src/arrow/pretty_print.h
index d85684cf460..1bc086a6889 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/pretty_print.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/pretty_print.h
@@ -19,7 +19,7 @@
 
 #include <iosfwd>
 #include <string>
-#include <utility> 
+#include <utility>
 
 #include "arrow/util/visibility.h"
 
@@ -35,14 +35,14 @@ class Table;
 struct PrettyPrintOptions {
   PrettyPrintOptions() = default;
 
-  PrettyPrintOptions(int indent_arg,  // NOLINT runtime/explicit 
-                     int window_arg = 10, int indent_size_arg = 2, 
+  PrettyPrintOptions(int indent_arg,  // NOLINT runtime/explicit
+                     int window_arg = 10, int indent_size_arg = 2,
                      std::string null_rep_arg = "null", bool skip_new_lines_arg = false,
                      bool truncate_metadata_arg = true)
       : indent(indent_arg),
         indent_size(indent_size_arg),
         window(window_arg),
-        null_rep(std::move(null_rep_arg)), 
+        null_rep(std::move(null_rep_arg)),
         skip_new_lines(skip_new_lines_arg),
         truncate_metadata(truncate_metadata_arg) {}
 
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/record_batch.cc b/contrib/libs/apache/arrow/cpp/src/arrow/record_batch.cc
index 21703f3cf24..66f9e932b58 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/record_batch.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/record_batch.cc
@@ -69,14 +69,14 @@ class SimpleRecordBatch : public RecordBatch {
     boxed_columns_.resize(schema_->num_fields());
   }
 
-  const std::vector<std::shared_ptr<Array>>& columns() const override { 
-    for (int i = 0; i < num_columns(); ++i) { 
-      // Force all columns to be boxed 
-      column(i); 
-    } 
-    return boxed_columns_; 
-  } 
- 
+  const std::vector<std::shared_ptr<Array>>& columns() const override {
+    for (int i = 0; i < num_columns(); ++i) {
+      // Force all columns to be boxed
+      column(i);
+    }
+    return boxed_columns_;
+  }
+
   std::shared_ptr<Array> column(int i) const override {
     std::shared_ptr<Array> result = internal::atomic_load(&boxed_columns_[i]);
     if (!result) {
@@ -88,7 +88,7 @@ class SimpleRecordBatch : public RecordBatch {
 
   std::shared_ptr<ArrayData> column_data(int i) const override { return columns_[i]; }
 
-  const ArrayDataVector& column_data() const override { return columns_; } 
+  const ArrayDataVector& column_data() const override { return columns_; }
 
   Result<std::shared_ptr<RecordBatch>> AddColumn(
       int i, const std::shared_ptr<Field>& field,
@@ -97,9 +97,9 @@ class SimpleRecordBatch : public RecordBatch {
     ARROW_CHECK(column != nullptr);
 
     if (!field->type()->Equals(column->type())) {
-      return Status::TypeError("Column data type ", field->type()->name(), 
-                               " does not match field data type ", 
-                               column->type()->name()); 
+      return Status::TypeError("Column data type ", field->type()->name(),
+                               " does not match field data type ",
+                               column->type()->name());
     }
     if (column->length() != num_rows_) {
       return Status::Invalid(
@@ -108,42 +108,42 @@ class SimpleRecordBatch : public RecordBatch {
     }
 
     ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->AddField(i, field));
-    return RecordBatch::Make(std::move(new_schema), num_rows_, 
+    return RecordBatch::Make(std::move(new_schema), num_rows_,
                              internal::AddVectorElement(columns_, i, column->data()));
   }
 
-  Result<std::shared_ptr<RecordBatch>> SetColumn( 
-      int i, const std::shared_ptr<Field>& field, 
-      const std::shared_ptr<Array>& column) const override { 
-    ARROW_CHECK(field != nullptr); 
-    ARROW_CHECK(column != nullptr); 
- 
-    if (!field->type()->Equals(column->type())) { 
-      return Status::TypeError("Column data type ", field->type()->name(), 
-                               " does not match field data type ", 
-                               column->type()->name()); 
-    } 
-    if (column->length() != num_rows_) { 
-      return Status::Invalid( 
-          "Added column's length must match record batch's length. Expected length ", 
-          num_rows_, " but got length ", column->length()); 
-    } 
- 
-    ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->SetField(i, field)); 
-    return RecordBatch::Make(std::move(new_schema), num_rows_, 
-                             internal::ReplaceVectorElement(columns_, i, column->data())); 
-  } 
- 
+  Result<std::shared_ptr<RecordBatch>> SetColumn(
+      int i, const std::shared_ptr<Field>& field,
+      const std::shared_ptr<Array>& column) const override {
+    ARROW_CHECK(field != nullptr);
+    ARROW_CHECK(column != nullptr);
+
+    if (!field->type()->Equals(column->type())) {
+      return Status::TypeError("Column data type ", field->type()->name(),
+                               " does not match field data type ",
+                               column->type()->name());
+    }
+    if (column->length() != num_rows_) {
+      return Status::Invalid(
+          "Added column's length must match record batch's length. Expected length ",
+          num_rows_, " but got length ", column->length());
+    }
+
+    ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->SetField(i, field));
+    return RecordBatch::Make(std::move(new_schema), num_rows_,
+                             internal::ReplaceVectorElement(columns_, i, column->data()));
+  }
+
   Result<std::shared_ptr<RecordBatch>> RemoveColumn(int i) const override {
     ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->RemoveField(i));
-    return RecordBatch::Make(std::move(new_schema), num_rows_, 
+    return RecordBatch::Make(std::move(new_schema), num_rows_,
                              internal::DeleteVectorElement(columns_, i));
   }
 
   std::shared_ptr<RecordBatch> ReplaceSchemaMetadata(
       const std::shared_ptr<const KeyValueMetadata>& metadata) const override {
     auto new_schema = schema_->WithMetadata(metadata);
-    return RecordBatch::Make(std::move(new_schema), num_rows_, columns_); 
+    return RecordBatch::Make(std::move(new_schema), num_rows_, columns_);
   }
 
   std::shared_ptr<RecordBatch> Slice(int64_t offset, int64_t length) const override {
@@ -191,8 +191,8 @@ std::shared_ptr<RecordBatch> RecordBatch::Make(
 Result<std::shared_ptr<RecordBatch>> RecordBatch::FromStructArray(
     const std::shared_ptr<Array>& array) {
   if (array->type_id() != Type::STRUCT) {
-    return Status::TypeError("Cannot construct record batch from array of type ", 
-                             *array->type()); 
+    return Status::TypeError("Cannot construct record batch from array of type ",
+                             *array->type());
   }
   if (array->null_count() != 0) {
     return Status::Invalid(
@@ -251,27 +251,27 @@ bool RecordBatch::ApproxEquals(const RecordBatch& other) const {
   return true;
 }
 
-Result<std::shared_ptr<RecordBatch>> RecordBatch::SelectColumns( 
-    const std::vector<int>& indices) const { 
-  int n = static_cast<int>(indices.size()); 
- 
-  FieldVector fields(n); 
-  ArrayVector columns(n); 
- 
-  for (int i = 0; i < n; i++) { 
-    int pos = indices[i]; 
-    if (pos < 0 || pos > num_columns() - 1) { 
-      return Status::Invalid("Invalid column index ", pos, " to select columns."); 
-    } 
-    fields[i] = schema()->field(pos); 
-    columns[i] = column(pos); 
-  } 
- 
-  auto new_schema = 
-      std::make_shared<arrow::Schema>(std::move(fields), schema()->metadata()); 
-  return RecordBatch::Make(std::move(new_schema), num_rows(), std::move(columns)); 
-} 
- 
+Result<std::shared_ptr<RecordBatch>> RecordBatch::SelectColumns(
+    const std::vector<int>& indices) const {
+  int n = static_cast<int>(indices.size());
+
+  FieldVector fields(n);
+  ArrayVector columns(n);
+
+  for (int i = 0; i < n; i++) {
+    int pos = indices[i];
+    if (pos < 0 || pos > num_columns() - 1) {
+      return Status::Invalid("Invalid column index ", pos, " to select columns.");
+    }
+    fields[i] = schema()->field(pos);
+    columns[i] = column(pos);
+  }
+
+  auto new_schema =
+      std::make_shared<arrow::Schema>(std::move(fields), schema()->metadata());
+  return RecordBatch::Make(std::move(new_schema), num_rows(), std::move(columns));
+}
+
 std::shared_ptr<RecordBatch> RecordBatch::Slice(int64_t offset) const {
   return Slice(offset, this->num_rows() - offset);
 }
@@ -304,7 +304,7 @@ Status RecordBatch::ValidateFull() const {
   RETURN_NOT_OK(Validate());
   for (int i = 0; i < num_columns(); ++i) {
     const auto& array = *this->column(i);
-    RETURN_NOT_OK(internal::ValidateArrayFull(array)); 
+    RETURN_NOT_OK(internal::ValidateArrayFull(array));
   }
   return Status::OK();
 }
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/record_batch.h b/contrib/libs/apache/arrow/cpp/src/arrow/record_batch.h
index 735d4f6f06b..3dc1f54a083 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/record_batch.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/record_batch.h
@@ -87,10 +87,10 @@ class ARROW_EXPORT RecordBatch {
 
   // \return the table's schema
   /// \return true if batches are equal
-  const std::shared_ptr<Schema>& schema() const { return schema_; } 
+  const std::shared_ptr<Schema>& schema() const { return schema_; }
 
   /// \brief Retrieve all columns at once
-  virtual const std::vector<std::shared_ptr<Array>>& columns() const = 0; 
+  virtual const std::vector<std::shared_ptr<Array>>& columns() const = 0;
 
   /// \brief Retrieve an array from the record batch
   /// \param[in] i field index, does not boundscheck
@@ -108,7 +108,7 @@ class ARROW_EXPORT RecordBatch {
   virtual std::shared_ptr<ArrayData> column_data(int i) const = 0;
 
   /// \brief Retrieve all arrays' internal data from the record batch.
-  virtual const ArrayDataVector& column_data() const = 0; 
+  virtual const ArrayDataVector& column_data() const = 0;
 
   /// \brief Add column to the record batch, producing a new RecordBatch
   ///
@@ -130,11 +130,11 @@ class ARROW_EXPORT RecordBatch {
   virtual Result<std::shared_ptr<RecordBatch>> AddColumn(
       int i, std::string field_name, const std::shared_ptr<Array>& column) const;
 
-  /// \brief Replace a column in the table, producing a new Table 
-  virtual Result<std::shared_ptr<RecordBatch>> SetColumn( 
-      int i, const std::shared_ptr<Field>& field, 
-      const std::shared_ptr<Array>& column) const = 0; 
- 
+  /// \brief Replace a column in the table, producing a new Table
+  virtual Result<std::shared_ptr<RecordBatch>> SetColumn(
+      int i, const std::shared_ptr<Field>& field,
+      const std::shared_ptr<Array>& column) const = 0;
+
   /// \brief Remove column from the record batch, producing a new RecordBatch
   ///
   /// \param[in] i field index, does boundscheck
@@ -166,10 +166,10 @@ class ARROW_EXPORT RecordBatch {
   /// \return PrettyPrint representation suitable for debugging
   std::string ToString() const;
 
-  /// \brief Return new record batch with specified columns 
-  Result<std::shared_ptr<RecordBatch>> SelectColumns( 
-      const std::vector<int>& indices) const; 
- 
+  /// \brief Return new record batch with specified columns
+  Result<std::shared_ptr<RecordBatch>> SelectColumns(
+      const std::vector<int>& indices) const;
+
   /// \brief Perform cheap validation checks to determine obvious inconsistencies
   /// within the record batch's schema and internal data.
   ///
@@ -199,8 +199,8 @@ class ARROW_EXPORT RecordBatch {
 /// \brief Abstract interface for reading stream of record batches
 class ARROW_EXPORT RecordBatchReader {
  public:
-  using ValueType = std::shared_ptr<RecordBatch>; 
- 
+  using ValueType = std::shared_ptr<RecordBatch>;
+
   virtual ~RecordBatchReader() = default;
 
   /// \return the shared schema of the record batches in the stream
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/result.h b/contrib/libs/apache/arrow/cpp/src/arrow/result.h
index 21483c89533..cb7437cd242 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/result.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/result.h
@@ -18,7 +18,7 @@
 
 #pragma once
 
-#include <cstddef> 
+#include <cstddef>
 #include <new>
 #include <string>
 #include <type_traits>
@@ -29,9 +29,9 @@
 
 namespace arrow {
 
-template <typename> 
-struct EnsureResult; 
- 
+template <typename>
+struct EnsureResult;
+
 namespace internal {
 
 #if __cplusplus >= 201703L
@@ -317,7 +317,7 @@ class ARROW_MUST_USE_TYPE Result : public util::EqualityComparable<Result<T>> {
     return ValueUnsafe();
   }
   const T& operator*() const& { return ValueOrDie(); }
-  const T* operator->() const { return &ValueOrDie(); } 
+  const T* operator->() const { return &ValueOrDie(); }
 
   /// Gets a mutable reference to the stored `T` value.
   ///
@@ -332,7 +332,7 @@ class ARROW_MUST_USE_TYPE Result : public util::EqualityComparable<Result<T>> {
     return ValueUnsafe();
   }
   T& operator*() & { return ValueOrDie(); }
-  T* operator->() { return &ValueOrDie(); } 
+  T* operator->() { return &ValueOrDie(); }
 
   /// Moves and returns the internally-stored `T` value.
   ///
@@ -385,7 +385,7 @@ class ARROW_MUST_USE_TYPE Result : public util::EqualityComparable<Result<T>> {
   /// Apply a function to the internally stored value to produce a new result or propagate
   /// the stored error.
   template <typename M>
-  typename EnsureResult<typename std::result_of<M && (T)>::type>::type Map(M&& m) && { 
+  typename EnsureResult<typename std::result_of<M && (T)>::type>::type Map(M&& m) && {
     if (!ok()) {
       return status();
     }
@@ -395,36 +395,36 @@ class ARROW_MUST_USE_TYPE Result : public util::EqualityComparable<Result<T>> {
   /// Apply a function to the internally stored value to produce a new result or propagate
   /// the stored error.
   template <typename M>
-  typename EnsureResult<typename std::result_of<M && (const T&)>::type>::type Map( 
-      M&& m) const& { 
+  typename EnsureResult<typename std::result_of<M && (const T&)>::type>::type Map(
+      M&& m) const& {
     if (!ok()) {
       return status();
     }
     return std::forward<M>(m)(ValueUnsafe());
   }
 
-  /// Cast the internally stored value to produce a new result or propagate the stored 
-  /// error. 
-  template <typename U, typename E = typename std::enable_if< 
-                            std::is_constructible<U, T>::value>::type> 
-  Result<U> As() && { 
-    if (!ok()) { 
-      return status(); 
-    } 
-    return U(MoveValueUnsafe()); 
-  } 
- 
-  /// Cast the internally stored value to produce a new result or propagate the stored 
-  /// error. 
-  template <typename U, typename E = typename std::enable_if< 
-                            std::is_constructible<U, const T&>::value>::type> 
-  Result<U> As() const& { 
-    if (!ok()) { 
-      return status(); 
-    } 
-    return U(ValueUnsafe()); 
-  } 
- 
+  /// Cast the internally stored value to produce a new result or propagate the stored
+  /// error.
+  template <typename U, typename E = typename std::enable_if<
+                            std::is_constructible<U, T>::value>::type>
+  Result<U> As() && {
+    if (!ok()) {
+      return status();
+    }
+    return U(MoveValueUnsafe());
+  }
+
+  /// Cast the internally stored value to produce a new result or propagate the stored
+  /// error.
+  template <typename U, typename E = typename std::enable_if<
+                            std::is_constructible<U, const T&>::value>::type>
+  Result<U> As() const& {
+    if (!ok()) {
+      return status();
+    }
+    return U(ValueUnsafe());
+  }
+
   const T& ValueUnsafe() const& {
     return *internal::launder(reinterpret_cast<const T*>(&data_));
   }
@@ -448,16 +448,16 @@ class ARROW_MUST_USE_TYPE Result : public util::EqualityComparable<Result<T>> {
 
   void Destroy() {
     if (ARROW_PREDICT_TRUE(status_.ok())) {
-      static_assert(offsetof(Result<T>, status_) == 0, 
-                    "Status is guaranteed to be at the start of Result<>"); 
+      static_assert(offsetof(Result<T>, status_) == 0,
+                    "Status is guaranteed to be at the start of Result<>");
       internal::launder(reinterpret_cast<const T*>(&data_))->~T();
     }
   }
 };
 
-#define ARROW_ASSIGN_OR_RAISE_IMPL(result_name, lhs, rexpr)                              \ 
-  auto&& result_name = (rexpr);                                                          \ 
-  ARROW_RETURN_IF_(!(result_name).ok(), (result_name).status(), ARROW_STRINGIFY(rexpr)); \ 
+#define ARROW_ASSIGN_OR_RAISE_IMPL(result_name, lhs, rexpr)                              \
+  auto&& result_name = (rexpr);                                                          \
+  ARROW_RETURN_IF_(!(result_name).ok(), (result_name).status(), ARROW_STRINGIFY(rexpr)); \
   lhs = std::move(result_name).ValueUnsafe();
 
 #define ARROW_ASSIGN_OR_RAISE_NAME(x, y) ARROW_CONCAT(x, y)
@@ -475,14 +475,14 @@ class ARROW_MUST_USE_TYPE Result : public util::EqualityComparable<Result<T>> {
 /// WARNING: ARROW_ASSIGN_OR_RAISE expands into multiple statements;
 /// it cannot be used in a single statement (e.g. as the body of an if
 /// statement without {})!
-/// 
-/// WARNING: ARROW_ASSIGN_OR_RAISE `std::move`s its right operand. If you have 
-/// an lvalue Result which you *don't* want to move out of cast appropriately. 
-/// 
-/// WARNING: ARROW_ASSIGN_OR_RAISE is not a single expression; it will not 
-/// maintain lifetimes of all temporaries in `rexpr` (e.g. 
-/// `ARROW_ASSIGN_OR_RAISE(auto x, MakeTemp().GetResultRef());` 
-/// will most likely segfault)! 
+///
+/// WARNING: ARROW_ASSIGN_OR_RAISE `std::move`s its right operand. If you have
+/// an lvalue Result which you *don't* want to move out of cast appropriately.
+///
+/// WARNING: ARROW_ASSIGN_OR_RAISE is not a single expression; it will not
+/// maintain lifetimes of all temporaries in `rexpr` (e.g.
+/// `ARROW_ASSIGN_OR_RAISE(auto x, MakeTemp().GetResultRef());`
+/// will most likely segfault)!
 #define ARROW_ASSIGN_OR_RAISE(lhs, rexpr)                                              \
   ARROW_ASSIGN_OR_RAISE_IMPL(ARROW_ASSIGN_OR_RAISE_NAME(_error_or_value, __COUNTER__), \
                              lhs, rexpr);
@@ -490,7 +490,7 @@ class ARROW_MUST_USE_TYPE Result : public util::EqualityComparable<Result<T>> {
 namespace internal {
 
 template <typename T>
-inline const Status& GenericToStatus(const Result<T>& res) { 
+inline const Status& GenericToStatus(const Result<T>& res) {
   return res.status();
 }
 
@@ -501,19 +501,19 @@ inline Status GenericToStatus(Result<T>&& res) {
 
 }  // namespace internal
 
-template <typename T, typename R = typename EnsureResult<T>::type> 
-R ToResult(T t) { 
-  return R(std::move(t)); 
+template <typename T, typename R = typename EnsureResult<T>::type>
+R ToResult(T t) {
+  return R(std::move(t));
 }
 
-template <typename T> 
-struct EnsureResult { 
-  using type = Result<T>; 
-}; 
- 
-template <typename T> 
-struct EnsureResult<Result<T>> { 
-  using type = Result<T>; 
-}; 
- 
+template <typename T>
+struct EnsureResult {
+  using type = Result<T>;
+};
+
+template <typename T>
+struct EnsureResult<Result<T>> {
+  using type = Result<T>;
+};
+
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/scalar.cc b/contrib/libs/apache/arrow/cpp/src/arrow/scalar.cc
index 4f9d94a0a38..cb7755ba3f1 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/scalar.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/scalar.cc
@@ -18,7 +18,7 @@
 #include "arrow/scalar.h"
 
 #include <memory>
-#include <sstream> 
+#include <sstream>
 #include <string>
 #include <utility>
 
@@ -45,10 +45,10 @@ bool Scalar::Equals(const Scalar& other, const EqualOptions& options) const {
   return ScalarEquals(*this, other, options);
 }
 
-bool Scalar::ApproxEquals(const Scalar& other, const EqualOptions& options) const { 
-  return ScalarApproxEquals(*this, other, options); 
-} 
- 
+bool Scalar::ApproxEquals(const Scalar& other, const EqualOptions& options) const {
+  return ScalarApproxEquals(*this, other, options);
+}
+
 struct ScalarHashImpl {
   static std::hash<std::string> string_hash;
 
@@ -74,14 +74,14 @@ struct ScalarHashImpl {
     return StdHash(s.value.low_bits()) & StdHash(s.value.high_bits());
   }
 
-  Status Visit(const Decimal256Scalar& s) { 
-    Status status = Status::OK(); 
-    for (uint64_t elem : s.value.little_endian_array()) { 
-      status &= StdHash(elem); 
-    } 
-    return status; 
-  } 
- 
+  Status Visit(const Decimal256Scalar& s) {
+    Status status = Status::OK();
+    for (uint64_t elem : s.value.little_endian_array()) {
+      status &= StdHash(elem);
+    }
+    return status;
+  }
+
   Status Visit(const BaseListScalar& s) { return ArrayHash(*s.value); }
 
   Status Visit(const StructScalar& s) {
@@ -91,11 +91,11 @@ struct ScalarHashImpl {
     return Status::OK();
   }
 
-  Status Visit(const DictionaryScalar& s) { 
-    AccumulateHashFrom(*s.value.index); 
-    return Status::OK(); 
-  } 
- 
+  Status Visit(const DictionaryScalar& s) {
+    AccumulateHashFrom(*s.value.index);
+    return Status::OK();
+  }
+
   // TODO(bkietz) implement less wimpy hashing when these have ValueType
   Status Visit(const UnionScalar& s) { return Status::OK(); }
   Status Visit(const ExtensionScalar& s) { return Status::OK(); }
@@ -132,21 +132,21 @@ struct ScalarHashImpl {
     return Status::OK();
   }
 
-  explicit ScalarHashImpl(const Scalar& scalar) : hash_(scalar.type->Hash()) { 
-    if (scalar.is_valid) { 
-      AccumulateHashFrom(scalar); 
-    } 
-  } 
+  explicit ScalarHashImpl(const Scalar& scalar) : hash_(scalar.type->Hash()) {
+    if (scalar.is_valid) {
+      AccumulateHashFrom(scalar);
+    }
+  }
 
   void AccumulateHashFrom(const Scalar& scalar) {
     DCHECK_OK(StdHash(scalar.type->fingerprint()));
     DCHECK_OK(VisitScalarInline(scalar, this));
   }
 
-  size_t hash_; 
+  size_t hash_;
 };
 
-size_t Scalar::hash() const { return ScalarHashImpl(*this).hash_; } 
+size_t Scalar::hash() const { return ScalarHashImpl(*this).hash_; }
 
 StringScalar::StringScalar(std::string s)
     : StringScalar(Buffer::FromString(std::move(s))) {}
@@ -193,20 +193,20 @@ FixedSizeListScalar::FixedSizeListScalar(std::shared_ptr<Array> value)
     : BaseListScalar(
           value, fixed_size_list(value->type(), static_cast<int32_t>(value->length()))) {}
 
-Result<std::shared_ptr<StructScalar>> StructScalar::Make( 
-    ScalarVector values, std::vector<std::string> field_names) { 
-  if (values.size() != field_names.size()) { 
-    return Status::Invalid("Mismatching number of field names and child scalars"); 
-  } 
- 
-  FieldVector fields(field_names.size()); 
-  for (size_t i = 0; i < fields.size(); ++i) { 
-    fields[i] = arrow::field(std::move(field_names[i]), values[i]->type); 
-  } 
- 
-  return std::make_shared<StructScalar>(std::move(values), struct_(std::move(fields))); 
-} 
- 
+Result<std::shared_ptr<StructScalar>> StructScalar::Make(
+    ScalarVector values, std::vector<std::string> field_names) {
+  if (values.size() != field_names.size()) {
+    return Status::Invalid("Mismatching number of field names and child scalars");
+  }
+
+  FieldVector fields(field_names.size());
+  for (size_t i = 0; i < fields.size(); ++i) {
+    fields[i] = arrow::field(std::move(field_names[i]), values[i]->type);
+  }
+
+  return std::make_shared<StructScalar>(std::move(values), struct_(std::move(fields)));
+}
+
 Result<std::shared_ptr<Scalar>> StructScalar::field(FieldRef ref) const {
   ARROW_ASSIGN_OR_RAISE(auto path, ref.FindOne(*type));
   if (path.indices().size() != 1) {
@@ -277,13 +277,13 @@ Result<std::shared_ptr<Scalar>> DictionaryScalar::GetEncodedValue() const {
   return value.dictionary->GetScalar(index_value);
 }
 
-std::shared_ptr<DictionaryScalar> DictionaryScalar::Make(std::shared_ptr<Scalar> index, 
-                                                         std::shared_ptr<Array> dict) { 
-  auto type = dictionary(index->type, dict->type()); 
-  return std::make_shared<DictionaryScalar>(ValueType{std::move(index), std::move(dict)}, 
-                                            std::move(type)); 
-} 
- 
+std::shared_ptr<DictionaryScalar> DictionaryScalar::Make(std::shared_ptr<Scalar> index,
+                                                         std::shared_ptr<Array> dict) {
+  auto type = dictionary(index->type, dict->type());
+  return std::make_shared<DictionaryScalar>(ValueType{std::move(index), std::move(dict)},
+                                            std::move(type));
+}
+
 template <typename T>
 using scalar_constructor_has_arrow_type =
     std::is_constructible<typename TypeTraits<T>::ScalarType, std::shared_ptr<DataType>>;
@@ -551,31 +551,31 @@ Status CastImpl(const ScalarType& from, StringScalar* to) {
   return Status::OK();
 }
 
-Status CastImpl(const Decimal128Scalar& from, StringScalar* to) { 
-  auto from_type = checked_cast<const Decimal128Type*>(from.type.get()); 
-  to->value = Buffer::FromString(from.value.ToString(from_type->scale())); 
-  return Status::OK(); 
-} 
- 
-Status CastImpl(const Decimal256Scalar& from, StringScalar* to) { 
-  auto from_type = checked_cast<const Decimal256Type*>(from.type.get()); 
-  to->value = Buffer::FromString(from.value.ToString(from_type->scale())); 
-  return Status::OK(); 
-} 
- 
-Status CastImpl(const StructScalar& from, StringScalar* to) { 
-  std::stringstream ss; 
-  ss << '{'; 
-  for (int i = 0; static_cast<size_t>(i) < from.value.size(); i++) { 
-    if (i > 0) ss << ", "; 
-    ss << from.type->field(i)->name() << ':' << from.type->field(i)->type()->ToString() 
-       << " = " << from.value[i]->ToString(); 
-  } 
-  ss << '}'; 
-  to->value = Buffer::FromString(ss.str()); 
-  return Status::OK(); 
-} 
- 
+Status CastImpl(const Decimal128Scalar& from, StringScalar* to) {
+  auto from_type = checked_cast<const Decimal128Type*>(from.type.get());
+  to->value = Buffer::FromString(from.value.ToString(from_type->scale()));
+  return Status::OK();
+}
+
+Status CastImpl(const Decimal256Scalar& from, StringScalar* to) {
+  auto from_type = checked_cast<const Decimal256Type*>(from.type.get());
+  to->value = Buffer::FromString(from.value.ToString(from_type->scale()));
+  return Status::OK();
+}
+
+Status CastImpl(const StructScalar& from, StringScalar* to) {
+  std::stringstream ss;
+  ss << '{';
+  for (int i = 0; static_cast<size_t>(i) < from.value.size(); i++) {
+    if (i > 0) ss << ", ";
+    ss << from.type->field(i)->name() << ':' << from.type->field(i)->type()->ToString()
+       << " = " << from.value[i]->ToString();
+  }
+  ss << '}';
+  to->value = Buffer::FromString(ss.str());
+  return Status::OK();
+}
+
 struct CastImplVisitor {
   Status NotImplemented() {
     return Status::NotImplemented("cast to ", *to_type_, " from ", *from_.type);
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/scalar.h b/contrib/libs/apache/arrow/cpp/src/arrow/scalar.h
index 1d5e2c93ff4..24744859686 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/scalar.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/scalar.h
@@ -65,19 +65,19 @@ struct ARROW_EXPORT Scalar : public util::EqualityComparable<Scalar> {
   bool Equals(const Scalar& other,
               const EqualOptions& options = EqualOptions::Defaults()) const;
 
-  bool ApproxEquals(const Scalar& other, 
-                    const EqualOptions& options = EqualOptions::Defaults()) const; 
- 
+  bool ApproxEquals(const Scalar& other,
+                    const EqualOptions& options = EqualOptions::Defaults()) const;
+
   struct ARROW_EXPORT Hash {
-    size_t operator()(const Scalar& scalar) const { return scalar.hash(); } 
+    size_t operator()(const Scalar& scalar) const { return scalar.hash(); }
 
     size_t operator()(const std::shared_ptr<Scalar>& scalar) const {
-      return scalar->hash(); 
+      return scalar->hash();
     }
   };
 
-  size_t hash() const; 
- 
+  size_t hash() const;
+
   std::string ToString() const;
 
   static Result<std::shared_ptr<Scalar>> Parse(const std::shared_ptr<DataType>& type,
@@ -350,17 +350,17 @@ struct ARROW_EXPORT Decimal128Scalar : public Scalar {
   Decimal128 value;
 };
 
-struct ARROW_EXPORT Decimal256Scalar : public Scalar { 
-  using Scalar::Scalar; 
-  using TypeClass = Decimal256Type; 
-  using ValueType = Decimal256; 
- 
-  Decimal256Scalar(Decimal256 value, std::shared_ptr<DataType> type) 
-      : Scalar(std::move(type), true), value(value) {} 
- 
-  Decimal256 value; 
-}; 
- 
+struct ARROW_EXPORT Decimal256Scalar : public Scalar {
+  using Scalar::Scalar;
+  using TypeClass = Decimal256Type;
+  using ValueType = Decimal256;
+
+  Decimal256Scalar(Decimal256 value, std::shared_ptr<DataType> type)
+      : Scalar(std::move(type), true), value(value) {}
+
+  Decimal256 value;
+};
+
 struct ARROW_EXPORT BaseListScalar : public Scalar {
   using Scalar::Scalar;
   using ValueType = std::shared_ptr<Array>;
@@ -411,9 +411,9 @@ struct ARROW_EXPORT StructScalar : public Scalar {
   StructScalar(ValueType value, std::shared_ptr<DataType> type)
       : Scalar(std::move(type), true), value(std::move(value)) {}
 
-  static Result<std::shared_ptr<StructScalar>> Make(ValueType value, 
-                                                    std::vector<std::string> field_names); 
- 
+  static Result<std::shared_ptr<StructScalar>> Make(ValueType value,
+                                                    std::vector<std::string> field_names);
+
   explicit StructScalar(std::shared_ptr<DataType> type) : Scalar(std::move(type)) {}
 };
 
@@ -448,9 +448,9 @@ struct ARROW_EXPORT DictionaryScalar : public Scalar {
   DictionaryScalar(ValueType value, std::shared_ptr<DataType> type, bool is_valid = true)
       : Scalar(std::move(type), is_valid), value(std::move(value)) {}
 
-  static std::shared_ptr<DictionaryScalar> Make(std::shared_ptr<Scalar> index, 
-                                                std::shared_ptr<Array> dict); 
- 
+  static std::shared_ptr<DictionaryScalar> Make(std::shared_ptr<Scalar> index,
+                                                std::shared_ptr<Array> dict);
+
   Result<std::shared_ptr<Scalar>> GetEncodedValue() const;
 };
 
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/status.cc b/contrib/libs/apache/arrow/cpp/src/arrow/status.cc
index d6399f8bfce..0f02cb57a23 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/status.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/status.cc
@@ -68,9 +68,9 @@ std::string Status::CodeAsString(StatusCode code) {
     case StatusCode::Invalid:
       type = "Invalid";
       break;
-    case StatusCode::Cancelled: 
-      type = "Cancelled"; 
-      break; 
+    case StatusCode::Cancelled:
+      type = "Cancelled";
+      break;
     case StatusCode::IOError:
       type = "IOError";
       break;
@@ -135,7 +135,7 @@ void Status::Abort(const std::string& message) const {
 void Status::AddContextLine(const char* filename, int line, const char* expr) {
   ARROW_CHECK(!ok()) << "Cannot add context line to ok status";
   std::stringstream ss;
-  ss << "\n" << filename << ":" << line << "  " << expr; 
+  ss << "\n" << filename << ":" << line << "  " << expr;
   state_->msg += ss.str();
 }
 #endif
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/status.h b/contrib/libs/apache/arrow/cpp/src/arrow/status.h
index 9fbc840a541..056d60d6f32 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/status.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/status.h
@@ -83,7 +83,7 @@ enum class StatusCode : char {
   IOError = 5,
   CapacityError = 6,
   IndexError = 7,
-  Cancelled = 8, 
+  Cancelled = 8,
   UnknownError = 9,
   NotImplemented = 10,
   SerializationError = 11,
@@ -205,12 +205,12 @@ class ARROW_MUST_USE_TYPE ARROW_EXPORT Status : public util::EqualityComparable<
     return Status::FromArgs(StatusCode::Invalid, std::forward<Args>(args)...);
   }
 
-  /// Return an error status for cancelled operation 
-  template <typename... Args> 
-  static Status Cancelled(Args&&... args) { 
-    return Status::FromArgs(StatusCode::Cancelled, std::forward<Args>(args)...); 
-  } 
- 
+  /// Return an error status for cancelled operation
+  template <typename... Args>
+  static Status Cancelled(Args&&... args) {
+    return Status::FromArgs(StatusCode::Cancelled, std::forward<Args>(args)...);
+  }
+
   /// Return an error status when an index is out of bounds
   template <typename... Args>
   static Status IndexError(Args&&... args) {
@@ -270,8 +270,8 @@ class ARROW_MUST_USE_TYPE ARROW_EXPORT Status : public util::EqualityComparable<
   bool IsKeyError() const { return code() == StatusCode::KeyError; }
   /// Return true iff the status indicates invalid data.
   bool IsInvalid() const { return code() == StatusCode::Invalid; }
-  /// Return true iff the status indicates a cancelled operation. 
-  bool IsCancelled() const { return code() == StatusCode::Cancelled; } 
+  /// Return true iff the status indicates a cancelled operation.
+  bool IsCancelled() const { return code() == StatusCode::Cancelled; }
   /// Return true iff the status indicates an IO-related failure.
   bool IsIOError() const { return code() == StatusCode::IOError; }
   /// Return true iff the status indicates a container reaching capacity limits.
@@ -312,10 +312,10 @@ class ARROW_MUST_USE_TYPE ARROW_EXPORT Status : public util::EqualityComparable<
   StatusCode code() const { return ok() ? StatusCode::OK : state_->code; }
 
   /// \brief Return the specific error message attached to this status.
-  const std::string& message() const { 
-    static const std::string no_message = ""; 
-    return ok() ? no_message : state_->msg; 
-  } 
+  const std::string& message() const {
+    static const std::string no_message = "";
+    return ok() ? no_message : state_->msg;
+  }
 
   /// \brief Return the status detail attached to this message.
   const std::shared_ptr<StatusDetail>& detail() const {
@@ -443,7 +443,7 @@ namespace internal {
 
 // Extract Status from Status or Result<T>
 // Useful for the status check macros such as RETURN_NOT_OK.
-inline const Status& GenericToStatus(const Status& st) { return st; } 
+inline const Status& GenericToStatus(const Status& st) { return st; }
 inline Status GenericToStatus(Status&& st) { return std::move(st); }
 
 }  // namespace internal
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/stl_iterator.h b/contrib/libs/apache/arrow/cpp/src/arrow/stl_iterator.h
index c996923ca67..6225a89aae4 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/stl_iterator.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/stl_iterator.h
@@ -1,146 +1,146 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <cstddef> 
-#include <iterator> 
-#include <utility> 
- 
-#include "arrow/type_fwd.h" 
-#include "arrow/util/macros.h" 
-#include "arrow/util/optional.h" 
- 
-namespace arrow { 
-namespace stl { 
- 
-namespace detail { 
- 
-template <typename ArrayType> 
-struct DefaultValueAccessor { 
-  using ValueType = decltype(std::declval<ArrayType>().GetView(0)); 
- 
-  ValueType operator()(const ArrayType& array, int64_t index) { 
-    return array.GetView(index); 
-  } 
-}; 
- 
-}  // namespace detail 
- 
-template <typename ArrayType, 
-          typename ValueAccessor = detail::DefaultValueAccessor<ArrayType>> 
-class ArrayIterator { 
- public: 
-  using value_type = arrow::util::optional<typename ValueAccessor::ValueType>; 
-  using difference_type = int64_t; 
-  using pointer = value_type*; 
-  using reference = value_type&; 
-  using iterator_category = std::random_access_iterator_tag; 
- 
-  // Some algorithms need to default-construct an iterator 
-  ArrayIterator() : array_(NULLPTR), index_(0) {} 
- 
-  explicit ArrayIterator(const ArrayType& array, int64_t index = 0) 
-      : array_(&array), index_(index) {} 
- 
-  // Value access 
-  value_type operator*() const { 
-    return array_->IsNull(index_) ? value_type{} : array_->GetView(index_); 
-  } 
- 
-  value_type operator[](difference_type n) const { 
-    return array_->IsNull(index_ + n) ? value_type{} : array_->GetView(index_ + n); 
-  } 
- 
-  int64_t index() const { return index_; } 
- 
-  // Forward / backward 
-  ArrayIterator& operator++() { 
-    ++index_; 
-    return *this; 
-  } 
-  ArrayIterator& operator--() { 
-    --index_; 
-    return *this; 
-  } 
-  ArrayIterator operator++(int) { 
-    ArrayIterator tmp(*this); 
-    ++index_; 
-    return tmp; 
-  } 
-  ArrayIterator operator--(int) { 
-    ArrayIterator tmp(*this); 
-    --index_; 
-    return tmp; 
-  } 
- 
-  // Arithmetic 
-  difference_type operator-(const ArrayIterator& other) const { 
-    return index_ - other.index_; 
-  } 
-  ArrayIterator operator+(difference_type n) const { 
-    return ArrayIterator(*array_, index_ + n); 
-  } 
-  ArrayIterator operator-(difference_type n) const { 
-    return ArrayIterator(*array_, index_ - n); 
-  } 
-  friend inline ArrayIterator operator+(difference_type diff, 
-                                        const ArrayIterator& other) { 
-    return ArrayIterator(*other.array_, diff + other.index_); 
-  } 
-  friend inline ArrayIterator operator-(difference_type diff, 
-                                        const ArrayIterator& other) { 
-    return ArrayIterator(*other.array_, diff - other.index_); 
-  } 
-  ArrayIterator& operator+=(difference_type n) { 
-    index_ += n; 
-    return *this; 
-  } 
-  ArrayIterator& operator-=(difference_type n) { 
-    index_ -= n; 
-    return *this; 
-  } 
- 
-  // Comparisons 
-  bool operator==(const ArrayIterator& other) const { return index_ == other.index_; } 
-  bool operator!=(const ArrayIterator& other) const { return index_ != other.index_; } 
-  bool operator<(const ArrayIterator& other) const { return index_ < other.index_; } 
-  bool operator>(const ArrayIterator& other) const { return index_ > other.index_; } 
-  bool operator<=(const ArrayIterator& other) const { return index_ <= other.index_; } 
-  bool operator>=(const ArrayIterator& other) const { return index_ >= other.index_; } 
- 
- private: 
-  const ArrayType* array_; 
-  int64_t index_; 
-}; 
- 
-}  // namespace stl 
-}  // namespace arrow 
- 
-namespace std { 
- 
-template <typename ArrayType> 
-struct iterator_traits<::arrow::stl::ArrayIterator<ArrayType>> { 
-  using IteratorType = ::arrow::stl::ArrayIterator<ArrayType>; 
-  using difference_type = typename IteratorType::difference_type; 
-  using value_type = typename IteratorType::value_type; 
-  using pointer = typename IteratorType::pointer; 
-  using reference = typename IteratorType::reference; 
-  using iterator_category = typename IteratorType::iterator_category; 
-}; 
- 
-}  // namespace std 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstddef>
+#include <iterator>
+#include <utility>
+
+#include "arrow/type_fwd.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/optional.h"
+
+namespace arrow {
+namespace stl {
+
+namespace detail {
+
+template <typename ArrayType>
+struct DefaultValueAccessor {
+  using ValueType = decltype(std::declval<ArrayType>().GetView(0));
+
+  ValueType operator()(const ArrayType& array, int64_t index) {
+    return array.GetView(index);
+  }
+};
+
+}  // namespace detail
+
+template <typename ArrayType,
+          typename ValueAccessor = detail::DefaultValueAccessor<ArrayType>>
+class ArrayIterator {
+ public:
+  using value_type = arrow::util::optional<typename ValueAccessor::ValueType>;
+  using difference_type = int64_t;
+  using pointer = value_type*;
+  using reference = value_type&;
+  using iterator_category = std::random_access_iterator_tag;
+
+  // Some algorithms need to default-construct an iterator
+  ArrayIterator() : array_(NULLPTR), index_(0) {}
+
+  explicit ArrayIterator(const ArrayType& array, int64_t index = 0)
+      : array_(&array), index_(index) {}
+
+  // Value access
+  value_type operator*() const {
+    return array_->IsNull(index_) ? value_type{} : array_->GetView(index_);
+  }
+
+  value_type operator[](difference_type n) const {
+    return array_->IsNull(index_ + n) ? value_type{} : array_->GetView(index_ + n);
+  }
+
+  int64_t index() const { return index_; }
+
+  // Forward / backward
+  ArrayIterator& operator++() {
+    ++index_;
+    return *this;
+  }
+  ArrayIterator& operator--() {
+    --index_;
+    return *this;
+  }
+  ArrayIterator operator++(int) {
+    ArrayIterator tmp(*this);
+    ++index_;
+    return tmp;
+  }
+  ArrayIterator operator--(int) {
+    ArrayIterator tmp(*this);
+    --index_;
+    return tmp;
+  }
+
+  // Arithmetic
+  difference_type operator-(const ArrayIterator& other) const {
+    return index_ - other.index_;
+  }
+  ArrayIterator operator+(difference_type n) const {
+    return ArrayIterator(*array_, index_ + n);
+  }
+  ArrayIterator operator-(difference_type n) const {
+    return ArrayIterator(*array_, index_ - n);
+  }
+  friend inline ArrayIterator operator+(difference_type diff,
+                                        const ArrayIterator& other) {
+    return ArrayIterator(*other.array_, diff + other.index_);
+  }
+  friend inline ArrayIterator operator-(difference_type diff,
+                                        const ArrayIterator& other) {
+    return ArrayIterator(*other.array_, diff - other.index_);
+  }
+  ArrayIterator& operator+=(difference_type n) {
+    index_ += n;
+    return *this;
+  }
+  ArrayIterator& operator-=(difference_type n) {
+    index_ -= n;
+    return *this;
+  }
+
+  // Comparisons
+  bool operator==(const ArrayIterator& other) const { return index_ == other.index_; }
+  bool operator!=(const ArrayIterator& other) const { return index_ != other.index_; }
+  bool operator<(const ArrayIterator& other) const { return index_ < other.index_; }
+  bool operator>(const ArrayIterator& other) const { return index_ > other.index_; }
+  bool operator<=(const ArrayIterator& other) const { return index_ <= other.index_; }
+  bool operator>=(const ArrayIterator& other) const { return index_ >= other.index_; }
+
+ private:
+  const ArrayType* array_;
+  int64_t index_;
+};
+
+}  // namespace stl
+}  // namespace arrow
+
+namespace std {
+
+template <typename ArrayType>
+struct iterator_traits<::arrow::stl::ArrayIterator<ArrayType>> {
+  using IteratorType = ::arrow::stl::ArrayIterator<ArrayType>;
+  using difference_type = typename IteratorType::difference_type;
+  using value_type = typename IteratorType::value_type;
+  using pointer = typename IteratorType::pointer;
+  using reference = typename IteratorType::reference;
+  using iterator_category = typename IteratorType::iterator_category;
+};
+
+}  // namespace std
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/table.cc b/contrib/libs/apache/arrow/cpp/src/arrow/table.cc
index 6b5362c873d..d4c7802c834 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/table.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/table.cc
@@ -92,10 +92,10 @@ class SimpleTable : public Table {
 
   std::shared_ptr<ChunkedArray> column(int i) const override { return columns_[i]; }
 
-  const std::vector<std::shared_ptr<ChunkedArray>>& columns() const override { 
-    return columns_; 
-  } 
- 
+  const std::vector<std::shared_ptr<ChunkedArray>>& columns() const override {
+    return columns_;
+  }
+
   std::shared_ptr<Table> Slice(int64_t offset, int64_t length) const override {
     auto sliced = columns_;
     int64_t num_rows = length;
@@ -103,13 +103,13 @@ class SimpleTable : public Table {
       column = column->Slice(offset, length);
       num_rows = column->length();
     }
-    return Table::Make(schema_, std::move(sliced), num_rows); 
+    return Table::Make(schema_, std::move(sliced), num_rows);
   }
 
   Result<std::shared_ptr<Table>> RemoveColumn(int i) const override {
     ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->RemoveField(i));
 
-    return Table::Make(std::move(new_schema), internal::DeleteVectorElement(columns_, i), 
+    return Table::Make(std::move(new_schema), internal::DeleteVectorElement(columns_, i),
                        this->num_rows());
   }
 
@@ -129,7 +129,7 @@ class SimpleTable : public Table {
     }
 
     ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->AddField(i, field_arg));
-    return Table::Make(std::move(new_schema), 
+    return Table::Make(std::move(new_schema),
                        internal::AddVectorElement(columns_, i, std::move(col)));
   }
 
@@ -149,14 +149,14 @@ class SimpleTable : public Table {
     }
 
     ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->SetField(i, field_arg));
-    return Table::Make(std::move(new_schema), 
+    return Table::Make(std::move(new_schema),
                        internal::ReplaceVectorElement(columns_, i, std::move(col)));
   }
 
   std::shared_ptr<Table> ReplaceSchemaMetadata(
       const std::shared_ptr<const KeyValueMetadata>& metadata) const override {
     auto new_schema = schema_->WithMetadata(metadata);
-    return Table::Make(std::move(new_schema), columns_); 
+    return Table::Make(std::move(new_schema), columns_);
   }
 
   Result<std::shared_ptr<Table>> Flatten(MemoryPool* pool) const override {
@@ -374,7 +374,7 @@ Result<std::shared_ptr<Table>> Table::SelectColumns(
 
   auto new_schema =
       std::make_shared<arrow::Schema>(std::move(fields), schema()->metadata());
-  return Table::Make(std::move(new_schema), std::move(columns), num_rows()); 
+  return Table::Make(std::move(new_schema), std::move(columns), num_rows());
 }
 
 std::string Table::ToString() const {
@@ -435,7 +435,7 @@ Result<std::shared_ptr<Table>> ConcatenateTables(
     }
     columns[i] = std::make_shared<ChunkedArray>(column_arrays, schema->field(i)->type());
   }
-  return Table::Make(std::move(schema), std::move(columns)); 
+  return Table::Make(std::move(schema), std::move(columns));
 }
 
 Result<std::shared_ptr<Table>> PromoteTableToSchema(const std::shared_ptr<Table>& table,
@@ -564,7 +564,7 @@ Result<std::shared_ptr<Table>> Table::CombineChunks(MemoryPool* pool) const {
       compacted_columns[i] = std::make_shared<ChunkedArray>(compacted);
     }
   }
-  return Table::Make(schema(), std::move(compacted_columns), num_rows_); 
+  return Table::Make(schema(), std::move(compacted_columns), num_rows_);
 }
 
 // ----------------------------------------------------------------------
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/table.h b/contrib/libs/apache/arrow/cpp/src/arrow/table.h
index 96b50e002f9..f1e5f23eed8 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/table.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/table.h
@@ -98,7 +98,7 @@ class ARROW_EXPORT Table {
   virtual std::shared_ptr<ChunkedArray> column(int i) const = 0;
 
   /// \brief Return vector of all columns for table
-  virtual const std::vector<std::shared_ptr<ChunkedArray>>& columns() const = 0; 
+  virtual const std::vector<std::shared_ptr<ChunkedArray>>& columns() const = 0;
 
   /// Return a column's field by index
   std::shared_ptr<Field> field(int i) const { return schema_->field(i); }
@@ -151,7 +151,7 @@ class ARROW_EXPORT Table {
   /// \brief Return new table with specified columns
   Result<std::shared_ptr<Table>> SelectColumns(const std::vector<int>& indices) const;
 
-  /// \brief Replace schema key-value metadata with new metadata 
+  /// \brief Replace schema key-value metadata with new metadata
   /// \since 0.5.0
   ///
   /// \param[in] metadata new KeyValueMetadata
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/table_builder.cc b/contrib/libs/apache/arrow/cpp/src/arrow/table_builder.cc
index 170dfc70c3c..c026c355758 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/table_builder.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/table_builder.cc
@@ -21,7 +21,7 @@
 #include <utility>
 
 #include "arrow/array/array_base.h"
-#include "arrow/array/builder_base.h" 
+#include "arrow/array/builder_base.h"
 #include "arrow/record_batch.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
@@ -74,9 +74,9 @@ Status RecordBatchBuilder::Flush(bool reset_builders,
     }
   }
   std::shared_ptr<Schema> schema =
-      std::make_shared<Schema>(std::move(schema_fields), schema_->metadata()); 
+      std::make_shared<Schema>(std::move(schema_fields), schema_->metadata());
 
-  *batch = RecordBatch::Make(std::move(schema), length, std::move(fields)); 
+  *batch = RecordBatch::Make(std::move(schema), length, std::move(fields));
   if (reset_builders) {
     return InitBuilders();
   } else {
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/tensor.cc b/contrib/libs/apache/arrow/cpp/src/arrow/tensor.cc
index 91d5975715b..d591bacff02 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/tensor.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/tensor.cc
@@ -31,7 +31,7 @@
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/checked_cast.h"
-#include "arrow/util/int_util_internal.h" 
+#include "arrow/util/int_util_internal.h"
 #include "arrow/util/logging.h"
 #include "arrow/visitor_inline.h"
 
@@ -41,72 +41,72 @@ using internal::checked_cast;
 
 namespace internal {
 
-Status ComputeRowMajorStrides(const FixedWidthType& type, 
-                              const std::vector<int64_t>& shape, 
-                              std::vector<int64_t>* strides) { 
+Status ComputeRowMajorStrides(const FixedWidthType& type,
+                              const std::vector<int64_t>& shape,
+                              std::vector<int64_t>* strides) {
   const int byte_width = GetByteWidth(type);
-  const size_t ndim = shape.size(); 
- 
-  int64_t remaining = 0; 
-  if (!shape.empty() && shape.front() > 0) { 
-    remaining = byte_width; 
-    for (size_t i = 1; i < ndim; ++i) { 
-      if (internal::MultiplyWithOverflow(remaining, shape[i], &remaining)) { 
-        return Status::Invalid( 
-            "Row-major strides computed from shape would not fit in 64-bit integer"); 
-      } 
-    } 
+  const size_t ndim = shape.size();
+
+  int64_t remaining = 0;
+  if (!shape.empty() && shape.front() > 0) {
+    remaining = byte_width;
+    for (size_t i = 1; i < ndim; ++i) {
+      if (internal::MultiplyWithOverflow(remaining, shape[i], &remaining)) {
+        return Status::Invalid(
+            "Row-major strides computed from shape would not fit in 64-bit integer");
+      }
+    }
   }
 
   if (remaining == 0) {
     strides->assign(shape.size(), byte_width);
-    return Status::OK(); 
+    return Status::OK();
   }
 
-  strides->push_back(remaining); 
-  for (size_t i = 1; i < ndim; ++i) { 
-    remaining /= shape[i]; 
+  strides->push_back(remaining);
+  for (size_t i = 1; i < ndim; ++i) {
+    remaining /= shape[i];
     strides->push_back(remaining);
   }
- 
-  return Status::OK(); 
+
+  return Status::OK();
 }
 
-Status ComputeColumnMajorStrides(const FixedWidthType& type, 
-                                 const std::vector<int64_t>& shape, 
-                                 std::vector<int64_t>* strides) { 
-  const int byte_width = internal::GetByteWidth(type); 
-  const size_t ndim = shape.size(); 
-
-  int64_t total = 0; 
-  if (!shape.empty() && shape.back() > 0) { 
-    total = byte_width; 
-    for (size_t i = 0; i < ndim - 1; ++i) { 
-      if (internal::MultiplyWithOverflow(total, shape[i], &total)) { 
-        return Status::Invalid( 
-            "Column-major strides computed from shape would not fit in 64-bit " 
-            "integer"); 
-      } 
+Status ComputeColumnMajorStrides(const FixedWidthType& type,
+                                 const std::vector<int64_t>& shape,
+                                 std::vector<int64_t>* strides) {
+  const int byte_width = internal::GetByteWidth(type);
+  const size_t ndim = shape.size();
+
+  int64_t total = 0;
+  if (!shape.empty() && shape.back() > 0) {
+    total = byte_width;
+    for (size_t i = 0; i < ndim - 1; ++i) {
+      if (internal::MultiplyWithOverflow(total, shape[i], &total)) {
+        return Status::Invalid(
+            "Column-major strides computed from shape would not fit in 64-bit "
+            "integer");
+      }
     }
   }
- 
-  if (total == 0) { 
-    strides->assign(shape.size(), byte_width); 
-    return Status::OK(); 
-  } 
- 
-  total = byte_width; 
-  for (size_t i = 0; i < ndim - 1; ++i) { 
+
+  if (total == 0) {
+    strides->assign(shape.size(), byte_width);
+    return Status::OK();
+  }
+
+  total = byte_width;
+  for (size_t i = 0; i < ndim - 1; ++i) {
     strides->push_back(total);
-    total *= shape[i]; 
+    total *= shape[i];
   }
-  strides->push_back(total); 
- 
-  return Status::OK(); 
+  strides->push_back(total);
+
+  return Status::OK();
 }
 
-}  // namespace internal 
- 
+}  // namespace internal
+
 namespace {
 
 inline bool IsTensorStridesRowMajor(const std::shared_ptr<DataType>& type,
@@ -114,11 +114,11 @@ inline bool IsTensorStridesRowMajor(const std::shared_ptr<DataType>& type,
                                     const std::vector<int64_t>& strides) {
   std::vector<int64_t> c_strides;
   const auto& fw_type = checked_cast<const FixedWidthType&>(*type);
-  if (internal::ComputeRowMajorStrides(fw_type, shape, &c_strides).ok()) { 
-    return strides == c_strides; 
-  } else { 
-    return false; 
-  } 
+  if (internal::ComputeRowMajorStrides(fw_type, shape, &c_strides).ok()) {
+    return strides == c_strides;
+  } else {
+    return false;
+  }
 }
 
 inline bool IsTensorStridesColumnMajor(const std::shared_ptr<DataType>& type,
@@ -126,11 +126,11 @@ inline bool IsTensorStridesColumnMajor(const std::shared_ptr<DataType>& type,
                                        const std::vector<int64_t>& strides) {
   std::vector<int64_t> f_strides;
   const auto& fw_type = checked_cast<const FixedWidthType&>(*type);
-  if (internal::ComputeColumnMajorStrides(fw_type, shape, &f_strides).ok()) { 
-    return strides == f_strides; 
-  } else { 
-    return false; 
-  } 
+  if (internal::ComputeColumnMajorStrides(fw_type, shape, &f_strides).ok()) {
+    return strides == f_strides;
+  } else {
+    return false;
+  }
 }
 
 inline Status CheckTensorValidity(const std::shared_ptr<DataType>& type,
@@ -162,29 +162,29 @@ Status CheckTensorStridesValidity(const std::shared_ptr<Buffer>& data,
     return Status::OK();
   }
 
-  // Check the largest offset can be computed without overflow 
-  const size_t ndim = shape.size(); 
-  int64_t largest_offset = 0; 
-  for (size_t i = 0; i < ndim; ++i) { 
-    if (shape[i] == 0) continue; 
-    if (strides[i] < 0) { 
-      // TODO(mrkn): Support negative strides for sharing views 
-      return Status::Invalid("negative strides not supported"); 
-    } 
- 
-    int64_t dim_offset; 
-    if (!internal::MultiplyWithOverflow(shape[i] - 1, strides[i], &dim_offset)) { 
-      if (!internal::AddWithOverflow(largest_offset, dim_offset, &largest_offset)) { 
-        continue; 
-      } 
-    } 
- 
-    return Status::Invalid( 
-        "offsets computed from shape and strides would not fit in 64-bit integer"); 
+  // Check the largest offset can be computed without overflow
+  const size_t ndim = shape.size();
+  int64_t largest_offset = 0;
+  for (size_t i = 0; i < ndim; ++i) {
+    if (shape[i] == 0) continue;
+    if (strides[i] < 0) {
+      // TODO(mrkn): Support negative strides for sharing views
+      return Status::Invalid("negative strides not supported");
+    }
+
+    int64_t dim_offset;
+    if (!internal::MultiplyWithOverflow(shape[i] - 1, strides[i], &dim_offset)) {
+      if (!internal::AddWithOverflow(largest_offset, dim_offset, &largest_offset)) {
+        continue;
+      }
+    }
+
+    return Status::Invalid(
+        "offsets computed from shape and strides would not fit in 64-bit integer");
   }
- 
+
   const int byte_width = internal::GetByteWidth(*type);
-  if (largest_offset > data->size() - byte_width) { 
+  if (largest_offset > data->size() - byte_width) {
     return Status::Invalid("strides must not involve buffer over run");
   }
   return Status::OK();
@@ -209,10 +209,10 @@ Status ValidateTensorParameters(const std::shared_ptr<DataType>& type,
   RETURN_NOT_OK(CheckTensorValidity(type, data, shape));
   if (!strides.empty()) {
     RETURN_NOT_OK(CheckTensorStridesValidity(data, shape, strides, type));
-  } else { 
-    std::vector<int64_t> tmp_strides; 
-    RETURN_NOT_OK(ComputeRowMajorStrides(checked_cast<const FixedWidthType&>(*type), 
-                                         shape, &tmp_strides)); 
+  } else {
+    std::vector<int64_t> tmp_strides;
+    RETURN_NOT_OK(ComputeRowMajorStrides(checked_cast<const FixedWidthType&>(*type),
+                                         shape, &tmp_strides));
   }
   if (dim_names.size() > shape.size()) {
     return Status::Invalid("too many dim_names are supplied");
@@ -229,8 +229,8 @@ Tensor::Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buff
     : type_(type), data_(data), shape_(shape), strides_(strides), dim_names_(dim_names) {
   ARROW_CHECK(is_tensor_supported(type->id()));
   if (shape.size() > 0 && strides.size() == 0) {
-    ARROW_CHECK_OK(internal::ComputeRowMajorStrides( 
-        checked_cast<const FixedWidthType&>(*type_), shape, &strides_)); 
+    ARROW_CHECK_OK(internal::ComputeRowMajorStrides(
+        checked_cast<const FixedWidthType&>(*type_), shape, &strides_));
   }
 }
 
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/tensor.h b/contrib/libs/apache/arrow/cpp/src/arrow/tensor.h
index eebb488272e..91e9ad26066 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/tensor.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/tensor.h
@@ -56,16 +56,16 @@ static inline bool is_tensor_supported(Type::type type_id) {
 namespace internal {
 
 ARROW_EXPORT
-Status ComputeRowMajorStrides(const FixedWidthType& type, 
-                              const std::vector<int64_t>& shape, 
-                              std::vector<int64_t>* strides); 
+Status ComputeRowMajorStrides(const FixedWidthType& type,
+                              const std::vector<int64_t>& shape,
+                              std::vector<int64_t>* strides);
+
+ARROW_EXPORT
+Status ComputeColumnMajorStrides(const FixedWidthType& type,
+                                 const std::vector<int64_t>& shape,
+                                 std::vector<int64_t>* strides);
 
 ARROW_EXPORT
-Status ComputeColumnMajorStrides(const FixedWidthType& type, 
-                                 const std::vector<int64_t>& shape, 
-                                 std::vector<int64_t>* strides); 
- 
-ARROW_EXPORT 
 bool IsTensorStridesContiguous(const std::shared_ptr<DataType>& type,
                                const std::vector<int64_t>& shape,
                                const std::vector<int64_t>& strides);
@@ -180,10 +180,10 @@ class ARROW_EXPORT Tensor {
     return *ptr;
   }
 
-  Status Validate() const { 
-    return internal::ValidateTensorParameters(type_, data_, shape_, strides_, dim_names_); 
-  } 
- 
+  Status Validate() const {
+    return internal::ValidateTensorParameters(type_, data_, shape_, strides_, dim_names_);
+  }
+
  protected:
   Tensor() {}
 
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/tensor/coo_converter.cc b/contrib/libs/apache/arrow/cpp/src/arrow/tensor/coo_converter.cc
index d79739240af..2124d0a4e4b 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/tensor/coo_converter.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/tensor/coo_converter.cc
@@ -213,9 +213,9 @@ class SparseCOOTensorConverter : private SparseTensorConverterMixin {
     // make results
     const std::vector<int64_t> indices_shape = {nonzero_count, ndim};
     std::vector<int64_t> indices_strides;
-    RETURN_NOT_OK(internal::ComputeRowMajorStrides( 
+    RETURN_NOT_OK(internal::ComputeRowMajorStrides(
         checked_cast<const FixedWidthType&>(*index_value_type_), indices_shape,
-        &indices_strides)); 
+        &indices_strides));
     auto coords = std::make_shared<Tensor>(index_value_type_, std::move(indices_buffer),
                                            indices_shape, indices_strides);
     ARROW_ASSIGN_OR_RAISE(sparse_index, SparseCOOIndex::Make(coords, true));
@@ -305,7 +305,7 @@ Result<std::shared_ptr<Tensor>> MakeTensorFromSparseCOOTensor(
   std::fill_n(values, value_elsize * sparse_tensor->size(), 0);
 
   std::vector<int64_t> strides;
-  RETURN_NOT_OK(ComputeRowMajorStrides(value_type, sparse_tensor->shape(), &strides)); 
+  RETURN_NOT_OK(ComputeRowMajorStrides(value_type, sparse_tensor->shape(), &strides));
 
   const auto* raw_data = sparse_tensor->raw_data();
   const int ndim = sparse_tensor->ndim();
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/tensor/csf_converter.cc b/contrib/libs/apache/arrow/cpp/src/arrow/tensor/csf_converter.cc
index 27173dbc697..77a71d8a12e 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/tensor/csf_converter.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/tensor/csf_converter.cc
@@ -211,7 +211,7 @@ class TensorBuilderFromSparseCSFTensor : private SparseTensorConverterMixin {
   }
 
   Result<std::shared_ptr<Tensor>> Build() {
-    RETURN_NOT_OK(internal::ComputeRowMajorStrides(value_type_, shape_, &strides_)); 
+    RETURN_NOT_OK(internal::ComputeRowMajorStrides(value_type_, shape_, &strides_));
 
     ARROW_ASSIGN_OR_RAISE(values_buffer_,
                           AllocateBuffer(value_elsize_ * tensor_size_, pool_));
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/tensor/csx_converter.cc b/contrib/libs/apache/arrow/cpp/src/arrow/tensor/csx_converter.cc
index 8c71b1efdaf..137b5d3202f 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/tensor/csx_converter.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/tensor/csx_converter.cc
@@ -177,7 +177,7 @@ Result<std::shared_ptr<Tensor>> MakeTensorFromSparseCSXMatrix(
   std::fill_n(values, value_elsize * tensor_size, 0);
 
   std::vector<int64_t> strides;
-  RETURN_NOT_OK(ComputeRowMajorStrides(fw_value_type, shape, &strides)); 
+  RETURN_NOT_OK(ComputeRowMajorStrides(fw_value_type, shape, &strides));
 
   const auto nc = shape[1];
 
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/type.cc b/contrib/libs/apache/arrow/cpp/src/arrow/type.cc
index 6551b31575d..41914f43663 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/type.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/type.cc
@@ -68,8 +68,8 @@ constexpr Type::type StructType::type_id;
 
 constexpr Type::type Decimal128Type::type_id;
 
-constexpr Type::type Decimal256Type::type_id; 
- 
+constexpr Type::type Decimal256Type::type_id;
+
 constexpr Type::type SparseUnionType::type_id;
 
 constexpr Type::type DenseUnionType::type_id;
@@ -130,8 +130,8 @@ std::string ToString(Type::type id) {
     TO_STRING_CASE(HALF_FLOAT)
     TO_STRING_CASE(FLOAT)
     TO_STRING_CASE(DOUBLE)
-    TO_STRING_CASE(DECIMAL128) 
-    TO_STRING_CASE(DECIMAL256) 
+    TO_STRING_CASE(DECIMAL128)
+    TO_STRING_CASE(DECIMAL256)
     TO_STRING_CASE(DATE32)
     TO_STRING_CASE(DATE64)
     TO_STRING_CASE(TIME32)
@@ -188,32 +188,32 @@ int GetByteWidth(const DataType& type) {
 
 namespace {
 
-struct PhysicalTypeVisitor { 
-  const std::shared_ptr<DataType>& real_type; 
-  std::shared_ptr<DataType> result; 
- 
-  Status Visit(const DataType&) { 
-    result = real_type; 
-    return Status::OK(); 
-  } 
- 
-  template <typename Type, typename PhysicalType = typename Type::PhysicalType> 
-  Status Visit(const Type&) { 
-    result = TypeTraits<PhysicalType>::type_singleton(); 
-    return Status::OK(); 
-  } 
-}; 
- 
-}  // namespace 
- 
-std::shared_ptr<DataType> GetPhysicalType(const std::shared_ptr<DataType>& real_type) { 
-  PhysicalTypeVisitor visitor{real_type, {}}; 
-  ARROW_CHECK_OK(VisitTypeInline(*real_type, &visitor)); 
-  return std::move(visitor.result); 
-} 
- 
-namespace { 
- 
+struct PhysicalTypeVisitor {
+  const std::shared_ptr<DataType>& real_type;
+  std::shared_ptr<DataType> result;
+
+  Status Visit(const DataType&) {
+    result = real_type;
+    return Status::OK();
+  }
+
+  template <typename Type, typename PhysicalType = typename Type::PhysicalType>
+  Status Visit(const Type&) {
+    result = TypeTraits<PhysicalType>::type_singleton();
+    return Status::OK();
+  }
+};
+
+}  // namespace
+
+std::shared_ptr<DataType> GetPhysicalType(const std::shared_ptr<DataType>& real_type) {
+  PhysicalTypeVisitor visitor{real_type, {}};
+  ARROW_CHECK_OK(VisitTypeInline(*real_type, &visitor));
+  return std::move(visitor.result);
+}
+
+namespace {
+
 using internal::checked_cast;
 
 // Merges `existing` and `other` if one of them is of NullType, otherwise
@@ -771,44 +771,44 @@ std::vector<std::shared_ptr<Field>> StructType::GetAllFieldsByName(
   return result;
 }
 
-Result<std::shared_ptr<DataType>> DecimalType::Make(Type::type type_id, int32_t precision, 
-                                                    int32_t scale) { 
-  if (type_id == Type::DECIMAL128) { 
-    return Decimal128Type::Make(precision, scale); 
-  } else if (type_id == Type::DECIMAL256) { 
-    return Decimal256Type::Make(precision, scale); 
-  } else { 
-    return Status::Invalid("Not a decimal type_id: ", type_id); 
-  } 
-} 
- 
-// Taken from the Apache Impala codebase. The comments next 
-// to the return values are the maximum value that can be represented in 2's 
-// complement with the returned number of bytes. 
-int32_t DecimalType::DecimalSize(int32_t precision) { 
-  DCHECK_GE(precision, 1) << "decimal precision must be greater than or equal to 1, got " 
-                          << precision; 
- 
-  // Generated in python with: 
-  // >>> decimal_size = lambda prec: int(math.ceil((prec * math.log2(10) + 1) / 8)) 
-  // >>> [-1] + [decimal_size(i) for i in range(1, 77)] 
-  constexpr int32_t kBytes[] = { 
-      -1, 1,  1,  2,  2,  3,  3,  4,  4,  4,  5,  5,  6,  6,  6,  7,  7,  8,  8,  9, 
-      9,  9,  10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 14, 14, 15, 15, 16, 16, 16, 17, 
-      17, 18, 18, 18, 19, 19, 20, 20, 21, 21, 21, 22, 22, 23, 23, 23, 24, 24, 25, 25, 
-      26, 26, 26, 27, 27, 28, 28, 28, 29, 29, 30, 30, 31, 31, 31, 32, 32}; 
- 
-  if (precision <= 76) { 
-    return kBytes[precision]; 
-  } 
-  return static_cast<int32_t>(std::ceil((precision / 8.0) * std::log2(10) + 1)); 
-} 
- 
+Result<std::shared_ptr<DataType>> DecimalType::Make(Type::type type_id, int32_t precision,
+                                                    int32_t scale) {
+  if (type_id == Type::DECIMAL128) {
+    return Decimal128Type::Make(precision, scale);
+  } else if (type_id == Type::DECIMAL256) {
+    return Decimal256Type::Make(precision, scale);
+  } else {
+    return Status::Invalid("Not a decimal type_id: ", type_id);
+  }
+}
+
+// Taken from the Apache Impala codebase. The comments next
+// to the return values are the maximum value that can be represented in 2's
+// complement with the returned number of bytes.
+int32_t DecimalType::DecimalSize(int32_t precision) {
+  DCHECK_GE(precision, 1) << "decimal precision must be greater than or equal to 1, got "
+                          << precision;
+
+  // Generated in python with:
+  // >>> decimal_size = lambda prec: int(math.ceil((prec * math.log2(10) + 1) / 8))
+  // >>> [-1] + [decimal_size(i) for i in range(1, 77)]
+  constexpr int32_t kBytes[] = {
+      -1, 1,  1,  2,  2,  3,  3,  4,  4,  4,  5,  5,  6,  6,  6,  7,  7,  8,  8,  9,
+      9,  9,  10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 14, 14, 15, 15, 16, 16, 16, 17,
+      17, 18, 18, 18, 19, 19, 20, 20, 21, 21, 21, 22, 22, 23, 23, 23, 24, 24, 25, 25,
+      26, 26, 26, 27, 27, 28, 28, 28, 29, 29, 30, 30, 31, 31, 31, 32, 32};
+
+  if (precision <= 76) {
+    return kBytes[precision];
+  }
+  return static_cast<int32_t>(std::ceil((precision / 8.0) * std::log2(10) + 1));
+}
+
 // ----------------------------------------------------------------------
 // Decimal128 type
 
 Decimal128Type::Decimal128Type(int32_t precision, int32_t scale)
-    : DecimalType(type_id, 16, precision, scale) { 
+    : DecimalType(type_id, 16, precision, scale) {
   ARROW_CHECK_GE(precision, kMinPrecision);
   ARROW_CHECK_LE(precision, kMaxPrecision);
 }
@@ -821,22 +821,22 @@ Result<std::shared_ptr<DataType>> Decimal128Type::Make(int32_t precision, int32_
 }
 
 // ----------------------------------------------------------------------
-// Decimal256 type 
- 
-Decimal256Type::Decimal256Type(int32_t precision, int32_t scale) 
-    : DecimalType(type_id, 32, precision, scale) { 
-  ARROW_CHECK_GE(precision, kMinPrecision); 
-  ARROW_CHECK_LE(precision, kMaxPrecision); 
-} 
- 
-Result<std::shared_ptr<DataType>> Decimal256Type::Make(int32_t precision, int32_t scale) { 
-  if (precision < kMinPrecision || precision > kMaxPrecision) { 
-    return Status::Invalid("Decimal precision out of range: ", precision); 
-  } 
-  return std::make_shared<Decimal256Type>(precision, scale); 
-} 
- 
-// ---------------------------------------------------------------------- 
+// Decimal256 type
+
+Decimal256Type::Decimal256Type(int32_t precision, int32_t scale)
+    : DecimalType(type_id, 32, precision, scale) {
+  ARROW_CHECK_GE(precision, kMinPrecision);
+  ARROW_CHECK_LE(precision, kMaxPrecision);
+}
+
+Result<std::shared_ptr<DataType>> Decimal256Type::Make(int32_t precision, int32_t scale) {
+  if (precision < kMinPrecision || precision > kMaxPrecision) {
+    return Status::Invalid("Decimal precision out of range: ", precision);
+  }
+  return std::make_shared<Decimal256Type>(precision, scale);
+}
+
+// ----------------------------------------------------------------------
 // Dictionary-encoded type
 
 Status DictionaryType::ValidateParameters(const DataType& index_type,
@@ -894,15 +894,15 @@ size_t FieldPath::hash() const {
 }
 
 std::string FieldPath::ToString() const {
-  if (this->indices().empty()) { 
-    return "FieldPath(empty)"; 
-  } 
- 
+  if (this->indices().empty()) {
+    return "FieldPath(empty)";
+  }
+
   std::string repr = "FieldPath(";
   for (auto index : this->indices()) {
     repr += std::to_string(index) + " ";
   }
-  repr.back() = ')'; 
+  repr.back() = ')';
   return repr;
 }
 
@@ -964,10 +964,10 @@ struct FieldPathGetImpl {
     int depth = 0;
     const T* out;
     for (int index : path->indices()) {
-      if (children == nullptr) { 
-        return Status::NotImplemented("Get child data of non-struct array"); 
-      } 
- 
+      if (children == nullptr) {
+        return Status::NotImplemented("Get child data of non-struct array");
+      }
+
       if (index < 0 || static_cast<size_t>(index) >= children->size()) {
         *out_of_range_depth = depth;
         return nullptr;
@@ -1005,11 +1005,11 @@ struct FieldPathGetImpl {
                                                 const ArrayDataVector& child_data) {
     return FieldPathGetImpl::Get(
         path, &child_data,
-        [](const std::shared_ptr<ArrayData>& data) -> const ArrayDataVector* { 
-          if (data->type->id() != Type::STRUCT) { 
-            return nullptr; 
+        [](const std::shared_ptr<ArrayData>& data) -> const ArrayDataVector* {
+          if (data->type->id() != Type::STRUCT) {
+            return nullptr;
           }
-          return &data->child_data; 
+          return &data->child_data;
         });
   }
 };
@@ -1032,21 +1032,21 @@ Result<std::shared_ptr<Field>> FieldPath::Get(const FieldVector& fields) const {
 
 Result<std::shared_ptr<Array>> FieldPath::Get(const RecordBatch& batch) const {
   ARROW_ASSIGN_OR_RAISE(auto data, FieldPathGetImpl::Get(this, batch.column_data()));
-  return MakeArray(std::move(data)); 
+  return MakeArray(std::move(data));
 }
 
-Result<std::shared_ptr<Array>> FieldPath::Get(const Array& array) const { 
-  ARROW_ASSIGN_OR_RAISE(auto data, Get(*array.data())); 
-  return MakeArray(std::move(data)); 
+Result<std::shared_ptr<Array>> FieldPath::Get(const Array& array) const {
+  ARROW_ASSIGN_OR_RAISE(auto data, Get(*array.data()));
+  return MakeArray(std::move(data));
+}
+
+Result<std::shared_ptr<ArrayData>> FieldPath::Get(const ArrayData& data) const {
+  if (data.type->id() != Type::STRUCT) {
+    return Status::NotImplemented("Get child data of non-struct array");
+  }
+  return FieldPathGetImpl::Get(this, data.child_data);
 }
 
-Result<std::shared_ptr<ArrayData>> FieldPath::Get(const ArrayData& data) const { 
-  if (data.type->id() != Type::STRUCT) { 
-    return Status::NotImplemented("Get child data of non-struct array"); 
-  } 
-  return FieldPathGetImpl::Get(this, data.child_data); 
-} 
- 
 FieldRef::FieldRef(FieldPath indices) : impl_(std::move(indices)) {
   DCHECK_GT(util::get<FieldPath>(impl_).indices().size(), 0);
 }
@@ -1054,13 +1054,13 @@ FieldRef::FieldRef(FieldPath indices) : impl_(std::move(indices)) {
 void FieldRef::Flatten(std::vector<FieldRef> children) {
   // flatten children
   struct Visitor {
-    void operator()(std::string* name) { *out++ = FieldRef(std::move(*name)); } 
+    void operator()(std::string* name) { *out++ = FieldRef(std::move(*name)); }
 
-    void operator()(FieldPath* indices) { *out++ = FieldRef(std::move(*indices)); } 
+    void operator()(FieldPath* indices) { *out++ = FieldRef(std::move(*indices)); }
 
-    void operator()(std::vector<FieldRef>* children) { 
-      for (auto& child : *children) { 
-        util::visit(*this, &child.impl_); 
+    void operator()(std::vector<FieldRef>* children) {
+      for (auto& child : *children) {
+        util::visit(*this, &child.impl_);
       }
     }
 
@@ -1069,7 +1069,7 @@ void FieldRef::Flatten(std::vector<FieldRef> children) {
 
   std::vector<FieldRef> out;
   Visitor visitor{std::back_inserter(out)};
-  visitor(&children); 
+  visitor(&children);
 
   DCHECK(!out.empty());
   DCHECK(std::none_of(out.begin(), out.end(),
@@ -1195,10 +1195,10 @@ std::string FieldRef::ToString() const {
 }
 
 std::vector<FieldPath> FieldRef::FindAll(const Schema& schema) const {
-  if (auto name = this->name()) { 
-    return internal::MapVector([](int i) { return FieldPath{i}; }, 
-                               schema.GetAllFieldIndices(*name)); 
-  } 
+  if (auto name = this->name()) {
+    return internal::MapVector([](int i) { return FieldPath{i}; },
+                               schema.GetAllFieldIndices(*name));
+  }
   return FindAll(schema.fields());
 }
 
@@ -1296,11 +1296,11 @@ std::vector<FieldPath> FieldRef::FindAll(const FieldVector& fields) const {
   return util::visit(Visitor{fields}, impl_);
 }
 
-std::vector<FieldPath> FieldRef::FindAll(const ArrayData& array) const { 
-  return FindAll(*array.type); 
+std::vector<FieldPath> FieldRef::FindAll(const ArrayData& array) const {
+  return FindAll(*array.type);
 }
 
-std::vector<FieldPath> FieldRef::FindAll(const Array& array) const { 
+std::vector<FieldPath> FieldRef::FindAll(const Array& array) const {
   return FindAll(*array.type());
 }
 
@@ -1313,56 +1313,56 @@ void PrintTo(const FieldRef& ref, std::ostream* os) { *os << ref.ToString(); }
 // ----------------------------------------------------------------------
 // Schema implementation
 
-std::string EndiannessToString(Endianness endianness) { 
-  switch (endianness) { 
-    case Endianness::Little: 
-      return "little"; 
-    case Endianness::Big: 
-      return "big"; 
-    default: 
-      DCHECK(false) << "invalid endianness"; 
-      return "???"; 
-  } 
-} 
- 
+std::string EndiannessToString(Endianness endianness) {
+  switch (endianness) {
+    case Endianness::Little:
+      return "little";
+    case Endianness::Big:
+      return "big";
+    default:
+      DCHECK(false) << "invalid endianness";
+      return "???";
+  }
+}
+
 class Schema::Impl {
  public:
-  Impl(std::vector<std::shared_ptr<Field>> fields, Endianness endianness, 
+  Impl(std::vector<std::shared_ptr<Field>> fields, Endianness endianness,
        std::shared_ptr<const KeyValueMetadata> metadata)
       : fields_(std::move(fields)),
-        endianness_(endianness), 
+        endianness_(endianness),
         name_to_index_(CreateNameToIndexMap(fields_)),
         metadata_(std::move(metadata)) {}
 
   std::vector<std::shared_ptr<Field>> fields_;
-  Endianness endianness_; 
+  Endianness endianness_;
   std::unordered_multimap<std::string, int> name_to_index_;
   std::shared_ptr<const KeyValueMetadata> metadata_;
 };
 
-Schema::Schema(std::vector<std::shared_ptr<Field>> fields, Endianness endianness, 
-               std::shared_ptr<const KeyValueMetadata> metadata) 
-    : detail::Fingerprintable(), 
-      impl_(new Impl(std::move(fields), endianness, std::move(metadata))) {} 
- 
+Schema::Schema(std::vector<std::shared_ptr<Field>> fields, Endianness endianness,
+               std::shared_ptr<const KeyValueMetadata> metadata)
+    : detail::Fingerprintable(),
+      impl_(new Impl(std::move(fields), endianness, std::move(metadata))) {}
+
 Schema::Schema(std::vector<std::shared_ptr<Field>> fields,
                std::shared_ptr<const KeyValueMetadata> metadata)
     : detail::Fingerprintable(),
-      impl_(new Impl(std::move(fields), Endianness::Native, std::move(metadata))) {} 
+      impl_(new Impl(std::move(fields), Endianness::Native, std::move(metadata))) {}
 
 Schema::Schema(const Schema& schema)
     : detail::Fingerprintable(), impl_(new Impl(*schema.impl_)) {}
 
-Schema::~Schema() = default; 
+Schema::~Schema() = default;
+
+std::shared_ptr<Schema> Schema::WithEndianness(Endianness endianness) const {
+  return std::make_shared<Schema>(impl_->fields_, endianness, impl_->metadata_);
+}
+
+Endianness Schema::endianness() const { return impl_->endianness_; }
+
+bool Schema::is_native_endian() const { return impl_->endianness_ == Endianness::Native; }
 
-std::shared_ptr<Schema> Schema::WithEndianness(Endianness endianness) const { 
-  return std::make_shared<Schema>(impl_->fields_, endianness, impl_->metadata_); 
-} 
- 
-Endianness Schema::endianness() const { return impl_->endianness_; } 
- 
-bool Schema::is_native_endian() const { return impl_->endianness_ == Endianness::Native; } 
- 
 int Schema::num_fields() const { return static_cast<int>(impl_->fields_.size()); }
 
 const std::shared_ptr<Field>& Schema::field(int i) const {
@@ -1380,11 +1380,11 @@ bool Schema::Equals(const Schema& other, bool check_metadata) const {
     return true;
   }
 
-  // checks endianness equality 
-  if (endianness() != other.endianness()) { 
-    return false; 
-  } 
- 
+  // checks endianness equality
+  if (endianness() != other.endianness()) {
+    return false;
+  }
+
   // checks field equality
   if (num_fields() != other.num_fields()) {
     return false;
@@ -1509,7 +1509,7 @@ std::shared_ptr<Schema> Schema::WithMetadata(
   return std::make_shared<Schema>(impl_->fields_, metadata);
 }
 
-const std::shared_ptr<const KeyValueMetadata>& Schema::metadata() const { 
+const std::shared_ptr<const KeyValueMetadata>& Schema::metadata() const {
   return impl_->metadata_;
 }
 
@@ -1529,10 +1529,10 @@ std::string Schema::ToString(bool show_metadata) const {
     ++i;
   }
 
-  if (impl_->endianness_ != Endianness::Native) { 
-    buffer << "\n-- endianness: " << EndiannessToString(impl_->endianness_) << " --"; 
-  } 
- 
+  if (impl_->endianness_ != Endianness::Native) {
+    buffer << "\n-- endianness: " << EndiannessToString(impl_->endianness_) << " --";
+  }
+
   if (show_metadata && HasMetadata()) {
     buffer << impl_->metadata_->ToString();
   }
@@ -1712,12 +1712,12 @@ std::shared_ptr<Schema> schema(std::vector<std::shared_ptr<Field>> fields,
   return std::make_shared<Schema>(std::move(fields), std::move(metadata));
 }
 
-std::shared_ptr<Schema> schema(std::vector<std::shared_ptr<Field>> fields, 
-                               Endianness endianness, 
-                               std::shared_ptr<const KeyValueMetadata> metadata) { 
-  return std::make_shared<Schema>(std::move(fields), endianness, std::move(metadata)); 
-} 
- 
+std::shared_ptr<Schema> schema(std::vector<std::shared_ptr<Field>> fields,
+                               Endianness endianness,
+                               std::shared_ptr<const KeyValueMetadata> metadata) {
+  return std::make_shared<Schema>(std::move(fields), endianness, std::move(metadata));
+}
+
 Result<std::shared_ptr<Schema>> UnifySchemas(
     const std::vector<std::shared_ptr<Schema>>& schemas,
     const Field::MergeOptions field_merge_options) {
@@ -1876,7 +1876,7 @@ std::string Schema::ComputeFingerprint() const {
     }
     ss << field_fingerprint << ";";
   }
-  ss << (endianness() == Endianness::Little ? "L" : "B"); 
+  ss << (endianness() == Endianness::Little ? "L" : "B");
   ss << "}";
   return ss.str();
 }
@@ -2248,35 +2248,35 @@ std::shared_ptr<Field> field(std::string name, std::shared_ptr<DataType> type,
                                  std::move(metadata));
 }
 
-std::shared_ptr<Field> field(std::string name, std::shared_ptr<DataType> type, 
-                             std::shared_ptr<const KeyValueMetadata> metadata) { 
-  return std::make_shared<Field>(std::move(name), std::move(type), /*nullable=*/true, 
-                                 std::move(metadata)); 
-} 
- 
+std::shared_ptr<Field> field(std::string name, std::shared_ptr<DataType> type,
+                             std::shared_ptr<const KeyValueMetadata> metadata) {
+  return std::make_shared<Field>(std::move(name), std::move(type), /*nullable=*/true,
+                                 std::move(metadata));
+}
+
 std::shared_ptr<DataType> decimal(int32_t precision, int32_t scale) {
-  return precision <= Decimal128Type::kMaxPrecision ? decimal128(precision, scale) 
-                                                    : decimal256(precision, scale); 
-} 
- 
-std::shared_ptr<DataType> decimal128(int32_t precision, int32_t scale) { 
+  return precision <= Decimal128Type::kMaxPrecision ? decimal128(precision, scale)
+                                                    : decimal256(precision, scale);
+}
+
+std::shared_ptr<DataType> decimal128(int32_t precision, int32_t scale) {
   return std::make_shared<Decimal128Type>(precision, scale);
 }
 
-std::shared_ptr<DataType> decimal256(int32_t precision, int32_t scale) { 
-  return std::make_shared<Decimal256Type>(precision, scale); 
-} 
- 
+std::shared_ptr<DataType> decimal256(int32_t precision, int32_t scale) {
+  return std::make_shared<Decimal256Type>(precision, scale);
+}
+
 std::string Decimal128Type::ToString() const {
   std::stringstream s;
-  s << "decimal128(" << precision_ << ", " << scale_ << ")"; 
+  s << "decimal128(" << precision_ << ", " << scale_ << ")";
+  return s.str();
+}
+
+std::string Decimal256Type::ToString() const {
+  std::stringstream s;
+  s << "decimal256(" << precision_ << ", " << scale_ << ")";
   return s.str();
 }
 
-std::string Decimal256Type::ToString() const { 
-  std::stringstream s; 
-  s << "decimal256(" << precision_ << ", " << scale_ << ")"; 
-  return s.str(); 
-} 
- 
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/type.h b/contrib/libs/apache/arrow/cpp/src/arrow/type.h
index eb65603e0ea..b933da66089 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/type.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/type.h
@@ -30,7 +30,7 @@
 #include "arrow/result.h"
 #include "arrow/type_fwd.h"  // IWYU pragma: export
 #include "arrow/util/checked_cast.h"
-#include "arrow/util/endian.h" 
+#include "arrow/util/endian.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/variant.h"
 #include "arrow/util/visibility.h"
@@ -127,7 +127,7 @@ class ARROW_EXPORT DataType : public detail::Fingerprintable {
   ARROW_DEPRECATED("Use field(i)")
   const std::shared_ptr<Field>& child(int i) const { return field(i); }
 
-  /// Returns the child-field at index i. 
+  /// Returns the child-field at index i.
   const std::shared_ptr<Field>& field(int i) const { return children_[i]; }
 
   ARROW_DEPRECATED("Use fields()")
@@ -182,18 +182,18 @@ class ARROW_EXPORT DataType : public detail::Fingerprintable {
 ARROW_EXPORT
 std::ostream& operator<<(std::ostream& os, const DataType& type);
 
-/// \brief Return the compatible physical data type 
-/// 
-/// Some types may have distinct logical meanings but the exact same physical 
-/// representation.  For example, TimestampType has Int64Type as a physical 
-/// type (defined as TimestampType::PhysicalType). 
-/// 
-/// The return value is as follows: 
-/// - if a `PhysicalType` alias exists in the concrete type class, return 
-///   an instance of `PhysicalType`. 
-/// - otherwise, return the input type itself. 
-std::shared_ptr<DataType> GetPhysicalType(const std::shared_ptr<DataType>& type); 
- 
+/// \brief Return the compatible physical data type
+///
+/// Some types may have distinct logical meanings but the exact same physical
+/// representation.  For example, TimestampType has Int64Type as a physical
+/// type (defined as TimestampType::PhysicalType).
+///
+/// The return value is as follows:
+/// - if a `PhysicalType` alias exists in the concrete type class, return
+///   an instance of `PhysicalType`.
+/// - otherwise, return the input type itself.
+std::shared_ptr<DataType> GetPhysicalType(const std::shared_ptr<DataType>& type);
+
 /// \brief Base class for all fixed-width data types
 class ARROW_EXPORT FixedWidthType : public DataType {
  public:
@@ -626,10 +626,10 @@ class ARROW_EXPORT LargeListType : public BaseListType {
 /// \brief Concrete type class for map data
 ///
 /// Map data is nested data where each value is a variable number of
-/// key-item pairs.  Its physical representation is the same as 
-/// a list of `{key, item}` structs. 
-/// 
-/// Maps can be recursively nested, for example map(utf8, map(utf8, int32)). 
+/// key-item pairs.  Its physical representation is the same as
+/// a list of `{key, item}` structs.
+///
+/// Maps can be recursively nested, for example map(utf8, map(utf8, int32)).
 class ARROW_EXPORT MapType : public ListType {
  public:
   static constexpr Type::type type_id = Type::MAP;
@@ -876,22 +876,22 @@ class ARROW_EXPORT StructType : public NestedType {
 /// \brief Base type class for (fixed-size) decimal data
 class ARROW_EXPORT DecimalType : public FixedSizeBinaryType {
  public:
-  explicit DecimalType(Type::type type_id, int32_t byte_width, int32_t precision, 
-                       int32_t scale) 
-      : FixedSizeBinaryType(byte_width, type_id), precision_(precision), scale_(scale) {} 
-
-  /// Constructs concrete decimal types 
-  static Result<std::shared_ptr<DataType>> Make(Type::type type_id, int32_t precision, 
-                                                int32_t scale); 
- 
+  explicit DecimalType(Type::type type_id, int32_t byte_width, int32_t precision,
+                       int32_t scale)
+      : FixedSizeBinaryType(byte_width, type_id), precision_(precision), scale_(scale) {}
+
+  /// Constructs concrete decimal types
+  static Result<std::shared_ptr<DataType>> Make(Type::type type_id, int32_t precision,
+                                                int32_t scale);
+
   int32_t precision() const { return precision_; }
   int32_t scale() const { return scale_; }
 
-  /// \brief Returns the number of bytes needed for precision. 
-  /// 
-  /// precision must be >= 1 
-  static int32_t DecimalSize(int32_t precision); 
- 
+  /// \brief Returns the number of bytes needed for precision.
+  ///
+  /// precision must be >= 1
+  static int32_t DecimalSize(int32_t precision);
+
  protected:
   std::string ComputeFingerprint() const override;
 
@@ -900,24 +900,24 @@ class ARROW_EXPORT DecimalType : public FixedSizeBinaryType {
 };
 
 /// \brief Concrete type class for 128-bit decimal data
-/// 
-/// Arrow decimals are fixed-point decimal numbers encoded as a scaled 
-/// integer.  The precision is the number of significant digits that the 
-/// decimal type can represent; the scale is the number of digits after 
-/// the decimal point (note the scale can be negative). 
-/// 
-/// As an example, `Decimal128Type(7, 3)` can exactly represent the numbers 
-/// 1234.567 and -1234.567 (encoded internally as the 128-bit integers 
-/// 1234567 and -1234567, respectively), but neither 12345.67 nor 123.4567. 
-/// 
-/// Decimal128Type has a maximum precision of 38 significant digits 
-/// (also available as Decimal128Type::kMaxPrecision). 
-/// If higher precision is needed, consider using Decimal256Type. 
+///
+/// Arrow decimals are fixed-point decimal numbers encoded as a scaled
+/// integer.  The precision is the number of significant digits that the
+/// decimal type can represent; the scale is the number of digits after
+/// the decimal point (note the scale can be negative).
+///
+/// As an example, `Decimal128Type(7, 3)` can exactly represent the numbers
+/// 1234.567 and -1234.567 (encoded internally as the 128-bit integers
+/// 1234567 and -1234567, respectively), but neither 12345.67 nor 123.4567.
+///
+/// Decimal128Type has a maximum precision of 38 significant digits
+/// (also available as Decimal128Type::kMaxPrecision).
+/// If higher precision is needed, consider using Decimal256Type.
 class ARROW_EXPORT Decimal128Type : public DecimalType {
  public:
-  static constexpr Type::type type_id = Type::DECIMAL128; 
+  static constexpr Type::type type_id = Type::DECIMAL128;
 
-  static constexpr const char* type_name() { return "decimal128"; } 
+  static constexpr const char* type_name() { return "decimal128"; }
 
   /// Decimal128Type constructor that aborts on invalid input.
   explicit Decimal128Type(int32_t precision, int32_t scale);
@@ -926,47 +926,47 @@ class ARROW_EXPORT Decimal128Type : public DecimalType {
   static Result<std::shared_ptr<DataType>> Make(int32_t precision, int32_t scale);
 
   std::string ToString() const override;
-  std::string name() const override { return "decimal128"; } 
+  std::string name() const override { return "decimal128"; }
 
   static constexpr int32_t kMinPrecision = 1;
   static constexpr int32_t kMaxPrecision = 38;
-  static constexpr int32_t kByteWidth = 16; 
+  static constexpr int32_t kByteWidth = 16;
+};
+
+/// \brief Concrete type class for 256-bit decimal data
+///
+/// Arrow decimals are fixed-point decimal numbers encoded as a scaled
+/// integer.  The precision is the number of significant digits that the
+/// decimal type can represent; the scale is the number of digits after
+/// the decimal point (note the scale can be negative).
+///
+/// Decimal256Type has a maximum precision of 76 significant digits.
+/// (also available as Decimal256Type::kMaxPrecision).
+///
+/// For most use cases, the maximum precision offered by Decimal128Type
+/// is sufficient, and it will result in a more compact and more efficient
+/// encoding.
+class ARROW_EXPORT Decimal256Type : public DecimalType {
+ public:
+  static constexpr Type::type type_id = Type::DECIMAL256;
+
+  static constexpr const char* type_name() { return "decimal256"; }
+
+  /// Decimal256Type constructor that aborts on invalid input.
+  explicit Decimal256Type(int32_t precision, int32_t scale);
+
+  /// Decimal256Type constructor that returns an error on invalid input.
+  static Result<std::shared_ptr<DataType>> Make(int32_t precision, int32_t scale);
+
+  std::string ToString() const override;
+  std::string name() const override { return "decimal256"; }
+
+  static constexpr int32_t kMinPrecision = 1;
+  static constexpr int32_t kMaxPrecision = 76;
+  static constexpr int32_t kByteWidth = 32;
 };
 
-/// \brief Concrete type class for 256-bit decimal data 
-/// 
-/// Arrow decimals are fixed-point decimal numbers encoded as a scaled 
-/// integer.  The precision is the number of significant digits that the 
-/// decimal type can represent; the scale is the number of digits after 
-/// the decimal point (note the scale can be negative). 
-/// 
-/// Decimal256Type has a maximum precision of 76 significant digits. 
-/// (also available as Decimal256Type::kMaxPrecision). 
-/// 
-/// For most use cases, the maximum precision offered by Decimal128Type 
-/// is sufficient, and it will result in a more compact and more efficient 
-/// encoding. 
-class ARROW_EXPORT Decimal256Type : public DecimalType { 
- public: 
-  static constexpr Type::type type_id = Type::DECIMAL256; 
- 
-  static constexpr const char* type_name() { return "decimal256"; } 
- 
-  /// Decimal256Type constructor that aborts on invalid input. 
-  explicit Decimal256Type(int32_t precision, int32_t scale); 
- 
-  /// Decimal256Type constructor that returns an error on invalid input. 
-  static Result<std::shared_ptr<DataType>> Make(int32_t precision, int32_t scale); 
- 
-  std::string ToString() const override; 
-  std::string name() const override { return "decimal256"; } 
- 
-  static constexpr int32_t kMinPrecision = 1; 
-  static constexpr int32_t kMaxPrecision = 76; 
-  static constexpr int32_t kByteWidth = 32; 
-}; 
- 
-/// \brief Base type class for union data 
+/// \brief Base type class for union data
 class ARROW_EXPORT UnionType : public NestedType {
  public:
   static constexpr int8_t kMaxTypeCode = 127;
@@ -1014,17 +1014,17 @@ class ARROW_EXPORT UnionType : public NestedType {
   std::vector<int> child_ids_;
 };
 
-/// \brief Concrete type class for sparse union data 
-/// 
-/// A sparse union is a nested type where each logical value is taken from 
-/// a single child.  A buffer of 8-bit type ids indicates which child 
-/// a given logical value is to be taken from. 
-/// 
-/// In a sparse union, each child array should have the same length as the 
-/// union array, regardless of the actual number of union values that 
-/// refer to it. 
-/// 
-/// Note that, unlike most other types, unions don't have a top-level validity bitmap. 
+/// \brief Concrete type class for sparse union data
+///
+/// A sparse union is a nested type where each logical value is taken from
+/// a single child.  A buffer of 8-bit type ids indicates which child
+/// a given logical value is to be taken from.
+///
+/// In a sparse union, each child array should have the same length as the
+/// union array, regardless of the actual number of union values that
+/// refer to it.
+///
+/// Note that, unlike most other types, unions don't have a top-level validity bitmap.
 class ARROW_EXPORT SparseUnionType : public UnionType {
  public:
   static constexpr Type::type type_id = Type::SPARSE_UNION;
@@ -1041,20 +1041,20 @@ class ARROW_EXPORT SparseUnionType : public UnionType {
   std::string name() const override { return "sparse_union"; }
 };
 
-/// \brief Concrete type class for dense union data 
-/// 
-/// A dense union is a nested type where each logical value is taken from 
-/// a single child, at a specific offset.  A buffer of 8-bit type ids 
-/// indicates which child a given logical value is to be taken from, 
-/// and a buffer of 32-bit offsets indicates at which physical position 
-/// in the given child array the logical value is to be taken from. 
-/// 
-/// Unlike a sparse union, a dense union allows encoding only the child array 
-/// values which are actually referred to by the union array.  This is 
-/// counterbalanced by the additional footprint of the offsets buffer, and 
-/// the additional indirection cost when looking up values. 
-/// 
-/// Note that, unlike most other types, unions don't have a top-level validity bitmap. 
+/// \brief Concrete type class for dense union data
+///
+/// A dense union is a nested type where each logical value is taken from
+/// a single child, at a specific offset.  A buffer of 8-bit type ids
+/// indicates which child a given logical value is to be taken from,
+/// and a buffer of 32-bit offsets indicates at which physical position
+/// in the given child array the logical value is to be taken from.
+///
+/// Unlike a sparse union, a dense union allows encoding only the child array
+/// values which are actually referred to by the union array.  This is
+/// counterbalanced by the additional footprint of the offsets buffer, and
+/// the additional indirection cost when looking up values.
+///
+/// Note that, unlike most other types, unions don't have a top-level validity bitmap.
 class ARROW_EXPORT DenseUnionType : public UnionType {
  public:
   static constexpr Type::type type_id = Type::DENSE_UNION;
@@ -1413,7 +1413,7 @@ class ARROW_EXPORT DictionaryType : public FixedWidthType {
 /// FieldPaths provide a number of accessors for drilling down to potentially nested
 /// children. They are overloaded for convenience to support Schema (returns a field),
 /// DataType (returns a child field), Field (returns a child field of this field's type)
-/// Array (returns a child array), RecordBatch (returns a column). 
+/// Array (returns a child array), RecordBatch (returns a column).
 class ARROW_EXPORT FieldPath {
  public:
   FieldPath() = default;
@@ -1427,11 +1427,11 @@ class ARROW_EXPORT FieldPath {
   std::string ToString() const;
 
   size_t hash() const;
-  struct Hash { 
-    size_t operator()(const FieldPath& path) const { return path.hash(); } 
-  }; 
+  struct Hash {
+    size_t operator()(const FieldPath& path) const { return path.hash(); }
+  };
 
-  bool empty() const { return indices_.empty(); } 
+  bool empty() const { return indices_.empty(); }
   bool operator==(const FieldPath& other) const { return indices() == other.indices(); }
   bool operator!=(const FieldPath& other) const { return indices() != other.indices(); }
 
@@ -1449,9 +1449,9 @@ class ARROW_EXPORT FieldPath {
   /// \brief Retrieve the referenced column from a RecordBatch or Table
   Result<std::shared_ptr<Array>> Get(const RecordBatch& batch) const;
 
-  /// \brief Retrieve the referenced child from an Array or ArrayData 
+  /// \brief Retrieve the referenced child from an Array or ArrayData
   Result<std::shared_ptr<Array>> Get(const Array& array) const;
-  Result<std::shared_ptr<ArrayData>> Get(const ArrayData& data) const; 
+  Result<std::shared_ptr<ArrayData>> Get(const ArrayData& data) const;
 
  private:
   std::vector<int> indices_;
@@ -1543,13 +1543,13 @@ class ARROW_EXPORT FieldRef {
   std::string ToString() const;
 
   size_t hash() const;
-  struct Hash { 
-    size_t operator()(const FieldRef& ref) const { return ref.hash(); } 
-  }; 
+  struct Hash {
+    size_t operator()(const FieldRef& ref) const { return ref.hash(); }
+  };
+
+  explicit operator bool() const { return Equals(FieldPath{}); }
+  bool operator!() const { return !Equals(FieldPath{}); }
 
-  explicit operator bool() const { return Equals(FieldPath{}); } 
-  bool operator!() const { return !Equals(FieldPath{}); } 
- 
   bool IsFieldPath() const { return util::holds_alternative<FieldPath>(impl_); }
   bool IsName() const { return util::holds_alternative<std::string>(impl_); }
   bool IsNested() const {
@@ -1558,13 +1558,13 @@ class ARROW_EXPORT FieldRef {
     return true;
   }
 
-  const FieldPath* field_path() const { 
-    return IsFieldPath() ? &util::get<FieldPath>(impl_) : NULLPTR; 
-  } 
-  const std::string* name() const { 
-    return IsName() ? &util::get<std::string>(impl_) : NULLPTR; 
-  } 
- 
+  const FieldPath* field_path() const {
+    return IsFieldPath() ? &util::get<FieldPath>(impl_) : NULLPTR;
+  }
+  const std::string* name() const {
+    return IsName() ? &util::get<std::string>(impl_) : NULLPTR;
+  }
+
   /// \brief Retrieve FieldPath of every child field which matches this FieldRef.
   std::vector<FieldPath> FindAll(const Schema& schema) const;
   std::vector<FieldPath> FindAll(const Field& field) const;
@@ -1572,7 +1572,7 @@ class ARROW_EXPORT FieldRef {
   std::vector<FieldPath> FindAll(const FieldVector& fields) const;
 
   /// \brief Convenience function which applies FindAll to arg's type or schema.
-  std::vector<FieldPath> FindAll(const ArrayData& array) const; 
+  std::vector<FieldPath> FindAll(const ArrayData& array) const;
   std::vector<FieldPath> FindAll(const Array& array) const;
   std::vector<FieldPath> FindAll(const RecordBatch& batch) const;
 
@@ -1644,16 +1644,16 @@ class ARROW_EXPORT FieldRef {
   template <typename T>
   Result<GetType<T>> GetOneOrNone(const T& root) const {
     ARROW_ASSIGN_OR_RAISE(auto match, FindOneOrNone(root));
-    if (match.empty()) { 
-      return static_cast<GetType<T>>(NULLPTR); 
+    if (match.empty()) {
+      return static_cast<GetType<T>>(NULLPTR);
     }
-    return match.Get(root).ValueOrDie(); 
+    return match.Get(root).ValueOrDie();
   }
 
  private:
   void Flatten(std::vector<FieldRef> children);
 
-  util::Variant<FieldPath, std::string, std::vector<FieldRef>> impl_; 
+  util::Variant<FieldPath, std::string, std::vector<FieldRef>> impl_;
 
   ARROW_EXPORT friend void PrintTo(const FieldRef& ref, std::ostream* os);
 };
@@ -1661,16 +1661,16 @@ class ARROW_EXPORT FieldRef {
 // ----------------------------------------------------------------------
 // Schema
 
-enum class Endianness { 
-  Little = 0, 
-  Big = 1, 
-#if ARROW_LITTLE_ENDIAN 
-  Native = Little 
-#else 
-  Native = Big 
-#endif 
-}; 
- 
+enum class Endianness {
+  Little = 0,
+  Big = 1,
+#if ARROW_LITTLE_ENDIAN
+  Native = Little
+#else
+  Native = Big
+#endif
+};
+
 /// \class Schema
 /// \brief Sequence of arrow::Field objects describing the columns of a record
 /// batch or table data structure
@@ -1678,12 +1678,12 @@ class ARROW_EXPORT Schema : public detail::Fingerprintable,
                             public util::EqualityComparable<Schema>,
                             public util::ToStringOstreamable<Schema> {
  public:
-  explicit Schema(FieldVector fields, Endianness endianness, 
+  explicit Schema(FieldVector fields, Endianness endianness,
+                  std::shared_ptr<const KeyValueMetadata> metadata = NULLPTR);
+
+  explicit Schema(FieldVector fields,
                   std::shared_ptr<const KeyValueMetadata> metadata = NULLPTR);
 
-  explicit Schema(FieldVector fields, 
-                  std::shared_ptr<const KeyValueMetadata> metadata = NULLPTR); 
- 
   Schema(const Schema&);
 
   ~Schema() override;
@@ -1692,24 +1692,24 @@ class ARROW_EXPORT Schema : public detail::Fingerprintable,
   bool Equals(const Schema& other, bool check_metadata = false) const;
   bool Equals(const std::shared_ptr<Schema>& other, bool check_metadata = false) const;
 
-  /// \brief Set endianness in the schema 
-  /// 
-  /// \return new Schema 
-  std::shared_ptr<Schema> WithEndianness(Endianness endianness) const; 
- 
-  /// \brief Return endianness in the schema 
-  Endianness endianness() const; 
- 
-  /// \brief Indicate if endianness is equal to platform-native endianness 
-  bool is_native_endian() const; 
- 
+  /// \brief Set endianness in the schema
+  ///
+  /// \return new Schema
+  std::shared_ptr<Schema> WithEndianness(Endianness endianness) const;
+
+  /// \brief Return endianness in the schema
+  Endianness endianness() const;
+
+  /// \brief Indicate if endianness is equal to platform-native endianness
+  bool is_native_endian() const;
+
   /// \brief Return the number of fields (columns) in the schema
   int num_fields() const;
 
   /// Return the ith schema element. Does not boundscheck
   const std::shared_ptr<Field>& field(int i) const;
 
-  const FieldVector& fields() const; 
+  const FieldVector& fields() const;
 
   std::vector<std::string> field_names() const;
 
@@ -1717,7 +1717,7 @@ class ARROW_EXPORT Schema : public detail::Fingerprintable,
   std::shared_ptr<Field> GetFieldByName(const std::string& name) const;
 
   /// \brief Return the indices of all fields having this name in sorted order
-  FieldVector GetAllFieldsByName(const std::string& name) const; 
+  FieldVector GetAllFieldsByName(const std::string& name) const;
 
   /// Returns -1 if name not found
   int GetFieldIndex(const std::string& name) const;
@@ -1731,7 +1731,7 @@ class ARROW_EXPORT Schema : public detail::Fingerprintable,
   /// \brief The custom key-value metadata, if any
   ///
   /// \return metadata may be null
-  const std::shared_ptr<const KeyValueMetadata>& metadata() const; 
+  const std::shared_ptr<const KeyValueMetadata>& metadata() const;
 
   /// \brief Render a string representation of the schema suitable for debugging
   /// \param[in] show_metadata when true, if KeyValueMetadata is non-empty,
@@ -1771,9 +1771,9 @@ class ARROW_EXPORT Schema : public detail::Fingerprintable,
   std::unique_ptr<Impl> impl_;
 };
 
-ARROW_EXPORT 
-std::string EndiannessToString(Endianness endianness); 
- 
+ARROW_EXPORT
+std::string EndiannessToString(Endianness endianness);
+
 // ----------------------------------------------------------------------
 
 /// \brief Convenience class to incrementally construct/merge schemas.
@@ -1802,18 +1802,18 @@ class ARROW_EXPORT SchemaBuilder {
   };
 
   /// \brief Construct an empty SchemaBuilder
-  /// `field_merge_options` is only effective when `conflict_policy` == `CONFLICT_MERGE`. 
+  /// `field_merge_options` is only effective when `conflict_policy` == `CONFLICT_MERGE`.
   SchemaBuilder(
       ConflictPolicy conflict_policy = CONFLICT_APPEND,
       Field::MergeOptions field_merge_options = Field::MergeOptions::Defaults());
   /// \brief Construct a SchemaBuilder from a list of fields
-  /// `field_merge_options` is only effective when `conflict_policy` == `CONFLICT_MERGE`. 
+  /// `field_merge_options` is only effective when `conflict_policy` == `CONFLICT_MERGE`.
   SchemaBuilder(
       std::vector<std::shared_ptr<Field>> fields,
       ConflictPolicy conflict_policy = CONFLICT_APPEND,
       Field::MergeOptions field_merge_options = Field::MergeOptions::Defaults());
   /// \brief Construct a SchemaBuilder from a schema, preserving the metadata
-  /// `field_merge_options` is only effective when `conflict_policy` == `CONFLICT_MERGE`. 
+  /// `field_merge_options` is only effective when `conflict_policy` == `CONFLICT_MERGE`.
   SchemaBuilder(
       const std::shared_ptr<Schema>& schema,
       ConflictPolicy conflict_policy = CONFLICT_APPEND,
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/type_fwd.h b/contrib/libs/apache/arrow/cpp/src/arrow/type_fwd.h
index 80b8345b625..7e564106bbe 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/type_fwd.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/type_fwd.h
@@ -29,20 +29,20 @@ namespace arrow {
 
 template <typename T>
 class Iterator;
-template <typename T> 
-struct IterationTraits; 
+template <typename T>
+struct IterationTraits;
 
 template <typename T>
 class Result;
 
 class Status;
 
-namespace internal { 
-struct Empty; 
-}  // namespace internal 
-template <typename T = internal::Empty> 
-class Future; 
- 
+namespace internal {
+struct Empty;
+}  // namespace internal
+template <typename T = internal::Empty>
+class Future;
+
 namespace util {
 class Codec;
 }  // namespace util
@@ -60,7 +60,7 @@ class DataType;
 class Field;
 class FieldRef;
 class KeyValueMetadata;
-enum class Endianness; 
+enum class Endianness;
 class Schema;
 
 using DataTypeVector = std::vector<std::shared_ptr<DataType>>;
@@ -80,9 +80,9 @@ class RecordBatch;
 class RecordBatchReader;
 class Table;
 
-struct Datum; 
-struct ValueDescr; 
- 
+struct Datum;
+struct ValueDescr;
+
 using ChunkedArrayVector = std::vector<std::shared_ptr<ChunkedArray>>;
 using RecordBatchVector = std::vector<std::shared_ptr<RecordBatch>>;
 using RecordBatchIterator = Iterator<std::shared_ptr<RecordBatch>>;
@@ -154,16 +154,16 @@ class StructBuilder;
 struct StructScalar;
 
 class Decimal128;
-class Decimal256; 
+class Decimal256;
 class DecimalType;
 class Decimal128Type;
-class Decimal256Type; 
+class Decimal256Type;
 class Decimal128Array;
-class Decimal256Array; 
+class Decimal256Array;
 class Decimal128Builder;
-class Decimal256Builder; 
+class Decimal256Builder;
 struct Decimal128Scalar;
-struct Decimal256Scalar; 
+struct Decimal256Scalar;
 
 struct UnionMode {
   enum type { SPARSE, DENSE };
@@ -262,9 +262,9 @@ class ExtensionType;
 class ExtensionArray;
 struct ExtensionScalar;
 
-class Tensor; 
-class SparseTensor; 
- 
+class Tensor;
+class SparseTensor;
+
 // ----------------------------------------------------------------------
 
 struct Type {
@@ -345,15 +345,15 @@ struct Type {
     /// DAY_TIME interval in SQL style
     INTERVAL_DAY_TIME,
 
-    /// Precision- and scale-based decimal type with 128 bits. 
-    DECIMAL128, 
+    /// Precision- and scale-based decimal type with 128 bits.
+    DECIMAL128,
+
+    /// Defined for backward-compatibility.
+    DECIMAL = DECIMAL128,
+
+    /// Precision- and scale-based decimal type with 256 bits.
+    DECIMAL256,
 
-    /// Defined for backward-compatibility. 
-    DECIMAL = DECIMAL128, 
- 
-    /// Precision- and scale-based decimal type with 256 bits. 
-    DECIMAL256, 
- 
     /// A list of some logical data type
     LIST,
 
@@ -447,21 +447,21 @@ std::shared_ptr<DataType> ARROW_EXPORT date64();
 ARROW_EXPORT
 std::shared_ptr<DataType> fixed_size_binary(int32_t byte_width);
 
-/// \brief Create a DecimalType instance depending on the precision 
-/// 
-/// If the precision is greater than 38, a Decimal256Type is returned, 
-/// otherwise a Decimal128Type. 
+/// \brief Create a DecimalType instance depending on the precision
+///
+/// If the precision is greater than 38, a Decimal256Type is returned,
+/// otherwise a Decimal128Type.
 ARROW_EXPORT
 std::shared_ptr<DataType> decimal(int32_t precision, int32_t scale);
 
-/// \brief Create a Decimal128Type instance 
-ARROW_EXPORT 
-std::shared_ptr<DataType> decimal128(int32_t precision, int32_t scale); 
- 
-/// \brief Create a Decimal256Type instance 
-ARROW_EXPORT 
-std::shared_ptr<DataType> decimal256(int32_t precision, int32_t scale); 
- 
+/// \brief Create a Decimal128Type instance
+ARROW_EXPORT
+std::shared_ptr<DataType> decimal128(int32_t precision, int32_t scale);
+
+/// \brief Create a Decimal256Type instance
+ARROW_EXPORT
+std::shared_ptr<DataType> decimal256(int32_t precision, int32_t scale);
+
 /// \brief Create a ListType instance from its child Field type
 ARROW_EXPORT
 std::shared_ptr<DataType> list(const std::shared_ptr<Field>& value_type);
@@ -502,7 +502,7 @@ ARROW_EXPORT
 std::shared_ptr<DataType> fixed_size_list(const std::shared_ptr<DataType>& value_type,
                                           int32_t list_size);
 /// \brief Return a Duration instance (naming use _type to avoid namespace conflict with
-/// built in time classes). 
+/// built in time classes).
 std::shared_ptr<DataType> ARROW_EXPORT duration(TimeUnit::type unit);
 
 /// \brief Return a DayTimeIntervalType instance
@@ -638,17 +638,17 @@ std::shared_ptr<Field> ARROW_EXPORT
 field(std::string name, std::shared_ptr<DataType> type, bool nullable = true,
       std::shared_ptr<const KeyValueMetadata> metadata = NULLPTR);
 
-/// \brief Create a Field instance with metadata 
-/// 
-/// The field will be assumed to be nullable. 
-/// 
-/// \param name the field name 
-/// \param type the field value type 
-/// \param metadata any custom key-value metadata 
-std::shared_ptr<Field> ARROW_EXPORT 
-field(std::string name, std::shared_ptr<DataType> type, 
-      std::shared_ptr<const KeyValueMetadata> metadata); 
- 
+/// \brief Create a Field instance with metadata
+///
+/// The field will be assumed to be nullable.
+///
+/// \param name the field name
+/// \param type the field value type
+/// \param metadata any custom key-value metadata
+std::shared_ptr<Field> ARROW_EXPORT
+field(std::string name, std::shared_ptr<DataType> type,
+      std::shared_ptr<const KeyValueMetadata> metadata);
+
 /// \brief Create a Schema instance
 ///
 /// \param fields the schema's fields
@@ -659,17 +659,17 @@ std::shared_ptr<Schema> schema(
     std::vector<std::shared_ptr<Field>> fields,
     std::shared_ptr<const KeyValueMetadata> metadata = NULLPTR);
 
-/// \brief Create a Schema instance 
-/// 
-/// \param fields the schema's fields 
-/// \param endianness the endianness of the data 
-/// \param metadata any custom key-value metadata, default null 
-/// \return schema shared_ptr to Schema 
-ARROW_EXPORT 
-std::shared_ptr<Schema> schema( 
-    std::vector<std::shared_ptr<Field>> fields, Endianness endianness, 
-    std::shared_ptr<const KeyValueMetadata> metadata = NULLPTR); 
- 
+/// \brief Create a Schema instance
+///
+/// \param fields the schema's fields
+/// \param endianness the endianness of the data
+/// \param metadata any custom key-value metadata, default null
+/// \return schema shared_ptr to Schema
+ARROW_EXPORT
+std::shared_ptr<Schema> schema(
+    std::vector<std::shared_ptr<Field>> fields, Endianness endianness,
+    std::shared_ptr<const KeyValueMetadata> metadata = NULLPTR);
+
 /// @}
 
 /// Return the process-wide default memory pool.
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/type_traits.h b/contrib/libs/apache/arrow/cpp/src/arrow/type_traits.h
index c9637e09ed5..e4d809967f9 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/type_traits.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/type_traits.h
@@ -66,8 +66,8 @@ TYPE_ID_TRAIT(TIMESTAMP, TimestampType)
 TYPE_ID_TRAIT(INTERVAL_DAY_TIME, DayTimeIntervalType)
 TYPE_ID_TRAIT(INTERVAL_MONTHS, MonthIntervalType)
 TYPE_ID_TRAIT(DURATION, DurationType)
-TYPE_ID_TRAIT(DECIMAL128, Decimal128Type) 
-TYPE_ID_TRAIT(DECIMAL256, Decimal256Type) 
+TYPE_ID_TRAIT(DECIMAL128, Decimal128Type)
+TYPE_ID_TRAIT(DECIMAL256, Decimal256Type)
 TYPE_ID_TRAIT(STRUCT, StructType)
 TYPE_ID_TRAIT(LIST, ListType)
 TYPE_ID_TRAIT(LARGE_LIST, LargeListType)
@@ -233,7 +233,7 @@ struct TypeTraits<MonthIntervalType> {
   using ArrayType = MonthIntervalArray;
   using BuilderType = MonthIntervalBuilder;
   using ScalarType = MonthIntervalScalar;
-  using CType = MonthIntervalType::c_type; 
+  using CType = MonthIntervalType::c_type;
 
   static constexpr int64_t bytes_required(int64_t elements) {
     return elements * static_cast<int64_t>(sizeof(int32_t));
@@ -291,14 +291,14 @@ struct TypeTraits<Decimal128Type> {
 };
 
 template <>
-struct TypeTraits<Decimal256Type> { 
-  using ArrayType = Decimal256Array; 
-  using BuilderType = Decimal256Builder; 
-  using ScalarType = Decimal256Scalar; 
-  constexpr static bool is_parameter_free = false; 
-}; 
- 
-template <> 
+struct TypeTraits<Decimal256Type> {
+  using ArrayType = Decimal256Array;
+  using BuilderType = Decimal256Builder;
+  using ScalarType = Decimal256Scalar;
+  constexpr static bool is_parameter_free = false;
+};
+
+template <>
 struct TypeTraits<BinaryType> {
   using ArrayType = BinaryArray;
   using BuilderType = BinaryBuilder;
@@ -587,18 +587,18 @@ using is_decimal_type = std::is_base_of<DecimalType, T>;
 template <typename T, typename R = void>
 using enable_if_decimal = enable_if_t<is_decimal_type<T>::value, R>;
 
-template <typename T> 
-using is_decimal128_type = std::is_base_of<Decimal128Type, T>; 
- 
-template <typename T, typename R = void> 
-using enable_if_decimal128 = enable_if_t<is_decimal128_type<T>::value, R>; 
- 
-template <typename T> 
-using is_decimal256_type = std::is_base_of<Decimal256Type, T>; 
- 
-template <typename T, typename R = void> 
-using enable_if_decimal256 = enable_if_t<is_decimal256_type<T>::value, R>; 
- 
+template <typename T>
+using is_decimal128_type = std::is_base_of<Decimal128Type, T>;
+
+template <typename T, typename R = void>
+using enable_if_decimal128 = enable_if_t<is_decimal128_type<T>::value, R>;
+
+template <typename T>
+using is_decimal256_type = std::is_base_of<Decimal256Type, T>;
+
+template <typename T, typename R = void>
+using enable_if_decimal256 = enable_if_t<is_decimal256_type<T>::value, R>;
+
 // Nested Types
 
 template <typename T>
@@ -636,7 +636,7 @@ template <typename T>
 using is_list_type =
     std::integral_constant<bool, std::is_same<T, ListType>::value ||
                                      std::is_same<T, LargeListType>::value ||
-                                     std::is_same<T, FixedSizeListType>::value>; 
+                                     std::is_same<T, FixedSizeListType>::value>;
 
 template <typename T, typename R = void>
 using enable_if_list_type = enable_if_t<is_list_type<T>::value, R>;
@@ -846,17 +846,17 @@ static inline bool is_floating(Type::type type_id) {
   return false;
 }
 
-static inline bool is_decimal(Type::type type_id) { 
-  switch (type_id) { 
-    case Type::DECIMAL128: 
-    case Type::DECIMAL256: 
-      return true; 
-    default: 
-      break; 
-  } 
-  return false; 
-} 
- 
+static inline bool is_decimal(Type::type type_id) {
+  switch (type_id) {
+    case Type::DECIMAL128:
+    case Type::DECIMAL256:
+      return true;
+    default:
+      break;
+  }
+  return false;
+}
+
 static inline bool is_primitive(Type::type type_id) {
   switch (type_id) {
     case Type::BOOL:
@@ -927,8 +927,8 @@ static inline bool is_dictionary(Type::type type_id) {
 
 static inline bool is_fixed_size_binary(Type::type type_id) {
   switch (type_id) {
-    case Type::DECIMAL128: 
-    case Type::DECIMAL256: 
+    case Type::DECIMAL128:
+    case Type::DECIMAL256:
     case Type::FIXED_SIZE_BINARY:
       return true;
     default:
@@ -941,52 +941,52 @@ static inline bool is_fixed_width(Type::type type_id) {
   return is_primitive(type_id) || is_dictionary(type_id) || is_fixed_size_binary(type_id);
 }
 
-static inline int bit_width(Type::type type_id) { 
-  switch (type_id) { 
-    case Type::BOOL: 
-      return 1; 
-    case Type::UINT8: 
-    case Type::INT8: 
-      return 8; 
-    case Type::UINT16: 
-    case Type::INT16: 
-      return 16; 
-    case Type::UINT32: 
-    case Type::INT32: 
-    case Type::DATE32: 
-    case Type::TIME32: 
-      return 32; 
-    case Type::UINT64: 
-    case Type::INT64: 
-    case Type::DATE64: 
-    case Type::TIME64: 
-    case Type::TIMESTAMP: 
-    case Type::DURATION: 
-      return 64; 
- 
-    case Type::HALF_FLOAT: 
-      return 16; 
-    case Type::FLOAT: 
-      return 32; 
-    case Type::DOUBLE: 
-      return 64; 
- 
-    case Type::INTERVAL_MONTHS: 
-      return 32; 
-    case Type::INTERVAL_DAY_TIME: 
-      return 64; 
- 
-    case Type::DECIMAL128: 
-      return 128; 
-    case Type::DECIMAL256: 
-      return 256; 
- 
-    default: 
-      break; 
-  } 
-  return 0; 
-} 
- 
+static inline int bit_width(Type::type type_id) {
+  switch (type_id) {
+    case Type::BOOL:
+      return 1;
+    case Type::UINT8:
+    case Type::INT8:
+      return 8;
+    case Type::UINT16:
+    case Type::INT16:
+      return 16;
+    case Type::UINT32:
+    case Type::INT32:
+    case Type::DATE32:
+    case Type::TIME32:
+      return 32;
+    case Type::UINT64:
+    case Type::INT64:
+    case Type::DATE64:
+    case Type::TIME64:
+    case Type::TIMESTAMP:
+    case Type::DURATION:
+      return 64;
+
+    case Type::HALF_FLOAT:
+      return 16;
+    case Type::FLOAT:
+      return 32;
+    case Type::DOUBLE:
+      return 64;
+
+    case Type::INTERVAL_MONTHS:
+      return 32;
+    case Type::INTERVAL_DAY_TIME:
+      return 64;
+
+    case Type::DECIMAL128:
+      return 128;
+    case Type::DECIMAL256:
+      return 256;
+
+    default:
+      break;
+  }
+  return 0;
+}
+
 static inline bool is_nested(Type::type type_id) {
   switch (type_id) {
     case Type::LIST:
@@ -1003,22 +1003,22 @@ static inline bool is_nested(Type::type type_id) {
   return false;
 }
 
-static inline int offset_bit_width(Type::type type_id) { 
-  switch (type_id) { 
-    case Type::STRING: 
-    case Type::BINARY: 
-    case Type::LIST: 
-    case Type::MAP: 
-    case Type::DENSE_UNION: 
-      return 32; 
-    case Type::LARGE_STRING: 
-    case Type::LARGE_BINARY: 
-    case Type::LARGE_LIST: 
-      return 64; 
-    default: 
-      break; 
-  } 
-  return 0; 
-} 
- 
+static inline int offset_bit_width(Type::type type_id) {
+  switch (type_id) {
+    case Type::STRING:
+    case Type::BINARY:
+    case Type::LIST:
+    case Type::MAP:
+    case Type::DENSE_UNION:
+      return 32;
+    case Type::LARGE_STRING:
+    case Type::LARGE_BINARY:
+    case Type::LARGE_LIST:
+      return 64;
+    default:
+      break;
+  }
+  return 0;
+}
+
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/algorithm.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/algorithm.h
index 8f9ae1f7706..2a0e6ba709d 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/algorithm.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/algorithm.h
@@ -1,33 +1,33 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include "arrow/result.h" 
- 
-namespace arrow { 
- 
-template <typename InputIterator, typename OutputIterator, typename UnaryOperation> 
-Status MaybeTransform(InputIterator first, InputIterator last, OutputIterator out, 
-                      UnaryOperation unary_op) { 
-  for (; first != last; ++first, (void)++out) { 
-    ARROW_ASSIGN_OR_RAISE(*out, unary_op(*first)); 
-  } 
-  return Status::OK(); 
-} 
- 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/result.h"
+
+namespace arrow {
+
+template <typename InputIterator, typename OutputIterator, typename UnaryOperation>
+Status MaybeTransform(InputIterator first, InputIterator last, OutputIterator out,
+                      UnaryOperation unary_op) {
+  for (; first != last; ++first, (void)++out) {
+    ARROW_ASSIGN_OR_RAISE(*out, unary_op(*first));
+  }
+  return Status::OK();
+}
+
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/async_generator.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/async_generator.h
index c672ebab778..9d1021edff5 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/async_generator.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/async_generator.h
@@ -1,1614 +1,1614 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <atomic> 
-#include <cassert> 
-#include <cstring> 
-#include <deque> 
-#include <limits> 
-#include <queue> 
- 
-#include "arrow/util/functional.h" 
-#include "arrow/util/future.h" 
-#include "arrow/util/io_util.h" 
-#include "arrow/util/iterator.h" 
-#include "arrow/util/mutex.h" 
-#include "arrow/util/optional.h" 
-#include "arrow/util/queue.h" 
-#include "arrow/util/thread_pool.h" 
- 
-namespace arrow { 
- 
-// The methods in this file create, modify, and utilize AsyncGenerator which is an 
-// iterator of futures.  This allows an asynchronous source (like file input) to be run 
-// through a pipeline in the same way that iterators can be used to create pipelined 
-// workflows. 
-// 
-// In order to support pipeline parallelism we introduce the concept of asynchronous 
-// reentrancy. This is different than synchronous reentrancy.  With synchronous code a 
-// function is reentrant if the function can be called again while a previous call to that 
-// function is still running.  Unless otherwise specified none of these generators are 
-// synchronously reentrant.  Care should be taken to avoid calling them in such a way (and 
-// the utilities Visit/Collect/Await take care to do this). 
-// 
-// Asynchronous reentrancy on the other hand means the function is called again before the 
-// future returned by the function is marked finished (but after the call to get the 
-// future returns).  Some of these generators are async-reentrant while others (e.g. 
-// those that depend on ordered processing like decompression) are not.  Read the MakeXYZ 
-// function comments to determine which generators support async reentrancy. 
-// 
-// Note: Generators that are not asynchronously reentrant can still support readahead 
-// (\see MakeSerialReadaheadGenerator). 
-// 
-// Readahead operators, and some other operators, may introduce queueing.  Any operators 
-// that introduce buffering should detail the amount of buffering they introduce in their 
-// MakeXYZ function comments. 
-template <typename T> 
-using AsyncGenerator = std::function<Future<T>()>; 
- 
-template <typename T> 
-struct IterationTraits<AsyncGenerator<T>> { 
-  /// \brief by default when iterating through a sequence of AsyncGenerator<T>, 
-  /// an empty function indicates the end of iteration. 
-  static AsyncGenerator<T> End() { return AsyncGenerator<T>(); } 
- 
-  static bool IsEnd(const AsyncGenerator<T>& val) { return !val; } 
-}; 
- 
-template <typename T> 
-Future<T> AsyncGeneratorEnd() { 
-  return Future<T>::MakeFinished(IterationTraits<T>::End()); 
-} 
- 
-/// returning a future that completes when all have been visited 
-template <typename T, typename Visitor> 
-Future<> VisitAsyncGenerator(AsyncGenerator<T> generator, Visitor visitor) { 
-  struct LoopBody { 
-    struct Callback { 
-      Result<ControlFlow<>> operator()(const T& next) { 
-        if (IsIterationEnd(next)) { 
-          return Break(); 
-        } else { 
-          auto visited = visitor(next); 
-          if (visited.ok()) { 
-            return Continue(); 
-          } else { 
-            return visited; 
-          } 
-        } 
-      } 
- 
-      Visitor visitor; 
-    }; 
- 
-    Future<ControlFlow<>> operator()() { 
-      Callback callback{visitor}; 
-      auto next = generator(); 
-      return next.Then(std::move(callback)); 
-    } 
- 
-    AsyncGenerator<T> generator; 
-    Visitor visitor; 
-  }; 
- 
-  return Loop(LoopBody{std::move(generator), std::move(visitor)}); 
-} 
- 
-/// \brief Waits for an async generator to complete, discarding results. 
-template <typename T> 
-Future<> DiscardAllFromAsyncGenerator(AsyncGenerator<T> generator) { 
-  std::function<Status(T)> visitor = [](const T&) { return Status::OK(); }; 
-  return VisitAsyncGenerator(generator, visitor); 
-} 
- 
-/// \brief Collects the results of an async generator into a vector 
-template <typename T> 
-Future<std::vector<T>> CollectAsyncGenerator(AsyncGenerator<T> generator) { 
-  auto vec = std::make_shared<std::vector<T>>(); 
-  struct LoopBody { 
-    Future<ControlFlow<std::vector<T>>> operator()() { 
-      auto next = generator_(); 
-      auto vec = vec_; 
-      return next.Then([vec](const T& result) -> Result<ControlFlow<std::vector<T>>> { 
-        if (IsIterationEnd(result)) { 
-          return Break(*vec); 
-        } else { 
-          vec->push_back(result); 
-          return Continue(); 
-        } 
-      }); 
-    } 
-    AsyncGenerator<T> generator_; 
-    std::shared_ptr<std::vector<T>> vec_; 
-  }; 
-  return Loop(LoopBody{std::move(generator), std::move(vec)}); 
-} 
- 
-/// \see MakeMappedGenerator 
-template <typename T, typename V> 
-class MappingGenerator { 
- public: 
-  MappingGenerator(AsyncGenerator<T> source, std::function<Future<V>(const T&)> map) 
-      : state_(std::make_shared<State>(std::move(source), std::move(map))) {} 
- 
-  Future<V> operator()() { 
-    auto future = Future<V>::Make(); 
-    bool should_trigger; 
-    { 
-      auto guard = state_->mutex.Lock(); 
-      if (state_->finished) { 
-        return AsyncGeneratorEnd<V>(); 
-      } 
-      should_trigger = state_->waiting_jobs.empty(); 
-      state_->waiting_jobs.push_back(future); 
-    } 
-    if (should_trigger) { 
-      state_->source().AddCallback(Callback{state_}); 
-    } 
-    return future; 
-  } 
- 
- private: 
-  struct State { 
-    State(AsyncGenerator<T> source, std::function<Future<V>(const T&)> map) 
-        : source(std::move(source)), 
-          map(std::move(map)), 
-          waiting_jobs(), 
-          mutex(), 
-          finished(false) {} 
- 
-    void Purge() { 
-      // This might be called by an original callback (if the source iterator fails or 
-      // ends) or by a mapped callback (if the map function fails or ends prematurely). 
-      // Either way it should only be called once and after finished is set so there is no 
-      // need to guard access to `waiting_jobs`. 
-      while (!waiting_jobs.empty()) { 
-        waiting_jobs.front().MarkFinished(IterationTraits<V>::End()); 
-        waiting_jobs.pop_front(); 
-      } 
-    } 
- 
-    AsyncGenerator<T> source; 
-    std::function<Future<V>(const T&)> map; 
-    std::deque<Future<V>> waiting_jobs; 
-    util::Mutex mutex; 
-    bool finished; 
-  }; 
- 
-  struct Callback; 
- 
-  struct MappedCallback { 
-    void operator()(const Result<V>& maybe_next) { 
-      bool end = !maybe_next.ok() || IsIterationEnd(*maybe_next); 
-      bool should_purge = false; 
-      if (end) { 
-        { 
-          auto guard = state->mutex.Lock(); 
-          should_purge = !state->finished; 
-          state->finished = true; 
-        } 
-      } 
-      sink.MarkFinished(maybe_next); 
-      if (should_purge) { 
-        state->Purge(); 
-      } 
-    } 
-    std::shared_ptr<State> state; 
-    Future<V> sink; 
-  }; 
- 
-  struct Callback { 
-    void operator()(const Result<T>& maybe_next) { 
-      Future<V> sink; 
-      bool end = !maybe_next.ok() || IsIterationEnd(*maybe_next); 
-      bool should_purge = false; 
-      bool should_trigger; 
-      { 
-        auto guard = state->mutex.Lock(); 
-        if (end) { 
-          should_purge = !state->finished; 
-          state->finished = true; 
-        } 
-        sink = state->waiting_jobs.front(); 
-        state->waiting_jobs.pop_front(); 
-        should_trigger = !end && !state->waiting_jobs.empty(); 
-      } 
-      if (should_purge) { 
-        state->Purge(); 
-      } 
-      if (should_trigger) { 
-        state->source().AddCallback(Callback{state}); 
-      } 
-      if (maybe_next.ok()) { 
-        const T& val = maybe_next.ValueUnsafe(); 
-        if (IsIterationEnd(val)) { 
-          sink.MarkFinished(IterationTraits<V>::End()); 
-        } else { 
-          Future<V> mapped_fut = state->map(val); 
-          mapped_fut.AddCallback(MappedCallback{std::move(state), std::move(sink)}); 
-        } 
-      } else { 
-        sink.MarkFinished(maybe_next.status()); 
-      } 
-    } 
- 
-    std::shared_ptr<State> state; 
-  }; 
- 
-  std::shared_ptr<State> state_; 
-}; 
- 
-/// \brief Creates a generator that will apply the map function to each element of 
-/// source.  The map function is not called on the end token. 
-/// 
-/// Note: This function makes a copy of `map` for each item 
-/// Note: Errors returned from the `map` function will be propagated 
-/// 
-/// If the source generator is async-reentrant then this generator will be also 
-template <typename T, typename MapFn, 
-          typename Mapped = detail::result_of_t<MapFn(const T&)>, 
-          typename V = typename EnsureFuture<Mapped>::type::ValueType> 
-AsyncGenerator<V> MakeMappedGenerator(AsyncGenerator<T> source_generator, MapFn map) { 
-  struct MapCallback { 
-    MapFn map_; 
- 
-    Future<V> operator()(const T& val) { return ToFuture(map_(val)); } 
-  }; 
- 
-  return MappingGenerator<T, V>(std::move(source_generator), MapCallback{std::move(map)}); 
-} 
- 
-/// \see MakeSequencingGenerator 
-template <typename T, typename ComesAfter, typename IsNext> 
-class SequencingGenerator { 
- public: 
-  SequencingGenerator(AsyncGenerator<T> source, ComesAfter compare, IsNext is_next, 
-                      T initial_value) 
-      : state_(std::make_shared<State>(std::move(source), std::move(compare), 
-                                       std::move(is_next), std::move(initial_value))) {} 
- 
-  Future<T> operator()() { 
-    { 
-      auto guard = state_->mutex.Lock(); 
-      // We can send a result immediately if the top of the queue is either an 
-      // error or the next item 
-      if (!state_->queue.empty() && 
-          (!state_->queue.top().ok() || 
-           state_->is_next(state_->previous_value, *state_->queue.top()))) { 
-        auto result = std::move(state_->queue.top()); 
-        if (result.ok()) { 
-          state_->previous_value = *result; 
-        } 
-        state_->queue.pop(); 
-        return Future<T>::MakeFinished(result); 
-      } 
-      if (state_->finished) { 
-        return AsyncGeneratorEnd<T>(); 
-      } 
-      // The next item is not in the queue so we will need to wait 
-      auto new_waiting_fut = Future<T>::Make(); 
-      state_->waiting_future = new_waiting_fut; 
-      guard.Unlock(); 
-      state_->source().AddCallback(Callback{state_}); 
-      return new_waiting_fut; 
-    } 
-  } 
- 
- private: 
-  struct WrappedComesAfter { 
-    bool operator()(const Result<T>& left, const Result<T>& right) { 
-      if (!left.ok() || !right.ok()) { 
-        // Should never happen 
-        return false; 
-      } 
-      return compare(*left, *right); 
-    } 
-    ComesAfter compare; 
-  }; 
- 
-  struct State { 
-    State(AsyncGenerator<T> source, ComesAfter compare, IsNext is_next, T initial_value) 
-        : source(std::move(source)), 
-          is_next(std::move(is_next)), 
-          previous_value(std::move(initial_value)), 
-          waiting_future(), 
-          queue(WrappedComesAfter{compare}), 
-          finished(false), 
-          mutex() {} 
- 
-    AsyncGenerator<T> source; 
-    IsNext is_next; 
-    T previous_value; 
-    Future<T> waiting_future; 
-    std::priority_queue<Result<T>, std::vector<Result<T>>, WrappedComesAfter> queue; 
-    bool finished; 
-    util::Mutex mutex; 
-  }; 
- 
-  class Callback { 
-   public: 
-    explicit Callback(std::shared_ptr<State> state) : state_(std::move(state)) {} 
- 
-    void operator()(const Result<T> result) { 
-      Future<T> to_deliver; 
-      bool finished; 
-      { 
-        auto guard = state_->mutex.Lock(); 
-        bool ready_to_deliver = false; 
-        if (!result.ok()) { 
-          // Clear any cached results 
-          while (!state_->queue.empty()) { 
-            state_->queue.pop(); 
-          } 
-          ready_to_deliver = true; 
-          state_->finished = true; 
-        } else if (IsIterationEnd<T>(result.ValueUnsafe())) { 
-          ready_to_deliver = state_->queue.empty(); 
-          state_->finished = true; 
-        } else { 
-          ready_to_deliver = state_->is_next(state_->previous_value, *result); 
-        } 
- 
-        if (ready_to_deliver && state_->waiting_future.is_valid()) { 
-          to_deliver = state_->waiting_future; 
-          if (result.ok()) { 
-            state_->previous_value = *result; 
-          } 
-        } else { 
-          state_->queue.push(result); 
-        } 
-        // Capture state_->finished so we can access it outside the mutex 
-        finished = state_->finished; 
-      } 
-      // Must deliver result outside of the mutex 
-      if (to_deliver.is_valid()) { 
-        to_deliver.MarkFinished(result); 
-      } else { 
-        // Otherwise, if we didn't get the next item (or a terminal item), we 
-        // need to keep looking 
-        if (!finished) { 
-          state_->source().AddCallback(Callback{state_}); 
-        } 
-      } 
-    } 
- 
-   private: 
-    const std::shared_ptr<State> state_; 
-  }; 
- 
-  const std::shared_ptr<State> state_; 
-}; 
- 
-/// \brief Buffers an AsyncGenerator to return values in sequence order  ComesAfter 
-/// and IsNext determine the sequence order. 
-/// 
-/// ComesAfter should be a BinaryPredicate that only returns true if a comes after b 
-/// 
-/// IsNext should be a BinaryPredicate that returns true, given `a` and `b`, only if 
-/// `b` follows immediately after `a`.  It should return true given `initial_value` and 
-/// `b` if `b` is the first item in the sequence. 
-/// 
-/// This operator will queue unboundedly while waiting for the next item.  It is intended 
-/// for jittery sources that might scatter an ordered sequence.  It is NOT intended to 
-/// sort.  Using it to try and sort could result in excessive RAM usage.  This generator 
-/// will queue up to N blocks where N is the max "out of order"ness of the source. 
-/// 
-/// For example, if the source is 1,6,2,5,4,3 it will queue 3 blocks because 3 is 3 
-/// blocks beyond where it belongs. 
-/// 
-/// This generator is not async-reentrant but it consists only of a simple log(n) 
-/// insertion into a priority queue. 
-template <typename T, typename ComesAfter, typename IsNext> 
-AsyncGenerator<T> MakeSequencingGenerator(AsyncGenerator<T> source_generator, 
-                                          ComesAfter compare, IsNext is_next, 
-                                          T initial_value) { 
-  return SequencingGenerator<T, ComesAfter, IsNext>( 
-      std::move(source_generator), std::move(compare), std::move(is_next), 
-      std::move(initial_value)); 
-} 
- 
-/// \see MakeTransformedGenerator 
-template <typename T, typename V> 
-class TransformingGenerator { 
-  // The transforming generator state will be referenced as an async generator but will 
-  // also be referenced via callback to various futures.  If the async generator owner 
-  // moves it around we need the state to be consistent for future callbacks. 
-  struct TransformingGeneratorState 
-      : std::enable_shared_from_this<TransformingGeneratorState> { 
-    TransformingGeneratorState(AsyncGenerator<T> generator, Transformer<T, V> transformer) 
-        : generator_(std::move(generator)), 
-          transformer_(std::move(transformer)), 
-          last_value_(), 
-          finished_() {} 
- 
-    Future<V> operator()() { 
-      while (true) { 
-        auto maybe_next_result = Pump(); 
-        if (!maybe_next_result.ok()) { 
-          return Future<V>::MakeFinished(maybe_next_result.status()); 
-        } 
-        auto maybe_next = std::move(maybe_next_result).ValueUnsafe(); 
-        if (maybe_next.has_value()) { 
-          return Future<V>::MakeFinished(*std::move(maybe_next)); 
-        } 
- 
-        auto next_fut = generator_(); 
-        // If finished already, process results immediately inside the loop to avoid 
-        // stack overflow 
-        if (next_fut.is_finished()) { 
-          auto next_result = next_fut.result(); 
-          if (next_result.ok()) { 
-            last_value_ = *next_result; 
-          } else { 
-            return Future<V>::MakeFinished(next_result.status()); 
-          } 
-          // Otherwise, if not finished immediately, add callback to process results 
-        } else { 
-          auto self = this->shared_from_this(); 
-          return next_fut.Then([self](const T& next_result) { 
-            self->last_value_ = next_result; 
-            return (*self)(); 
-          }); 
-        } 
-      } 
-    } 
- 
-    // See comment on TransformingIterator::Pump 
-    Result<util::optional<V>> Pump() { 
-      if (!finished_ && last_value_.has_value()) { 
-        ARROW_ASSIGN_OR_RAISE(TransformFlow<V> next, transformer_(*last_value_)); 
-        if (next.ReadyForNext()) { 
-          if (IsIterationEnd(*last_value_)) { 
-            finished_ = true; 
-          } 
-          last_value_.reset(); 
-        } 
-        if (next.Finished()) { 
-          finished_ = true; 
-        } 
-        if (next.HasValue()) { 
-          return next.Value(); 
-        } 
-      } 
-      if (finished_) { 
-        return IterationTraits<V>::End(); 
-      } 
-      return util::nullopt; 
-    } 
- 
-    AsyncGenerator<T> generator_; 
-    Transformer<T, V> transformer_; 
-    util::optional<T> last_value_; 
-    bool finished_; 
-  }; 
- 
- public: 
-  explicit TransformingGenerator(AsyncGenerator<T> generator, 
-                                 Transformer<T, V> transformer) 
-      : state_(std::make_shared<TransformingGeneratorState>(std::move(generator), 
-                                                            std::move(transformer))) {} 
- 
-  Future<V> operator()() { return (*state_)(); } 
- 
- protected: 
-  std::shared_ptr<TransformingGeneratorState> state_; 
-}; 
- 
-/// \brief Transforms an async generator using a transformer function returning a new 
-/// AsyncGenerator 
-/// 
-/// The transform function here behaves exactly the same as the transform function in 
-/// MakeTransformedIterator and you can safely use the same transform function to 
-/// transform both synchronous and asynchronous streams. 
-/// 
-/// This generator is not async-reentrant 
-/// 
-/// This generator may queue up to 1 instance of T but will not delay 
-template <typename T, typename V> 
-AsyncGenerator<V> MakeTransformedGenerator(AsyncGenerator<T> generator, 
-                                           Transformer<T, V> transformer) { 
-  return TransformingGenerator<T, V>(generator, transformer); 
-} 
- 
-/// \see MakeSerialReadaheadGenerator 
-template <typename T> 
-class SerialReadaheadGenerator { 
- public: 
-  SerialReadaheadGenerator(AsyncGenerator<T> source_generator, int max_readahead) 
-      : state_(std::make_shared<State>(std::move(source_generator), max_readahead)) {} 
- 
-  Future<T> operator()() { 
-    if (state_->first_) { 
-      // Lazy generator, need to wait for the first ask to prime the pump 
-      state_->first_ = false; 
-      auto next = state_->source_(); 
-      return next.Then(Callback{state_}, ErrCallback{state_}); 
-    } 
- 
-    // This generator is not async-reentrant.  We won't be called until the last 
-    // future finished so we know there is something in the queue 
-    auto finished = state_->finished_.load(); 
-    if (finished && state_->readahead_queue_.IsEmpty()) { 
-      return AsyncGeneratorEnd<T>(); 
-    } 
- 
-    std::shared_ptr<Future<T>> next; 
-    if (!state_->readahead_queue_.Read(next)) { 
-      return Status::UnknownError("Could not read from readahead_queue"); 
-    } 
- 
-    auto last_available = state_->spaces_available_.fetch_add(1); 
-    if (last_available == 0 && !finished) { 
-      // Reader idled out, we need to restart it 
-      ARROW_RETURN_NOT_OK(state_->Pump(state_)); 
-    } 
-    return *next; 
-  } 
- 
- private: 
-  struct State { 
-    State(AsyncGenerator<T> source, int max_readahead) 
-        : first_(true), 
-          source_(std::move(source)), 
-          finished_(false), 
-          // There is one extra "space" for the in-flight request 
-          spaces_available_(max_readahead + 1), 
-          // The SPSC queue has size-1 "usable" slots so we need to overallocate 1 
-          readahead_queue_(max_readahead + 1) {} 
- 
-    Status Pump(const std::shared_ptr<State>& self) { 
-      // Can't do readahead_queue.write(source().Then(...)) because then the 
-      // callback might run immediately and add itself to the queue before this gets added 
-      // to the queue messing up the order. 
-      auto next_slot = std::make_shared<Future<T>>(); 
-      auto written = readahead_queue_.Write(next_slot); 
-      if (!written) { 
-        return Status::UnknownError("Could not write to readahead_queue"); 
-      } 
-      // If this Pump is being called from a callback it is possible for the source to 
-      // poll and read from the queue between the Write and this spot where we fill the 
-      // value in. However, it is not possible for the future to read this value we are 
-      // writing.  That is because this callback (the callback for future X) must be 
-      // finished before future X is marked complete and this source is not pulled 
-      // reentrantly so it will not poll for future X+1 until this callback has completed. 
-      *next_slot = source_().Then(Callback{self}, ErrCallback{self}); 
-      return Status::OK(); 
-    } 
- 
-    // Only accessed by the consumer end 
-    bool first_; 
-    // Accessed by both threads 
-    AsyncGenerator<T> source_; 
-    std::atomic<bool> finished_; 
-    // The queue has a size but it is not atomic.  We keep track of how many spaces are 
-    // left in the queue here so we know if we've just written the last value and we need 
-    // to stop reading ahead or if we've just read from a full queue and we need to 
-    // restart reading ahead 
-    std::atomic<uint32_t> spaces_available_; 
-    // Needs to be a queue of shared_ptr and not Future because we set the value of the 
-    // future after we add it to the queue 
-    util::SpscQueue<std::shared_ptr<Future<T>>> readahead_queue_; 
-  }; 
- 
-  struct Callback { 
-    Result<T> operator()(const T& next) { 
-      if (IsIterationEnd(next)) { 
-        state_->finished_.store(true); 
-        return next; 
-      } 
-      auto last_available = state_->spaces_available_.fetch_sub(1); 
-      if (last_available > 1) { 
-        ARROW_RETURN_NOT_OK(state_->Pump(state_)); 
-      } 
-      return next; 
-    } 
- 
-    std::shared_ptr<State> state_; 
-  }; 
- 
-  struct ErrCallback { 
-    Result<T> operator()(const Status& st) { 
-      state_->finished_.store(true); 
-      return st; 
-    } 
- 
-    std::shared_ptr<State> state_; 
-  }; 
- 
-  std::shared_ptr<State> state_; 
-}; 
- 
-/// \see MakeFromFuture 
-template <typename T> 
-class FutureFirstGenerator { 
- public: 
-  explicit FutureFirstGenerator(Future<AsyncGenerator<T>> future) 
-      : state_(std::make_shared<State>(std::move(future))) {} 
- 
-  Future<T> operator()() { 
-    if (state_->source_) { 
-      return state_->source_(); 
-    } else { 
-      auto state = state_; 
-      return state_->future_.Then([state](const AsyncGenerator<T>& source) { 
-        state->source_ = source; 
-        return state->source_(); 
-      }); 
-    } 
-  } 
- 
- private: 
-  struct State { 
-    explicit State(Future<AsyncGenerator<T>> future) : future_(future), source_() {} 
- 
-    Future<AsyncGenerator<T>> future_; 
-    AsyncGenerator<T> source_; 
-  }; 
- 
-  std::shared_ptr<State> state_; 
-}; 
- 
-/// \brief Transforms a Future<AsyncGenerator<T>> into an AsyncGenerator<T> 
-/// that waits for the future to complete as part of the first item. 
-/// 
-/// This generator is not async-reentrant (even if the generator yielded by future is) 
-/// 
-/// This generator does not queue 
-template <typename T> 
-AsyncGenerator<T> MakeFromFuture(Future<AsyncGenerator<T>> future) { 
-  return FutureFirstGenerator<T>(std::move(future)); 
-} 
- 
-/// \brief Creates a generator that will pull from the source into a queue.  Unlike 
-/// MakeReadaheadGenerator this will not pull reentrantly from the source. 
-/// 
-/// The source generator does not need to be async-reentrant 
-/// 
-/// This generator is not async-reentrant (even if the source is) 
-/// 
-/// This generator may queue up to max_readahead additional instances of T 
-template <typename T> 
-AsyncGenerator<T> MakeSerialReadaheadGenerator(AsyncGenerator<T> source_generator, 
-                                               int max_readahead) { 
-  return SerialReadaheadGenerator<T>(std::move(source_generator), max_readahead); 
-} 
- 
-/// \see MakeReadaheadGenerator 
-template <typename T> 
-class ReadaheadGenerator { 
- public: 
-  ReadaheadGenerator(AsyncGenerator<T> source_generator, int max_readahead) 
-      : state_(std::make_shared<State>(std::move(source_generator), max_readahead)) {} 
- 
-  Future<T> AddMarkFinishedContinuation(Future<T> fut) { 
-    auto state = state_; 
-    return fut.Then( 
-        [state](const T& result) -> Result<T> { 
-          state->MarkFinishedIfDone(result); 
-          return result; 
-        }, 
-        [state](const Status& err) -> Result<T> { 
-          state->finished.store(true); 
-          return err; 
-        }); 
-  } 
- 
-  Future<T> operator()() { 
-    if (state_->readahead_queue.empty()) { 
-      // This is the first request, let's pump the underlying queue 
-      for (int i = 0; i < state_->max_readahead; i++) { 
-        auto next = state_->source_generator(); 
-        auto next_after_check = AddMarkFinishedContinuation(std::move(next)); 
-        state_->readahead_queue.push(std::move(next_after_check)); 
-      } 
-    } 
-    // Pop one and add one 
-    auto result = state_->readahead_queue.front(); 
-    state_->readahead_queue.pop(); 
-    if (state_->finished.load()) { 
-      state_->readahead_queue.push(AsyncGeneratorEnd<T>()); 
-    } else { 
-      auto back_of_queue = state_->source_generator(); 
-      auto back_of_queue_after_check = 
-          AddMarkFinishedContinuation(std::move(back_of_queue)); 
-      state_->readahead_queue.push(std::move(back_of_queue_after_check)); 
-    } 
-    return result; 
-  } 
- 
- private: 
-  struct State { 
-    State(AsyncGenerator<T> source_generator, int max_readahead) 
-        : source_generator(std::move(source_generator)), max_readahead(max_readahead) { 
-      finished.store(false); 
-    } 
- 
-    void MarkFinishedIfDone(const T& next_result) { 
-      if (IsIterationEnd(next_result)) { 
-        finished.store(true); 
-      } 
-    } 
- 
-    AsyncGenerator<T> source_generator; 
-    int max_readahead; 
-    std::atomic<bool> finished; 
-    std::queue<Future<T>> readahead_queue; 
-  }; 
- 
-  std::shared_ptr<State> state_; 
-}; 
- 
-/// \brief A generator where the producer pushes items on a queue. 
-/// 
-/// No back-pressure is applied, so this generator is mostly useful when 
-/// producing the values is neither CPU- nor memory-expensive (e.g. fetching 
-/// filesystem metadata). 
-/// 
-/// This generator is not async-reentrant. 
-template <typename T> 
-class PushGenerator { 
-  struct State { 
-    util::Mutex mutex; 
-    std::deque<Result<T>> result_q; 
-    util::optional<Future<T>> consumer_fut; 
-    bool finished = false; 
-  }; 
- 
- public: 
-  /// Producer API for PushGenerator 
-  class Producer { 
-   public: 
-    explicit Producer(const std::shared_ptr<State>& state) : weak_state_(state) {} 
- 
-    /// \brief Push a value on the queue 
-    /// 
-    /// True is returned if the value was pushed, false if the generator is 
-    /// already closed or destroyed.  If the latter, it is recommended to stop 
-    /// producing any further values. 
-    bool Push(Result<T> result) { 
-      auto state = weak_state_.lock(); 
-      if (!state) { 
-        // Generator was destroyed 
-        return false; 
-      } 
-      auto lock = state->mutex.Lock(); 
-      if (state->finished) { 
-        // Closed early 
-        return false; 
-      } 
-      if (state->consumer_fut.has_value()) { 
-        auto fut = std::move(state->consumer_fut.value()); 
-        state->consumer_fut.reset(); 
-        lock.Unlock();  // unlock before potentially invoking a callback 
-        fut.MarkFinished(std::move(result)); 
-      } else { 
-        state->result_q.push_back(std::move(result)); 
-      } 
-      return true; 
-    } 
- 
-    /// \brief Tell the consumer we have finished producing 
-    /// 
-    /// It is allowed to call this and later call Push() again ("early close"). 
-    /// In this case, calls to Push() after the queue is closed are silently 
-    /// ignored.  This can help implementing non-trivial cancellation cases. 
-    /// 
-    /// True is returned on success, false if the generator is already closed 
-    /// or destroyed. 
-    bool Close() { 
-      auto state = weak_state_.lock(); 
-      if (!state) { 
-        // Generator was destroyed 
-        return false; 
-      } 
-      auto lock = state->mutex.Lock(); 
-      if (state->finished) { 
-        // Already closed 
-        return false; 
-      } 
-      state->finished = true; 
-      if (state->consumer_fut.has_value()) { 
-        auto fut = std::move(state->consumer_fut.value()); 
-        state->consumer_fut.reset(); 
-        lock.Unlock();  // unlock before potentially invoking a callback 
-        fut.MarkFinished(IterationTraits<T>::End()); 
-      } 
-      return true; 
-    } 
- 
-    /// Return whether the generator was closed or destroyed. 
-    bool is_closed() const { 
-      auto state = weak_state_.lock(); 
-      if (!state) { 
-        // Generator was destroyed 
-        return true; 
-      } 
-      auto lock = state->mutex.Lock(); 
-      return state->finished; 
-    } 
- 
-   private: 
-    const std::weak_ptr<State> weak_state_; 
-  }; 
- 
-  PushGenerator() : state_(std::make_shared<State>()) {} 
- 
-  /// Read an item from the queue 
-  Future<T> operator()() { 
-    auto lock = state_->mutex.Lock(); 
-    assert(!state_->consumer_fut.has_value());  // Non-reentrant 
-    if (!state_->result_q.empty()) { 
-      auto fut = Future<T>::MakeFinished(std::move(state_->result_q.front())); 
-      state_->result_q.pop_front(); 
-      return fut; 
-    } 
-    if (state_->finished) { 
-      return AsyncGeneratorEnd<T>(); 
-    } 
-    auto fut = Future<T>::Make(); 
-    state_->consumer_fut = fut; 
-    return fut; 
-  } 
- 
-  /// \brief Return producer-side interface 
-  /// 
-  /// The returned object must be used by the producer to push values on the queue. 
-  /// Only a single Producer object should be instantiated. 
-  Producer producer() { return Producer{state_}; } 
- 
- private: 
-  const std::shared_ptr<State> state_; 
-}; 
- 
-/// \brief Creates a generator that pulls reentrantly from a source 
-/// This generator will pull reentrantly from a source, ensuring that max_readahead 
-/// requests are active at any given time. 
-/// 
-/// The source generator must be async-reentrant 
-/// 
-/// This generator itself is async-reentrant. 
-/// 
-/// This generator may queue up to max_readahead instances of T 
-template <typename T> 
-AsyncGenerator<T> MakeReadaheadGenerator(AsyncGenerator<T> source_generator, 
-                                         int max_readahead) { 
-  return ReadaheadGenerator<T>(std::move(source_generator), max_readahead); 
-} 
- 
-/// \brief Creates a generator that will yield finished futures from a vector 
-/// 
-/// This generator is async-reentrant 
-template <typename T> 
-AsyncGenerator<T> MakeVectorGenerator(std::vector<T> vec) { 
-  struct State { 
-    explicit State(std::vector<T> vec_) : vec(std::move(vec_)), vec_idx(0) {} 
- 
-    std::vector<T> vec; 
-    std::atomic<std::size_t> vec_idx; 
-  }; 
- 
-  auto state = std::make_shared<State>(std::move(vec)); 
-  return [state]() { 
-    auto idx = state->vec_idx.fetch_add(1); 
-    if (idx >= state->vec.size()) { 
-      // Eagerly return memory 
-      state->vec.clear(); 
-      return AsyncGeneratorEnd<T>(); 
-    } 
-    return Future<T>::MakeFinished(state->vec[idx]); 
-  }; 
-} 
- 
-/// \see MakeMergedGenerator 
-template <typename T> 
-class MergedGenerator { 
- public: 
-  explicit MergedGenerator(AsyncGenerator<AsyncGenerator<T>> source, 
-                           int max_subscriptions) 
-      : state_(std::make_shared<State>(std::move(source), max_subscriptions)) {} 
- 
-  Future<T> operator()() { 
-    Future<T> waiting_future; 
-    std::shared_ptr<DeliveredJob> delivered_job; 
-    { 
-      auto guard = state_->mutex.Lock(); 
-      if (!state_->delivered_jobs.empty()) { 
-        delivered_job = std::move(state_->delivered_jobs.front()); 
-        state_->delivered_jobs.pop_front(); 
-      } else if (state_->finished) { 
-        return IterationTraits<T>::End(); 
-      } else { 
-        waiting_future = Future<T>::Make(); 
-        state_->waiting_jobs.push_back(std::make_shared<Future<T>>(waiting_future)); 
-      } 
-    } 
-    if (delivered_job) { 
-      // deliverer will be invalid if outer callback encounters an error and delivers a 
-      // failed result 
-      if (delivered_job->deliverer) { 
-        delivered_job->deliverer().AddCallback( 
-            InnerCallback{state_, delivered_job->index}); 
-      } 
-      return std::move(delivered_job->value); 
-    } 
-    if (state_->first) { 
-      state_->first = false; 
-      for (std::size_t i = 0; i < state_->active_subscriptions.size(); i++) { 
-        state_->PullSource().AddCallback(OuterCallback{state_, i}); 
-      } 
-    } 
-    return waiting_future; 
-  } 
- 
- private: 
-  struct DeliveredJob { 
-    explicit DeliveredJob(AsyncGenerator<T> deliverer_, Result<T> value_, 
-                          std::size_t index_) 
-        : deliverer(deliverer_), value(std::move(value_)), index(index_) {} 
- 
-    AsyncGenerator<T> deliverer; 
-    Result<T> value; 
-    std::size_t index; 
-  }; 
- 
-  struct State { 
-    State(AsyncGenerator<AsyncGenerator<T>> source, int max_subscriptions) 
-        : source(std::move(source)), 
-          active_subscriptions(max_subscriptions), 
-          delivered_jobs(), 
-          waiting_jobs(), 
-          mutex(), 
-          first(true), 
-          source_exhausted(false), 
-          finished(false), 
-          num_active_subscriptions(max_subscriptions) {} 
- 
-    Future<AsyncGenerator<T>> PullSource() { 
-      // Need to guard access to source() so we don't pull sync-reentrantly which 
-      // is never valid. 
-      auto lock = mutex.Lock(); 
-      return source(); 
-    } 
- 
-    AsyncGenerator<AsyncGenerator<T>> source; 
-    // active_subscriptions and delivered_jobs will be bounded by max_subscriptions 
-    std::vector<AsyncGenerator<T>> active_subscriptions; 
-    std::deque<std::shared_ptr<DeliveredJob>> delivered_jobs; 
-    // waiting_jobs is unbounded, reentrant pulls (e.g. AddReadahead) will provide the 
-    // backpressure 
-    std::deque<std::shared_ptr<Future<T>>> waiting_jobs; 
-    util::Mutex mutex; 
-    bool first; 
-    bool source_exhausted; 
-    bool finished; 
-    int num_active_subscriptions; 
-  }; 
- 
-  struct InnerCallback { 
-    void operator()(const Result<T>& maybe_next) { 
-      Future<T> sink; 
-      bool sub_finished = maybe_next.ok() && IsIterationEnd(*maybe_next); 
-      { 
-        auto guard = state->mutex.Lock(); 
-        if (state->finished) { 
-          // We've errored out so just ignore this result and don't keep pumping 
-          return; 
-        } 
-        if (!sub_finished) { 
-          if (state->waiting_jobs.empty()) { 
-            state->delivered_jobs.push_back(std::make_shared<DeliveredJob>( 
-                state->active_subscriptions[index], maybe_next, index)); 
-          } else { 
-            sink = std::move(*state->waiting_jobs.front()); 
-            state->waiting_jobs.pop_front(); 
-          } 
-        } 
-      } 
-      if (sub_finished) { 
-        state->PullSource().AddCallback(OuterCallback{state, index}); 
-      } else if (sink.is_valid()) { 
-        sink.MarkFinished(maybe_next); 
-        if (maybe_next.ok()) { 
-          state->active_subscriptions[index]().AddCallback(*this); 
-        } 
-      } 
-    } 
-    std::shared_ptr<State> state; 
-    std::size_t index; 
-  }; 
- 
-  struct OuterCallback { 
-    void operator()(const Result<AsyncGenerator<T>>& maybe_next) { 
-      bool should_purge = false; 
-      bool should_continue = false; 
-      Future<T> error_sink; 
-      { 
-        auto guard = state->mutex.Lock(); 
-        if (!maybe_next.ok() || IsIterationEnd(*maybe_next)) { 
-          state->source_exhausted = true; 
-          if (!maybe_next.ok() || --state->num_active_subscriptions == 0) { 
-            state->finished = true; 
-            should_purge = true; 
-          } 
-          if (!maybe_next.ok()) { 
-            if (state->waiting_jobs.empty()) { 
-              state->delivered_jobs.push_back(std::make_shared<DeliveredJob>( 
-                  AsyncGenerator<T>(), maybe_next.status(), index)); 
-            } else { 
-              error_sink = std::move(*state->waiting_jobs.front()); 
-              state->waiting_jobs.pop_front(); 
-            } 
-          } 
-        } else { 
-          state->active_subscriptions[index] = *maybe_next; 
-          should_continue = true; 
-        } 
-      } 
-      if (error_sink.is_valid()) { 
-        error_sink.MarkFinished(maybe_next.status()); 
-      } 
-      if (should_continue) { 
-        (*maybe_next)().AddCallback(InnerCallback{state, index}); 
-      } else if (should_purge) { 
-        // At this point state->finished has been marked true so no one else 
-        // will be interacting with waiting_jobs and we can iterate outside lock 
-        while (!state->waiting_jobs.empty()) { 
-          state->waiting_jobs.front()->MarkFinished(IterationTraits<T>::End()); 
-          state->waiting_jobs.pop_front(); 
-        } 
-      } 
-    } 
-    std::shared_ptr<State> state; 
-    std::size_t index; 
-  }; 
- 
-  std::shared_ptr<State> state_; 
-}; 
- 
-/// \brief Creates a generator that takes in a stream of generators and pulls from up to 
-/// max_subscriptions at a time 
-/// 
-/// Note: This may deliver items out of sequence. For example, items from the third 
-/// AsyncGenerator generated by the source may be emitted before some items from the first 
-/// AsyncGenerator generated by the source. 
-/// 
-/// This generator will pull from source async-reentrantly unless max_subscriptions is 1 
-/// This generator will not pull from the individual subscriptions reentrantly.  Add 
-/// readahead to the individual subscriptions if that is desired. 
-/// This generator is async-reentrant 
-/// 
-/// This generator may queue up to max_subscriptions instances of T 
-template <typename T> 
-AsyncGenerator<T> MakeMergedGenerator(AsyncGenerator<AsyncGenerator<T>> source, 
-                                      int max_subscriptions) { 
-  return MergedGenerator<T>(std::move(source), max_subscriptions); 
-} 
- 
-/// \brief Creates a generator that takes in a stream of generators and pulls from each 
-/// one in sequence. 
-/// 
-/// This generator is async-reentrant but will never pull from source reentrantly and 
-/// will never pull from any subscription reentrantly. 
-/// 
-/// This generator may queue 1 instance of T 
-/// 
-/// TODO: Could potentially make a bespoke implementation instead of MergedGenerator that 
-/// forwards async-reentrant requests instead of buffering them (which is what 
-/// MergedGenerator does) 
-template <typename T> 
-AsyncGenerator<T> MakeConcatenatedGenerator(AsyncGenerator<AsyncGenerator<T>> source) { 
-  return MergedGenerator<T>(std::move(source), 1); 
-} 
- 
-template <typename T> 
-struct Enumerated { 
-  T value; 
-  int index; 
-  bool last; 
-}; 
- 
-template <typename T> 
-struct IterationTraits<Enumerated<T>> { 
-  static Enumerated<T> End() { return Enumerated<T>{IterationEnd<T>(), -1, false}; } 
-  static bool IsEnd(const Enumerated<T>& val) { return val.index < 0; } 
-}; 
- 
-/// \see MakeEnumeratedGenerator 
-template <typename T> 
-class EnumeratingGenerator { 
- public: 
-  EnumeratingGenerator(AsyncGenerator<T> source, T initial_value) 
-      : state_(std::make_shared<State>(std::move(source), std::move(initial_value))) {} 
- 
-  Future<Enumerated<T>> operator()() { 
-    if (state_->finished) { 
-      return AsyncGeneratorEnd<Enumerated<T>>(); 
-    } else { 
-      auto state = state_; 
-      return state->source().Then([state](const T& next) { 
-        auto finished = IsIterationEnd<T>(next); 
-        auto prev = Enumerated<T>{state->prev_value, state->prev_index, finished}; 
-        state->prev_value = next; 
-        state->prev_index++; 
-        state->finished = finished; 
-        return prev; 
-      }); 
-    } 
-  } 
- 
- private: 
-  struct State { 
-    State(AsyncGenerator<T> source, T initial_value) 
-        : source(std::move(source)), prev_value(std::move(initial_value)), prev_index(0) { 
-      finished = IsIterationEnd<T>(prev_value); 
-    } 
- 
-    AsyncGenerator<T> source; 
-    T prev_value; 
-    int prev_index; 
-    bool finished; 
-  }; 
- 
-  std::shared_ptr<State> state_; 
-}; 
- 
-/// Wraps items from a source generator with positional information 
-/// 
-/// When used with MakeMergedGenerator and MakeSequencingGenerator this allows items to be 
-/// processed in a "first-available" fashion and later resequenced which can reduce the 
-/// impact of sources with erratic performance (e.g. a filesystem where some items may 
-/// take longer to read than others). 
-/// 
-/// TODO(ARROW-12371) Would require this generator be async-reentrant 
-/// 
-/// \see MakeSequencingGenerator for an example of putting items back in order 
-/// 
-/// This generator is not async-reentrant 
-/// 
-/// This generator buffers one item (so it knows which item is the last item) 
-template <typename T> 
-AsyncGenerator<Enumerated<T>> MakeEnumeratedGenerator(AsyncGenerator<T> source) { 
-  return FutureFirstGenerator<Enumerated<T>>( 
-      source().Then([source](const T& initial_value) -> AsyncGenerator<Enumerated<T>> { 
-        return EnumeratingGenerator<T>(std::move(source), initial_value); 
-      })); 
-} 
- 
-/// \see MakeTransferredGenerator 
-template <typename T> 
-class TransferringGenerator { 
- public: 
-  explicit TransferringGenerator(AsyncGenerator<T> source, internal::Executor* executor) 
-      : source_(std::move(source)), executor_(executor) {} 
- 
-  Future<T> operator()() { return executor_->Transfer(source_()); } 
- 
- private: 
-  AsyncGenerator<T> source_; 
-  internal::Executor* executor_; 
-}; 
- 
-/// \brief Transfers a future to an underlying executor. 
-/// 
-/// Continuations run on the returned future will be run on the given executor 
-/// if they cannot be run synchronously. 
-/// 
-/// This is often needed to move computation off I/O threads or other external 
-/// completion sources and back on to the CPU executor so the I/O thread can 
-/// stay busy and focused on I/O 
-/// 
-/// Keep in mind that continuations called on an already completed future will 
-/// always be run synchronously and so no transfer will happen in that case. 
-/// 
-/// This generator is async reentrant if the source is 
-/// 
-/// This generator will not queue 
-template <typename T> 
-AsyncGenerator<T> MakeTransferredGenerator(AsyncGenerator<T> source, 
-                                           internal::Executor* executor) { 
-  return TransferringGenerator<T>(std::move(source), executor); 
-} 
- 
-/// \see MakeBackgroundGenerator 
-template <typename T> 
-class BackgroundGenerator { 
- public: 
-  explicit BackgroundGenerator(Iterator<T> it, internal::Executor* io_executor, int max_q, 
-                               int q_restart) 
-      : state_(std::make_shared<State>(io_executor, std::move(it), max_q, q_restart)), 
-        cleanup_(std::make_shared<Cleanup>(state_.get())) {} 
- 
-  Future<T> operator()() { 
-    auto guard = state_->mutex.Lock(); 
-    Future<T> waiting_future; 
-    if (state_->queue.empty()) { 
-      if (state_->finished) { 
-        return AsyncGeneratorEnd<T>(); 
-      } else { 
-        waiting_future = Future<T>::Make(); 
-        state_->waiting_future = waiting_future; 
-      } 
-    } else { 
-      auto next = Future<T>::MakeFinished(std::move(state_->queue.front())); 
-      state_->queue.pop(); 
-      if (state_->NeedsRestart()) { 
-        return state_->RestartTask(state_, std::move(guard), std::move(next)); 
-      } 
-      return next; 
-    } 
-    // This should only trigger the very first time this method is called 
-    if (state_->NeedsRestart()) { 
-      return state_->RestartTask(state_, std::move(guard), std::move(waiting_future)); 
-    } 
-    return waiting_future; 
-  } 
- 
- protected: 
-  static constexpr uint64_t kUnlikelyThreadId{std::numeric_limits<uint64_t>::max()}; 
- 
-  struct State { 
-    State(internal::Executor* io_executor, Iterator<T> it, int max_q, int q_restart) 
-        : io_executor(io_executor), 
-          max_q(max_q), 
-          q_restart(q_restart), 
-          it(std::move(it)), 
-          reading(false), 
-          finished(false), 
-          should_shutdown(false) {} 
- 
-    void ClearQueue() { 
-      while (!queue.empty()) { 
-        queue.pop(); 
-      } 
-    } 
- 
-    bool TaskIsRunning() const { return task_finished.is_valid(); } 
- 
-    bool NeedsRestart() const { 
-      return !finished && !reading && static_cast<int>(queue.size()) <= q_restart; 
-    } 
- 
-    void DoRestartTask(std::shared_ptr<State> state, util::Mutex::Guard guard) { 
-      // If we get here we are actually going to start a new task so let's create a 
-      // task_finished future for it 
-      state->task_finished = Future<>::Make(); 
-      state->reading = true; 
-      auto spawn_status = io_executor->Spawn( 
-          [state]() { BackgroundGenerator::WorkerTask(std::move(state)); }); 
-      if (!spawn_status.ok()) { 
-        // If we can't spawn a new task then send an error to the consumer (either via a 
-        // waiting future or the queue) and mark ourselves finished 
-        state->finished = true; 
-        state->task_finished = Future<>(); 
-        if (waiting_future.has_value()) { 
-          auto to_deliver = std::move(waiting_future.value()); 
-          waiting_future.reset(); 
-          guard.Unlock(); 
-          to_deliver.MarkFinished(spawn_status); 
-        } else { 
-          ClearQueue(); 
-          queue.push(spawn_status); 
-        } 
-      } 
-    } 
- 
-    Future<T> RestartTask(std::shared_ptr<State> state, util::Mutex::Guard guard, 
-                          Future<T> next) { 
-      if (TaskIsRunning()) { 
-        // If the task is still cleaning up we need to wait for it to finish before 
-        // restarting.  We also want to block the consumer until we've restarted the 
-        // reader to avoid multiple restarts 
-        return task_finished.Then([state, next]() { 
-          // This may appear dangerous (recursive mutex) but we should be guaranteed the 
-          // outer guard has been released by this point.  We know... 
-          // * task_finished is not already finished (it would be invalid in that case) 
-          // * task_finished will not be marked complete until we've given up the mutex 
-          auto guard_ = state->mutex.Lock(); 
-          state->DoRestartTask(state, std::move(guard_)); 
-          return next; 
-        }); 
-      } 
-      // Otherwise we can restart immediately 
-      DoRestartTask(std::move(state), std::move(guard)); 
-      return next; 
-    } 
- 
-    internal::Executor* io_executor; 
-    const int max_q; 
-    const int q_restart; 
-    Iterator<T> it; 
-    std::atomic<uint64_t> worker_thread_id{kUnlikelyThreadId}; 
- 
-    // If true, the task is actively pumping items from the queue and does not need a 
-    // restart 
-    bool reading; 
-    // Set to true when a terminal item arrives 
-    bool finished; 
-    // Signal to the background task to end early because consumers have given up on it 
-    bool should_shutdown; 
-    // If the queue is empty, the consumer will create a waiting future and wait for it 
-    std::queue<Result<T>> queue; 
-    util::optional<Future<T>> waiting_future; 
-    // Every background task is given a future to complete when it is entirely finished 
-    // processing and ready for the next task to start or for State to be destroyed 
-    Future<> task_finished; 
-    util::Mutex mutex; 
-  }; 
- 
-  // Cleanup task that will be run when all consumer references to the generator are lost 
-  struct Cleanup { 
-    explicit Cleanup(State* state) : state(state) {} 
-    ~Cleanup() { 
-      /// TODO: Once ARROW-13109 is available then we can be force consumers to spawn and 
-      /// there is no need to perform this check. 
-      /// 
-      /// It's a deadlock if we enter cleanup from 
-      /// the worker thread but it can happen if the consumer doesn't transfer away 
-      assert(state->worker_thread_id.load() != ::arrow::internal::GetThreadId()); 
-      Future<> finish_fut; 
-      { 
-        auto lock = state->mutex.Lock(); 
-        if (!state->TaskIsRunning()) { 
-          return; 
-        } 
-        // Signal the current task to stop and wait for it to finish 
-        state->should_shutdown = true; 
-        finish_fut = state->task_finished; 
-      } 
-      // Using future as a condition variable here 
-      Status st = finish_fut.status(); 
-      ARROW_UNUSED(st); 
-    } 
-    State* state; 
-  }; 
- 
-  static void WorkerTask(std::shared_ptr<State> state) { 
-    state->worker_thread_id.store(::arrow::internal::GetThreadId()); 
-    // We need to capture the state to read while outside the mutex 
-    bool reading = true; 
-    while (reading) { 
-      auto next = state->it.Next(); 
-      // Need to capture state->waiting_future inside the mutex to mark finished outside 
-      Future<T> waiting_future; 
-      { 
-        auto guard = state->mutex.Lock(); 
- 
-        if (state->should_shutdown) { 
-          state->finished = true; 
-          break; 
-        } 
- 
-        if (!next.ok() || IsIterationEnd<T>(*next)) { 
-          // Terminal item.  Mark finished to true, send this last item, and quit 
-          state->finished = true; 
-          if (!next.ok()) { 
-            state->ClearQueue(); 
-          } 
-        } 
-        // At this point we are going to send an item.  Either we will add it to the 
-        // queue or deliver it to a waiting future. 
-        if (state->waiting_future.has_value()) { 
-          waiting_future = std::move(state->waiting_future.value()); 
-          state->waiting_future.reset(); 
-        } else { 
-          state->queue.push(std::move(next)); 
-          // We just filled up the queue so it is time to quit.  We may need to notify 
-          // a cleanup task so we transition to Quitting 
-          if (static_cast<int>(state->queue.size()) >= state->max_q) { 
-            state->reading = false; 
-          } 
-        } 
-        reading = state->reading && !state->finished; 
-      } 
-      // This should happen outside the mutex.  Presumably there is a 
-      // transferring generator on the other end that will quickly transfer any 
-      // callbacks off of this thread so we can continue looping.  Still, best not to 
-      // rely on that 
-      if (waiting_future.is_valid()) { 
-        waiting_future.MarkFinished(next); 
-      } 
-    } 
-    // Once we've sent our last item we can notify any waiters that we are done and so 
-    // either state can be cleaned up or a new background task can be started 
-    Future<> task_finished; 
-    { 
-      auto guard = state->mutex.Lock(); 
-      // After we give up the mutex state can be safely deleted.  We will no longer 
-      // reference it.  We can safely transition to idle now. 
-      task_finished = state->task_finished; 
-      state->task_finished = Future<>(); 
-      state->worker_thread_id.store(kUnlikelyThreadId); 
-    } 
-    task_finished.MarkFinished(); 
-  } 
- 
-  std::shared_ptr<State> state_; 
-  // state_ is held by both the generator and the background thread so it won't be cleaned 
-  // up when all consumer references are relinquished.  cleanup_ is only held by the 
-  // generator so it will be destructed when the last consumer reference is gone.  We use 
-  // this to cleanup / stop the background generator in case the consuming end stops 
-  // listening (e.g. due to a downstream error) 
-  std::shared_ptr<Cleanup> cleanup_; 
-}; 
- 
-constexpr int kDefaultBackgroundMaxQ = 32; 
-constexpr int kDefaultBackgroundQRestart = 16; 
- 
-/// \brief Creates an AsyncGenerator<T> by iterating over an Iterator<T> on a background 
-/// thread 
-/// 
-/// The parameter max_q and q_restart control queue size and background thread task 
-/// management. If the background task is fast you typically don't want it creating a 
-/// thread task for every item.  Instead the background thread will run until it fills 
-/// up a readahead queue. 
-/// 
-/// Once the queue has filled up the background thread task will terminate (allowing other 
-/// I/O tasks to use the thread).  Once the queue has been drained enough (specified by 
-/// q_restart) then the background thread task will be restarted.  If q_restart is too low 
-/// then you may exhaust the queue waiting for the background thread task to start running 
-/// again.  If it is too high then it will be constantly stopping and restarting the 
-/// background queue task 
-/// 
-/// The "background thread" is a logical thread and will run as tasks on the io_executor. 
-/// This thread may stop and start when the queue fills up but there will only be one 
-/// active background thread task at any given time.  You MUST transfer away from this 
-/// background generator.  Otherwise there could be a race condition if a callback on the 
-/// background thread deletes the last consumer reference to the background generator. You 
-/// can transfer onto the same executor as the background thread, it is only neccesary to 
-/// create a new thread task, not to switch executors. 
-/// 
-/// This generator is not async-reentrant 
-/// 
-/// This generator will queue up to max_q blocks 
-template <typename T> 
-static Result<AsyncGenerator<T>> MakeBackgroundGenerator( 
-    Iterator<T> iterator, internal::Executor* io_executor, 
-    int max_q = kDefaultBackgroundMaxQ, int q_restart = kDefaultBackgroundQRestart) { 
-  if (max_q < q_restart) { 
-    return Status::Invalid("max_q must be >= q_restart"); 
-  } 
-  return BackgroundGenerator<T>(std::move(iterator), io_executor, max_q, q_restart); 
-} 
- 
-/// \see MakeGeneratorIterator 
-template <typename T> 
-class GeneratorIterator { 
- public: 
-  explicit GeneratorIterator(AsyncGenerator<T> source) : source_(std::move(source)) {} 
- 
-  Result<T> Next() { return source_().result(); } 
- 
- private: 
-  AsyncGenerator<T> source_; 
-}; 
- 
-/// \brief Converts an AsyncGenerator<T> to an Iterator<T> by blocking until each future 
-/// is finished 
-template <typename T> 
-Iterator<T> MakeGeneratorIterator(AsyncGenerator<T> source) { 
-  return Iterator<T>(GeneratorIterator<T>(std::move(source))); 
-} 
- 
-/// \brief Adds readahead to an iterator using a background thread. 
-/// 
-/// Under the hood this is converting the iterator to a generator using 
-/// MakeBackgroundGenerator, adding readahead to the converted generator with 
-/// MakeReadaheadGenerator, and then converting back to an iterator using 
-/// MakeGeneratorIterator. 
-template <typename T> 
-Result<Iterator<T>> MakeReadaheadIterator(Iterator<T> it, int readahead_queue_size) { 
-  ARROW_ASSIGN_OR_RAISE(auto io_executor, internal::ThreadPool::Make(1)); 
-  auto max_q = readahead_queue_size; 
-  auto q_restart = std::max(1, max_q / 2); 
-  ARROW_ASSIGN_OR_RAISE( 
-      auto background_generator, 
-      MakeBackgroundGenerator(std::move(it), io_executor.get(), max_q, q_restart)); 
-  // Capture io_executor to keep it alive as long as owned_bg_generator is still 
-  // referenced 
-  AsyncGenerator<T> owned_bg_generator = [io_executor, background_generator]() { 
-    return background_generator(); 
-  }; 
-  return MakeGeneratorIterator(std::move(owned_bg_generator)); 
-} 
- 
-/// \brief Make a generator that returns a single pre-generated future 
-/// 
-/// This generator is async-reentrant. 
-template <typename T> 
-std::function<Future<T>()> MakeSingleFutureGenerator(Future<T> future) { 
-  assert(future.is_valid()); 
-  auto state = std::make_shared<Future<T>>(std::move(future)); 
-  return [state]() -> Future<T> { 
-    auto fut = std::move(*state); 
-    if (fut.is_valid()) { 
-      return fut; 
-    } else { 
-      return AsyncGeneratorEnd<T>(); 
-    } 
-  }; 
-} 
- 
-/// \brief Make a generator that immediately ends. 
-/// 
-/// This generator is async-reentrant. 
-template <typename T> 
-std::function<Future<T>()> MakeEmptyGenerator() { 
-  return []() -> Future<T> { return AsyncGeneratorEnd<T>(); }; 
-} 
- 
-/// \brief Make a generator that always fails with a given error 
-/// 
-/// This generator is async-reentrant. 
-template <typename T> 
-AsyncGenerator<T> MakeFailingGenerator(Status st) { 
-  assert(!st.ok()); 
-  auto state = std::make_shared<Status>(std::move(st)); 
-  return [state]() -> Future<T> { 
-    auto st = std::move(*state); 
-    if (!st.ok()) { 
-      return std::move(st); 
-    } else { 
-      return AsyncGeneratorEnd<T>(); 
-    } 
-  }; 
-} 
- 
-/// \brief Make a generator that always fails with a given error 
-/// 
-/// This overload allows inferring the return type from the argument. 
-template <typename T> 
-AsyncGenerator<T> MakeFailingGenerator(const Result<T>& result) { 
-  return MakeFailingGenerator<T>(result.status()); 
-} 
- 
-/// \brief Prepends initial_values onto a generator 
-/// 
-/// This generator is async-reentrant but will buffer requests and will not 
-/// pull from following_values async-reentrantly. 
-template <typename T> 
-AsyncGenerator<T> MakeGeneratorStartsWith(std::vector<T> initial_values, 
-                                          AsyncGenerator<T> following_values) { 
-  auto initial_values_vec_gen = MakeVectorGenerator(std::move(initial_values)); 
-  auto gen_gen = MakeVectorGenerator<AsyncGenerator<T>>( 
-      {std::move(initial_values_vec_gen), std::move(following_values)}); 
-  return MakeConcatenatedGenerator(std::move(gen_gen)); 
-} 
- 
-template <typename T> 
-struct CancellableGenerator { 
-  Future<T> operator()() { 
-    if (stop_token.IsStopRequested()) { 
-      return stop_token.Poll(); 
-    } 
-    return source(); 
-  } 
- 
-  AsyncGenerator<T> source; 
-  StopToken stop_token; 
-}; 
- 
-/// \brief Allows an async generator to be cancelled 
-/// 
-/// This generator is async-reentrant 
-template <typename T> 
-AsyncGenerator<T> MakeCancellable(AsyncGenerator<T> source, StopToken stop_token) { 
-  return CancellableGenerator<T>{std::move(source), std::move(stop_token)}; 
-} 
- 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <atomic>
+#include <cassert>
+#include <cstring>
+#include <deque>
+#include <limits>
+#include <queue>
+
+#include "arrow/util/functional.h"
+#include "arrow/util/future.h"
+#include "arrow/util/io_util.h"
+#include "arrow/util/iterator.h"
+#include "arrow/util/mutex.h"
+#include "arrow/util/optional.h"
+#include "arrow/util/queue.h"
+#include "arrow/util/thread_pool.h"
+
+namespace arrow {
+
+// The methods in this file create, modify, and utilize AsyncGenerator which is an
+// iterator of futures.  This allows an asynchronous source (like file input) to be run
+// through a pipeline in the same way that iterators can be used to create pipelined
+// workflows.
+//
+// In order to support pipeline parallelism we introduce the concept of asynchronous
+// reentrancy. This is different than synchronous reentrancy.  With synchronous code a
+// function is reentrant if the function can be called again while a previous call to that
+// function is still running.  Unless otherwise specified none of these generators are
+// synchronously reentrant.  Care should be taken to avoid calling them in such a way (and
+// the utilities Visit/Collect/Await take care to do this).
+//
+// Asynchronous reentrancy on the other hand means the function is called again before the
+// future returned by the function is marked finished (but after the call to get the
+// future returns).  Some of these generators are async-reentrant while others (e.g.
+// those that depend on ordered processing like decompression) are not.  Read the MakeXYZ
+// function comments to determine which generators support async reentrancy.
+//
+// Note: Generators that are not asynchronously reentrant can still support readahead
+// (\see MakeSerialReadaheadGenerator).
+//
+// Readahead operators, and some other operators, may introduce queueing.  Any operators
+// that introduce buffering should detail the amount of buffering they introduce in their
+// MakeXYZ function comments.
+template <typename T>
+using AsyncGenerator = std::function<Future<T>()>;
+
+template <typename T>
+struct IterationTraits<AsyncGenerator<T>> {
+  /// \brief by default when iterating through a sequence of AsyncGenerator<T>,
+  /// an empty function indicates the end of iteration.
+  static AsyncGenerator<T> End() { return AsyncGenerator<T>(); }
+
+  static bool IsEnd(const AsyncGenerator<T>& val) { return !val; }
+};
+
+template <typename T>
+Future<T> AsyncGeneratorEnd() {
+  return Future<T>::MakeFinished(IterationTraits<T>::End());
+}
+
+/// returning a future that completes when all have been visited
+template <typename T, typename Visitor>
+Future<> VisitAsyncGenerator(AsyncGenerator<T> generator, Visitor visitor) {
+  struct LoopBody {
+    struct Callback {
+      Result<ControlFlow<>> operator()(const T& next) {
+        if (IsIterationEnd(next)) {
+          return Break();
+        } else {
+          auto visited = visitor(next);
+          if (visited.ok()) {
+            return Continue();
+          } else {
+            return visited;
+          }
+        }
+      }
+
+      Visitor visitor;
+    };
+
+    Future<ControlFlow<>> operator()() {
+      Callback callback{visitor};
+      auto next = generator();
+      return next.Then(std::move(callback));
+    }
+
+    AsyncGenerator<T> generator;
+    Visitor visitor;
+  };
+
+  return Loop(LoopBody{std::move(generator), std::move(visitor)});
+}
+
+/// \brief Waits for an async generator to complete, discarding results.
+template <typename T>
+Future<> DiscardAllFromAsyncGenerator(AsyncGenerator<T> generator) {
+  std::function<Status(T)> visitor = [](const T&) { return Status::OK(); };
+  return VisitAsyncGenerator(generator, visitor);
+}
+
+/// \brief Collects the results of an async generator into a vector
+template <typename T>
+Future<std::vector<T>> CollectAsyncGenerator(AsyncGenerator<T> generator) {
+  auto vec = std::make_shared<std::vector<T>>();
+  struct LoopBody {
+    Future<ControlFlow<std::vector<T>>> operator()() {
+      auto next = generator_();
+      auto vec = vec_;
+      return next.Then([vec](const T& result) -> Result<ControlFlow<std::vector<T>>> {
+        if (IsIterationEnd(result)) {
+          return Break(*vec);
+        } else {
+          vec->push_back(result);
+          return Continue();
+        }
+      });
+    }
+    AsyncGenerator<T> generator_;
+    std::shared_ptr<std::vector<T>> vec_;
+  };
+  return Loop(LoopBody{std::move(generator), std::move(vec)});
+}
+
+/// \see MakeMappedGenerator
+template <typename T, typename V>
+class MappingGenerator {
+ public:
+  MappingGenerator(AsyncGenerator<T> source, std::function<Future<V>(const T&)> map)
+      : state_(std::make_shared<State>(std::move(source), std::move(map))) {}
+
+  Future<V> operator()() {
+    auto future = Future<V>::Make();
+    bool should_trigger;
+    {
+      auto guard = state_->mutex.Lock();
+      if (state_->finished) {
+        return AsyncGeneratorEnd<V>();
+      }
+      should_trigger = state_->waiting_jobs.empty();
+      state_->waiting_jobs.push_back(future);
+    }
+    if (should_trigger) {
+      state_->source().AddCallback(Callback{state_});
+    }
+    return future;
+  }
+
+ private:
+  struct State {
+    State(AsyncGenerator<T> source, std::function<Future<V>(const T&)> map)
+        : source(std::move(source)),
+          map(std::move(map)),
+          waiting_jobs(),
+          mutex(),
+          finished(false) {}
+
+    void Purge() {
+      // This might be called by an original callback (if the source iterator fails or
+      // ends) or by a mapped callback (if the map function fails or ends prematurely).
+      // Either way it should only be called once and after finished is set so there is no
+      // need to guard access to `waiting_jobs`.
+      while (!waiting_jobs.empty()) {
+        waiting_jobs.front().MarkFinished(IterationTraits<V>::End());
+        waiting_jobs.pop_front();
+      }
+    }
+
+    AsyncGenerator<T> source;
+    std::function<Future<V>(const T&)> map;
+    std::deque<Future<V>> waiting_jobs;
+    util::Mutex mutex;
+    bool finished;
+  };
+
+  struct Callback;
+
+  struct MappedCallback {
+    void operator()(const Result<V>& maybe_next) {
+      bool end = !maybe_next.ok() || IsIterationEnd(*maybe_next);
+      bool should_purge = false;
+      if (end) {
+        {
+          auto guard = state->mutex.Lock();
+          should_purge = !state->finished;
+          state->finished = true;
+        }
+      }
+      sink.MarkFinished(maybe_next);
+      if (should_purge) {
+        state->Purge();
+      }
+    }
+    std::shared_ptr<State> state;
+    Future<V> sink;
+  };
+
+  struct Callback {
+    void operator()(const Result<T>& maybe_next) {
+      Future<V> sink;
+      bool end = !maybe_next.ok() || IsIterationEnd(*maybe_next);
+      bool should_purge = false;
+      bool should_trigger;
+      {
+        auto guard = state->mutex.Lock();
+        if (end) {
+          should_purge = !state->finished;
+          state->finished = true;
+        }
+        sink = state->waiting_jobs.front();
+        state->waiting_jobs.pop_front();
+        should_trigger = !end && !state->waiting_jobs.empty();
+      }
+      if (should_purge) {
+        state->Purge();
+      }
+      if (should_trigger) {
+        state->source().AddCallback(Callback{state});
+      }
+      if (maybe_next.ok()) {
+        const T& val = maybe_next.ValueUnsafe();
+        if (IsIterationEnd(val)) {
+          sink.MarkFinished(IterationTraits<V>::End());
+        } else {
+          Future<V> mapped_fut = state->map(val);
+          mapped_fut.AddCallback(MappedCallback{std::move(state), std::move(sink)});
+        }
+      } else {
+        sink.MarkFinished(maybe_next.status());
+      }
+    }
+
+    std::shared_ptr<State> state;
+  };
+
+  std::shared_ptr<State> state_;
+};
+
+/// \brief Creates a generator that will apply the map function to each element of
+/// source.  The map function is not called on the end token.
+///
+/// Note: This function makes a copy of `map` for each item
+/// Note: Errors returned from the `map` function will be propagated
+///
+/// If the source generator is async-reentrant then this generator will be also
+template <typename T, typename MapFn,
+          typename Mapped = detail::result_of_t<MapFn(const T&)>,
+          typename V = typename EnsureFuture<Mapped>::type::ValueType>
+AsyncGenerator<V> MakeMappedGenerator(AsyncGenerator<T> source_generator, MapFn map) {
+  struct MapCallback {
+    MapFn map_;
+
+    Future<V> operator()(const T& val) { return ToFuture(map_(val)); }
+  };
+
+  return MappingGenerator<T, V>(std::move(source_generator), MapCallback{std::move(map)});
+}
+
+/// \see MakeSequencingGenerator
+template <typename T, typename ComesAfter, typename IsNext>
+class SequencingGenerator {
+ public:
+  SequencingGenerator(AsyncGenerator<T> source, ComesAfter compare, IsNext is_next,
+                      T initial_value)
+      : state_(std::make_shared<State>(std::move(source), std::move(compare),
+                                       std::move(is_next), std::move(initial_value))) {}
+
+  Future<T> operator()() {
+    {
+      auto guard = state_->mutex.Lock();
+      // We can send a result immediately if the top of the queue is either an
+      // error or the next item
+      if (!state_->queue.empty() &&
+          (!state_->queue.top().ok() ||
+           state_->is_next(state_->previous_value, *state_->queue.top()))) {
+        auto result = std::move(state_->queue.top());
+        if (result.ok()) {
+          state_->previous_value = *result;
+        }
+        state_->queue.pop();
+        return Future<T>::MakeFinished(result);
+      }
+      if (state_->finished) {
+        return AsyncGeneratorEnd<T>();
+      }
+      // The next item is not in the queue so we will need to wait
+      auto new_waiting_fut = Future<T>::Make();
+      state_->waiting_future = new_waiting_fut;
+      guard.Unlock();
+      state_->source().AddCallback(Callback{state_});
+      return new_waiting_fut;
+    }
+  }
+
+ private:
+  struct WrappedComesAfter {
+    bool operator()(const Result<T>& left, const Result<T>& right) {
+      if (!left.ok() || !right.ok()) {
+        // Should never happen
+        return false;
+      }
+      return compare(*left, *right);
+    }
+    ComesAfter compare;
+  };
+
+  struct State {
+    State(AsyncGenerator<T> source, ComesAfter compare, IsNext is_next, T initial_value)
+        : source(std::move(source)),
+          is_next(std::move(is_next)),
+          previous_value(std::move(initial_value)),
+          waiting_future(),
+          queue(WrappedComesAfter{compare}),
+          finished(false),
+          mutex() {}
+
+    AsyncGenerator<T> source;
+    IsNext is_next;
+    T previous_value;
+    Future<T> waiting_future;
+    std::priority_queue<Result<T>, std::vector<Result<T>>, WrappedComesAfter> queue;
+    bool finished;
+    util::Mutex mutex;
+  };
+
+  class Callback {
+   public:
+    explicit Callback(std::shared_ptr<State> state) : state_(std::move(state)) {}
+
+    void operator()(const Result<T> result) {
+      Future<T> to_deliver;
+      bool finished;
+      {
+        auto guard = state_->mutex.Lock();
+        bool ready_to_deliver = false;
+        if (!result.ok()) {
+          // Clear any cached results
+          while (!state_->queue.empty()) {
+            state_->queue.pop();
+          }
+          ready_to_deliver = true;
+          state_->finished = true;
+        } else if (IsIterationEnd<T>(result.ValueUnsafe())) {
+          ready_to_deliver = state_->queue.empty();
+          state_->finished = true;
+        } else {
+          ready_to_deliver = state_->is_next(state_->previous_value, *result);
+        }
+
+        if (ready_to_deliver && state_->waiting_future.is_valid()) {
+          to_deliver = state_->waiting_future;
+          if (result.ok()) {
+            state_->previous_value = *result;
+          }
+        } else {
+          state_->queue.push(result);
+        }
+        // Capture state_->finished so we can access it outside the mutex
+        finished = state_->finished;
+      }
+      // Must deliver result outside of the mutex
+      if (to_deliver.is_valid()) {
+        to_deliver.MarkFinished(result);
+      } else {
+        // Otherwise, if we didn't get the next item (or a terminal item), we
+        // need to keep looking
+        if (!finished) {
+          state_->source().AddCallback(Callback{state_});
+        }
+      }
+    }
+
+   private:
+    const std::shared_ptr<State> state_;
+  };
+
+  const std::shared_ptr<State> state_;
+};
+
+/// \brief Buffers an AsyncGenerator to return values in sequence order  ComesAfter
+/// and IsNext determine the sequence order.
+///
+/// ComesAfter should be a BinaryPredicate that only returns true if a comes after b
+///
+/// IsNext should be a BinaryPredicate that returns true, given `a` and `b`, only if
+/// `b` follows immediately after `a`.  It should return true given `initial_value` and
+/// `b` if `b` is the first item in the sequence.
+///
+/// This operator will queue unboundedly while waiting for the next item.  It is intended
+/// for jittery sources that might scatter an ordered sequence.  It is NOT intended to
+/// sort.  Using it to try and sort could result in excessive RAM usage.  This generator
+/// will queue up to N blocks where N is the max "out of order"ness of the source.
+///
+/// For example, if the source is 1,6,2,5,4,3 it will queue 3 blocks because 3 is 3
+/// blocks beyond where it belongs.
+///
+/// This generator is not async-reentrant but it consists only of a simple log(n)
+/// insertion into a priority queue.
+template <typename T, typename ComesAfter, typename IsNext>
+AsyncGenerator<T> MakeSequencingGenerator(AsyncGenerator<T> source_generator,
+                                          ComesAfter compare, IsNext is_next,
+                                          T initial_value) {
+  return SequencingGenerator<T, ComesAfter, IsNext>(
+      std::move(source_generator), std::move(compare), std::move(is_next),
+      std::move(initial_value));
+}
+
+/// \see MakeTransformedGenerator
+template <typename T, typename V>
+class TransformingGenerator {
+  // The transforming generator state will be referenced as an async generator but will
+  // also be referenced via callback to various futures.  If the async generator owner
+  // moves it around we need the state to be consistent for future callbacks.
+  struct TransformingGeneratorState
+      : std::enable_shared_from_this<TransformingGeneratorState> {
+    TransformingGeneratorState(AsyncGenerator<T> generator, Transformer<T, V> transformer)
+        : generator_(std::move(generator)),
+          transformer_(std::move(transformer)),
+          last_value_(),
+          finished_() {}
+
+    Future<V> operator()() {
+      while (true) {
+        auto maybe_next_result = Pump();
+        if (!maybe_next_result.ok()) {
+          return Future<V>::MakeFinished(maybe_next_result.status());
+        }
+        auto maybe_next = std::move(maybe_next_result).ValueUnsafe();
+        if (maybe_next.has_value()) {
+          return Future<V>::MakeFinished(*std::move(maybe_next));
+        }
+
+        auto next_fut = generator_();
+        // If finished already, process results immediately inside the loop to avoid
+        // stack overflow
+        if (next_fut.is_finished()) {
+          auto next_result = next_fut.result();
+          if (next_result.ok()) {
+            last_value_ = *next_result;
+          } else {
+            return Future<V>::MakeFinished(next_result.status());
+          }
+          // Otherwise, if not finished immediately, add callback to process results
+        } else {
+          auto self = this->shared_from_this();
+          return next_fut.Then([self](const T& next_result) {
+            self->last_value_ = next_result;
+            return (*self)();
+          });
+        }
+      }
+    }
+
+    // See comment on TransformingIterator::Pump
+    Result<util::optional<V>> Pump() {
+      if (!finished_ && last_value_.has_value()) {
+        ARROW_ASSIGN_OR_RAISE(TransformFlow<V> next, transformer_(*last_value_));
+        if (next.ReadyForNext()) {
+          if (IsIterationEnd(*last_value_)) {
+            finished_ = true;
+          }
+          last_value_.reset();
+        }
+        if (next.Finished()) {
+          finished_ = true;
+        }
+        if (next.HasValue()) {
+          return next.Value();
+        }
+      }
+      if (finished_) {
+        return IterationTraits<V>::End();
+      }
+      return util::nullopt;
+    }
+
+    AsyncGenerator<T> generator_;
+    Transformer<T, V> transformer_;
+    util::optional<T> last_value_;
+    bool finished_;
+  };
+
+ public:
+  explicit TransformingGenerator(AsyncGenerator<T> generator,
+                                 Transformer<T, V> transformer)
+      : state_(std::make_shared<TransformingGeneratorState>(std::move(generator),
+                                                            std::move(transformer))) {}
+
+  Future<V> operator()() { return (*state_)(); }
+
+ protected:
+  std::shared_ptr<TransformingGeneratorState> state_;
+};
+
+/// \brief Transforms an async generator using a transformer function returning a new
+/// AsyncGenerator
+///
+/// The transform function here behaves exactly the same as the transform function in
+/// MakeTransformedIterator and you can safely use the same transform function to
+/// transform both synchronous and asynchronous streams.
+///
+/// This generator is not async-reentrant
+///
+/// This generator may queue up to 1 instance of T but will not delay
+template <typename T, typename V>
+AsyncGenerator<V> MakeTransformedGenerator(AsyncGenerator<T> generator,
+                                           Transformer<T, V> transformer) {
+  return TransformingGenerator<T, V>(generator, transformer);
+}
+
+/// \see MakeSerialReadaheadGenerator
+template <typename T>
+class SerialReadaheadGenerator {
+ public:
+  SerialReadaheadGenerator(AsyncGenerator<T> source_generator, int max_readahead)
+      : state_(std::make_shared<State>(std::move(source_generator), max_readahead)) {}
+
+  Future<T> operator()() {
+    if (state_->first_) {
+      // Lazy generator, need to wait for the first ask to prime the pump
+      state_->first_ = false;
+      auto next = state_->source_();
+      return next.Then(Callback{state_}, ErrCallback{state_});
+    }
+
+    // This generator is not async-reentrant.  We won't be called until the last
+    // future finished so we know there is something in the queue
+    auto finished = state_->finished_.load();
+    if (finished && state_->readahead_queue_.IsEmpty()) {
+      return AsyncGeneratorEnd<T>();
+    }
+
+    std::shared_ptr<Future<T>> next;
+    if (!state_->readahead_queue_.Read(next)) {
+      return Status::UnknownError("Could not read from readahead_queue");
+    }
+
+    auto last_available = state_->spaces_available_.fetch_add(1);
+    if (last_available == 0 && !finished) {
+      // Reader idled out, we need to restart it
+      ARROW_RETURN_NOT_OK(state_->Pump(state_));
+    }
+    return *next;
+  }
+
+ private:
+  struct State {
+    State(AsyncGenerator<T> source, int max_readahead)
+        : first_(true),
+          source_(std::move(source)),
+          finished_(false),
+          // There is one extra "space" for the in-flight request
+          spaces_available_(max_readahead + 1),
+          // The SPSC queue has size-1 "usable" slots so we need to overallocate 1
+          readahead_queue_(max_readahead + 1) {}
+
+    Status Pump(const std::shared_ptr<State>& self) {
+      // Can't do readahead_queue.write(source().Then(...)) because then the
+      // callback might run immediately and add itself to the queue before this gets added
+      // to the queue messing up the order.
+      auto next_slot = std::make_shared<Future<T>>();
+      auto written = readahead_queue_.Write(next_slot);
+      if (!written) {
+        return Status::UnknownError("Could not write to readahead_queue");
+      }
+      // If this Pump is being called from a callback it is possible for the source to
+      // poll and read from the queue between the Write and this spot where we fill the
+      // value in. However, it is not possible for the future to read this value we are
+      // writing.  That is because this callback (the callback for future X) must be
+      // finished before future X is marked complete and this source is not pulled
+      // reentrantly so it will not poll for future X+1 until this callback has completed.
+      *next_slot = source_().Then(Callback{self}, ErrCallback{self});
+      return Status::OK();
+    }
+
+    // Only accessed by the consumer end
+    bool first_;
+    // Accessed by both threads
+    AsyncGenerator<T> source_;
+    std::atomic<bool> finished_;
+    // The queue has a size but it is not atomic.  We keep track of how many spaces are
+    // left in the queue here so we know if we've just written the last value and we need
+    // to stop reading ahead or if we've just read from a full queue and we need to
+    // restart reading ahead
+    std::atomic<uint32_t> spaces_available_;
+    // Needs to be a queue of shared_ptr and not Future because we set the value of the
+    // future after we add it to the queue
+    util::SpscQueue<std::shared_ptr<Future<T>>> readahead_queue_;
+  };
+
+  struct Callback {
+    Result<T> operator()(const T& next) {
+      if (IsIterationEnd(next)) {
+        state_->finished_.store(true);
+        return next;
+      }
+      auto last_available = state_->spaces_available_.fetch_sub(1);
+      if (last_available > 1) {
+        ARROW_RETURN_NOT_OK(state_->Pump(state_));
+      }
+      return next;
+    }
+
+    std::shared_ptr<State> state_;
+  };
+
+  struct ErrCallback {
+    Result<T> operator()(const Status& st) {
+      state_->finished_.store(true);
+      return st;
+    }
+
+    std::shared_ptr<State> state_;
+  };
+
+  std::shared_ptr<State> state_;
+};
+
+/// \see MakeFromFuture
+template <typename T>
+class FutureFirstGenerator {
+ public:
+  explicit FutureFirstGenerator(Future<AsyncGenerator<T>> future)
+      : state_(std::make_shared<State>(std::move(future))) {}
+
+  Future<T> operator()() {
+    if (state_->source_) {
+      return state_->source_();
+    } else {
+      auto state = state_;
+      return state_->future_.Then([state](const AsyncGenerator<T>& source) {
+        state->source_ = source;
+        return state->source_();
+      });
+    }
+  }
+
+ private:
+  struct State {
+    explicit State(Future<AsyncGenerator<T>> future) : future_(future), source_() {}
+
+    Future<AsyncGenerator<T>> future_;
+    AsyncGenerator<T> source_;
+  };
+
+  std::shared_ptr<State> state_;
+};
+
+/// \brief Transforms a Future<AsyncGenerator<T>> into an AsyncGenerator<T>
+/// that waits for the future to complete as part of the first item.
+///
+/// This generator is not async-reentrant (even if the generator yielded by future is)
+///
+/// This generator does not queue
+template <typename T>
+AsyncGenerator<T> MakeFromFuture(Future<AsyncGenerator<T>> future) {
+  return FutureFirstGenerator<T>(std::move(future));
+}
+
+/// \brief Creates a generator that will pull from the source into a queue.  Unlike
+/// MakeReadaheadGenerator this will not pull reentrantly from the source.
+///
+/// The source generator does not need to be async-reentrant
+///
+/// This generator is not async-reentrant (even if the source is)
+///
+/// This generator may queue up to max_readahead additional instances of T
+template <typename T>
+AsyncGenerator<T> MakeSerialReadaheadGenerator(AsyncGenerator<T> source_generator,
+                                               int max_readahead) {
+  return SerialReadaheadGenerator<T>(std::move(source_generator), max_readahead);
+}
+
+/// \see MakeReadaheadGenerator
+template <typename T>
+class ReadaheadGenerator {
+ public:
+  ReadaheadGenerator(AsyncGenerator<T> source_generator, int max_readahead)
+      : state_(std::make_shared<State>(std::move(source_generator), max_readahead)) {}
+
+  Future<T> AddMarkFinishedContinuation(Future<T> fut) {
+    auto state = state_;
+    return fut.Then(
+        [state](const T& result) -> Result<T> {
+          state->MarkFinishedIfDone(result);
+          return result;
+        },
+        [state](const Status& err) -> Result<T> {
+          state->finished.store(true);
+          return err;
+        });
+  }
+
+  Future<T> operator()() {
+    if (state_->readahead_queue.empty()) {
+      // This is the first request, let's pump the underlying queue
+      for (int i = 0; i < state_->max_readahead; i++) {
+        auto next = state_->source_generator();
+        auto next_after_check = AddMarkFinishedContinuation(std::move(next));
+        state_->readahead_queue.push(std::move(next_after_check));
+      }
+    }
+    // Pop one and add one
+    auto result = state_->readahead_queue.front();
+    state_->readahead_queue.pop();
+    if (state_->finished.load()) {
+      state_->readahead_queue.push(AsyncGeneratorEnd<T>());
+    } else {
+      auto back_of_queue = state_->source_generator();
+      auto back_of_queue_after_check =
+          AddMarkFinishedContinuation(std::move(back_of_queue));
+      state_->readahead_queue.push(std::move(back_of_queue_after_check));
+    }
+    return result;
+  }
+
+ private:
+  struct State {
+    State(AsyncGenerator<T> source_generator, int max_readahead)
+        : source_generator(std::move(source_generator)), max_readahead(max_readahead) {
+      finished.store(false);
+    }
+
+    void MarkFinishedIfDone(const T& next_result) {
+      if (IsIterationEnd(next_result)) {
+        finished.store(true);
+      }
+    }
+
+    AsyncGenerator<T> source_generator;
+    int max_readahead;
+    std::atomic<bool> finished;
+    std::queue<Future<T>> readahead_queue;
+  };
+
+  std::shared_ptr<State> state_;
+};
+
+/// \brief A generator where the producer pushes items on a queue.
+///
+/// No back-pressure is applied, so this generator is mostly useful when
+/// producing the values is neither CPU- nor memory-expensive (e.g. fetching
+/// filesystem metadata).
+///
+/// This generator is not async-reentrant.
+template <typename T>
+class PushGenerator {
+  struct State {
+    util::Mutex mutex;
+    std::deque<Result<T>> result_q;
+    util::optional<Future<T>> consumer_fut;
+    bool finished = false;
+  };
+
+ public:
+  /// Producer API for PushGenerator
+  class Producer {
+   public:
+    explicit Producer(const std::shared_ptr<State>& state) : weak_state_(state) {}
+
+    /// \brief Push a value on the queue
+    ///
+    /// True is returned if the value was pushed, false if the generator is
+    /// already closed or destroyed.  If the latter, it is recommended to stop
+    /// producing any further values.
+    bool Push(Result<T> result) {
+      auto state = weak_state_.lock();
+      if (!state) {
+        // Generator was destroyed
+        return false;
+      }
+      auto lock = state->mutex.Lock();
+      if (state->finished) {
+        // Closed early
+        return false;
+      }
+      if (state->consumer_fut.has_value()) {
+        auto fut = std::move(state->consumer_fut.value());
+        state->consumer_fut.reset();
+        lock.Unlock();  // unlock before potentially invoking a callback
+        fut.MarkFinished(std::move(result));
+      } else {
+        state->result_q.push_back(std::move(result));
+      }
+      return true;
+    }
+
+    /// \brief Tell the consumer we have finished producing
+    ///
+    /// It is allowed to call this and later call Push() again ("early close").
+    /// In this case, calls to Push() after the queue is closed are silently
+    /// ignored.  This can help implementing non-trivial cancellation cases.
+    ///
+    /// True is returned on success, false if the generator is already closed
+    /// or destroyed.
+    bool Close() {
+      auto state = weak_state_.lock();
+      if (!state) {
+        // Generator was destroyed
+        return false;
+      }
+      auto lock = state->mutex.Lock();
+      if (state->finished) {
+        // Already closed
+        return false;
+      }
+      state->finished = true;
+      if (state->consumer_fut.has_value()) {
+        auto fut = std::move(state->consumer_fut.value());
+        state->consumer_fut.reset();
+        lock.Unlock();  // unlock before potentially invoking a callback
+        fut.MarkFinished(IterationTraits<T>::End());
+      }
+      return true;
+    }
+
+    /// Return whether the generator was closed or destroyed.
+    bool is_closed() const {
+      auto state = weak_state_.lock();
+      if (!state) {
+        // Generator was destroyed
+        return true;
+      }
+      auto lock = state->mutex.Lock();
+      return state->finished;
+    }
+
+   private:
+    const std::weak_ptr<State> weak_state_;
+  };
+
+  PushGenerator() : state_(std::make_shared<State>()) {}
+
+  /// Read an item from the queue
+  Future<T> operator()() {
+    auto lock = state_->mutex.Lock();
+    assert(!state_->consumer_fut.has_value());  // Non-reentrant
+    if (!state_->result_q.empty()) {
+      auto fut = Future<T>::MakeFinished(std::move(state_->result_q.front()));
+      state_->result_q.pop_front();
+      return fut;
+    }
+    if (state_->finished) {
+      return AsyncGeneratorEnd<T>();
+    }
+    auto fut = Future<T>::Make();
+    state_->consumer_fut = fut;
+    return fut;
+  }
+
+  /// \brief Return producer-side interface
+  ///
+  /// The returned object must be used by the producer to push values on the queue.
+  /// Only a single Producer object should be instantiated.
+  Producer producer() { return Producer{state_}; }
+
+ private:
+  const std::shared_ptr<State> state_;
+};
+
+/// \brief Creates a generator that pulls reentrantly from a source
+/// This generator will pull reentrantly from a source, ensuring that max_readahead
+/// requests are active at any given time.
+///
+/// The source generator must be async-reentrant
+///
+/// This generator itself is async-reentrant.
+///
+/// This generator may queue up to max_readahead instances of T
+template <typename T>
+AsyncGenerator<T> MakeReadaheadGenerator(AsyncGenerator<T> source_generator,
+                                         int max_readahead) {
+  return ReadaheadGenerator<T>(std::move(source_generator), max_readahead);
+}
+
+/// \brief Creates a generator that will yield finished futures from a vector
+///
+/// This generator is async-reentrant
+template <typename T>
+AsyncGenerator<T> MakeVectorGenerator(std::vector<T> vec) {
+  struct State {
+    explicit State(std::vector<T> vec_) : vec(std::move(vec_)), vec_idx(0) {}
+
+    std::vector<T> vec;
+    std::atomic<std::size_t> vec_idx;
+  };
+
+  auto state = std::make_shared<State>(std::move(vec));
+  return [state]() {
+    auto idx = state->vec_idx.fetch_add(1);
+    if (idx >= state->vec.size()) {
+      // Eagerly return memory
+      state->vec.clear();
+      return AsyncGeneratorEnd<T>();
+    }
+    return Future<T>::MakeFinished(state->vec[idx]);
+  };
+}
+
+/// \see MakeMergedGenerator
+template <typename T>
+class MergedGenerator {
+ public:
+  explicit MergedGenerator(AsyncGenerator<AsyncGenerator<T>> source,
+                           int max_subscriptions)
+      : state_(std::make_shared<State>(std::move(source), max_subscriptions)) {}
+
+  Future<T> operator()() {
+    Future<T> waiting_future;
+    std::shared_ptr<DeliveredJob> delivered_job;
+    {
+      auto guard = state_->mutex.Lock();
+      if (!state_->delivered_jobs.empty()) {
+        delivered_job = std::move(state_->delivered_jobs.front());
+        state_->delivered_jobs.pop_front();
+      } else if (state_->finished) {
+        return IterationTraits<T>::End();
+      } else {
+        waiting_future = Future<T>::Make();
+        state_->waiting_jobs.push_back(std::make_shared<Future<T>>(waiting_future));
+      }
+    }
+    if (delivered_job) {
+      // deliverer will be invalid if outer callback encounters an error and delivers a
+      // failed result
+      if (delivered_job->deliverer) {
+        delivered_job->deliverer().AddCallback(
+            InnerCallback{state_, delivered_job->index});
+      }
+      return std::move(delivered_job->value);
+    }
+    if (state_->first) {
+      state_->first = false;
+      for (std::size_t i = 0; i < state_->active_subscriptions.size(); i++) {
+        state_->PullSource().AddCallback(OuterCallback{state_, i});
+      }
+    }
+    return waiting_future;
+  }
+
+ private:
+  struct DeliveredJob {
+    explicit DeliveredJob(AsyncGenerator<T> deliverer_, Result<T> value_,
+                          std::size_t index_)
+        : deliverer(deliverer_), value(std::move(value_)), index(index_) {}
+
+    AsyncGenerator<T> deliverer;
+    Result<T> value;
+    std::size_t index;
+  };
+
+  struct State {
+    State(AsyncGenerator<AsyncGenerator<T>> source, int max_subscriptions)
+        : source(std::move(source)),
+          active_subscriptions(max_subscriptions),
+          delivered_jobs(),
+          waiting_jobs(),
+          mutex(),
+          first(true),
+          source_exhausted(false),
+          finished(false),
+          num_active_subscriptions(max_subscriptions) {}
+
+    Future<AsyncGenerator<T>> PullSource() {
+      // Need to guard access to source() so we don't pull sync-reentrantly which
+      // is never valid.
+      auto lock = mutex.Lock();
+      return source();
+    }
+
+    AsyncGenerator<AsyncGenerator<T>> source;
+    // active_subscriptions and delivered_jobs will be bounded by max_subscriptions
+    std::vector<AsyncGenerator<T>> active_subscriptions;
+    std::deque<std::shared_ptr<DeliveredJob>> delivered_jobs;
+    // waiting_jobs is unbounded, reentrant pulls (e.g. AddReadahead) will provide the
+    // backpressure
+    std::deque<std::shared_ptr<Future<T>>> waiting_jobs;
+    util::Mutex mutex;
+    bool first;
+    bool source_exhausted;
+    bool finished;
+    int num_active_subscriptions;
+  };
+
+  struct InnerCallback {
+    void operator()(const Result<T>& maybe_next) {
+      Future<T> sink;
+      bool sub_finished = maybe_next.ok() && IsIterationEnd(*maybe_next);
+      {
+        auto guard = state->mutex.Lock();
+        if (state->finished) {
+          // We've errored out so just ignore this result and don't keep pumping
+          return;
+        }
+        if (!sub_finished) {
+          if (state->waiting_jobs.empty()) {
+            state->delivered_jobs.push_back(std::make_shared<DeliveredJob>(
+                state->active_subscriptions[index], maybe_next, index));
+          } else {
+            sink = std::move(*state->waiting_jobs.front());
+            state->waiting_jobs.pop_front();
+          }
+        }
+      }
+      if (sub_finished) {
+        state->PullSource().AddCallback(OuterCallback{state, index});
+      } else if (sink.is_valid()) {
+        sink.MarkFinished(maybe_next);
+        if (maybe_next.ok()) {
+          state->active_subscriptions[index]().AddCallback(*this);
+        }
+      }
+    }
+    std::shared_ptr<State> state;
+    std::size_t index;
+  };
+
+  struct OuterCallback {
+    void operator()(const Result<AsyncGenerator<T>>& maybe_next) {
+      bool should_purge = false;
+      bool should_continue = false;
+      Future<T> error_sink;
+      {
+        auto guard = state->mutex.Lock();
+        if (!maybe_next.ok() || IsIterationEnd(*maybe_next)) {
+          state->source_exhausted = true;
+          if (!maybe_next.ok() || --state->num_active_subscriptions == 0) {
+            state->finished = true;
+            should_purge = true;
+          }
+          if (!maybe_next.ok()) {
+            if (state->waiting_jobs.empty()) {
+              state->delivered_jobs.push_back(std::make_shared<DeliveredJob>(
+                  AsyncGenerator<T>(), maybe_next.status(), index));
+            } else {
+              error_sink = std::move(*state->waiting_jobs.front());
+              state->waiting_jobs.pop_front();
+            }
+          }
+        } else {
+          state->active_subscriptions[index] = *maybe_next;
+          should_continue = true;
+        }
+      }
+      if (error_sink.is_valid()) {
+        error_sink.MarkFinished(maybe_next.status());
+      }
+      if (should_continue) {
+        (*maybe_next)().AddCallback(InnerCallback{state, index});
+      } else if (should_purge) {
+        // At this point state->finished has been marked true so no one else
+        // will be interacting with waiting_jobs and we can iterate outside lock
+        while (!state->waiting_jobs.empty()) {
+          state->waiting_jobs.front()->MarkFinished(IterationTraits<T>::End());
+          state->waiting_jobs.pop_front();
+        }
+      }
+    }
+    std::shared_ptr<State> state;
+    std::size_t index;
+  };
+
+  std::shared_ptr<State> state_;
+};
+
+/// \brief Creates a generator that takes in a stream of generators and pulls from up to
+/// max_subscriptions at a time
+///
+/// Note: This may deliver items out of sequence. For example, items from the third
+/// AsyncGenerator generated by the source may be emitted before some items from the first
+/// AsyncGenerator generated by the source.
+///
+/// This generator will pull from source async-reentrantly unless max_subscriptions is 1
+/// This generator will not pull from the individual subscriptions reentrantly.  Add
+/// readahead to the individual subscriptions if that is desired.
+/// This generator is async-reentrant
+///
+/// This generator may queue up to max_subscriptions instances of T
+template <typename T>
+AsyncGenerator<T> MakeMergedGenerator(AsyncGenerator<AsyncGenerator<T>> source,
+                                      int max_subscriptions) {
+  return MergedGenerator<T>(std::move(source), max_subscriptions);
+}
+
+/// \brief Creates a generator that takes in a stream of generators and pulls from each
+/// one in sequence.
+///
+/// This generator is async-reentrant but will never pull from source reentrantly and
+/// will never pull from any subscription reentrantly.
+///
+/// This generator may queue 1 instance of T
+///
+/// TODO: Could potentially make a bespoke implementation instead of MergedGenerator that
+/// forwards async-reentrant requests instead of buffering them (which is what
+/// MergedGenerator does)
+template <typename T>
+AsyncGenerator<T> MakeConcatenatedGenerator(AsyncGenerator<AsyncGenerator<T>> source) {
+  return MergedGenerator<T>(std::move(source), 1);
+}
+
+template <typename T>
+struct Enumerated {
+  T value;
+  int index;
+  bool last;
+};
+
+template <typename T>
+struct IterationTraits<Enumerated<T>> {
+  static Enumerated<T> End() { return Enumerated<T>{IterationEnd<T>(), -1, false}; }
+  static bool IsEnd(const Enumerated<T>& val) { return val.index < 0; }
+};
+
+/// \see MakeEnumeratedGenerator
+template <typename T>
+class EnumeratingGenerator {
+ public:
+  EnumeratingGenerator(AsyncGenerator<T> source, T initial_value)
+      : state_(std::make_shared<State>(std::move(source), std::move(initial_value))) {}
+
+  Future<Enumerated<T>> operator()() {
+    if (state_->finished) {
+      return AsyncGeneratorEnd<Enumerated<T>>();
+    } else {
+      auto state = state_;
+      return state->source().Then([state](const T& next) {
+        auto finished = IsIterationEnd<T>(next);
+        auto prev = Enumerated<T>{state->prev_value, state->prev_index, finished};
+        state->prev_value = next;
+        state->prev_index++;
+        state->finished = finished;
+        return prev;
+      });
+    }
+  }
+
+ private:
+  struct State {
+    State(AsyncGenerator<T> source, T initial_value)
+        : source(std::move(source)), prev_value(std::move(initial_value)), prev_index(0) {
+      finished = IsIterationEnd<T>(prev_value);
+    }
+
+    AsyncGenerator<T> source;
+    T prev_value;
+    int prev_index;
+    bool finished;
+  };
+
+  std::shared_ptr<State> state_;
+};
+
+/// Wraps items from a source generator with positional information
+///
+/// When used with MakeMergedGenerator and MakeSequencingGenerator this allows items to be
+/// processed in a "first-available" fashion and later resequenced which can reduce the
+/// impact of sources with erratic performance (e.g. a filesystem where some items may
+/// take longer to read than others).
+///
+/// TODO(ARROW-12371) Would require this generator be async-reentrant
+///
+/// \see MakeSequencingGenerator for an example of putting items back in order
+///
+/// This generator is not async-reentrant
+///
+/// This generator buffers one item (so it knows which item is the last item)
+template <typename T>
+AsyncGenerator<Enumerated<T>> MakeEnumeratedGenerator(AsyncGenerator<T> source) {
+  return FutureFirstGenerator<Enumerated<T>>(
+      source().Then([source](const T& initial_value) -> AsyncGenerator<Enumerated<T>> {
+        return EnumeratingGenerator<T>(std::move(source), initial_value);
+      }));
+}
+
+/// \see MakeTransferredGenerator
+template <typename T>
+class TransferringGenerator {
+ public:
+  explicit TransferringGenerator(AsyncGenerator<T> source, internal::Executor* executor)
+      : source_(std::move(source)), executor_(executor) {}
+
+  Future<T> operator()() { return executor_->Transfer(source_()); }
+
+ private:
+  AsyncGenerator<T> source_;
+  internal::Executor* executor_;
+};
+
+/// \brief Transfers a future to an underlying executor.
+///
+/// Continuations run on the returned future will be run on the given executor
+/// if they cannot be run synchronously.
+///
+/// This is often needed to move computation off I/O threads or other external
+/// completion sources and back on to the CPU executor so the I/O thread can
+/// stay busy and focused on I/O
+///
+/// Keep in mind that continuations called on an already completed future will
+/// always be run synchronously and so no transfer will happen in that case.
+///
+/// This generator is async reentrant if the source is
+///
+/// This generator will not queue
+template <typename T>
+AsyncGenerator<T> MakeTransferredGenerator(AsyncGenerator<T> source,
+                                           internal::Executor* executor) {
+  return TransferringGenerator<T>(std::move(source), executor);
+}
+
+/// \see MakeBackgroundGenerator
+template <typename T>
+class BackgroundGenerator {
+ public:
+  explicit BackgroundGenerator(Iterator<T> it, internal::Executor* io_executor, int max_q,
+                               int q_restart)
+      : state_(std::make_shared<State>(io_executor, std::move(it), max_q, q_restart)),
+        cleanup_(std::make_shared<Cleanup>(state_.get())) {}
+
+  Future<T> operator()() {
+    auto guard = state_->mutex.Lock();
+    Future<T> waiting_future;
+    if (state_->queue.empty()) {
+      if (state_->finished) {
+        return AsyncGeneratorEnd<T>();
+      } else {
+        waiting_future = Future<T>::Make();
+        state_->waiting_future = waiting_future;
+      }
+    } else {
+      auto next = Future<T>::MakeFinished(std::move(state_->queue.front()));
+      state_->queue.pop();
+      if (state_->NeedsRestart()) {
+        return state_->RestartTask(state_, std::move(guard), std::move(next));
+      }
+      return next;
+    }
+    // This should only trigger the very first time this method is called
+    if (state_->NeedsRestart()) {
+      return state_->RestartTask(state_, std::move(guard), std::move(waiting_future));
+    }
+    return waiting_future;
+  }
+
+ protected:
+  static constexpr uint64_t kUnlikelyThreadId{std::numeric_limits<uint64_t>::max()};
+
+  struct State {
+    State(internal::Executor* io_executor, Iterator<T> it, int max_q, int q_restart)
+        : io_executor(io_executor),
+          max_q(max_q),
+          q_restart(q_restart),
+          it(std::move(it)),
+          reading(false),
+          finished(false),
+          should_shutdown(false) {}
+
+    void ClearQueue() {
+      while (!queue.empty()) {
+        queue.pop();
+      }
+    }
+
+    bool TaskIsRunning() const { return task_finished.is_valid(); }
+
+    bool NeedsRestart() const {
+      return !finished && !reading && static_cast<int>(queue.size()) <= q_restart;
+    }
+
+    void DoRestartTask(std::shared_ptr<State> state, util::Mutex::Guard guard) {
+      // If we get here we are actually going to start a new task so let's create a
+      // task_finished future for it
+      state->task_finished = Future<>::Make();
+      state->reading = true;
+      auto spawn_status = io_executor->Spawn(
+          [state]() { BackgroundGenerator::WorkerTask(std::move(state)); });
+      if (!spawn_status.ok()) {
+        // If we can't spawn a new task then send an error to the consumer (either via a
+        // waiting future or the queue) and mark ourselves finished
+        state->finished = true;
+        state->task_finished = Future<>();
+        if (waiting_future.has_value()) {
+          auto to_deliver = std::move(waiting_future.value());
+          waiting_future.reset();
+          guard.Unlock();
+          to_deliver.MarkFinished(spawn_status);
+        } else {
+          ClearQueue();
+          queue.push(spawn_status);
+        }
+      }
+    }
+
+    Future<T> RestartTask(std::shared_ptr<State> state, util::Mutex::Guard guard,
+                          Future<T> next) {
+      if (TaskIsRunning()) {
+        // If the task is still cleaning up we need to wait for it to finish before
+        // restarting.  We also want to block the consumer until we've restarted the
+        // reader to avoid multiple restarts
+        return task_finished.Then([state, next]() {
+          // This may appear dangerous (recursive mutex) but we should be guaranteed the
+          // outer guard has been released by this point.  We know...
+          // * task_finished is not already finished (it would be invalid in that case)
+          // * task_finished will not be marked complete until we've given up the mutex
+          auto guard_ = state->mutex.Lock();
+          state->DoRestartTask(state, std::move(guard_));
+          return next;
+        });
+      }
+      // Otherwise we can restart immediately
+      DoRestartTask(std::move(state), std::move(guard));
+      return next;
+    }
+
+    internal::Executor* io_executor;
+    const int max_q;
+    const int q_restart;
+    Iterator<T> it;
+    std::atomic<uint64_t> worker_thread_id{kUnlikelyThreadId};
+
+    // If true, the task is actively pumping items from the queue and does not need a
+    // restart
+    bool reading;
+    // Set to true when a terminal item arrives
+    bool finished;
+    // Signal to the background task to end early because consumers have given up on it
+    bool should_shutdown;
+    // If the queue is empty, the consumer will create a waiting future and wait for it
+    std::queue<Result<T>> queue;
+    util::optional<Future<T>> waiting_future;
+    // Every background task is given a future to complete when it is entirely finished
+    // processing and ready for the next task to start or for State to be destroyed
+    Future<> task_finished;
+    util::Mutex mutex;
+  };
+
+  // Cleanup task that will be run when all consumer references to the generator are lost
+  struct Cleanup {
+    explicit Cleanup(State* state) : state(state) {}
+    ~Cleanup() {
+      /// TODO: Once ARROW-13109 is available then we can be force consumers to spawn and
+      /// there is no need to perform this check.
+      ///
+      /// It's a deadlock if we enter cleanup from
+      /// the worker thread but it can happen if the consumer doesn't transfer away
+      assert(state->worker_thread_id.load() != ::arrow::internal::GetThreadId());
+      Future<> finish_fut;
+      {
+        auto lock = state->mutex.Lock();
+        if (!state->TaskIsRunning()) {
+          return;
+        }
+        // Signal the current task to stop and wait for it to finish
+        state->should_shutdown = true;
+        finish_fut = state->task_finished;
+      }
+      // Using future as a condition variable here
+      Status st = finish_fut.status();
+      ARROW_UNUSED(st);
+    }
+    State* state;
+  };
+
+  static void WorkerTask(std::shared_ptr<State> state) {
+    state->worker_thread_id.store(::arrow::internal::GetThreadId());
+    // We need to capture the state to read while outside the mutex
+    bool reading = true;
+    while (reading) {
+      auto next = state->it.Next();
+      // Need to capture state->waiting_future inside the mutex to mark finished outside
+      Future<T> waiting_future;
+      {
+        auto guard = state->mutex.Lock();
+
+        if (state->should_shutdown) {
+          state->finished = true;
+          break;
+        }
+
+        if (!next.ok() || IsIterationEnd<T>(*next)) {
+          // Terminal item.  Mark finished to true, send this last item, and quit
+          state->finished = true;
+          if (!next.ok()) {
+            state->ClearQueue();
+          }
+        }
+        // At this point we are going to send an item.  Either we will add it to the
+        // queue or deliver it to a waiting future.
+        if (state->waiting_future.has_value()) {
+          waiting_future = std::move(state->waiting_future.value());
+          state->waiting_future.reset();
+        } else {
+          state->queue.push(std::move(next));
+          // We just filled up the queue so it is time to quit.  We may need to notify
+          // a cleanup task so we transition to Quitting
+          if (static_cast<int>(state->queue.size()) >= state->max_q) {
+            state->reading = false;
+          }
+        }
+        reading = state->reading && !state->finished;
+      }
+      // This should happen outside the mutex.  Presumably there is a
+      // transferring generator on the other end that will quickly transfer any
+      // callbacks off of this thread so we can continue looping.  Still, best not to
+      // rely on that
+      if (waiting_future.is_valid()) {
+        waiting_future.MarkFinished(next);
+      }
+    }
+    // Once we've sent our last item we can notify any waiters that we are done and so
+    // either state can be cleaned up or a new background task can be started
+    Future<> task_finished;
+    {
+      auto guard = state->mutex.Lock();
+      // After we give up the mutex state can be safely deleted.  We will no longer
+      // reference it.  We can safely transition to idle now.
+      task_finished = state->task_finished;
+      state->task_finished = Future<>();
+      state->worker_thread_id.store(kUnlikelyThreadId);
+    }
+    task_finished.MarkFinished();
+  }
+
+  std::shared_ptr<State> state_;
+  // state_ is held by both the generator and the background thread so it won't be cleaned
+  // up when all consumer references are relinquished.  cleanup_ is only held by the
+  // generator so it will be destructed when the last consumer reference is gone.  We use
+  // this to cleanup / stop the background generator in case the consuming end stops
+  // listening (e.g. due to a downstream error)
+  std::shared_ptr<Cleanup> cleanup_;
+};
+
+constexpr int kDefaultBackgroundMaxQ = 32;
+constexpr int kDefaultBackgroundQRestart = 16;
+
+/// \brief Creates an AsyncGenerator<T> by iterating over an Iterator<T> on a background
+/// thread
+///
+/// The parameter max_q and q_restart control queue size and background thread task
+/// management. If the background task is fast you typically don't want it creating a
+/// thread task for every item.  Instead the background thread will run until it fills
+/// up a readahead queue.
+///
+/// Once the queue has filled up the background thread task will terminate (allowing other
+/// I/O tasks to use the thread).  Once the queue has been drained enough (specified by
+/// q_restart) then the background thread task will be restarted.  If q_restart is too low
+/// then you may exhaust the queue waiting for the background thread task to start running
+/// again.  If it is too high then it will be constantly stopping and restarting the
+/// background queue task
+///
+/// The "background thread" is a logical thread and will run as tasks on the io_executor.
+/// This thread may stop and start when the queue fills up but there will only be one
+/// active background thread task at any given time.  You MUST transfer away from this
+/// background generator.  Otherwise there could be a race condition if a callback on the
+/// background thread deletes the last consumer reference to the background generator. You
+/// can transfer onto the same executor as the background thread, it is only neccesary to
+/// create a new thread task, not to switch executors.
+///
+/// This generator is not async-reentrant
+///
+/// This generator will queue up to max_q blocks
+template <typename T>
+static Result<AsyncGenerator<T>> MakeBackgroundGenerator(
+    Iterator<T> iterator, internal::Executor* io_executor,
+    int max_q = kDefaultBackgroundMaxQ, int q_restart = kDefaultBackgroundQRestart) {
+  if (max_q < q_restart) {
+    return Status::Invalid("max_q must be >= q_restart");
+  }
+  return BackgroundGenerator<T>(std::move(iterator), io_executor, max_q, q_restart);
+}
+
+/// \see MakeGeneratorIterator
+template <typename T>
+class GeneratorIterator {
+ public:
+  explicit GeneratorIterator(AsyncGenerator<T> source) : source_(std::move(source)) {}
+
+  Result<T> Next() { return source_().result(); }
+
+ private:
+  AsyncGenerator<T> source_;
+};
+
+/// \brief Converts an AsyncGenerator<T> to an Iterator<T> by blocking until each future
+/// is finished
+template <typename T>
+Iterator<T> MakeGeneratorIterator(AsyncGenerator<T> source) {
+  return Iterator<T>(GeneratorIterator<T>(std::move(source)));
+}
+
+/// \brief Adds readahead to an iterator using a background thread.
+///
+/// Under the hood this is converting the iterator to a generator using
+/// MakeBackgroundGenerator, adding readahead to the converted generator with
+/// MakeReadaheadGenerator, and then converting back to an iterator using
+/// MakeGeneratorIterator.
+template <typename T>
+Result<Iterator<T>> MakeReadaheadIterator(Iterator<T> it, int readahead_queue_size) {
+  ARROW_ASSIGN_OR_RAISE(auto io_executor, internal::ThreadPool::Make(1));
+  auto max_q = readahead_queue_size;
+  auto q_restart = std::max(1, max_q / 2);
+  ARROW_ASSIGN_OR_RAISE(
+      auto background_generator,
+      MakeBackgroundGenerator(std::move(it), io_executor.get(), max_q, q_restart));
+  // Capture io_executor to keep it alive as long as owned_bg_generator is still
+  // referenced
+  AsyncGenerator<T> owned_bg_generator = [io_executor, background_generator]() {
+    return background_generator();
+  };
+  return MakeGeneratorIterator(std::move(owned_bg_generator));
+}
+
+/// \brief Make a generator that returns a single pre-generated future
+///
+/// This generator is async-reentrant.
+template <typename T>
+std::function<Future<T>()> MakeSingleFutureGenerator(Future<T> future) {
+  assert(future.is_valid());
+  auto state = std::make_shared<Future<T>>(std::move(future));
+  return [state]() -> Future<T> {
+    auto fut = std::move(*state);
+    if (fut.is_valid()) {
+      return fut;
+    } else {
+      return AsyncGeneratorEnd<T>();
+    }
+  };
+}
+
+/// \brief Make a generator that immediately ends.
+///
+/// This generator is async-reentrant.
+template <typename T>
+std::function<Future<T>()> MakeEmptyGenerator() {
+  return []() -> Future<T> { return AsyncGeneratorEnd<T>(); };
+}
+
+/// \brief Make a generator that always fails with a given error
+///
+/// This generator is async-reentrant.
+template <typename T>
+AsyncGenerator<T> MakeFailingGenerator(Status st) {
+  assert(!st.ok());
+  auto state = std::make_shared<Status>(std::move(st));
+  return [state]() -> Future<T> {
+    auto st = std::move(*state);
+    if (!st.ok()) {
+      return std::move(st);
+    } else {
+      return AsyncGeneratorEnd<T>();
+    }
+  };
+}
+
+/// \brief Make a generator that always fails with a given error
+///
+/// This overload allows inferring the return type from the argument.
+template <typename T>
+AsyncGenerator<T> MakeFailingGenerator(const Result<T>& result) {
+  return MakeFailingGenerator<T>(result.status());
+}
+
+/// \brief Prepends initial_values onto a generator
+///
+/// This generator is async-reentrant but will buffer requests and will not
+/// pull from following_values async-reentrantly.
+template <typename T>
+AsyncGenerator<T> MakeGeneratorStartsWith(std::vector<T> initial_values,
+                                          AsyncGenerator<T> following_values) {
+  auto initial_values_vec_gen = MakeVectorGenerator(std::move(initial_values));
+  auto gen_gen = MakeVectorGenerator<AsyncGenerator<T>>(
+      {std::move(initial_values_vec_gen), std::move(following_values)});
+  return MakeConcatenatedGenerator(std::move(gen_gen));
+}
+
+template <typename T>
+struct CancellableGenerator {
+  Future<T> operator()() {
+    if (stop_token.IsStopRequested()) {
+      return stop_token.Poll();
+    }
+    return source();
+  }
+
+  AsyncGenerator<T> source;
+  StopToken stop_token;
+};
+
+/// \brief Allows an async generator to be cancelled
+///
+/// This generator is async-reentrant
+template <typename T>
+AsyncGenerator<T> MakeCancellable(AsyncGenerator<T> source, StopToken stop_token) {
+  return CancellableGenerator<T>{std::move(source), std::move(stop_token)};
+}
+
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/basic_decimal.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/basic_decimal.cc
index d6640775c4f..56809f28165 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/basic_decimal.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/basic_decimal.cc
@@ -28,7 +28,7 @@
 #include <string>
 
 #include "arrow/util/bit_util.h"
-#include "arrow/util/endian.h" 
+#include "arrow/util/endian.h"
 #include "arrow/util/int128_internal.h"
 #include "arrow/util/int_util_internal.h"
 #include "arrow/util/logging.h"
@@ -121,223 +121,223 @@ static const BasicDecimal128 ScaleMultipliersHalf[] = {
     BasicDecimal128(271050543121376108LL, 9257742014424809472ULL),
     BasicDecimal128(2710505431213761085LL, 343699775700336640ULL)};
 
-static const BasicDecimal256 ScaleMultipliersDecimal256[] = { 
-    BasicDecimal256({1ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({10ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({100ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({1000ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({10000ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({100000ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({1000000ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({10000000ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({100000000ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({1000000000ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({10000000000ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({100000000000ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({1000000000000ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({10000000000000ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({100000000000000ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({1000000000000000ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({10000000000000000ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({100000000000000000ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({1000000000000000000ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({10000000000000000000ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({7766279631452241920ULL, 5ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({3875820019684212736ULL, 54ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({1864712049423024128ULL, 542ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({200376420520689664ULL, 5421ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({2003764205206896640ULL, 54210ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({1590897978359414784ULL, 542101ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({15908979783594147840ULL, 5421010ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({11515845246265065472ULL, 54210108ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({4477988020393345024ULL, 542101086ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({7886392056514347008ULL, 5421010862ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({5076944270305263616ULL, 54210108624ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({13875954555633532928ULL, 542101086242ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({9632337040368467968ULL, 5421010862427ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({4089650035136921600ULL, 54210108624275ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({4003012203950112768ULL, 542101086242752ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({3136633892082024448ULL, 5421010862427522ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({12919594847110692864ULL, 54210108624275221ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({68739955140067328ULL, 542101086242752217ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({687399551400673280ULL, 5421010862427522170ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({6873995514006732800ULL, 17316620476856118468ULL, 2ULL, 0ULL}), 
-    BasicDecimal256({13399722918938673152ULL, 7145508105175220139ULL, 29ULL, 0ULL}), 
-    BasicDecimal256({4870020673419870208ULL, 16114848830623546549ULL, 293ULL, 0ULL}), 
-    BasicDecimal256({11806718586779598848ULL, 13574535716559052564ULL, 2938ULL, 0ULL}), 
-    BasicDecimal256({7386721425538678784ULL, 6618148649623664334ULL, 29387ULL, 0ULL}), 
-    BasicDecimal256({80237960548581376ULL, 10841254275107988496ULL, 293873ULL, 0ULL}), 
-    BasicDecimal256({802379605485813760ULL, 16178822382532126880ULL, 2938735ULL, 0ULL}), 
-    BasicDecimal256({8023796054858137600ULL, 14214271235644855872ULL, 29387358ULL, 0ULL}), 
-    BasicDecimal256( 
-        {6450984253743169536ULL, 13015503840481697412ULL, 293873587ULL, 0ULL}), 
-    BasicDecimal256( 
-        {9169610316303040512ULL, 1027829888850112811ULL, 2938735877ULL, 0ULL}), 
-    BasicDecimal256( 
-        {17909126868192198656ULL, 10278298888501128114ULL, 29387358770ULL, 0ULL}), 
-    BasicDecimal256( 
-        {13070572018536022016ULL, 10549268516463523069ULL, 293873587705ULL, 0ULL}), 
-    BasicDecimal256( 
-        {1578511669393358848ULL, 13258964796087472617ULL, 2938735877055ULL, 0ULL}), 
-    BasicDecimal256( 
-        {15785116693933588480ULL, 3462439444907864858ULL, 29387358770557ULL, 0ULL}), 
-    BasicDecimal256( 
-        {10277214349659471872ULL, 16177650375369096972ULL, 293873587705571ULL, 0ULL}), 
-    BasicDecimal256( 
-        {10538423128046960640ULL, 14202551164014556797ULL, 2938735877055718ULL, 0ULL}), 
-    BasicDecimal256( 
-        {13150510911921848320ULL, 12898303124178706663ULL, 29387358770557187ULL, 0ULL}), 
-    BasicDecimal256( 
-        {2377900603251621888ULL, 18302566799529756941ULL, 293873587705571876ULL, 0ULL}), 
-    BasicDecimal256( 
-        {5332261958806667264ULL, 17004971331911604867ULL, 2938735877055718769ULL, 0ULL}), 
-    BasicDecimal256( 
-        {16429131440647569408ULL, 4029016655730084128ULL, 10940614696847636083ULL, 1ULL}), 
-    BasicDecimal256({16717361816799281152ULL, 3396678409881738056ULL, 
-                     17172426599928602752ULL, 15ULL}), 
-    BasicDecimal256({1152921504606846976ULL, 15520040025107828953ULL, 
-                     5703569335900062977ULL, 159ULL}), 
-    BasicDecimal256({11529215046068469760ULL, 7626447661401876602ULL, 
-                     1695461137871974930ULL, 1593ULL}), 
-    BasicDecimal256({4611686018427387904ULL, 2477500319180559562ULL, 
-                     16954611378719749304ULL, 15930ULL}), 
-    BasicDecimal256({9223372036854775808ULL, 6328259118096044006ULL, 
-                     3525417123811528497ULL, 159309ULL}), 
-    BasicDecimal256({0ULL, 7942358959831785217ULL, 16807427164405733357ULL, 1593091ULL}), 
-    BasicDecimal256({0ULL, 5636613303479645706ULL, 2053574980671369030ULL, 15930919ULL}), 
-    BasicDecimal256({0ULL, 1025900813667802212ULL, 2089005733004138687ULL, 159309191ULL}), 
-    BasicDecimal256( 
-        {0ULL, 10259008136678022120ULL, 2443313256331835254ULL, 1593091911ULL}), 
-    BasicDecimal256( 
-        {0ULL, 10356360998232463120ULL, 5986388489608800929ULL, 15930919111ULL}), 
-    BasicDecimal256( 
-        {0ULL, 11329889613776873120ULL, 4523652674959354447ULL, 159309191113ULL}), 
-    BasicDecimal256( 
-        {0ULL, 2618431695511421504ULL, 8343038602174441244ULL, 1593091911132ULL}), 
-    BasicDecimal256( 
-        {0ULL, 7737572881404663424ULL, 9643409726906205977ULL, 15930919111324ULL}), 
-    BasicDecimal256( 
-        {0ULL, 3588752519208427776ULL, 4200376900514301694ULL, 159309191113245ULL}), 
-    BasicDecimal256( 
-        {0ULL, 17440781118374726144ULL, 5110280857723913709ULL, 1593091911132452ULL}), 
-    BasicDecimal256( 
-        {0ULL, 8387114520361296896ULL, 14209320429820033867ULL, 15930919111324522ULL}), 
-    BasicDecimal256( 
-        {0ULL, 10084168908774762496ULL, 12965995782233477362ULL, 159309191113245227ULL}), 
-    BasicDecimal256( 
-        {0ULL, 8607968719199866880ULL, 532749306367912313ULL, 1593091911132452277ULL})}; 
- 
-static const BasicDecimal256 ScaleMultipliersHalfDecimal256[] = { 
-    BasicDecimal256({0ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({5ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({50ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({500ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({5000ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({50000ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({500000ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({5000000ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({50000000ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({500000000ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({5000000000ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({50000000000ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({500000000000ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({5000000000000ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({50000000000000ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({500000000000000ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({5000000000000000ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({50000000000000000ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({500000000000000000ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({5000000000000000000ULL, 0ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({13106511852580896768ULL, 2ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({1937910009842106368ULL, 27ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({932356024711512064ULL, 271ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({9323560247115120640ULL, 2710ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({1001882102603448320ULL, 27105ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({10018821026034483200ULL, 271050ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({7954489891797073920ULL, 2710505ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({5757922623132532736ULL, 27105054ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({2238994010196672512ULL, 271050543ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({3943196028257173504ULL, 2710505431ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({2538472135152631808ULL, 27105054312ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({6937977277816766464ULL, 271050543121ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({14039540557039009792ULL, 2710505431213ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({11268197054423236608ULL, 27105054312137ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({2001506101975056384ULL, 271050543121376ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({1568316946041012224ULL, 2710505431213761ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({15683169460410122240ULL, 27105054312137610ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({9257742014424809472ULL, 271050543121376108ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({343699775700336640ULL, 2710505431213761085ULL, 0ULL, 0ULL}), 
-    BasicDecimal256({3436997757003366400ULL, 8658310238428059234ULL, 1ULL, 0ULL}), 
-    BasicDecimal256({15923233496324112384ULL, 12796126089442385877ULL, 14ULL, 0ULL}), 
-    BasicDecimal256({11658382373564710912ULL, 17280796452166549082ULL, 146ULL, 0ULL}), 
-    BasicDecimal256({5903359293389799424ULL, 6787267858279526282ULL, 1469ULL, 0ULL}), 
-    BasicDecimal256({3693360712769339392ULL, 12532446361666607975ULL, 14693ULL, 0ULL}), 
-    BasicDecimal256({40118980274290688ULL, 14643999174408770056ULL, 146936ULL, 0ULL}), 
-    BasicDecimal256({401189802742906880ULL, 17312783228120839248ULL, 1469367ULL, 0ULL}), 
-    BasicDecimal256({4011898027429068800ULL, 7107135617822427936ULL, 14693679ULL, 0ULL}), 
-    BasicDecimal256( 
-        {3225492126871584768ULL, 15731123957095624514ULL, 146936793ULL, 0ULL}), 
-    BasicDecimal256( 
-        {13808177195006296064ULL, 9737286981279832213ULL, 1469367938ULL, 0ULL}), 
-    BasicDecimal256( 
-        {8954563434096099328ULL, 5139149444250564057ULL, 14693679385ULL, 0ULL}), 
-    BasicDecimal256( 
-        {15758658046122786816ULL, 14498006295086537342ULL, 146936793852ULL, 0ULL}), 
-    BasicDecimal256( 
-        {10012627871551455232ULL, 15852854434898512116ULL, 1469367938527ULL, 0ULL}), 
-    BasicDecimal256( 
-        {7892558346966794240ULL, 10954591759308708237ULL, 14693679385278ULL, 0ULL}), 
-    BasicDecimal256( 
-        {5138607174829735936ULL, 17312197224539324294ULL, 146936793852785ULL, 0ULL}), 
-    BasicDecimal256( 
-        {14492583600878256128ULL, 7101275582007278398ULL, 1469367938527859ULL, 0ULL}), 
-    BasicDecimal256( 
-        {15798627492815699968ULL, 15672523598944129139ULL, 14693679385278593ULL, 0ULL}), 
-    BasicDecimal256( 
-        {10412322338480586752ULL, 9151283399764878470ULL, 146936793852785938ULL, 0ULL}), 
-    BasicDecimal256( 
-        {11889503016258109440ULL, 17725857702810578241ULL, 1469367938527859384ULL, 0ULL}), 
-    BasicDecimal256( 
-        {8214565720323784704ULL, 11237880364719817872ULL, 14693679385278593849ULL, 0ULL}), 
-    BasicDecimal256( 
-        {8358680908399640576ULL, 1698339204940869028ULL, 17809585336819077184ULL, 7ULL}), 
-    BasicDecimal256({9799832789158199296ULL, 16983392049408690284ULL, 
-                     12075156704804807296ULL, 79ULL}), 
-    BasicDecimal256({5764607523034234880ULL, 3813223830700938301ULL, 
-                     10071102605790763273ULL, 796ULL}), 
-    BasicDecimal256({2305843009213693952ULL, 1238750159590279781ULL, 
-                     8477305689359874652ULL, 7965ULL}), 
-    BasicDecimal256({4611686018427387904ULL, 12387501595902797811ULL, 
-                     10986080598760540056ULL, 79654ULL}), 
-    BasicDecimal256({9223372036854775808ULL, 13194551516770668416ULL, 
-                     17627085619057642486ULL, 796545ULL}), 
-    BasicDecimal256({0ULL, 2818306651739822853ULL, 10250159527190460323ULL, 7965459ULL}), 
-    BasicDecimal256({0ULL, 9736322443688676914ULL, 10267874903356845151ULL, 79654595ULL}), 
-    BasicDecimal256( 
-        {0ULL, 5129504068339011060ULL, 10445028665020693435ULL, 796545955ULL}), 
-    BasicDecimal256( 
-        {0ULL, 14401552535971007368ULL, 12216566281659176272ULL, 7965459555ULL}), 
-    BasicDecimal256( 
-        {0ULL, 14888316843743212368ULL, 11485198374334453031ULL, 79654595556ULL}), 
-    BasicDecimal256( 
-        {0ULL, 1309215847755710752ULL, 4171519301087220622ULL, 796545955566ULL}), 
-    BasicDecimal256( 
-        {0ULL, 13092158477557107520ULL, 4821704863453102988ULL, 7965459555662ULL}), 
-    BasicDecimal256( 
-        {0ULL, 1794376259604213888ULL, 11323560487111926655ULL, 79654595556622ULL}), 
-    BasicDecimal256( 
-        {0ULL, 17943762596042138880ULL, 2555140428861956854ULL, 796545955566226ULL}), 
-    BasicDecimal256( 
-        {0ULL, 13416929297035424256ULL, 7104660214910016933ULL, 7965459555662261ULL}), 
-    BasicDecimal256( 
-        {0ULL, 5042084454387381248ULL, 15706369927971514489ULL, 79654595556622613ULL}), 
-    BasicDecimal256( 
-        {0ULL, 13527356396454709248ULL, 9489746690038731964ULL, 796545955566226138ULL})}; 
- 
+static const BasicDecimal256 ScaleMultipliersDecimal256[] = {
+    BasicDecimal256({1ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({10ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({100ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({1000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({10000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({100000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({1000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({10000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({100000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({1000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({10000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({100000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({1000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({10000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({100000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({1000000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({10000000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({100000000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({1000000000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({10000000000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({7766279631452241920ULL, 5ULL, 0ULL, 0ULL}),
+    BasicDecimal256({3875820019684212736ULL, 54ULL, 0ULL, 0ULL}),
+    BasicDecimal256({1864712049423024128ULL, 542ULL, 0ULL, 0ULL}),
+    BasicDecimal256({200376420520689664ULL, 5421ULL, 0ULL, 0ULL}),
+    BasicDecimal256({2003764205206896640ULL, 54210ULL, 0ULL, 0ULL}),
+    BasicDecimal256({1590897978359414784ULL, 542101ULL, 0ULL, 0ULL}),
+    BasicDecimal256({15908979783594147840ULL, 5421010ULL, 0ULL, 0ULL}),
+    BasicDecimal256({11515845246265065472ULL, 54210108ULL, 0ULL, 0ULL}),
+    BasicDecimal256({4477988020393345024ULL, 542101086ULL, 0ULL, 0ULL}),
+    BasicDecimal256({7886392056514347008ULL, 5421010862ULL, 0ULL, 0ULL}),
+    BasicDecimal256({5076944270305263616ULL, 54210108624ULL, 0ULL, 0ULL}),
+    BasicDecimal256({13875954555633532928ULL, 542101086242ULL, 0ULL, 0ULL}),
+    BasicDecimal256({9632337040368467968ULL, 5421010862427ULL, 0ULL, 0ULL}),
+    BasicDecimal256({4089650035136921600ULL, 54210108624275ULL, 0ULL, 0ULL}),
+    BasicDecimal256({4003012203950112768ULL, 542101086242752ULL, 0ULL, 0ULL}),
+    BasicDecimal256({3136633892082024448ULL, 5421010862427522ULL, 0ULL, 0ULL}),
+    BasicDecimal256({12919594847110692864ULL, 54210108624275221ULL, 0ULL, 0ULL}),
+    BasicDecimal256({68739955140067328ULL, 542101086242752217ULL, 0ULL, 0ULL}),
+    BasicDecimal256({687399551400673280ULL, 5421010862427522170ULL, 0ULL, 0ULL}),
+    BasicDecimal256({6873995514006732800ULL, 17316620476856118468ULL, 2ULL, 0ULL}),
+    BasicDecimal256({13399722918938673152ULL, 7145508105175220139ULL, 29ULL, 0ULL}),
+    BasicDecimal256({4870020673419870208ULL, 16114848830623546549ULL, 293ULL, 0ULL}),
+    BasicDecimal256({11806718586779598848ULL, 13574535716559052564ULL, 2938ULL, 0ULL}),
+    BasicDecimal256({7386721425538678784ULL, 6618148649623664334ULL, 29387ULL, 0ULL}),
+    BasicDecimal256({80237960548581376ULL, 10841254275107988496ULL, 293873ULL, 0ULL}),
+    BasicDecimal256({802379605485813760ULL, 16178822382532126880ULL, 2938735ULL, 0ULL}),
+    BasicDecimal256({8023796054858137600ULL, 14214271235644855872ULL, 29387358ULL, 0ULL}),
+    BasicDecimal256(
+        {6450984253743169536ULL, 13015503840481697412ULL, 293873587ULL, 0ULL}),
+    BasicDecimal256(
+        {9169610316303040512ULL, 1027829888850112811ULL, 2938735877ULL, 0ULL}),
+    BasicDecimal256(
+        {17909126868192198656ULL, 10278298888501128114ULL, 29387358770ULL, 0ULL}),
+    BasicDecimal256(
+        {13070572018536022016ULL, 10549268516463523069ULL, 293873587705ULL, 0ULL}),
+    BasicDecimal256(
+        {1578511669393358848ULL, 13258964796087472617ULL, 2938735877055ULL, 0ULL}),
+    BasicDecimal256(
+        {15785116693933588480ULL, 3462439444907864858ULL, 29387358770557ULL, 0ULL}),
+    BasicDecimal256(
+        {10277214349659471872ULL, 16177650375369096972ULL, 293873587705571ULL, 0ULL}),
+    BasicDecimal256(
+        {10538423128046960640ULL, 14202551164014556797ULL, 2938735877055718ULL, 0ULL}),
+    BasicDecimal256(
+        {13150510911921848320ULL, 12898303124178706663ULL, 29387358770557187ULL, 0ULL}),
+    BasicDecimal256(
+        {2377900603251621888ULL, 18302566799529756941ULL, 293873587705571876ULL, 0ULL}),
+    BasicDecimal256(
+        {5332261958806667264ULL, 17004971331911604867ULL, 2938735877055718769ULL, 0ULL}),
+    BasicDecimal256(
+        {16429131440647569408ULL, 4029016655730084128ULL, 10940614696847636083ULL, 1ULL}),
+    BasicDecimal256({16717361816799281152ULL, 3396678409881738056ULL,
+                     17172426599928602752ULL, 15ULL}),
+    BasicDecimal256({1152921504606846976ULL, 15520040025107828953ULL,
+                     5703569335900062977ULL, 159ULL}),
+    BasicDecimal256({11529215046068469760ULL, 7626447661401876602ULL,
+                     1695461137871974930ULL, 1593ULL}),
+    BasicDecimal256({4611686018427387904ULL, 2477500319180559562ULL,
+                     16954611378719749304ULL, 15930ULL}),
+    BasicDecimal256({9223372036854775808ULL, 6328259118096044006ULL,
+                     3525417123811528497ULL, 159309ULL}),
+    BasicDecimal256({0ULL, 7942358959831785217ULL, 16807427164405733357ULL, 1593091ULL}),
+    BasicDecimal256({0ULL, 5636613303479645706ULL, 2053574980671369030ULL, 15930919ULL}),
+    BasicDecimal256({0ULL, 1025900813667802212ULL, 2089005733004138687ULL, 159309191ULL}),
+    BasicDecimal256(
+        {0ULL, 10259008136678022120ULL, 2443313256331835254ULL, 1593091911ULL}),
+    BasicDecimal256(
+        {0ULL, 10356360998232463120ULL, 5986388489608800929ULL, 15930919111ULL}),
+    BasicDecimal256(
+        {0ULL, 11329889613776873120ULL, 4523652674959354447ULL, 159309191113ULL}),
+    BasicDecimal256(
+        {0ULL, 2618431695511421504ULL, 8343038602174441244ULL, 1593091911132ULL}),
+    BasicDecimal256(
+        {0ULL, 7737572881404663424ULL, 9643409726906205977ULL, 15930919111324ULL}),
+    BasicDecimal256(
+        {0ULL, 3588752519208427776ULL, 4200376900514301694ULL, 159309191113245ULL}),
+    BasicDecimal256(
+        {0ULL, 17440781118374726144ULL, 5110280857723913709ULL, 1593091911132452ULL}),
+    BasicDecimal256(
+        {0ULL, 8387114520361296896ULL, 14209320429820033867ULL, 15930919111324522ULL}),
+    BasicDecimal256(
+        {0ULL, 10084168908774762496ULL, 12965995782233477362ULL, 159309191113245227ULL}),
+    BasicDecimal256(
+        {0ULL, 8607968719199866880ULL, 532749306367912313ULL, 1593091911132452277ULL})};
+
+static const BasicDecimal256 ScaleMultipliersHalfDecimal256[] = {
+    BasicDecimal256({0ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({5ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({50ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({500ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({5000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({50000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({500000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({5000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({50000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({500000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({5000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({50000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({500000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({5000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({50000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({500000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({5000000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({50000000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({500000000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({5000000000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256({13106511852580896768ULL, 2ULL, 0ULL, 0ULL}),
+    BasicDecimal256({1937910009842106368ULL, 27ULL, 0ULL, 0ULL}),
+    BasicDecimal256({932356024711512064ULL, 271ULL, 0ULL, 0ULL}),
+    BasicDecimal256({9323560247115120640ULL, 2710ULL, 0ULL, 0ULL}),
+    BasicDecimal256({1001882102603448320ULL, 27105ULL, 0ULL, 0ULL}),
+    BasicDecimal256({10018821026034483200ULL, 271050ULL, 0ULL, 0ULL}),
+    BasicDecimal256({7954489891797073920ULL, 2710505ULL, 0ULL, 0ULL}),
+    BasicDecimal256({5757922623132532736ULL, 27105054ULL, 0ULL, 0ULL}),
+    BasicDecimal256({2238994010196672512ULL, 271050543ULL, 0ULL, 0ULL}),
+    BasicDecimal256({3943196028257173504ULL, 2710505431ULL, 0ULL, 0ULL}),
+    BasicDecimal256({2538472135152631808ULL, 27105054312ULL, 0ULL, 0ULL}),
+    BasicDecimal256({6937977277816766464ULL, 271050543121ULL, 0ULL, 0ULL}),
+    BasicDecimal256({14039540557039009792ULL, 2710505431213ULL, 0ULL, 0ULL}),
+    BasicDecimal256({11268197054423236608ULL, 27105054312137ULL, 0ULL, 0ULL}),
+    BasicDecimal256({2001506101975056384ULL, 271050543121376ULL, 0ULL, 0ULL}),
+    BasicDecimal256({1568316946041012224ULL, 2710505431213761ULL, 0ULL, 0ULL}),
+    BasicDecimal256({15683169460410122240ULL, 27105054312137610ULL, 0ULL, 0ULL}),
+    BasicDecimal256({9257742014424809472ULL, 271050543121376108ULL, 0ULL, 0ULL}),
+    BasicDecimal256({343699775700336640ULL, 2710505431213761085ULL, 0ULL, 0ULL}),
+    BasicDecimal256({3436997757003366400ULL, 8658310238428059234ULL, 1ULL, 0ULL}),
+    BasicDecimal256({15923233496324112384ULL, 12796126089442385877ULL, 14ULL, 0ULL}),
+    BasicDecimal256({11658382373564710912ULL, 17280796452166549082ULL, 146ULL, 0ULL}),
+    BasicDecimal256({5903359293389799424ULL, 6787267858279526282ULL, 1469ULL, 0ULL}),
+    BasicDecimal256({3693360712769339392ULL, 12532446361666607975ULL, 14693ULL, 0ULL}),
+    BasicDecimal256({40118980274290688ULL, 14643999174408770056ULL, 146936ULL, 0ULL}),
+    BasicDecimal256({401189802742906880ULL, 17312783228120839248ULL, 1469367ULL, 0ULL}),
+    BasicDecimal256({4011898027429068800ULL, 7107135617822427936ULL, 14693679ULL, 0ULL}),
+    BasicDecimal256(
+        {3225492126871584768ULL, 15731123957095624514ULL, 146936793ULL, 0ULL}),
+    BasicDecimal256(
+        {13808177195006296064ULL, 9737286981279832213ULL, 1469367938ULL, 0ULL}),
+    BasicDecimal256(
+        {8954563434096099328ULL, 5139149444250564057ULL, 14693679385ULL, 0ULL}),
+    BasicDecimal256(
+        {15758658046122786816ULL, 14498006295086537342ULL, 146936793852ULL, 0ULL}),
+    BasicDecimal256(
+        {10012627871551455232ULL, 15852854434898512116ULL, 1469367938527ULL, 0ULL}),
+    BasicDecimal256(
+        {7892558346966794240ULL, 10954591759308708237ULL, 14693679385278ULL, 0ULL}),
+    BasicDecimal256(
+        {5138607174829735936ULL, 17312197224539324294ULL, 146936793852785ULL, 0ULL}),
+    BasicDecimal256(
+        {14492583600878256128ULL, 7101275582007278398ULL, 1469367938527859ULL, 0ULL}),
+    BasicDecimal256(
+        {15798627492815699968ULL, 15672523598944129139ULL, 14693679385278593ULL, 0ULL}),
+    BasicDecimal256(
+        {10412322338480586752ULL, 9151283399764878470ULL, 146936793852785938ULL, 0ULL}),
+    BasicDecimal256(
+        {11889503016258109440ULL, 17725857702810578241ULL, 1469367938527859384ULL, 0ULL}),
+    BasicDecimal256(
+        {8214565720323784704ULL, 11237880364719817872ULL, 14693679385278593849ULL, 0ULL}),
+    BasicDecimal256(
+        {8358680908399640576ULL, 1698339204940869028ULL, 17809585336819077184ULL, 7ULL}),
+    BasicDecimal256({9799832789158199296ULL, 16983392049408690284ULL,
+                     12075156704804807296ULL, 79ULL}),
+    BasicDecimal256({5764607523034234880ULL, 3813223830700938301ULL,
+                     10071102605790763273ULL, 796ULL}),
+    BasicDecimal256({2305843009213693952ULL, 1238750159590279781ULL,
+                     8477305689359874652ULL, 7965ULL}),
+    BasicDecimal256({4611686018427387904ULL, 12387501595902797811ULL,
+                     10986080598760540056ULL, 79654ULL}),
+    BasicDecimal256({9223372036854775808ULL, 13194551516770668416ULL,
+                     17627085619057642486ULL, 796545ULL}),
+    BasicDecimal256({0ULL, 2818306651739822853ULL, 10250159527190460323ULL, 7965459ULL}),
+    BasicDecimal256({0ULL, 9736322443688676914ULL, 10267874903356845151ULL, 79654595ULL}),
+    BasicDecimal256(
+        {0ULL, 5129504068339011060ULL, 10445028665020693435ULL, 796545955ULL}),
+    BasicDecimal256(
+        {0ULL, 14401552535971007368ULL, 12216566281659176272ULL, 7965459555ULL}),
+    BasicDecimal256(
+        {0ULL, 14888316843743212368ULL, 11485198374334453031ULL, 79654595556ULL}),
+    BasicDecimal256(
+        {0ULL, 1309215847755710752ULL, 4171519301087220622ULL, 796545955566ULL}),
+    BasicDecimal256(
+        {0ULL, 13092158477557107520ULL, 4821704863453102988ULL, 7965459555662ULL}),
+    BasicDecimal256(
+        {0ULL, 1794376259604213888ULL, 11323560487111926655ULL, 79654595556622ULL}),
+    BasicDecimal256(
+        {0ULL, 17943762596042138880ULL, 2555140428861956854ULL, 796545955566226ULL}),
+    BasicDecimal256(
+        {0ULL, 13416929297035424256ULL, 7104660214910016933ULL, 7965459555662261ULL}),
+    BasicDecimal256(
+        {0ULL, 5042084454387381248ULL, 15706369927971514489ULL, 79654595556622613ULL}),
+    BasicDecimal256(
+        {0ULL, 13527356396454709248ULL, 9489746690038731964ULL, 796545955566226138ULL})};
+
 #ifdef ARROW_USE_NATIVE_INT128
 static constexpr uint64_t kInt64Mask = 0xFFFFFFFFFFFFFFFF;
 #else
-static constexpr uint64_t kInt32Mask = 0xFFFFFFFF; 
+static constexpr uint64_t kInt32Mask = 0xFFFFFFFF;
 #endif
 
 // same as ScaleMultipliers[38] - 1
@@ -468,127 +468,127 @@ BasicDecimal128& BasicDecimal128::operator>>=(uint32_t bits) {
 
 namespace {
 
-// Convenience wrapper type over 128 bit unsigned integers. We opt not to 
-// replace the uint128_t type in int128_internal.h because it would require 
-// significantly more implementation work to be done. This class merely 
-// provides the minimum necessary set of functions to perform 128+ bit 
-// multiplication operations when there may or may not be native support. 
+// Convenience wrapper type over 128 bit unsigned integers. We opt not to
+// replace the uint128_t type in int128_internal.h because it would require
+// significantly more implementation work to be done. This class merely
+// provides the minimum necessary set of functions to perform 128+ bit
+// multiplication operations when there may or may not be native support.
 #ifdef ARROW_USE_NATIVE_INT128
-struct uint128_t { 
-  uint128_t() {} 
-  uint128_t(uint64_t hi, uint64_t lo) : val_((static_cast<__uint128_t>(hi) << 64) | lo) {} 
-  explicit uint128_t(const BasicDecimal128& decimal) { 
-    val_ = (static_cast<__uint128_t>(decimal.high_bits()) << 64) | decimal.low_bits(); 
-  } 
- 
-  explicit uint128_t(uint64_t value) : val_(value) {} 
- 
-  uint64_t hi() { return val_ >> 64; } 
-  uint64_t lo() { return val_ & kInt64Mask; } 
- 
-  uint128_t& operator+=(const uint128_t& other) { 
-    val_ += other.val_; 
-    return *this; 
-  } 
- 
-  uint128_t& operator*=(const uint128_t& other) { 
-    val_ *= other.val_; 
-    return *this; 
-  } 
- 
-  __uint128_t val_; 
-}; 
- 
+struct uint128_t {
+  uint128_t() {}
+  uint128_t(uint64_t hi, uint64_t lo) : val_((static_cast<__uint128_t>(hi) << 64) | lo) {}
+  explicit uint128_t(const BasicDecimal128& decimal) {
+    val_ = (static_cast<__uint128_t>(decimal.high_bits()) << 64) | decimal.low_bits();
+  }
+
+  explicit uint128_t(uint64_t value) : val_(value) {}
+
+  uint64_t hi() { return val_ >> 64; }
+  uint64_t lo() { return val_ & kInt64Mask; }
+
+  uint128_t& operator+=(const uint128_t& other) {
+    val_ += other.val_;
+    return *this;
+  }
+
+  uint128_t& operator*=(const uint128_t& other) {
+    val_ *= other.val_;
+    return *this;
+  }
+
+  __uint128_t val_;
+};
+
 #else
-// Multiply two 64 bit word components into a 128 bit result, with high bits 
-// stored in hi and low bits in lo. 
-inline void ExtendAndMultiply(uint64_t x, uint64_t y, uint64_t* hi, uint64_t* lo) { 
-  // Perform multiplication on two 64 bit words x and y into a 128 bit result 
+// Multiply two 64 bit word components into a 128 bit result, with high bits
+// stored in hi and low bits in lo.
+inline void ExtendAndMultiply(uint64_t x, uint64_t y, uint64_t* hi, uint64_t* lo) {
+  // Perform multiplication on two 64 bit words x and y into a 128 bit result
   // by splitting up x and y into 32 bit high/low bit components,
   // allowing us to represent the multiplication as
   // x * y = x_lo * y_lo + x_hi * y_lo * 2^32 + y_hi * x_lo * 2^32
-  // + x_hi * y_hi * 2^64 
+  // + x_hi * y_hi * 2^64
   //
-  // Now, consider the final output as lo_lo || lo_hi || hi_lo || hi_hi 
+  // Now, consider the final output as lo_lo || lo_hi || hi_lo || hi_hi
   // Therefore,
   // lo_lo is (x_lo * y_lo)_lo,
   // lo_hi is ((x_lo * y_lo)_hi + (x_hi * y_lo)_lo + (x_lo * y_hi)_lo)_lo,
   // hi_lo is ((x_hi * y_hi)_lo + (x_hi * y_lo)_hi + (x_lo * y_hi)_hi)_hi,
   // hi_hi is (x_hi * y_hi)_hi
-  const uint64_t x_lo = x & kInt32Mask; 
-  const uint64_t y_lo = y & kInt32Mask; 
+  const uint64_t x_lo = x & kInt32Mask;
+  const uint64_t y_lo = y & kInt32Mask;
   const uint64_t x_hi = x >> 32;
   const uint64_t y_hi = y >> 32;
 
   const uint64_t t = x_lo * y_lo;
-  const uint64_t t_lo = t & kInt32Mask; 
+  const uint64_t t_lo = t & kInt32Mask;
   const uint64_t t_hi = t >> 32;
 
   const uint64_t u = x_hi * y_lo + t_hi;
-  const uint64_t u_lo = u & kInt32Mask; 
+  const uint64_t u_lo = u & kInt32Mask;
   const uint64_t u_hi = u >> 32;
 
   const uint64_t v = x_lo * y_hi + u_lo;
   const uint64_t v_hi = v >> 32;
 
   *hi = x_hi * y_hi + u_hi + v_hi;
-  *lo = (v << 32) + t_lo; 
-}
- 
-struct uint128_t { 
-  uint128_t() {} 
-  uint128_t(uint64_t hi, uint64_t lo) : hi_(hi), lo_(lo) {} 
-  explicit uint128_t(const BasicDecimal128& decimal) { 
-    hi_ = decimal.high_bits(); 
-    lo_ = decimal.low_bits(); 
-  } 
- 
-  uint64_t hi() const { return hi_; } 
-  uint64_t lo() const { return lo_; } 
- 
-  uint128_t& operator+=(const uint128_t& other) { 
-    // To deduce the carry bit, we perform "65 bit" addition on the low bits and 
-    // seeing if the resulting high bit is 1. This is accomplished by shifting the 
-    // low bits to the right by 1 (chopping off the lowest bit), then adding 1 if the 
-    // result of adding the two chopped bits would have produced a carry. 
-    uint64_t carry = (((lo_ & other.lo_) & 1) + (lo_ >> 1) + (other.lo_ >> 1)) >> 63; 
-    hi_ += other.hi_ + carry; 
-    lo_ += other.lo_; 
-    return *this; 
-  } 
- 
-  uint128_t& operator*=(const uint128_t& other) { 
-    uint128_t r; 
-    ExtendAndMultiply(lo_, other.lo_, &r.hi_, &r.lo_); 
-    r.hi_ += (hi_ * other.lo_) + (lo_ * other.hi_); 
-    *this = r; 
-    return *this; 
-  } 
- 
-  uint64_t hi_; 
-  uint64_t lo_; 
-}; 
+  *lo = (v << 32) + t_lo;
+}
+
+struct uint128_t {
+  uint128_t() {}
+  uint128_t(uint64_t hi, uint64_t lo) : hi_(hi), lo_(lo) {}
+  explicit uint128_t(const BasicDecimal128& decimal) {
+    hi_ = decimal.high_bits();
+    lo_ = decimal.low_bits();
+  }
+
+  uint64_t hi() const { return hi_; }
+  uint64_t lo() const { return lo_; }
+
+  uint128_t& operator+=(const uint128_t& other) {
+    // To deduce the carry bit, we perform "65 bit" addition on the low bits and
+    // seeing if the resulting high bit is 1. This is accomplished by shifting the
+    // low bits to the right by 1 (chopping off the lowest bit), then adding 1 if the
+    // result of adding the two chopped bits would have produced a carry.
+    uint64_t carry = (((lo_ & other.lo_) & 1) + (lo_ >> 1) + (other.lo_ >> 1)) >> 63;
+    hi_ += other.hi_ + carry;
+    lo_ += other.lo_;
+    return *this;
+  }
+
+  uint128_t& operator*=(const uint128_t& other) {
+    uint128_t r;
+    ExtendAndMultiply(lo_, other.lo_, &r.hi_, &r.lo_);
+    r.hi_ += (hi_ * other.lo_) + (lo_ * other.hi_);
+    *this = r;
+    return *this;
+  }
+
+  uint64_t hi_;
+  uint64_t lo_;
+};
 #endif
 
-// Multiplies two N * 64 bit unsigned integer types, represented by a uint64_t 
-// array into a same sized output. Elements in the array should be in 
-// little endian order, and output will be the same. Overflow in multiplication 
-// will result in the lower N * 64 bits of the result being set. 
-template <int N> 
-inline void MultiplyUnsignedArray(const std::array<uint64_t, N>& lh, 
-                                  const std::array<uint64_t, N>& rh, 
-                                  std::array<uint64_t, N>* result) { 
-  for (int j = 0; j < N; ++j) { 
-    uint64_t carry = 0; 
-    for (int i = 0; i < N - j; ++i) { 
-      uint128_t tmp(lh[i]); 
-      tmp *= uint128_t(rh[j]); 
-      tmp += uint128_t((*result)[i + j]); 
-      tmp += uint128_t(carry); 
-      (*result)[i + j] = tmp.lo(); 
-      carry = tmp.hi(); 
-    } 
-  } 
+// Multiplies two N * 64 bit unsigned integer types, represented by a uint64_t
+// array into a same sized output. Elements in the array should be in
+// little endian order, and output will be the same. Overflow in multiplication
+// will result in the lower N * 64 bits of the result being set.
+template <int N>
+inline void MultiplyUnsignedArray(const std::array<uint64_t, N>& lh,
+                                  const std::array<uint64_t, N>& rh,
+                                  std::array<uint64_t, N>* result) {
+  for (int j = 0; j < N; ++j) {
+    uint64_t carry = 0;
+    for (int i = 0; i < N - j; ++i) {
+      uint128_t tmp(lh[i]);
+      tmp *= uint128_t(rh[j]);
+      tmp += uint128_t((*result)[i + j]);
+      tmp += uint128_t(carry);
+      (*result)[i + j] = tmp.lo();
+      carry = tmp.hi();
+    }
+  }
 }
 
 }  // namespace
@@ -599,62 +599,62 @@ BasicDecimal128& BasicDecimal128::operator*=(const BasicDecimal128& right) {
   const bool negate = Sign() != right.Sign();
   BasicDecimal128 x = BasicDecimal128::Abs(*this);
   BasicDecimal128 y = BasicDecimal128::Abs(right);
-  uint128_t r(x); 
-  r *= uint128_t{y}; 
-  high_bits_ = r.hi(); 
-  low_bits_ = r.lo(); 
+  uint128_t r(x);
+  r *= uint128_t{y};
+  high_bits_ = r.hi();
+  low_bits_ = r.lo();
   if (negate) {
     Negate();
   }
   return *this;
 }
 
-/// Expands the given little endian array of uint64_t into a big endian array of 
-/// uint32_t. The value of input array is expected to be non-negative. The result_array 
-/// will remove leading zeros from the input array. 
-/// \param value_array a little endian array to represent the value 
-/// \param result_array a big endian array of length N*2 to set with the value 
-/// \result the output length of the array 
-template <size_t N> 
-static int64_t FillInArray(const std::array<uint64_t, N>& value_array, 
-                           uint32_t* result_array) { 
-  int64_t next_index = 0; 
-  // 1st loop to find out 1st non-negative value in input 
-  int64_t i = N - 1; 
-  for (; i >= 0; i--) { 
-    if (value_array[i] != 0) { 
-      if (value_array[i] <= std::numeric_limits<uint32_t>::max()) { 
-        result_array[next_index++] = static_cast<uint32_t>(value_array[i]); 
-        i--; 
-      } 
-      break; 
-    } 
-  } 
-  // 2nd loop to fill in the rest of the array. 
-  for (int64_t j = i; j >= 0; j--) { 
-    result_array[next_index++] = static_cast<uint32_t>(value_array[j] >> 32); 
-    result_array[next_index++] = static_cast<uint32_t>(value_array[j]); 
-  } 
-  return next_index; 
-} 
- 
-/// Expands the given value into a big endian array of ints so that we can work on 
-/// it. The array will be converted to an absolute value and the was_negative 
+/// Expands the given little endian array of uint64_t into a big endian array of
+/// uint32_t. The value of input array is expected to be non-negative. The result_array
+/// will remove leading zeros from the input array.
+/// \param value_array a little endian array to represent the value
+/// \param result_array a big endian array of length N*2 to set with the value
+/// \result the output length of the array
+template <size_t N>
+static int64_t FillInArray(const std::array<uint64_t, N>& value_array,
+                           uint32_t* result_array) {
+  int64_t next_index = 0;
+  // 1st loop to find out 1st non-negative value in input
+  int64_t i = N - 1;
+  for (; i >= 0; i--) {
+    if (value_array[i] != 0) {
+      if (value_array[i] <= std::numeric_limits<uint32_t>::max()) {
+        result_array[next_index++] = static_cast<uint32_t>(value_array[i]);
+        i--;
+      }
+      break;
+    }
+  }
+  // 2nd loop to fill in the rest of the array.
+  for (int64_t j = i; j >= 0; j--) {
+    result_array[next_index++] = static_cast<uint32_t>(value_array[j] >> 32);
+    result_array[next_index++] = static_cast<uint32_t>(value_array[j]);
+  }
+  return next_index;
+}
+
+/// Expands the given value into a big endian array of ints so that we can work on
+/// it. The array will be converted to an absolute value and the was_negative
 /// flag will be set appropriately. The array will remove leading zeros from
 /// the value.
-/// \param array a big endian array of length 4 to set with the value 
+/// \param array a big endian array of length 4 to set with the value
 /// \param was_negative a flag for whether the value was original negative
 /// \result the output length of the array
 static int64_t FillInArray(const BasicDecimal128& value, uint32_t* array,
                            bool& was_negative) {
-  BasicDecimal128 abs_value = BasicDecimal128::Abs(value); 
-  was_negative = value.high_bits() < 0; 
-  uint64_t high = static_cast<uint64_t>(abs_value.high_bits()); 
-  uint64_t low = abs_value.low_bits(); 
-
-  // FillInArray(std::array<uint64_t, N>& value_array, uint32_t* result_array) is not 
-  // called here as the following code has better performance, to avoid regression on 
-  // BasicDecimal128 Division. 
+  BasicDecimal128 abs_value = BasicDecimal128::Abs(value);
+  was_negative = value.high_bits() < 0;
+  uint64_t high = static_cast<uint64_t>(abs_value.high_bits());
+  uint64_t low = abs_value.low_bits();
+
+  // FillInArray(std::array<uint64_t, N>& value_array, uint32_t* result_array) is not
+  // called here as the following code has better performance, to avoid regression on
+  // BasicDecimal128 Division.
   if (high != 0) {
     if (high > std::numeric_limits<uint32_t>::max()) {
       array[0] = static_cast<uint32_t>(high >> 32);
@@ -670,7 +670,7 @@ static int64_t FillInArray(const BasicDecimal128& value, uint32_t* array,
     return 3;
   }
 
-  if (low > std::numeric_limits<uint32_t>::max()) { 
+  if (low > std::numeric_limits<uint32_t>::max()) {
     array[0] = static_cast<uint32_t>(low >> 32);
     array[1] = static_cast<uint32_t>(low);
     return 2;
@@ -684,24 +684,24 @@ static int64_t FillInArray(const BasicDecimal128& value, uint32_t* array,
   return 1;
 }
 
-/// Expands the given value into a big endian array of ints so that we can work on 
-/// it. The array will be converted to an absolute value and the was_negative 
-/// flag will be set appropriately. The array will remove leading zeros from 
-/// the value. 
-/// \param array a big endian array of length 8 to set with the value 
-/// \param was_negative a flag for whether the value was original negative 
-/// \result the output length of the array 
-static int64_t FillInArray(const BasicDecimal256& value, uint32_t* array, 
-                           bool& was_negative) { 
-  BasicDecimal256 positive_value = value; 
-  was_negative = false; 
-  if (positive_value.IsNegative()) { 
-    positive_value.Negate(); 
-    was_negative = true; 
-  } 
-  return FillInArray<4>(positive_value.little_endian_array(), array); 
-} 
- 
+/// Expands the given value into a big endian array of ints so that we can work on
+/// it. The array will be converted to an absolute value and the was_negative
+/// flag will be set appropriately. The array will remove leading zeros from
+/// the value.
+/// \param array a big endian array of length 8 to set with the value
+/// \param was_negative a flag for whether the value was original negative
+/// \result the output length of the array
+static int64_t FillInArray(const BasicDecimal256& value, uint32_t* array,
+                           bool& was_negative) {
+  BasicDecimal256 positive_value = value;
+  was_negative = false;
+  if (positive_value.IsNegative()) {
+    positive_value.Negate();
+    was_negative = true;
+  }
+  return FillInArray<4>(positive_value.little_endian_array(), array);
+}
+
 /// Shift the number in the array left by bits positions.
 /// \param array the number to shift, must have length elements
 /// \param length the number of entries in the array
@@ -719,7 +719,7 @@ static void ShiftArrayLeft(uint32_t* array, int64_t length, int64_t bits) {
 /// \param array the number to shift, must have length elements
 /// \param length the number of entries in the array
 /// \param bits the number of bits to shift (0 <= bits < 32)
-static inline void ShiftArrayRight(uint32_t* array, int64_t length, int64_t bits) { 
+static inline void ShiftArrayRight(uint32_t* array, int64_t length, int64_t bits) {
   if (length > 0 && bits != 0) {
     for (int64_t i = length - 1; i > 0; --i) {
       array[i] = (array[i] >> bits) | (array[i - 1] << (32 - bits));
@@ -730,10 +730,10 @@ static inline void ShiftArrayRight(uint32_t* array, int64_t length, int64_t bits
 
 /// \brief Fix the signs of the result and remainder at the end of the division based on
 /// the signs of the dividend and divisor.
-template <class DecimalClass> 
-static inline void FixDivisionSigns(DecimalClass* result, DecimalClass* remainder, 
-                                    bool dividend_was_negative, 
-                                    bool divisor_was_negative) { 
+template <class DecimalClass>
+static inline void FixDivisionSigns(DecimalClass* result, DecimalClass* remainder,
+                                    bool dividend_was_negative,
+                                    bool divisor_was_negative) {
   if (dividend_was_negative != divisor_was_negative) {
     result->Negate();
   }
@@ -743,65 +743,65 @@ static inline void FixDivisionSigns(DecimalClass* result, DecimalClass* remainde
   }
 }
 
-/// \brief Build a little endian array of uint64_t from a big endian array of uint32_t. 
-template <size_t N> 
-static DecimalStatus BuildFromArray(std::array<uint64_t, N>* result_array, 
-                                    const uint32_t* array, int64_t length) { 
-  for (int64_t i = length - 2 * N - 1; i >= 0; i--) { 
-    if (array[i] != 0) { 
+/// \brief Build a little endian array of uint64_t from a big endian array of uint32_t.
+template <size_t N>
+static DecimalStatus BuildFromArray(std::array<uint64_t, N>* result_array,
+                                    const uint32_t* array, int64_t length) {
+  for (int64_t i = length - 2 * N - 1; i >= 0; i--) {
+    if (array[i] != 0) {
       return DecimalStatus::kOverflow;
-    } 
+    }
+  }
+  int64_t next_index = length - 1;
+  size_t i = 0;
+  for (; i < N && next_index >= 0; i++) {
+    uint64_t lower_bits = array[next_index--];
+    (*result_array)[i] =
+        (next_index < 0)
+            ? lower_bits
+            : ((static_cast<uint64_t>(array[next_index--]) << 32) + lower_bits);
+  }
+  for (; i < N; i++) {
+    (*result_array)[i] = 0;
   }
-  int64_t next_index = length - 1; 
-  size_t i = 0; 
-  for (; i < N && next_index >= 0; i++) { 
-    uint64_t lower_bits = array[next_index--]; 
-    (*result_array)[i] = 
-        (next_index < 0) 
-            ? lower_bits 
-            : ((static_cast<uint64_t>(array[next_index--]) << 32) + lower_bits); 
-  } 
-  for (; i < N; i++) { 
-    (*result_array)[i] = 0; 
-  } 
-  return DecimalStatus::kSuccess; 
-} 
-
-/// \brief Build a BasicDecimal128 from a big endian array of uint32_t. 
-static DecimalStatus BuildFromArray(BasicDecimal128* value, const uint32_t* array, 
-                                    int64_t length) { 
-  std::array<uint64_t, 2> result_array; 
-  auto status = BuildFromArray(&result_array, array, length); 
-  if (status != DecimalStatus::kSuccess) { 
-    return status; 
-  } 
-  *value = {static_cast<int64_t>(result_array[1]), result_array[0]}; 
   return DecimalStatus::kSuccess;
 }
 
-/// \brief Build a BasicDecimal256 from a big endian array of uint32_t. 
-static DecimalStatus BuildFromArray(BasicDecimal256* value, const uint32_t* array, 
-                                    int64_t length) { 
-  std::array<uint64_t, 4> result_array; 
-  auto status = BuildFromArray(&result_array, array, length); 
-  if (status != DecimalStatus::kSuccess) { 
-    return status; 
-  } 
-  *value = result_array; 
-  return DecimalStatus::kSuccess; 
-} 
- 
+/// \brief Build a BasicDecimal128 from a big endian array of uint32_t.
+static DecimalStatus BuildFromArray(BasicDecimal128* value, const uint32_t* array,
+                                    int64_t length) {
+  std::array<uint64_t, 2> result_array;
+  auto status = BuildFromArray(&result_array, array, length);
+  if (status != DecimalStatus::kSuccess) {
+    return status;
+  }
+  *value = {static_cast<int64_t>(result_array[1]), result_array[0]};
+  return DecimalStatus::kSuccess;
+}
+
+/// \brief Build a BasicDecimal256 from a big endian array of uint32_t.
+static DecimalStatus BuildFromArray(BasicDecimal256* value, const uint32_t* array,
+                                    int64_t length) {
+  std::array<uint64_t, 4> result_array;
+  auto status = BuildFromArray(&result_array, array, length);
+  if (status != DecimalStatus::kSuccess) {
+    return status;
+  }
+  *value = result_array;
+  return DecimalStatus::kSuccess;
+}
+
 /// \brief Do a division where the divisor fits into a single 32 bit value.
-template <class DecimalClass> 
-static inline DecimalStatus SingleDivide(const uint32_t* dividend, 
-                                         int64_t dividend_length, uint32_t divisor, 
-                                         DecimalClass* remainder, 
-                                         bool dividend_was_negative, 
-                                         bool divisor_was_negative, 
-                                         DecimalClass* result) { 
+template <class DecimalClass>
+static inline DecimalStatus SingleDivide(const uint32_t* dividend,
+                                         int64_t dividend_length, uint32_t divisor,
+                                         DecimalClass* remainder,
+                                         bool dividend_was_negative,
+                                         bool divisor_was_negative,
+                                         DecimalClass* result) {
   uint64_t r = 0;
-  constexpr int64_t kDecimalArrayLength = DecimalClass::bit_width / sizeof(uint32_t) + 1; 
-  uint32_t result_array[kDecimalArrayLength]; 
+  constexpr int64_t kDecimalArrayLength = DecimalClass::bit_width / sizeof(uint32_t) + 1;
+  uint32_t result_array[kDecimalArrayLength];
   for (int64_t j = 0; j < dividend_length; j++) {
     r <<= 32;
     r += dividend[j];
@@ -818,27 +818,27 @@ static inline DecimalStatus SingleDivide(const uint32_t* dividend,
   return DecimalStatus::kSuccess;
 }
 
-/// \brief Do a decimal division with remainder. 
-template <class DecimalClass> 
-static inline DecimalStatus DecimalDivide(const DecimalClass& dividend, 
-                                          const DecimalClass& divisor, 
-                                          DecimalClass* result, DecimalClass* remainder) { 
-  constexpr int64_t kDecimalArrayLength = DecimalClass::bit_width / sizeof(uint32_t); 
+/// \brief Do a decimal division with remainder.
+template <class DecimalClass>
+static inline DecimalStatus DecimalDivide(const DecimalClass& dividend,
+                                          const DecimalClass& divisor,
+                                          DecimalClass* result, DecimalClass* remainder) {
+  constexpr int64_t kDecimalArrayLength = DecimalClass::bit_width / sizeof(uint32_t);
   // Split the dividend and divisor into integer pieces so that we can
   // work on them.
-  uint32_t dividend_array[kDecimalArrayLength + 1]; 
-  uint32_t divisor_array[kDecimalArrayLength]; 
+  uint32_t dividend_array[kDecimalArrayLength + 1];
+  uint32_t divisor_array[kDecimalArrayLength];
   bool dividend_was_negative;
   bool divisor_was_negative;
   // leave an extra zero before the dividend
   dividend_array[0] = 0;
   int64_t dividend_length =
-      FillInArray(dividend, dividend_array + 1, dividend_was_negative) + 1; 
+      FillInArray(dividend, dividend_array + 1, dividend_was_negative) + 1;
   int64_t divisor_length = FillInArray(divisor, divisor_array, divisor_was_negative);
 
   // Handle some of the easy cases.
   if (dividend_length <= divisor_length) {
-    *remainder = dividend; 
+    *remainder = dividend;
     *result = 0;
     return DecimalStatus::kSuccess;
   }
@@ -853,8 +853,8 @@ static inline DecimalStatus DecimalDivide(const DecimalClass& dividend,
   }
 
   int64_t result_length = dividend_length - divisor_length;
-  uint32_t result_array[kDecimalArrayLength]; 
-  DCHECK_LE(result_length, kDecimalArrayLength); 
+  uint32_t result_array[kDecimalArrayLength];
+  DCHECK_LE(result_length, kDecimalArrayLength);
 
   // Normalize by shifting both by a multiple of 2 so that
   // the digit guessing is better. The requirement is that
@@ -933,12 +933,12 @@ static inline DecimalStatus DecimalDivide(const DecimalClass& dividend,
   return DecimalStatus::kSuccess;
 }
 
-DecimalStatus BasicDecimal128::Divide(const BasicDecimal128& divisor, 
-                                      BasicDecimal128* result, 
-                                      BasicDecimal128* remainder) const { 
-  return DecimalDivide(*this, divisor, result, remainder); 
-} 
- 
+DecimalStatus BasicDecimal128::Divide(const BasicDecimal128& divisor,
+                                      BasicDecimal128* result,
+                                      BasicDecimal128* remainder) const {
+  return DecimalDivide(*this, divisor, result, remainder);
+}
+
 bool operator==(const BasicDecimal128& left, const BasicDecimal128& right) {
   return left.high_bits() == right.high_bits() && left.low_bits() == right.low_bits();
 }
@@ -1008,13 +1008,13 @@ BasicDecimal128 operator%(const BasicDecimal128& left, const BasicDecimal128& ri
   return remainder;
 }
 
-template <class DecimalClass> 
-static bool RescaleWouldCauseDataLoss(const DecimalClass& value, int32_t delta_scale, 
-                                      const DecimalClass& multiplier, 
-                                      DecimalClass* result) { 
+template <class DecimalClass>
+static bool RescaleWouldCauseDataLoss(const DecimalClass& value, int32_t delta_scale,
+                                      const DecimalClass& multiplier,
+                                      DecimalClass* result) {
   if (delta_scale < 0) {
     DCHECK_NE(multiplier, 0);
-    DecimalClass remainder; 
+    DecimalClass remainder;
     auto status = value.Divide(multiplier, result, &remainder);
     DCHECK_EQ(status, DecimalStatus::kSuccess);
     return remainder != 0;
@@ -1024,23 +1024,23 @@ static bool RescaleWouldCauseDataLoss(const DecimalClass& value, int32_t delta_s
   return (value < 0) ? *result > value : *result < value;
 }
 
-template <class DecimalClass> 
-DecimalStatus DecimalRescale(const DecimalClass& value, int32_t original_scale, 
-                             int32_t new_scale, DecimalClass* out) { 
+template <class DecimalClass>
+DecimalStatus DecimalRescale(const DecimalClass& value, int32_t original_scale,
+                             int32_t new_scale, DecimalClass* out) {
   DCHECK_NE(out, nullptr);
 
   if (original_scale == new_scale) {
-    *out = value; 
+    *out = value;
     return DecimalStatus::kSuccess;
   }
 
   const int32_t delta_scale = new_scale - original_scale;
   const int32_t abs_delta_scale = std::abs(delta_scale);
 
-  DecimalClass multiplier = DecimalClass::GetScaleMultiplier(abs_delta_scale); 
+  DecimalClass multiplier = DecimalClass::GetScaleMultiplier(abs_delta_scale);
 
   const bool rescale_would_cause_data_loss =
-      RescaleWouldCauseDataLoss(value, delta_scale, multiplier, out); 
+      RescaleWouldCauseDataLoss(value, delta_scale, multiplier, out);
 
   // Fail if we overflow or truncate
   if (ARROW_PREDICT_FALSE(rescale_would_cause_data_loss)) {
@@ -1050,11 +1050,11 @@ DecimalStatus DecimalRescale(const DecimalClass& value, int32_t original_scale,
   return DecimalStatus::kSuccess;
 }
 
-DecimalStatus BasicDecimal128::Rescale(int32_t original_scale, int32_t new_scale, 
-                                       BasicDecimal128* out) const { 
-  return DecimalRescale(*this, original_scale, new_scale, out); 
-} 
- 
+DecimalStatus BasicDecimal128::Rescale(int32_t original_scale, int32_t new_scale,
+                                       BasicDecimal128* out) const {
+  return DecimalRescale(*this, original_scale, new_scale, out);
+}
+
 void BasicDecimal128::GetWholeAndFraction(int scale, BasicDecimal128* whole,
                                           BasicDecimal128* fraction) const {
   DCHECK_GE(scale, 0);
@@ -1117,228 +1117,228 @@ int32_t BasicDecimal128::CountLeadingBinaryZeros() const {
   }
 }
 
-#if ARROW_LITTLE_ENDIAN 
-BasicDecimal256::BasicDecimal256(const uint8_t* bytes) 
-    : little_endian_array_( 
-          std::array<uint64_t, 4>({reinterpret_cast<const uint64_t*>(bytes)[0], 
-                                   reinterpret_cast<const uint64_t*>(bytes)[1], 
-                                   reinterpret_cast<const uint64_t*>(bytes)[2], 
-                                   reinterpret_cast<const uint64_t*>(bytes)[3]})) {} 
-#else 
-BasicDecimal256::BasicDecimal256(const uint8_t* bytes) 
-    : little_endian_array_( 
-          std::array<uint64_t, 4>({reinterpret_cast<const uint64_t*>(bytes)[3], 
-                                   reinterpret_cast<const uint64_t*>(bytes)[2], 
-                                   reinterpret_cast<const uint64_t*>(bytes)[1], 
-                                   reinterpret_cast<const uint64_t*>(bytes)[0]})) {} 
-#endif 
- 
-BasicDecimal256& BasicDecimal256::Negate() { 
-  uint64_t carry = 1; 
-  for (uint64_t& elem : little_endian_array_) { 
-    elem = ~elem + carry; 
-    carry &= (elem == 0); 
-  } 
-  return *this; 
-} 
- 
-BasicDecimal256& BasicDecimal256::Abs() { return *this < 0 ? Negate() : *this; } 
- 
-BasicDecimal256 BasicDecimal256::Abs(const BasicDecimal256& in) { 
-  BasicDecimal256 result(in); 
-  return result.Abs(); 
-} 
- 
-BasicDecimal256& BasicDecimal256::operator+=(const BasicDecimal256& right) { 
-  uint64_t carry = 0; 
-  for (size_t i = 0; i < little_endian_array_.size(); i++) { 
-    const uint64_t right_value = right.little_endian_array_[i]; 
-    uint64_t sum = right_value + carry; 
-    carry = 0; 
-    if (sum < right_value) { 
-      carry += 1; 
-    } 
-    sum += little_endian_array_[i]; 
-    if (sum < little_endian_array_[i]) { 
-      carry += 1; 
-    } 
-    little_endian_array_[i] = sum; 
-  } 
-  return *this; 
-} 
- 
-BasicDecimal256& BasicDecimal256::operator-=(const BasicDecimal256& right) { 
-  *this += -right; 
-  return *this; 
-} 
- 
-BasicDecimal256& BasicDecimal256::operator<<=(uint32_t bits) { 
-  if (bits == 0) { 
-    return *this; 
-  } 
-  int cross_word_shift = bits / 64; 
-  if (static_cast<size_t>(cross_word_shift) >= little_endian_array_.size()) { 
-    little_endian_array_ = {0, 0, 0, 0}; 
-    return *this; 
-  } 
-  uint32_t in_word_shift = bits % 64; 
-  for (int i = static_cast<int>(little_endian_array_.size() - 1); i >= cross_word_shift; 
-       i--) { 
-    // Account for shifts larger then 64 bits 
-    little_endian_array_[i] = little_endian_array_[i - cross_word_shift]; 
-    little_endian_array_[i] <<= in_word_shift; 
-    if (in_word_shift != 0 && i >= cross_word_shift + 1) { 
-      little_endian_array_[i] |= 
-          little_endian_array_[i - (cross_word_shift + 1)] >> (64 - in_word_shift); 
-    } 
-  } 
-  for (int i = cross_word_shift - 1; i >= 0; i--) { 
-    little_endian_array_[i] = 0; 
-  } 
-  return *this; 
-} 
- 
-std::array<uint8_t, 32> BasicDecimal256::ToBytes() const { 
-  std::array<uint8_t, 32> out{{0}}; 
-  ToBytes(out.data()); 
-  return out; 
-} 
- 
-void BasicDecimal256::ToBytes(uint8_t* out) const { 
-  DCHECK_NE(out, nullptr); 
-#if ARROW_LITTLE_ENDIAN 
-  reinterpret_cast<int64_t*>(out)[0] = little_endian_array_[0]; 
-  reinterpret_cast<int64_t*>(out)[1] = little_endian_array_[1]; 
-  reinterpret_cast<int64_t*>(out)[2] = little_endian_array_[2]; 
-  reinterpret_cast<int64_t*>(out)[3] = little_endian_array_[3]; 
-#else 
-  reinterpret_cast<int64_t*>(out)[0] = little_endian_array_[3]; 
-  reinterpret_cast<int64_t*>(out)[1] = little_endian_array_[2]; 
-  reinterpret_cast<int64_t*>(out)[2] = little_endian_array_[1]; 
-  reinterpret_cast<int64_t*>(out)[3] = little_endian_array_[0]; 
-#endif 
-} 
- 
-BasicDecimal256& BasicDecimal256::operator*=(const BasicDecimal256& right) { 
-  // Since the max value of BasicDecimal256 is supposed to be 1e76 - 1 and the 
-  // min the negation taking the absolute values here should always be safe. 
-  const bool negate = Sign() != right.Sign(); 
-  BasicDecimal256 x = BasicDecimal256::Abs(*this); 
-  BasicDecimal256 y = BasicDecimal256::Abs(right); 
- 
-  uint128_t r_hi; 
-  uint128_t r_lo; 
-  std::array<uint64_t, 4> res{0, 0, 0, 0}; 
-  MultiplyUnsignedArray<4>(x.little_endian_array_, y.little_endian_array_, &res); 
-  little_endian_array_ = res; 
-  if (negate) { 
-    Negate(); 
-  } 
-  return *this; 
-} 
- 
-DecimalStatus BasicDecimal256::Divide(const BasicDecimal256& divisor, 
-                                      BasicDecimal256* result, 
-                                      BasicDecimal256* remainder) const { 
-  return DecimalDivide(*this, divisor, result, remainder); 
-} 
- 
-DecimalStatus BasicDecimal256::Rescale(int32_t original_scale, int32_t new_scale, 
-                                       BasicDecimal256* out) const { 
-  return DecimalRescale(*this, original_scale, new_scale, out); 
-} 
- 
-BasicDecimal256 BasicDecimal256::IncreaseScaleBy(int32_t increase_by) const { 
-  DCHECK_GE(increase_by, 0); 
-  DCHECK_LE(increase_by, 76); 
- 
-  return (*this) * ScaleMultipliersDecimal256[increase_by]; 
-} 
- 
-BasicDecimal256 BasicDecimal256::ReduceScaleBy(int32_t reduce_by, bool round) const { 
-  DCHECK_GE(reduce_by, 0); 
-  DCHECK_LE(reduce_by, 76); 
- 
-  if (reduce_by == 0) { 
-    return *this; 
-  } 
- 
-  BasicDecimal256 divisor(ScaleMultipliersDecimal256[reduce_by]); 
-  BasicDecimal256 result; 
-  BasicDecimal256 remainder; 
-  auto s = Divide(divisor, &result, &remainder); 
-  DCHECK_EQ(s, DecimalStatus::kSuccess); 
-  if (round) { 
-    auto divisor_half = ScaleMultipliersHalfDecimal256[reduce_by]; 
-    if (remainder.Abs() >= divisor_half) { 
-      if (result > 0) { 
-        result += 1; 
-      } else { 
-        result -= 1; 
-      } 
-    } 
-  } 
-  return result; 
-} 
- 
-bool BasicDecimal256::FitsInPrecision(int32_t precision) const { 
-  DCHECK_GT(precision, 0); 
-  DCHECK_LE(precision, 76); 
-  return BasicDecimal256::Abs(*this) < ScaleMultipliersDecimal256[precision]; 
-} 
- 
-const BasicDecimal256& BasicDecimal256::GetScaleMultiplier(int32_t scale) { 
-  DCHECK_GE(scale, 0); 
-  DCHECK_LE(scale, 76); 
- 
-  return ScaleMultipliersDecimal256[scale]; 
-} 
- 
-BasicDecimal256 operator*(const BasicDecimal256& left, const BasicDecimal256& right) { 
-  BasicDecimal256 result = left; 
-  result *= right; 
-  return result; 
-} 
- 
-bool operator<(const BasicDecimal256& left, const BasicDecimal256& right) { 
-  const std::array<uint64_t, 4>& lhs = left.little_endian_array(); 
-  const std::array<uint64_t, 4>& rhs = right.little_endian_array(); 
-  return lhs[3] != rhs[3] 
-             ? static_cast<int64_t>(lhs[3]) < static_cast<int64_t>(rhs[3]) 
-             : lhs[2] != rhs[2] ? lhs[2] < rhs[2] 
-                                : lhs[1] != rhs[1] ? lhs[1] < rhs[1] : lhs[0] < rhs[0]; 
-} 
- 
-BasicDecimal256 operator-(const BasicDecimal256& operand) { 
-  BasicDecimal256 result(operand); 
-  return result.Negate(); 
-} 
- 
-BasicDecimal256 operator~(const BasicDecimal256& operand) { 
-  const std::array<uint64_t, 4>& arr = operand.little_endian_array(); 
-  BasicDecimal256 result({~arr[0], ~arr[1], ~arr[2], ~arr[3]}); 
-  return result; 
-} 
- 
-BasicDecimal256& BasicDecimal256::operator/=(const BasicDecimal256& right) { 
-  BasicDecimal256 remainder; 
-  auto s = Divide(right, this, &remainder); 
-  DCHECK_EQ(s, DecimalStatus::kSuccess); 
-  return *this; 
-} 
- 
-BasicDecimal256 operator+(const BasicDecimal256& left, const BasicDecimal256& right) { 
-  BasicDecimal256 sum = left; 
-  sum += right; 
-  return sum; 
-} 
- 
-BasicDecimal256 operator/(const BasicDecimal256& left, const BasicDecimal256& right) { 
-  BasicDecimal256 remainder; 
-  BasicDecimal256 result; 
-  auto s = left.Divide(right, &result, &remainder); 
-  DCHECK_EQ(s, DecimalStatus::kSuccess); 
-  return result; 
-} 
- 
+#if ARROW_LITTLE_ENDIAN
+BasicDecimal256::BasicDecimal256(const uint8_t* bytes)
+    : little_endian_array_(
+          std::array<uint64_t, 4>({reinterpret_cast<const uint64_t*>(bytes)[0],
+                                   reinterpret_cast<const uint64_t*>(bytes)[1],
+                                   reinterpret_cast<const uint64_t*>(bytes)[2],
+                                   reinterpret_cast<const uint64_t*>(bytes)[3]})) {}
+#else
+BasicDecimal256::BasicDecimal256(const uint8_t* bytes)
+    : little_endian_array_(
+          std::array<uint64_t, 4>({reinterpret_cast<const uint64_t*>(bytes)[3],
+                                   reinterpret_cast<const uint64_t*>(bytes)[2],
+                                   reinterpret_cast<const uint64_t*>(bytes)[1],
+                                   reinterpret_cast<const uint64_t*>(bytes)[0]})) {}
+#endif
+
+BasicDecimal256& BasicDecimal256::Negate() {
+  uint64_t carry = 1;
+  for (uint64_t& elem : little_endian_array_) {
+    elem = ~elem + carry;
+    carry &= (elem == 0);
+  }
+  return *this;
+}
+
+BasicDecimal256& BasicDecimal256::Abs() { return *this < 0 ? Negate() : *this; }
+
+BasicDecimal256 BasicDecimal256::Abs(const BasicDecimal256& in) {
+  BasicDecimal256 result(in);
+  return result.Abs();
+}
+
+BasicDecimal256& BasicDecimal256::operator+=(const BasicDecimal256& right) {
+  uint64_t carry = 0;
+  for (size_t i = 0; i < little_endian_array_.size(); i++) {
+    const uint64_t right_value = right.little_endian_array_[i];
+    uint64_t sum = right_value + carry;
+    carry = 0;
+    if (sum < right_value) {
+      carry += 1;
+    }
+    sum += little_endian_array_[i];
+    if (sum < little_endian_array_[i]) {
+      carry += 1;
+    }
+    little_endian_array_[i] = sum;
+  }
+  return *this;
+}
+
+BasicDecimal256& BasicDecimal256::operator-=(const BasicDecimal256& right) {
+  *this += -right;
+  return *this;
+}
+
+BasicDecimal256& BasicDecimal256::operator<<=(uint32_t bits) {
+  if (bits == 0) {
+    return *this;
+  }
+  int cross_word_shift = bits / 64;
+  if (static_cast<size_t>(cross_word_shift) >= little_endian_array_.size()) {
+    little_endian_array_ = {0, 0, 0, 0};
+    return *this;
+  }
+  uint32_t in_word_shift = bits % 64;
+  for (int i = static_cast<int>(little_endian_array_.size() - 1); i >= cross_word_shift;
+       i--) {
+    // Account for shifts larger then 64 bits
+    little_endian_array_[i] = little_endian_array_[i - cross_word_shift];
+    little_endian_array_[i] <<= in_word_shift;
+    if (in_word_shift != 0 && i >= cross_word_shift + 1) {
+      little_endian_array_[i] |=
+          little_endian_array_[i - (cross_word_shift + 1)] >> (64 - in_word_shift);
+    }
+  }
+  for (int i = cross_word_shift - 1; i >= 0; i--) {
+    little_endian_array_[i] = 0;
+  }
+  return *this;
+}
+
+std::array<uint8_t, 32> BasicDecimal256::ToBytes() const {
+  std::array<uint8_t, 32> out{{0}};
+  ToBytes(out.data());
+  return out;
+}
+
+void BasicDecimal256::ToBytes(uint8_t* out) const {
+  DCHECK_NE(out, nullptr);
+#if ARROW_LITTLE_ENDIAN
+  reinterpret_cast<int64_t*>(out)[0] = little_endian_array_[0];
+  reinterpret_cast<int64_t*>(out)[1] = little_endian_array_[1];
+  reinterpret_cast<int64_t*>(out)[2] = little_endian_array_[2];
+  reinterpret_cast<int64_t*>(out)[3] = little_endian_array_[3];
+#else
+  reinterpret_cast<int64_t*>(out)[0] = little_endian_array_[3];
+  reinterpret_cast<int64_t*>(out)[1] = little_endian_array_[2];
+  reinterpret_cast<int64_t*>(out)[2] = little_endian_array_[1];
+  reinterpret_cast<int64_t*>(out)[3] = little_endian_array_[0];
+#endif
+}
+
+BasicDecimal256& BasicDecimal256::operator*=(const BasicDecimal256& right) {
+  // Since the max value of BasicDecimal256 is supposed to be 1e76 - 1 and the
+  // min the negation taking the absolute values here should always be safe.
+  const bool negate = Sign() != right.Sign();
+  BasicDecimal256 x = BasicDecimal256::Abs(*this);
+  BasicDecimal256 y = BasicDecimal256::Abs(right);
+
+  uint128_t r_hi;
+  uint128_t r_lo;
+  std::array<uint64_t, 4> res{0, 0, 0, 0};
+  MultiplyUnsignedArray<4>(x.little_endian_array_, y.little_endian_array_, &res);
+  little_endian_array_ = res;
+  if (negate) {
+    Negate();
+  }
+  return *this;
+}
+
+DecimalStatus BasicDecimal256::Divide(const BasicDecimal256& divisor,
+                                      BasicDecimal256* result,
+                                      BasicDecimal256* remainder) const {
+  return DecimalDivide(*this, divisor, result, remainder);
+}
+
+DecimalStatus BasicDecimal256::Rescale(int32_t original_scale, int32_t new_scale,
+                                       BasicDecimal256* out) const {
+  return DecimalRescale(*this, original_scale, new_scale, out);
+}
+
+BasicDecimal256 BasicDecimal256::IncreaseScaleBy(int32_t increase_by) const {
+  DCHECK_GE(increase_by, 0);
+  DCHECK_LE(increase_by, 76);
+
+  return (*this) * ScaleMultipliersDecimal256[increase_by];
+}
+
+BasicDecimal256 BasicDecimal256::ReduceScaleBy(int32_t reduce_by, bool round) const {
+  DCHECK_GE(reduce_by, 0);
+  DCHECK_LE(reduce_by, 76);
+
+  if (reduce_by == 0) {
+    return *this;
+  }
+
+  BasicDecimal256 divisor(ScaleMultipliersDecimal256[reduce_by]);
+  BasicDecimal256 result;
+  BasicDecimal256 remainder;
+  auto s = Divide(divisor, &result, &remainder);
+  DCHECK_EQ(s, DecimalStatus::kSuccess);
+  if (round) {
+    auto divisor_half = ScaleMultipliersHalfDecimal256[reduce_by];
+    if (remainder.Abs() >= divisor_half) {
+      if (result > 0) {
+        result += 1;
+      } else {
+        result -= 1;
+      }
+    }
+  }
+  return result;
+}
+
+bool BasicDecimal256::FitsInPrecision(int32_t precision) const {
+  DCHECK_GT(precision, 0);
+  DCHECK_LE(precision, 76);
+  return BasicDecimal256::Abs(*this) < ScaleMultipliersDecimal256[precision];
+}
+
+const BasicDecimal256& BasicDecimal256::GetScaleMultiplier(int32_t scale) {
+  DCHECK_GE(scale, 0);
+  DCHECK_LE(scale, 76);
+
+  return ScaleMultipliersDecimal256[scale];
+}
+
+BasicDecimal256 operator*(const BasicDecimal256& left, const BasicDecimal256& right) {
+  BasicDecimal256 result = left;
+  result *= right;
+  return result;
+}
+
+bool operator<(const BasicDecimal256& left, const BasicDecimal256& right) {
+  const std::array<uint64_t, 4>& lhs = left.little_endian_array();
+  const std::array<uint64_t, 4>& rhs = right.little_endian_array();
+  return lhs[3] != rhs[3]
+             ? static_cast<int64_t>(lhs[3]) < static_cast<int64_t>(rhs[3])
+             : lhs[2] != rhs[2] ? lhs[2] < rhs[2]
+                                : lhs[1] != rhs[1] ? lhs[1] < rhs[1] : lhs[0] < rhs[0];
+}
+
+BasicDecimal256 operator-(const BasicDecimal256& operand) {
+  BasicDecimal256 result(operand);
+  return result.Negate();
+}
+
+BasicDecimal256 operator~(const BasicDecimal256& operand) {
+  const std::array<uint64_t, 4>& arr = operand.little_endian_array();
+  BasicDecimal256 result({~arr[0], ~arr[1], ~arr[2], ~arr[3]});
+  return result;
+}
+
+BasicDecimal256& BasicDecimal256::operator/=(const BasicDecimal256& right) {
+  BasicDecimal256 remainder;
+  auto s = Divide(right, this, &remainder);
+  DCHECK_EQ(s, DecimalStatus::kSuccess);
+  return *this;
+}
+
+BasicDecimal256 operator+(const BasicDecimal256& left, const BasicDecimal256& right) {
+  BasicDecimal256 sum = left;
+  sum += right;
+  return sum;
+}
+
+BasicDecimal256 operator/(const BasicDecimal256& left, const BasicDecimal256& right) {
+  BasicDecimal256 remainder;
+  BasicDecimal256 result;
+  auto s = left.Divide(right, &result, &remainder);
+  DCHECK_EQ(s, DecimalStatus::kSuccess);
+  return result;
+}
+
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/basic_decimal.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/basic_decimal.h
index a8f61c73c87..acc8ea4930f 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/basic_decimal.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/basic_decimal.h
@@ -42,8 +42,8 @@ enum class DecimalStatus {
 /// streams and boost.
 class ARROW_EXPORT BasicDecimal128 {
  public:
-  static constexpr int bit_width = 128; 
- 
+  static constexpr int bit_width = 128;
+
   /// \brief Create a BasicDecimal128 from the two's complement representation.
   constexpr BasicDecimal128(int64_t high, uint64_t low) noexcept
       : low_bits_(low), high_bits_(high) {}
@@ -111,10 +111,10 @@ class ARROW_EXPORT BasicDecimal128 {
   BasicDecimal128& operator>>=(uint32_t bits);
 
   /// \brief Get the high bits of the two's complement representation of the number.
-  inline constexpr int64_t high_bits() const { return high_bits_; } 
+  inline constexpr int64_t high_bits() const { return high_bits_; }
 
   /// \brief Get the low bits of the two's complement representation of the number.
-  inline constexpr uint64_t low_bits() const { return low_bits_; } 
+  inline constexpr uint64_t low_bits() const { return low_bits_; }
 
   /// \brief Return the raw bytes of the value in native-endian byte order.
   std::array<uint8_t, 16> ToBytes() const;
@@ -180,163 +180,163 @@ ARROW_EXPORT BasicDecimal128 operator/(const BasicDecimal128& left,
 ARROW_EXPORT BasicDecimal128 operator%(const BasicDecimal128& left,
                                        const BasicDecimal128& right);
 
-class ARROW_EXPORT BasicDecimal256 { 
- private: 
-  // Due to a bug in clang, we have to declare the extend method prior to its 
-  // usage. 
-  template <typename T> 
-  inline static constexpr uint64_t extend(T low_bits) noexcept { 
-    return low_bits >= T() ? uint64_t{0} : ~uint64_t{0}; 
-  } 
- 
- public: 
-  static constexpr int bit_width = 256; 
- 
-  /// \brief Create a BasicDecimal256 from the two's complement representation. 
-  constexpr BasicDecimal256(const std::array<uint64_t, 4>& little_endian_array) noexcept 
-      : little_endian_array_(little_endian_array) {} 
- 
-  /// \brief Empty constructor creates a BasicDecimal256 with a value of 0. 
-  constexpr BasicDecimal256() noexcept : little_endian_array_({0, 0, 0, 0}) {} 
- 
-  /// \brief Convert any integer value into a BasicDecimal256. 
-  template <typename T, 
-            typename = typename std::enable_if< 
-                std::is_integral<T>::value && (sizeof(T) <= sizeof(uint64_t)), T>::type> 
-  constexpr BasicDecimal256(T value) noexcept 
-      : little_endian_array_({static_cast<uint64_t>(value), extend(value), extend(value), 
-                              extend(value)}) {} 
- 
-  constexpr BasicDecimal256(const BasicDecimal128& value) noexcept 
-      : little_endian_array_({value.low_bits(), static_cast<uint64_t>(value.high_bits()), 
-                              extend(value.high_bits()), extend(value.high_bits())}) {} 
- 
-  /// \brief Create a BasicDecimal256 from an array of bytes. Bytes are assumed to be in 
-  /// native-endian byte order. 
-  explicit BasicDecimal256(const uint8_t* bytes); 
- 
-  /// \brief Negate the current value (in-place) 
-  BasicDecimal256& Negate(); 
- 
-  /// \brief Absolute value (in-place) 
-  BasicDecimal256& Abs(); 
- 
-  /// \brief Absolute value 
-  static BasicDecimal256 Abs(const BasicDecimal256& left); 
- 
-  /// \brief Add a number to this one. The result is truncated to 256 bits. 
-  BasicDecimal256& operator+=(const BasicDecimal256& right); 
- 
-  /// \brief Subtract a number from this one. The result is truncated to 256 bits. 
-  BasicDecimal256& operator-=(const BasicDecimal256& right); 
- 
-  /// \brief Get the bits of the two's complement representation of the number. The 4 
-  /// elements are in little endian order. The bits within each uint64_t element are in 
-  /// native endian order. For example, 
-  /// BasicDecimal256(123).little_endian_array() = {123, 0, 0, 0}; 
-  /// BasicDecimal256(-2).little_endian_array() = {0xFF...FE, 0xFF...FF, 0xFF...FF, 
-  /// 0xFF...FF}. 
-  inline const std::array<uint64_t, 4>& little_endian_array() const { 
-    return little_endian_array_; 
-  } 
- 
-  /// \brief Get the lowest bits of the two's complement representation of the number. 
-  inline constexpr uint64_t low_bits() const { return little_endian_array_[0]; } 
- 
-  /// \brief Return the raw bytes of the value in native-endian byte order. 
-  std::array<uint8_t, 32> ToBytes() const; 
-  void ToBytes(uint8_t* out) const; 
- 
-  /// \brief Scale multiplier for given scale value. 
-  static const BasicDecimal256& GetScaleMultiplier(int32_t scale); 
- 
-  /// \brief Convert BasicDecimal256 from one scale to another 
-  DecimalStatus Rescale(int32_t original_scale, int32_t new_scale, 
-                        BasicDecimal256* out) const; 
- 
-  /// \brief Scale up. 
-  BasicDecimal256 IncreaseScaleBy(int32_t increase_by) const; 
- 
-  /// \brief Scale down. 
-  /// - If 'round' is true, the right-most digits are dropped and the result value is 
-  ///   rounded up (+1 for positive, -1 for negative) based on the value of the 
-  ///   dropped digits (>= 10^reduce_by / 2). 
-  /// - If 'round' is false, the right-most digits are simply dropped. 
-  BasicDecimal256 ReduceScaleBy(int32_t reduce_by, bool round = true) const; 
- 
-  /// \brief Whether this number fits in the given precision 
-  /// 
-  /// Return true if the number of significant digits is less or equal to `precision`. 
-  bool FitsInPrecision(int32_t precision) const; 
- 
-  inline int64_t Sign() const { 
-    return 1 | (static_cast<int64_t>(little_endian_array_[3]) >> 63); 
-  } 
- 
-  inline int64_t IsNegative() const { 
-    return static_cast<int64_t>(little_endian_array_[3]) < 0; 
-  } 
- 
-  /// \brief Multiply this number by another number. The result is truncated to 256 bits. 
-  BasicDecimal256& operator*=(const BasicDecimal256& right); 
- 
-  /// Divide this number by right and return the result. 
-  /// 
-  /// This operation is not destructive. 
-  /// The answer rounds to zero. Signs work like: 
-  ///   21 /  5 ->  4,  1 
-  ///  -21 /  5 -> -4, -1 
-  ///   21 / -5 -> -4,  1 
-  ///  -21 / -5 ->  4, -1 
-  /// \param[in] divisor the number to divide by 
-  /// \param[out] result the quotient 
-  /// \param[out] remainder the remainder after the division 
-  DecimalStatus Divide(const BasicDecimal256& divisor, BasicDecimal256* result, 
-                       BasicDecimal256* remainder) const; 
- 
-  /// \brief Shift left by the given number of bits. 
-  BasicDecimal256& operator<<=(uint32_t bits); 
- 
-  /// \brief In-place division. 
-  BasicDecimal256& operator/=(const BasicDecimal256& right); 
- 
- private: 
-  std::array<uint64_t, 4> little_endian_array_; 
-}; 
- 
-ARROW_EXPORT inline bool operator==(const BasicDecimal256& left, 
-                                    const BasicDecimal256& right) { 
-  return left.little_endian_array() == right.little_endian_array(); 
-} 
- 
-ARROW_EXPORT inline bool operator!=(const BasicDecimal256& left, 
-                                    const BasicDecimal256& right) { 
-  return left.little_endian_array() != right.little_endian_array(); 
-} 
- 
-ARROW_EXPORT bool operator<(const BasicDecimal256& left, const BasicDecimal256& right); 
- 
-ARROW_EXPORT inline bool operator<=(const BasicDecimal256& left, 
-                                    const BasicDecimal256& right) { 
-  return !operator<(right, left); 
-} 
- 
-ARROW_EXPORT inline bool operator>(const BasicDecimal256& left, 
-                                   const BasicDecimal256& right) { 
-  return operator<(right, left); 
-} 
- 
-ARROW_EXPORT inline bool operator>=(const BasicDecimal256& left, 
-                                    const BasicDecimal256& right) { 
-  return !operator<(left, right); 
-} 
- 
-ARROW_EXPORT BasicDecimal256 operator-(const BasicDecimal256& operand); 
-ARROW_EXPORT BasicDecimal256 operator~(const BasicDecimal256& operand); 
-ARROW_EXPORT BasicDecimal256 operator+(const BasicDecimal256& left, 
-                                       const BasicDecimal256& right); 
-ARROW_EXPORT BasicDecimal256 operator*(const BasicDecimal256& left, 
-                                       const BasicDecimal256& right); 
-ARROW_EXPORT BasicDecimal256 operator/(const BasicDecimal256& left, 
-                                       const BasicDecimal256& right); 
+class ARROW_EXPORT BasicDecimal256 {
+ private:
+  // Due to a bug in clang, we have to declare the extend method prior to its
+  // usage.
+  template <typename T>
+  inline static constexpr uint64_t extend(T low_bits) noexcept {
+    return low_bits >= T() ? uint64_t{0} : ~uint64_t{0};
+  }
+
+ public:
+  static constexpr int bit_width = 256;
+
+  /// \brief Create a BasicDecimal256 from the two's complement representation.
+  constexpr BasicDecimal256(const std::array<uint64_t, 4>& little_endian_array) noexcept
+      : little_endian_array_(little_endian_array) {}
+
+  /// \brief Empty constructor creates a BasicDecimal256 with a value of 0.
+  constexpr BasicDecimal256() noexcept : little_endian_array_({0, 0, 0, 0}) {}
+
+  /// \brief Convert any integer value into a BasicDecimal256.
+  template <typename T,
+            typename = typename std::enable_if<
+                std::is_integral<T>::value && (sizeof(T) <= sizeof(uint64_t)), T>::type>
+  constexpr BasicDecimal256(T value) noexcept
+      : little_endian_array_({static_cast<uint64_t>(value), extend(value), extend(value),
+                              extend(value)}) {}
+
+  constexpr BasicDecimal256(const BasicDecimal128& value) noexcept
+      : little_endian_array_({value.low_bits(), static_cast<uint64_t>(value.high_bits()),
+                              extend(value.high_bits()), extend(value.high_bits())}) {}
+
+  /// \brief Create a BasicDecimal256 from an array of bytes. Bytes are assumed to be in
+  /// native-endian byte order.
+  explicit BasicDecimal256(const uint8_t* bytes);
+
+  /// \brief Negate the current value (in-place)
+  BasicDecimal256& Negate();
+
+  /// \brief Absolute value (in-place)
+  BasicDecimal256& Abs();
+
+  /// \brief Absolute value
+  static BasicDecimal256 Abs(const BasicDecimal256& left);
+
+  /// \brief Add a number to this one. The result is truncated to 256 bits.
+  BasicDecimal256& operator+=(const BasicDecimal256& right);
+
+  /// \brief Subtract a number from this one. The result is truncated to 256 bits.
+  BasicDecimal256& operator-=(const BasicDecimal256& right);
+
+  /// \brief Get the bits of the two's complement representation of the number. The 4
+  /// elements are in little endian order. The bits within each uint64_t element are in
+  /// native endian order. For example,
+  /// BasicDecimal256(123).little_endian_array() = {123, 0, 0, 0};
+  /// BasicDecimal256(-2).little_endian_array() = {0xFF...FE, 0xFF...FF, 0xFF...FF,
+  /// 0xFF...FF}.
+  inline const std::array<uint64_t, 4>& little_endian_array() const {
+    return little_endian_array_;
+  }
+
+  /// \brief Get the lowest bits of the two's complement representation of the number.
+  inline constexpr uint64_t low_bits() const { return little_endian_array_[0]; }
+
+  /// \brief Return the raw bytes of the value in native-endian byte order.
+  std::array<uint8_t, 32> ToBytes() const;
+  void ToBytes(uint8_t* out) const;
+
+  /// \brief Scale multiplier for given scale value.
+  static const BasicDecimal256& GetScaleMultiplier(int32_t scale);
+
+  /// \brief Convert BasicDecimal256 from one scale to another
+  DecimalStatus Rescale(int32_t original_scale, int32_t new_scale,
+                        BasicDecimal256* out) const;
+
+  /// \brief Scale up.
+  BasicDecimal256 IncreaseScaleBy(int32_t increase_by) const;
+
+  /// \brief Scale down.
+  /// - If 'round' is true, the right-most digits are dropped and the result value is
+  ///   rounded up (+1 for positive, -1 for negative) based on the value of the
+  ///   dropped digits (>= 10^reduce_by / 2).
+  /// - If 'round' is false, the right-most digits are simply dropped.
+  BasicDecimal256 ReduceScaleBy(int32_t reduce_by, bool round = true) const;
+
+  /// \brief Whether this number fits in the given precision
+  ///
+  /// Return true if the number of significant digits is less or equal to `precision`.
+  bool FitsInPrecision(int32_t precision) const;
+
+  inline int64_t Sign() const {
+    return 1 | (static_cast<int64_t>(little_endian_array_[3]) >> 63);
+  }
+
+  inline int64_t IsNegative() const {
+    return static_cast<int64_t>(little_endian_array_[3]) < 0;
+  }
+
+  /// \brief Multiply this number by another number. The result is truncated to 256 bits.
+  BasicDecimal256& operator*=(const BasicDecimal256& right);
+
+  /// Divide this number by right and return the result.
+  ///
+  /// This operation is not destructive.
+  /// The answer rounds to zero. Signs work like:
+  ///   21 /  5 ->  4,  1
+  ///  -21 /  5 -> -4, -1
+  ///   21 / -5 -> -4,  1
+  ///  -21 / -5 ->  4, -1
+  /// \param[in] divisor the number to divide by
+  /// \param[out] result the quotient
+  /// \param[out] remainder the remainder after the division
+  DecimalStatus Divide(const BasicDecimal256& divisor, BasicDecimal256* result,
+                       BasicDecimal256* remainder) const;
+
+  /// \brief Shift left by the given number of bits.
+  BasicDecimal256& operator<<=(uint32_t bits);
+
+  /// \brief In-place division.
+  BasicDecimal256& operator/=(const BasicDecimal256& right);
+
+ private:
+  std::array<uint64_t, 4> little_endian_array_;
+};
+
+ARROW_EXPORT inline bool operator==(const BasicDecimal256& left,
+                                    const BasicDecimal256& right) {
+  return left.little_endian_array() == right.little_endian_array();
+}
+
+ARROW_EXPORT inline bool operator!=(const BasicDecimal256& left,
+                                    const BasicDecimal256& right) {
+  return left.little_endian_array() != right.little_endian_array();
+}
+
+ARROW_EXPORT bool operator<(const BasicDecimal256& left, const BasicDecimal256& right);
+
+ARROW_EXPORT inline bool operator<=(const BasicDecimal256& left,
+                                    const BasicDecimal256& right) {
+  return !operator<(right, left);
+}
+
+ARROW_EXPORT inline bool operator>(const BasicDecimal256& left,
+                                   const BasicDecimal256& right) {
+  return operator<(right, left);
+}
+
+ARROW_EXPORT inline bool operator>=(const BasicDecimal256& left,
+                                    const BasicDecimal256& right) {
+  return !operator<(left, right);
+}
+
+ARROW_EXPORT BasicDecimal256 operator-(const BasicDecimal256& operand);
+ARROW_EXPORT BasicDecimal256 operator~(const BasicDecimal256& operand);
+ARROW_EXPORT BasicDecimal256 operator+(const BasicDecimal256& left,
+                                       const BasicDecimal256& right);
+ARROW_EXPORT BasicDecimal256 operator*(const BasicDecimal256& left,
+                                       const BasicDecimal256& right);
+ARROW_EXPORT BasicDecimal256 operator/(const BasicDecimal256& left,
+                                       const BasicDecimal256& right);
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_block_counter.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_block_counter.cc
index c7c97676f7c..c67cedc4a06 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_block_counter.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_block_counter.cc
@@ -27,7 +27,7 @@
 namespace arrow {
 namespace internal {
 
-BitBlockCount BitBlockCounter::GetBlockSlow(int64_t block_size) noexcept { 
+BitBlockCount BitBlockCounter::GetBlockSlow(int64_t block_size) noexcept {
   const int16_t run_length = static_cast<int16_t>(std::min(bits_remaining_, block_size));
   int16_t popcount = static_cast<int16_t>(CountSetBits(bitmap_, offset_, run_length));
   bits_remaining_ -= run_length;
@@ -37,11 +37,11 @@ BitBlockCount BitBlockCounter::GetBlockSlow(int64_t block_size) noexcept {
   return {run_length, popcount};
 }
 
-// Prevent pointer arithmetic on nullptr, which is undefined behavior even if the pointer 
-// is never dereferenced. 
-inline const uint8_t* EnsureNotNull(const uint8_t* ptr) { 
-  static const uint8_t byte{}; 
-  return ptr == nullptr ? &byte : ptr; 
+// Prevent pointer arithmetic on nullptr, which is undefined behavior even if the pointer
+// is never dereferenced.
+inline const uint8_t* EnsureNotNull(const uint8_t* ptr) {
+  static const uint8_t byte{};
+  return ptr == nullptr ? &byte : ptr;
 }
 
 OptionalBitBlockCounter::OptionalBitBlockCounter(const uint8_t* validity_bitmap,
@@ -49,7 +49,7 @@ OptionalBitBlockCounter::OptionalBitBlockCounter(const uint8_t* validity_bitmap,
     : has_bitmap_(validity_bitmap != nullptr),
       position_(0),
       length_(length),
-      counter_(EnsureNotNull(validity_bitmap), offset, length) {} 
+      counter_(EnsureNotNull(validity_bitmap), offset, length) {}
 
 OptionalBitBlockCounter::OptionalBitBlockCounter(
     const std::shared_ptr<Buffer>& validity_bitmap, int64_t offset, int64_t length)
@@ -64,10 +64,10 @@ OptionalBinaryBitBlockCounter::OptionalBinaryBitBlockCounter(const uint8_t* left
     : has_bitmap_(HasBitmapFromBitmaps(left_bitmap != nullptr, right_bitmap != nullptr)),
       position_(0),
       length_(length),
-      unary_counter_(EnsureNotNull(left_bitmap != nullptr ? left_bitmap : right_bitmap), 
+      unary_counter_(EnsureNotNull(left_bitmap != nullptr ? left_bitmap : right_bitmap),
                      left_bitmap != nullptr ? left_offset : right_offset, length),
-      binary_counter_(EnsureNotNull(left_bitmap), left_offset, 
-                      EnsureNotNull(right_bitmap), right_offset, length) {} 
+      binary_counter_(EnsureNotNull(left_bitmap), left_offset,
+                      EnsureNotNull(right_bitmap), right_offset, length) {}
 
 OptionalBinaryBitBlockCounter::OptionalBinaryBitBlockCounter(
     const std::shared_ptr<Buffer>& left_bitmap, int64_t left_offset,
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_block_counter.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_block_counter.h
index 5a14031cf0e..63036af52a4 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_block_counter.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_block_counter.h
@@ -25,26 +25,26 @@
 #include "arrow/buffer.h"
 #include "arrow/status.h"
 #include "arrow/util/bit_util.h"
-#include "arrow/util/endian.h" 
+#include "arrow/util/endian.h"
 #include "arrow/util/macros.h"
-#include "arrow/util/ubsan.h" 
+#include "arrow/util/ubsan.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
 namespace internal {
 namespace detail {
 
-inline uint64_t LoadWord(const uint8_t* bytes) { 
-  return BitUtil::ToLittleEndian(util::SafeLoadAs<uint64_t>(bytes)); 
-} 
- 
-inline uint64_t ShiftWord(uint64_t current, uint64_t next, int64_t shift) { 
-  if (shift == 0) { 
-    return current; 
-  } 
-  return (current >> shift) | (next << (64 - shift)); 
-} 
- 
+inline uint64_t LoadWord(const uint8_t* bytes) {
+  return BitUtil::ToLittleEndian(util::SafeLoadAs<uint64_t>(bytes));
+}
+
+inline uint64_t ShiftWord(uint64_t current, uint64_t next, int64_t shift) {
+  if (shift == 0) {
+    return current;
+  }
+  return (current >> shift) | (next << (64 - shift));
+}
+
 // These templates are here to help with unit tests
 
 template <typename T>
@@ -58,16 +58,16 @@ struct BitBlockAnd<bool> {
 };
 
 template <typename T>
-struct BitBlockAndNot { 
-  static T Call(T left, T right) { return left & ~right; } 
-}; 
- 
-template <> 
-struct BitBlockAndNot<bool> { 
-  static bool Call(bool left, bool right) { return left && !right; } 
-}; 
- 
-template <typename T> 
+struct BitBlockAndNot {
+  static T Call(T left, T right) { return left & ~right; }
+};
+
+template <>
+struct BitBlockAndNot<bool> {
+  static bool Call(bool left, bool right) { return left && !right; }
+};
+
+template <typename T>
 struct BitBlockOr {
   static T Call(T left, T right) { return left | right; }
 };
@@ -120,82 +120,82 @@ class ARROW_EXPORT BitBlockCounter {
   /// block will have a length less than 256 if the bitmap length is not a
   /// multiple of 256, and will return 0-length blocks in subsequent
   /// invocations.
-  BitBlockCount NextFourWords() { 
-    using detail::LoadWord; 
-    using detail::ShiftWord; 
-
-    if (!bits_remaining_) { 
-      return {0, 0}; 
-    } 
-    int64_t total_popcount = 0; 
-    if (offset_ == 0) { 
-      if (bits_remaining_ < kFourWordsBits) { 
-        return GetBlockSlow(kFourWordsBits); 
-      } 
-      total_popcount += BitUtil::PopCount(LoadWord(bitmap_)); 
-      total_popcount += BitUtil::PopCount(LoadWord(bitmap_ + 8)); 
-      total_popcount += BitUtil::PopCount(LoadWord(bitmap_ + 16)); 
-      total_popcount += BitUtil::PopCount(LoadWord(bitmap_ + 24)); 
-    } else { 
-      // When the offset is > 0, we need there to be a word beyond the last 
-      // aligned word in the bitmap for the bit shifting logic. 
-      if (bits_remaining_ < 5 * kFourWordsBits - offset_) { 
-        return GetBlockSlow(kFourWordsBits); 
-      } 
-      auto current = LoadWord(bitmap_); 
-      auto next = LoadWord(bitmap_ + 8); 
-      total_popcount += BitUtil::PopCount(ShiftWord(current, next, offset_)); 
-      current = next; 
-      next = LoadWord(bitmap_ + 16); 
-      total_popcount += BitUtil::PopCount(ShiftWord(current, next, offset_)); 
-      current = next; 
-      next = LoadWord(bitmap_ + 24); 
-      total_popcount += BitUtil::PopCount(ShiftWord(current, next, offset_)); 
-      current = next; 
-      next = LoadWord(bitmap_ + 32); 
-      total_popcount += BitUtil::PopCount(ShiftWord(current, next, offset_)); 
-    } 
-    bitmap_ += BitUtil::BytesForBits(kFourWordsBits); 
-    bits_remaining_ -= kFourWordsBits; 
-    return {256, static_cast<int16_t>(total_popcount)}; 
-  } 
- 
+  BitBlockCount NextFourWords() {
+    using detail::LoadWord;
+    using detail::ShiftWord;
+
+    if (!bits_remaining_) {
+      return {0, 0};
+    }
+    int64_t total_popcount = 0;
+    if (offset_ == 0) {
+      if (bits_remaining_ < kFourWordsBits) {
+        return GetBlockSlow(kFourWordsBits);
+      }
+      total_popcount += BitUtil::PopCount(LoadWord(bitmap_));
+      total_popcount += BitUtil::PopCount(LoadWord(bitmap_ + 8));
+      total_popcount += BitUtil::PopCount(LoadWord(bitmap_ + 16));
+      total_popcount += BitUtil::PopCount(LoadWord(bitmap_ + 24));
+    } else {
+      // When the offset is > 0, we need there to be a word beyond the last
+      // aligned word in the bitmap for the bit shifting logic.
+      if (bits_remaining_ < 5 * kFourWordsBits - offset_) {
+        return GetBlockSlow(kFourWordsBits);
+      }
+      auto current = LoadWord(bitmap_);
+      auto next = LoadWord(bitmap_ + 8);
+      total_popcount += BitUtil::PopCount(ShiftWord(current, next, offset_));
+      current = next;
+      next = LoadWord(bitmap_ + 16);
+      total_popcount += BitUtil::PopCount(ShiftWord(current, next, offset_));
+      current = next;
+      next = LoadWord(bitmap_ + 24);
+      total_popcount += BitUtil::PopCount(ShiftWord(current, next, offset_));
+      current = next;
+      next = LoadWord(bitmap_ + 32);
+      total_popcount += BitUtil::PopCount(ShiftWord(current, next, offset_));
+    }
+    bitmap_ += BitUtil::BytesForBits(kFourWordsBits);
+    bits_remaining_ -= kFourWordsBits;
+    return {256, static_cast<int16_t>(total_popcount)};
+  }
+
   /// \brief Return the next run of available bits, usually 64. The returned
   /// pair contains the size of run and the number of true values. The last
   /// block will have a length less than 64 if the bitmap length is not a
   /// multiple of 64, and will return 0-length blocks in subsequent
   /// invocations.
-  BitBlockCount NextWord() { 
-    using detail::LoadWord; 
-    using detail::ShiftWord; 
-
-    if (!bits_remaining_) { 
-      return {0, 0}; 
-    } 
-    int64_t popcount = 0; 
-    if (offset_ == 0) { 
-      if (bits_remaining_ < kWordBits) { 
-        return GetBlockSlow(kWordBits); 
-      } 
-      popcount = BitUtil::PopCount(LoadWord(bitmap_)); 
-    } else { 
-      // When the offset is > 0, we need there to be a word beyond the last 
-      // aligned word in the bitmap for the bit shifting logic. 
-      if (bits_remaining_ < 2 * kWordBits - offset_) { 
-        return GetBlockSlow(kWordBits); 
-      } 
-      popcount = 
-          BitUtil::PopCount(ShiftWord(LoadWord(bitmap_), LoadWord(bitmap_ + 8), offset_)); 
-    } 
-    bitmap_ += kWordBits / 8; 
-    bits_remaining_ -= kWordBits; 
-    return {64, static_cast<int16_t>(popcount)}; 
-  } 
- 
+  BitBlockCount NextWord() {
+    using detail::LoadWord;
+    using detail::ShiftWord;
+
+    if (!bits_remaining_) {
+      return {0, 0};
+    }
+    int64_t popcount = 0;
+    if (offset_ == 0) {
+      if (bits_remaining_ < kWordBits) {
+        return GetBlockSlow(kWordBits);
+      }
+      popcount = BitUtil::PopCount(LoadWord(bitmap_));
+    } else {
+      // When the offset is > 0, we need there to be a word beyond the last
+      // aligned word in the bitmap for the bit shifting logic.
+      if (bits_remaining_ < 2 * kWordBits - offset_) {
+        return GetBlockSlow(kWordBits);
+      }
+      popcount =
+          BitUtil::PopCount(ShiftWord(LoadWord(bitmap_), LoadWord(bitmap_ + 8), offset_));
+    }
+    bitmap_ += kWordBits / 8;
+    bits_remaining_ -= kWordBits;
+    return {64, static_cast<int16_t>(popcount)};
+  }
+
  private:
   /// \brief Return block with the requested size when doing word-wise
   /// computation is not possible due to inadequate bits remaining.
-  BitBlockCount GetBlockSlow(int64_t block_size) noexcept; 
+  BitBlockCount GetBlockSlow(int64_t block_size) noexcept;
 
   const uint8_t* bitmap_;
   int64_t bits_remaining_;
@@ -274,67 +274,67 @@ class ARROW_EXPORT BinaryBitBlockCounter {
   /// the number of true values. The last block will have a length less than 64
   /// if the bitmap length is not a multiple of 64, and will return 0-length
   /// blocks in subsequent invocations.
-  BitBlockCount NextAndWord() { return NextWord<detail::BitBlockAnd>(); } 
+  BitBlockCount NextAndWord() { return NextWord<detail::BitBlockAnd>(); }
+
+  /// \brief Computes "x & ~y" block for each available run of bits.
+  BitBlockCount NextAndNotWord() { return NextWord<detail::BitBlockAndNot>(); }
 
-  /// \brief Computes "x & ~y" block for each available run of bits. 
-  BitBlockCount NextAndNotWord() { return NextWord<detail::BitBlockAndNot>(); } 
- 
   /// \brief Computes "x | y" block for each available run of bits.
-  BitBlockCount NextOrWord() { return NextWord<detail::BitBlockOr>(); } 
+  BitBlockCount NextOrWord() { return NextWord<detail::BitBlockOr>(); }
 
   /// \brief Computes "x | ~y" block for each available run of bits.
-  BitBlockCount NextOrNotWord() { return NextWord<detail::BitBlockOrNot>(); } 
+  BitBlockCount NextOrNotWord() { return NextWord<detail::BitBlockOrNot>(); }
 
  private:
   template <template <typename T> class Op>
-  BitBlockCount NextWord() { 
-    using detail::LoadWord; 
-    using detail::ShiftWord; 
-
-    if (!bits_remaining_) { 
-      return {0, 0}; 
-    } 
-    // When the offset is > 0, we need there to be a word beyond the last aligned 
-    // word in the bitmap for the bit shifting logic. 
-    constexpr int64_t kWordBits = BitBlockCounter::kWordBits; 
-    const int64_t bits_required_to_use_words = 
-        std::max(left_offset_ == 0 ? 64 : 64 + (64 - left_offset_), 
-                 right_offset_ == 0 ? 64 : 64 + (64 - right_offset_)); 
-    if (bits_remaining_ < bits_required_to_use_words) { 
-      const int16_t run_length = 
-          static_cast<int16_t>(std::min(bits_remaining_, kWordBits)); 
-      int16_t popcount = 0; 
-      for (int64_t i = 0; i < run_length; ++i) { 
-        if (Op<bool>::Call(BitUtil::GetBit(left_bitmap_, left_offset_ + i), 
-                           BitUtil::GetBit(right_bitmap_, right_offset_ + i))) { 
-          ++popcount; 
-        } 
-      } 
-      // This code path should trigger _at most_ 2 times. In the "two times" 
-      // case, the first time the run length will be a multiple of 8. 
-      left_bitmap_ += run_length / 8; 
-      right_bitmap_ += run_length / 8; 
-      bits_remaining_ -= run_length; 
-      return {run_length, popcount}; 
-    } 
- 
-    int64_t popcount = 0; 
-    if (left_offset_ == 0 && right_offset_ == 0) { 
-      popcount = BitUtil::PopCount( 
-          Op<uint64_t>::Call(LoadWord(left_bitmap_), LoadWord(right_bitmap_))); 
-    } else { 
-      auto left_word = 
-          ShiftWord(LoadWord(left_bitmap_), LoadWord(left_bitmap_ + 8), left_offset_); 
-      auto right_word = 
-          ShiftWord(LoadWord(right_bitmap_), LoadWord(right_bitmap_ + 8), right_offset_); 
-      popcount = BitUtil::PopCount(Op<uint64_t>::Call(left_word, right_word)); 
-    } 
-    left_bitmap_ += kWordBits / 8; 
-    right_bitmap_ += kWordBits / 8; 
-    bits_remaining_ -= kWordBits; 
-    return {64, static_cast<int16_t>(popcount)}; 
-  } 
- 
+  BitBlockCount NextWord() {
+    using detail::LoadWord;
+    using detail::ShiftWord;
+
+    if (!bits_remaining_) {
+      return {0, 0};
+    }
+    // When the offset is > 0, we need there to be a word beyond the last aligned
+    // word in the bitmap for the bit shifting logic.
+    constexpr int64_t kWordBits = BitBlockCounter::kWordBits;
+    const int64_t bits_required_to_use_words =
+        std::max(left_offset_ == 0 ? 64 : 64 + (64 - left_offset_),
+                 right_offset_ == 0 ? 64 : 64 + (64 - right_offset_));
+    if (bits_remaining_ < bits_required_to_use_words) {
+      const int16_t run_length =
+          static_cast<int16_t>(std::min(bits_remaining_, kWordBits));
+      int16_t popcount = 0;
+      for (int64_t i = 0; i < run_length; ++i) {
+        if (Op<bool>::Call(BitUtil::GetBit(left_bitmap_, left_offset_ + i),
+                           BitUtil::GetBit(right_bitmap_, right_offset_ + i))) {
+          ++popcount;
+        }
+      }
+      // This code path should trigger _at most_ 2 times. In the "two times"
+      // case, the first time the run length will be a multiple of 8.
+      left_bitmap_ += run_length / 8;
+      right_bitmap_ += run_length / 8;
+      bits_remaining_ -= run_length;
+      return {run_length, popcount};
+    }
+
+    int64_t popcount = 0;
+    if (left_offset_ == 0 && right_offset_ == 0) {
+      popcount = BitUtil::PopCount(
+          Op<uint64_t>::Call(LoadWord(left_bitmap_), LoadWord(right_bitmap_)));
+    } else {
+      auto left_word =
+          ShiftWord(LoadWord(left_bitmap_), LoadWord(left_bitmap_ + 8), left_offset_);
+      auto right_word =
+          ShiftWord(LoadWord(right_bitmap_), LoadWord(right_bitmap_ + 8), right_offset_);
+      popcount = BitUtil::PopCount(Op<uint64_t>::Call(left_word, right_word));
+    }
+    left_bitmap_ += kWordBits / 8;
+    right_bitmap_ += kWordBits / 8;
+    bits_remaining_ -= kWordBits;
+    return {64, static_cast<int16_t>(popcount)};
+  }
+
   const uint8_t* left_bitmap_;
   int64_t left_offset_;
   const uint8_t* right_bitmap_;
@@ -379,30 +379,30 @@ class ARROW_EXPORT OptionalBinaryBitBlockCounter {
     }
   }
 
-  BitBlockCount NextOrNotBlock() { 
-    static constexpr int64_t kMaxBlockSize = std::numeric_limits<int16_t>::max(); 
-    switch (has_bitmap_) { 
-      case HasBitmap::BOTH: { 
-        BitBlockCount block = binary_counter_.NextOrNotWord(); 
-        position_ += block.length; 
-        return block; 
-      } 
-      case HasBitmap::ONE: { 
-        BitBlockCount block = unary_counter_.NextWord(); 
-        position_ += block.length; 
-        return block; 
-      } 
-      case HasBitmap::NONE: 
-      default: { 
-        const int16_t block_size = 
-            static_cast<int16_t>(std::min(kMaxBlockSize, length_ - position_)); 
-        position_ += block_size; 
-        // All values are non-null 
-        return {block_size, block_size}; 
-      } 
-    } 
-  } 
- 
+  BitBlockCount NextOrNotBlock() {
+    static constexpr int64_t kMaxBlockSize = std::numeric_limits<int16_t>::max();
+    switch (has_bitmap_) {
+      case HasBitmap::BOTH: {
+        BitBlockCount block = binary_counter_.NextOrNotWord();
+        position_ += block.length;
+        return block;
+      }
+      case HasBitmap::ONE: {
+        BitBlockCount block = unary_counter_.NextWord();
+        position_ += block.length;
+        return block;
+      }
+      case HasBitmap::NONE:
+      default: {
+        const int16_t block_size =
+            static_cast<int16_t>(std::min(kMaxBlockSize, length_ - position_));
+        position_ += block_size;
+        // All values are non-null
+        return {block_size, block_size};
+      }
+    }
+  }
+
  private:
   enum class HasBitmap : int { BOTH, ONE, NONE };
 
@@ -427,9 +427,9 @@ class ARROW_EXPORT OptionalBinaryBitBlockCounter {
 // Functional-style bit block visitors.
 
 template <typename VisitNotNull, typename VisitNull>
-static Status VisitBitBlocks(const std::shared_ptr<Buffer>& bitmap_buf, int64_t offset, 
-                             int64_t length, VisitNotNull&& visit_not_null, 
-                             VisitNull&& visit_null) { 
+static Status VisitBitBlocks(const std::shared_ptr<Buffer>& bitmap_buf, int64_t offset,
+                             int64_t length, VisitNotNull&& visit_not_null,
+                             VisitNull&& visit_null) {
   const uint8_t* bitmap = NULLPTR;
   if (bitmap_buf != NULLPTR) {
     bitmap = bitmap_buf->data();
@@ -460,9 +460,9 @@ static Status VisitBitBlocks(const std::shared_ptr<Buffer>& bitmap_buf, int64_t
 }
 
 template <typename VisitNotNull, typename VisitNull>
-static void VisitBitBlocksVoid(const std::shared_ptr<Buffer>& bitmap_buf, int64_t offset, 
-                               int64_t length, VisitNotNull&& visit_not_null, 
-                               VisitNull&& visit_null) { 
+static void VisitBitBlocksVoid(const std::shared_ptr<Buffer>& bitmap_buf, int64_t offset,
+                               int64_t length, VisitNotNull&& visit_not_null,
+                               VisitNull&& visit_null) {
   const uint8_t* bitmap = NULLPTR;
   if (bitmap_buf != NULLPTR) {
     bitmap = bitmap_buf->data();
@@ -492,11 +492,11 @@ static void VisitBitBlocksVoid(const std::shared_ptr<Buffer>& bitmap_buf, int64_
 }
 
 template <typename VisitNotNull, typename VisitNull>
-static void VisitTwoBitBlocksVoid(const std::shared_ptr<Buffer>& left_bitmap_buf, 
-                                  int64_t left_offset, 
-                                  const std::shared_ptr<Buffer>& right_bitmap_buf, 
-                                  int64_t right_offset, int64_t length, 
-                                  VisitNotNull&& visit_not_null, VisitNull&& visit_null) { 
+static void VisitTwoBitBlocksVoid(const std::shared_ptr<Buffer>& left_bitmap_buf,
+                                  int64_t left_offset,
+                                  const std::shared_ptr<Buffer>& right_bitmap_buf,
+                                  int64_t right_offset, int64_t length,
+                                  VisitNotNull&& visit_not_null, VisitNull&& visit_null) {
   if (left_bitmap_buf == NULLPTR || right_bitmap_buf == NULLPTR) {
     // At most one bitmap is present
     if (left_bitmap_buf == NULLPTR) {
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_run_reader.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_run_reader.cc
index 1114ec61f19..eda6088eb32 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_run_reader.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_run_reader.cc
@@ -45,7 +45,7 @@ BitRunReader::BitRunReader(const uint8_t* bitmap, int64_t start_offset, int64_t
 
   // Prepare for inversion in NextRun.
   // Clear out any preceding bits.
-  word_ = word_ & ~BitUtil::LeastSignificantBitMask(position_); 
+  word_ = word_ & ~BitUtil::LeastSignificantBitMask(position_);
 }
 
 #endif
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_run_reader.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_run_reader.h
index 10155687a20..3e196628477 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_run_reader.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_run_reader.h
@@ -17,14 +17,14 @@
 
 #pragma once
 
-#include <cassert> 
+#include <cassert>
 #include <cstdint>
 #include <cstring>
 #include <string>
 
 #include "arrow/util/bit_util.h"
 #include "arrow/util/bitmap_reader.h"
-#include "arrow/util/endian.h" 
+#include "arrow/util/endian.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/visibility.h"
 
@@ -42,14 +42,14 @@ struct BitRun {
   }
 };
 
-inline bool operator==(const BitRun& lhs, const BitRun& rhs) { 
+inline bool operator==(const BitRun& lhs, const BitRun& rhs) {
   return lhs.length == rhs.length && lhs.set == rhs.set;
 }
 
-inline bool operator!=(const BitRun& lhs, const BitRun& rhs) { 
-  return lhs.length != rhs.length || lhs.set != rhs.set; 
-} 
- 
+inline bool operator!=(const BitRun& lhs, const BitRun& rhs) {
+  return lhs.length != rhs.length || lhs.set != rhs.set;
+}
+
 class BitRunReaderLinear {
  public:
   BitRunReaderLinear(const uint8_t* bitmap, int64_t start_offset, int64_t length)
@@ -70,7 +70,7 @@ class BitRunReaderLinear {
 };
 
 #if ARROW_LITTLE_ENDIAN
-/// A convenience class for counting the number of contiguous set/unset bits 
+/// A convenience class for counting the number of contiguous set/unset bits
 /// in a bitmap.
 class ARROW_EXPORT BitRunReader {
  public:
@@ -102,7 +102,7 @@ class ARROW_EXPORT BitRunReader {
     int64_t start_bit_offset = start_position & 63;
     // Invert the word for proper use of CountTrailingZeros and
     // clear bits so CountTrailingZeros can do it magic.
-    word_ = ~word_ & ~BitUtil::LeastSignificantBitMask(start_bit_offset); 
+    word_ = ~word_ & ~BitUtil::LeastSignificantBitMask(start_bit_offset);
 
     // Go  forward until the next change from unset to set.
     int64_t new_bits = BitUtil::CountTrailingZeros(word_) - start_bit_offset;
@@ -151,7 +151,7 @@ class ARROW_EXPORT BitRunReader {
     }
 
     // Two cases:
-    //   1. For unset, CountTrailingZeros works naturally so we don't 
+    //   1. For unset, CountTrailingZeros works naturally so we don't
     //   invert the word.
     //   2. Otherwise invert so we can use CountTrailingZeros.
     if (current_run_bit_set_) {
@@ -168,348 +168,348 @@ class ARROW_EXPORT BitRunReader {
 using BitRunReader = BitRunReaderLinear;
 #endif
 
-struct SetBitRun { 
-  int64_t position; 
-  int64_t length; 
- 
-  bool AtEnd() const { return length == 0; } 
- 
-  std::string ToString() const { 
-    return std::string("{pos=") + std::to_string(position) + 
-           ", len=" + std::to_string(length) + "}"; 
-  } 
- 
-  bool operator==(const SetBitRun& other) const { 
-    return position == other.position && length == other.length; 
-  } 
-  bool operator!=(const SetBitRun& other) const { 
-    return position != other.position || length != other.length; 
-  } 
-}; 
- 
-template <bool Reverse> 
-class BaseSetBitRunReader { 
- public: 
-  /// \brief Constructs new SetBitRunReader. 
-  /// 
-  /// \param[in] bitmap source data 
-  /// \param[in] start_offset bit offset into the source data 
-  /// \param[in] length number of bits to copy 
-  ARROW_NOINLINE 
-  BaseSetBitRunReader(const uint8_t* bitmap, int64_t start_offset, int64_t length) 
-      : bitmap_(bitmap), 
-        length_(length), 
-        remaining_(length_), 
-        current_word_(0), 
-        current_num_bits_(0) { 
-    if (Reverse) { 
-      bitmap_ += (start_offset + length) / 8; 
-      const int8_t end_bit_offset = static_cast<int8_t>((start_offset + length) % 8); 
-      if (length > 0 && end_bit_offset) { 
-        // Get LSBs from last byte 
-        ++bitmap_; 
-        current_num_bits_ = 
-            std::min(static_cast<int32_t>(length), static_cast<int32_t>(end_bit_offset)); 
-        current_word_ = LoadPartialWord(8 - end_bit_offset, current_num_bits_); 
-      } 
-    } else { 
-      bitmap_ += start_offset / 8; 
-      const int8_t bit_offset = static_cast<int8_t>(start_offset % 8); 
-      if (length > 0 && bit_offset) { 
-        // Get MSBs from first byte 
-        current_num_bits_ = 
-            std::min(static_cast<int32_t>(length), static_cast<int32_t>(8 - bit_offset)); 
-        current_word_ = LoadPartialWord(bit_offset, current_num_bits_); 
-      } 
-    } 
-  } 
- 
-  ARROW_NOINLINE 
-  SetBitRun NextRun() { 
-    int64_t pos = 0; 
-    int64_t len = 0; 
-    if (current_num_bits_) { 
-      const auto run = FindCurrentRun(); 
-      assert(remaining_ >= 0); 
-      if (run.length && current_num_bits_) { 
-        // The run ends in current_word_ 
-        return AdjustRun(run); 
-      } 
-      pos = run.position; 
-      len = run.length; 
-    } 
-    if (!len) { 
-      // We didn't get any ones in current_word_, so we can skip any zeros 
-      // in the following words 
-      SkipNextZeros(); 
-      if (remaining_ == 0) { 
-        return {0, 0}; 
-      } 
-      assert(current_num_bits_); 
-      pos = position(); 
-    } else if (!current_num_bits_) { 
-      if (ARROW_PREDICT_TRUE(remaining_ >= 64)) { 
-        current_word_ = LoadFullWord(); 
-        current_num_bits_ = 64; 
-      } else if (remaining_ > 0) { 
-        current_word_ = LoadPartialWord(/*bit_offset=*/0, remaining_); 
-        current_num_bits_ = static_cast<int32_t>(remaining_); 
-      } else { 
-        // No bits remaining, perhaps we found a run? 
-        return AdjustRun({pos, len}); 
-      } 
-      // If current word starts with a zero, we got a full run 
-      if (!(current_word_ & kFirstBit)) { 
-        return AdjustRun({pos, len}); 
-      } 
-    } 
-    // Current word should now start with a set bit 
-    len += CountNextOnes(); 
-    return AdjustRun({pos, len}); 
-  } 
- 
- protected: 
-  int64_t position() const { 
-    if (Reverse) { 
-      return remaining_; 
-    } else { 
-      return length_ - remaining_; 
-    } 
-  } 
- 
-  SetBitRun AdjustRun(SetBitRun run) { 
-    if (Reverse) { 
-      assert(run.position >= run.length); 
-      run.position -= run.length; 
-    } 
-    return run; 
-  } 
- 
-  uint64_t LoadFullWord() { 
-    uint64_t word; 
-    if (Reverse) { 
-      bitmap_ -= 8; 
-    } 
-    memcpy(&word, bitmap_, 8); 
-    if (!Reverse) { 
-      bitmap_ += 8; 
-    } 
-    return BitUtil::ToLittleEndian(word); 
-  } 
- 
-  uint64_t LoadPartialWord(int8_t bit_offset, int64_t num_bits) { 
-    assert(num_bits > 0); 
-    uint64_t word = 0; 
-    const int64_t num_bytes = BitUtil::BytesForBits(num_bits); 
-    if (Reverse) { 
-      // Read in the most significant bytes of the word 
-      bitmap_ -= num_bytes; 
-      memcpy(reinterpret_cast<char*>(&word) + 8 - num_bytes, bitmap_, num_bytes); 
-      // XXX MostSignificantBitmask 
-      return (BitUtil::ToLittleEndian(word) << bit_offset) & 
-             ~BitUtil::LeastSignificantBitMask(64 - num_bits); 
-    } else { 
-      memcpy(&word, bitmap_, num_bytes); 
-      bitmap_ += num_bytes; 
-      return (BitUtil::ToLittleEndian(word) >> bit_offset) & 
-             BitUtil::LeastSignificantBitMask(num_bits); 
-    } 
-  } 
- 
-  void SkipNextZeros() { 
-    assert(current_num_bits_ == 0); 
-    while (ARROW_PREDICT_TRUE(remaining_ >= 64)) { 
-      current_word_ = LoadFullWord(); 
-      const auto num_zeros = CountFirstZeros(current_word_); 
-      if (num_zeros < 64) { 
-        // Run of zeros ends here 
-        current_word_ = ConsumeBits(current_word_, num_zeros); 
-        current_num_bits_ = 64 - num_zeros; 
-        remaining_ -= num_zeros; 
-        assert(remaining_ >= 0); 
-        assert(current_num_bits_ >= 0); 
-        return; 
-      } 
-      remaining_ -= 64; 
-    } 
-    // Run of zeros continues in last bitmap word 
-    if (remaining_ > 0) { 
-      current_word_ = LoadPartialWord(/*bit_offset=*/0, remaining_); 
-      current_num_bits_ = static_cast<int32_t>(remaining_); 
-      const auto num_zeros = 
-          std::min<int32_t>(current_num_bits_, CountFirstZeros(current_word_)); 
-      current_word_ = ConsumeBits(current_word_, num_zeros); 
-      current_num_bits_ -= num_zeros; 
-      remaining_ -= num_zeros; 
-      assert(remaining_ >= 0); 
-      assert(current_num_bits_ >= 0); 
-    } 
-  } 
- 
-  int64_t CountNextOnes() { 
-    assert(current_word_ & kFirstBit); 
- 
-    int64_t len; 
-    if (~current_word_) { 
-      const auto num_ones = CountFirstZeros(~current_word_); 
-      assert(num_ones <= current_num_bits_); 
-      assert(num_ones <= remaining_); 
-      remaining_ -= num_ones; 
-      current_word_ = ConsumeBits(current_word_, num_ones); 
-      current_num_bits_ -= num_ones; 
-      if (current_num_bits_) { 
-        // Run of ones ends here 
-        return num_ones; 
-      } 
-      len = num_ones; 
-    } else { 
-      // current_word_ is all ones 
-      remaining_ -= 64; 
-      current_num_bits_ = 0; 
-      len = 64; 
-    } 
- 
-    while (ARROW_PREDICT_TRUE(remaining_ >= 64)) { 
-      current_word_ = LoadFullWord(); 
-      const auto num_ones = CountFirstZeros(~current_word_); 
-      len += num_ones; 
-      remaining_ -= num_ones; 
-      if (num_ones < 64) { 
-        // Run of ones ends here 
-        current_word_ = ConsumeBits(current_word_, num_ones); 
-        current_num_bits_ = 64 - num_ones; 
-        return len; 
-      } 
-    } 
-    // Run of ones continues in last bitmap word 
-    if (remaining_ > 0) { 
-      current_word_ = LoadPartialWord(/*bit_offset=*/0, remaining_); 
-      current_num_bits_ = static_cast<int32_t>(remaining_); 
-      const auto num_ones = CountFirstZeros(~current_word_); 
-      assert(num_ones <= current_num_bits_); 
-      assert(num_ones <= remaining_); 
-      current_word_ = ConsumeBits(current_word_, num_ones); 
-      current_num_bits_ -= num_ones; 
-      remaining_ -= num_ones; 
-      len += num_ones; 
-    } 
-    return len; 
-  } 
- 
-  SetBitRun FindCurrentRun() { 
-    // Skip any pending zeros 
-    const auto num_zeros = CountFirstZeros(current_word_); 
-    if (num_zeros >= current_num_bits_) { 
-      remaining_ -= current_num_bits_; 
-      current_word_ = 0; 
-      current_num_bits_ = 0; 
-      return {0, 0}; 
-    } 
-    assert(num_zeros <= remaining_); 
-    current_word_ = ConsumeBits(current_word_, num_zeros); 
-    current_num_bits_ -= num_zeros; 
-    remaining_ -= num_zeros; 
-    const int64_t pos = position(); 
-    // Count any ones 
-    const auto num_ones = CountFirstZeros(~current_word_); 
-    assert(num_ones <= current_num_bits_); 
-    assert(num_ones <= remaining_); 
-    current_word_ = ConsumeBits(current_word_, num_ones); 
-    current_num_bits_ -= num_ones; 
-    remaining_ -= num_ones; 
-    return {pos, num_ones}; 
-  } 
- 
-  inline int CountFirstZeros(uint64_t word); 
-  inline uint64_t ConsumeBits(uint64_t word, int32_t num_bits); 
- 
-  const uint8_t* bitmap_; 
-  const int64_t length_; 
-  int64_t remaining_; 
-  uint64_t current_word_; 
-  int32_t current_num_bits_; 
- 
-  static constexpr uint64_t kFirstBit = Reverse ? 0x8000000000000000ULL : 1; 
-}; 
- 
-template <> 
-inline int BaseSetBitRunReader<false>::CountFirstZeros(uint64_t word) { 
-  return BitUtil::CountTrailingZeros(word); 
-} 
- 
-template <> 
-inline int BaseSetBitRunReader<true>::CountFirstZeros(uint64_t word) { 
-  return BitUtil::CountLeadingZeros(word); 
-} 
- 
-template <> 
-inline uint64_t BaseSetBitRunReader<false>::ConsumeBits(uint64_t word, int32_t num_bits) { 
-  return word >> num_bits; 
-} 
- 
-template <> 
-inline uint64_t BaseSetBitRunReader<true>::ConsumeBits(uint64_t word, int32_t num_bits) { 
-  return word << num_bits; 
-} 
- 
-using SetBitRunReader = BaseSetBitRunReader</*Reverse=*/false>; 
-using ReverseSetBitRunReader = BaseSetBitRunReader</*Reverse=*/true>; 
- 
-// Functional-style bit run visitors. 
- 
-// XXX: Try to make this function small so the compiler can inline and optimize 
-// the `visit` function, which is normally a hot loop with vectorizable code. 
-// - don't inline SetBitRunReader constructor, it doesn't hurt performance 
-// - un-inline NextRun hurts 'many null' cases a bit, but improves normal cases 
-template <typename Visit> 
-inline Status VisitSetBitRuns(const uint8_t* bitmap, int64_t offset, int64_t length, 
-                              Visit&& visit) { 
-  if (bitmap == NULLPTR) { 
-    // Assuming all set (as in a null bitmap) 
-    return visit(static_cast<int64_t>(0), static_cast<int64_t>(length)); 
-  } 
-  SetBitRunReader reader(bitmap, offset, length); 
-  while (true) { 
-    const auto run = reader.NextRun(); 
-    if (run.length == 0) { 
-      break; 
-    } 
-    ARROW_RETURN_NOT_OK(visit(run.position, run.length)); 
-  } 
-  return Status::OK(); 
-} 
- 
-template <typename Visit> 
-inline void VisitSetBitRunsVoid(const uint8_t* bitmap, int64_t offset, int64_t length, 
-                                Visit&& visit) { 
-  if (bitmap == NULLPTR) { 
-    // Assuming all set (as in a null bitmap) 
-    visit(static_cast<int64_t>(0), static_cast<int64_t>(length)); 
-    return; 
-  } 
-  SetBitRunReader reader(bitmap, offset, length); 
-  while (true) { 
-    const auto run = reader.NextRun(); 
-    if (run.length == 0) { 
-      break; 
-    } 
-    visit(run.position, run.length); 
-  } 
-} 
- 
-template <typename Visit> 
-inline Status VisitSetBitRuns(const std::shared_ptr<Buffer>& bitmap, int64_t offset, 
-                              int64_t length, Visit&& visit) { 
-  return VisitSetBitRuns(bitmap ? bitmap->data() : NULLPTR, offset, length, 
-                         std::forward<Visit>(visit)); 
-} 
- 
-template <typename Visit> 
-inline void VisitSetBitRunsVoid(const std::shared_ptr<Buffer>& bitmap, int64_t offset, 
-                                int64_t length, Visit&& visit) { 
-  VisitSetBitRunsVoid(bitmap ? bitmap->data() : NULLPTR, offset, length, 
-                      std::forward<Visit>(visit)); 
-} 
- 
+struct SetBitRun {
+  int64_t position;
+  int64_t length;
+
+  bool AtEnd() const { return length == 0; }
+
+  std::string ToString() const {
+    return std::string("{pos=") + std::to_string(position) +
+           ", len=" + std::to_string(length) + "}";
+  }
+
+  bool operator==(const SetBitRun& other) const {
+    return position == other.position && length == other.length;
+  }
+  bool operator!=(const SetBitRun& other) const {
+    return position != other.position || length != other.length;
+  }
+};
+
+template <bool Reverse>
+class BaseSetBitRunReader {
+ public:
+  /// \brief Constructs new SetBitRunReader.
+  ///
+  /// \param[in] bitmap source data
+  /// \param[in] start_offset bit offset into the source data
+  /// \param[in] length number of bits to copy
+  ARROW_NOINLINE
+  BaseSetBitRunReader(const uint8_t* bitmap, int64_t start_offset, int64_t length)
+      : bitmap_(bitmap),
+        length_(length),
+        remaining_(length_),
+        current_word_(0),
+        current_num_bits_(0) {
+    if (Reverse) {
+      bitmap_ += (start_offset + length) / 8;
+      const int8_t end_bit_offset = static_cast<int8_t>((start_offset + length) % 8);
+      if (length > 0 && end_bit_offset) {
+        // Get LSBs from last byte
+        ++bitmap_;
+        current_num_bits_ =
+            std::min(static_cast<int32_t>(length), static_cast<int32_t>(end_bit_offset));
+        current_word_ = LoadPartialWord(8 - end_bit_offset, current_num_bits_);
+      }
+    } else {
+      bitmap_ += start_offset / 8;
+      const int8_t bit_offset = static_cast<int8_t>(start_offset % 8);
+      if (length > 0 && bit_offset) {
+        // Get MSBs from first byte
+        current_num_bits_ =
+            std::min(static_cast<int32_t>(length), static_cast<int32_t>(8 - bit_offset));
+        current_word_ = LoadPartialWord(bit_offset, current_num_bits_);
+      }
+    }
+  }
+
+  ARROW_NOINLINE
+  SetBitRun NextRun() {
+    int64_t pos = 0;
+    int64_t len = 0;
+    if (current_num_bits_) {
+      const auto run = FindCurrentRun();
+      assert(remaining_ >= 0);
+      if (run.length && current_num_bits_) {
+        // The run ends in current_word_
+        return AdjustRun(run);
+      }
+      pos = run.position;
+      len = run.length;
+    }
+    if (!len) {
+      // We didn't get any ones in current_word_, so we can skip any zeros
+      // in the following words
+      SkipNextZeros();
+      if (remaining_ == 0) {
+        return {0, 0};
+      }
+      assert(current_num_bits_);
+      pos = position();
+    } else if (!current_num_bits_) {
+      if (ARROW_PREDICT_TRUE(remaining_ >= 64)) {
+        current_word_ = LoadFullWord();
+        current_num_bits_ = 64;
+      } else if (remaining_ > 0) {
+        current_word_ = LoadPartialWord(/*bit_offset=*/0, remaining_);
+        current_num_bits_ = static_cast<int32_t>(remaining_);
+      } else {
+        // No bits remaining, perhaps we found a run?
+        return AdjustRun({pos, len});
+      }
+      // If current word starts with a zero, we got a full run
+      if (!(current_word_ & kFirstBit)) {
+        return AdjustRun({pos, len});
+      }
+    }
+    // Current word should now start with a set bit
+    len += CountNextOnes();
+    return AdjustRun({pos, len});
+  }
+
+ protected:
+  int64_t position() const {
+    if (Reverse) {
+      return remaining_;
+    } else {
+      return length_ - remaining_;
+    }
+  }
+
+  SetBitRun AdjustRun(SetBitRun run) {
+    if (Reverse) {
+      assert(run.position >= run.length);
+      run.position -= run.length;
+    }
+    return run;
+  }
+
+  uint64_t LoadFullWord() {
+    uint64_t word;
+    if (Reverse) {
+      bitmap_ -= 8;
+    }
+    memcpy(&word, bitmap_, 8);
+    if (!Reverse) {
+      bitmap_ += 8;
+    }
+    return BitUtil::ToLittleEndian(word);
+  }
+
+  uint64_t LoadPartialWord(int8_t bit_offset, int64_t num_bits) {
+    assert(num_bits > 0);
+    uint64_t word = 0;
+    const int64_t num_bytes = BitUtil::BytesForBits(num_bits);
+    if (Reverse) {
+      // Read in the most significant bytes of the word
+      bitmap_ -= num_bytes;
+      memcpy(reinterpret_cast<char*>(&word) + 8 - num_bytes, bitmap_, num_bytes);
+      // XXX MostSignificantBitmask
+      return (BitUtil::ToLittleEndian(word) << bit_offset) &
+             ~BitUtil::LeastSignificantBitMask(64 - num_bits);
+    } else {
+      memcpy(&word, bitmap_, num_bytes);
+      bitmap_ += num_bytes;
+      return (BitUtil::ToLittleEndian(word) >> bit_offset) &
+             BitUtil::LeastSignificantBitMask(num_bits);
+    }
+  }
+
+  void SkipNextZeros() {
+    assert(current_num_bits_ == 0);
+    while (ARROW_PREDICT_TRUE(remaining_ >= 64)) {
+      current_word_ = LoadFullWord();
+      const auto num_zeros = CountFirstZeros(current_word_);
+      if (num_zeros < 64) {
+        // Run of zeros ends here
+        current_word_ = ConsumeBits(current_word_, num_zeros);
+        current_num_bits_ = 64 - num_zeros;
+        remaining_ -= num_zeros;
+        assert(remaining_ >= 0);
+        assert(current_num_bits_ >= 0);
+        return;
+      }
+      remaining_ -= 64;
+    }
+    // Run of zeros continues in last bitmap word
+    if (remaining_ > 0) {
+      current_word_ = LoadPartialWord(/*bit_offset=*/0, remaining_);
+      current_num_bits_ = static_cast<int32_t>(remaining_);
+      const auto num_zeros =
+          std::min<int32_t>(current_num_bits_, CountFirstZeros(current_word_));
+      current_word_ = ConsumeBits(current_word_, num_zeros);
+      current_num_bits_ -= num_zeros;
+      remaining_ -= num_zeros;
+      assert(remaining_ >= 0);
+      assert(current_num_bits_ >= 0);
+    }
+  }
+
+  int64_t CountNextOnes() {
+    assert(current_word_ & kFirstBit);
+
+    int64_t len;
+    if (~current_word_) {
+      const auto num_ones = CountFirstZeros(~current_word_);
+      assert(num_ones <= current_num_bits_);
+      assert(num_ones <= remaining_);
+      remaining_ -= num_ones;
+      current_word_ = ConsumeBits(current_word_, num_ones);
+      current_num_bits_ -= num_ones;
+      if (current_num_bits_) {
+        // Run of ones ends here
+        return num_ones;
+      }
+      len = num_ones;
+    } else {
+      // current_word_ is all ones
+      remaining_ -= 64;
+      current_num_bits_ = 0;
+      len = 64;
+    }
+
+    while (ARROW_PREDICT_TRUE(remaining_ >= 64)) {
+      current_word_ = LoadFullWord();
+      const auto num_ones = CountFirstZeros(~current_word_);
+      len += num_ones;
+      remaining_ -= num_ones;
+      if (num_ones < 64) {
+        // Run of ones ends here
+        current_word_ = ConsumeBits(current_word_, num_ones);
+        current_num_bits_ = 64 - num_ones;
+        return len;
+      }
+    }
+    // Run of ones continues in last bitmap word
+    if (remaining_ > 0) {
+      current_word_ = LoadPartialWord(/*bit_offset=*/0, remaining_);
+      current_num_bits_ = static_cast<int32_t>(remaining_);
+      const auto num_ones = CountFirstZeros(~current_word_);
+      assert(num_ones <= current_num_bits_);
+      assert(num_ones <= remaining_);
+      current_word_ = ConsumeBits(current_word_, num_ones);
+      current_num_bits_ -= num_ones;
+      remaining_ -= num_ones;
+      len += num_ones;
+    }
+    return len;
+  }
+
+  SetBitRun FindCurrentRun() {
+    // Skip any pending zeros
+    const auto num_zeros = CountFirstZeros(current_word_);
+    if (num_zeros >= current_num_bits_) {
+      remaining_ -= current_num_bits_;
+      current_word_ = 0;
+      current_num_bits_ = 0;
+      return {0, 0};
+    }
+    assert(num_zeros <= remaining_);
+    current_word_ = ConsumeBits(current_word_, num_zeros);
+    current_num_bits_ -= num_zeros;
+    remaining_ -= num_zeros;
+    const int64_t pos = position();
+    // Count any ones
+    const auto num_ones = CountFirstZeros(~current_word_);
+    assert(num_ones <= current_num_bits_);
+    assert(num_ones <= remaining_);
+    current_word_ = ConsumeBits(current_word_, num_ones);
+    current_num_bits_ -= num_ones;
+    remaining_ -= num_ones;
+    return {pos, num_ones};
+  }
+
+  inline int CountFirstZeros(uint64_t word);
+  inline uint64_t ConsumeBits(uint64_t word, int32_t num_bits);
+
+  const uint8_t* bitmap_;
+  const int64_t length_;
+  int64_t remaining_;
+  uint64_t current_word_;
+  int32_t current_num_bits_;
+
+  static constexpr uint64_t kFirstBit = Reverse ? 0x8000000000000000ULL : 1;
+};
+
+template <>
+inline int BaseSetBitRunReader<false>::CountFirstZeros(uint64_t word) {
+  return BitUtil::CountTrailingZeros(word);
+}
+
+template <>
+inline int BaseSetBitRunReader<true>::CountFirstZeros(uint64_t word) {
+  return BitUtil::CountLeadingZeros(word);
+}
+
+template <>
+inline uint64_t BaseSetBitRunReader<false>::ConsumeBits(uint64_t word, int32_t num_bits) {
+  return word >> num_bits;
+}
+
+template <>
+inline uint64_t BaseSetBitRunReader<true>::ConsumeBits(uint64_t word, int32_t num_bits) {
+  return word << num_bits;
+}
+
+using SetBitRunReader = BaseSetBitRunReader</*Reverse=*/false>;
+using ReverseSetBitRunReader = BaseSetBitRunReader</*Reverse=*/true>;
+
+// Functional-style bit run visitors.
+
+// XXX: Try to make this function small so the compiler can inline and optimize
+// the `visit` function, which is normally a hot loop with vectorizable code.
+// - don't inline SetBitRunReader constructor, it doesn't hurt performance
+// - un-inline NextRun hurts 'many null' cases a bit, but improves normal cases
+template <typename Visit>
+inline Status VisitSetBitRuns(const uint8_t* bitmap, int64_t offset, int64_t length,
+                              Visit&& visit) {
+  if (bitmap == NULLPTR) {
+    // Assuming all set (as in a null bitmap)
+    return visit(static_cast<int64_t>(0), static_cast<int64_t>(length));
+  }
+  SetBitRunReader reader(bitmap, offset, length);
+  while (true) {
+    const auto run = reader.NextRun();
+    if (run.length == 0) {
+      break;
+    }
+    ARROW_RETURN_NOT_OK(visit(run.position, run.length));
+  }
+  return Status::OK();
+}
+
+template <typename Visit>
+inline void VisitSetBitRunsVoid(const uint8_t* bitmap, int64_t offset, int64_t length,
+                                Visit&& visit) {
+  if (bitmap == NULLPTR) {
+    // Assuming all set (as in a null bitmap)
+    visit(static_cast<int64_t>(0), static_cast<int64_t>(length));
+    return;
+  }
+  SetBitRunReader reader(bitmap, offset, length);
+  while (true) {
+    const auto run = reader.NextRun();
+    if (run.length == 0) {
+      break;
+    }
+    visit(run.position, run.length);
+  }
+}
+
+template <typename Visit>
+inline Status VisitSetBitRuns(const std::shared_ptr<Buffer>& bitmap, int64_t offset,
+                              int64_t length, Visit&& visit) {
+  return VisitSetBitRuns(bitmap ? bitmap->data() : NULLPTR, offset, length,
+                         std::forward<Visit>(visit));
+}
+
+template <typename Visit>
+inline void VisitSetBitRunsVoid(const std::shared_ptr<Buffer>& bitmap, int64_t offset,
+                                int64_t length, Visit&& visit) {
+  VisitSetBitRunsVoid(bitmap ? bitmap->data() : NULLPTR, offset, length,
+                      std::forward<Visit>(visit));
+}
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_stream_utils.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_stream_utils.h
index cdd3683557c..b9e695dfcb0 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_stream_utils.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_stream_utils.h
@@ -1,433 +1,433 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-// From Apache Impala (incubating) as of 2016-01-29 
- 
-#pragma once 
- 
-#include <string.h> 
-#include <algorithm> 
-#include <cstdint> 
- 
-#include "arrow/util/bit_util.h" 
-#include "arrow/util/bpacking.h" 
-#include "arrow/util/logging.h" 
-#include "arrow/util/macros.h" 
-#include "arrow/util/ubsan.h" 
- 
-namespace arrow { 
-namespace BitUtil { 
- 
-/// Utility class to write bit/byte streams.  This class can write data to either be 
-/// bit packed or byte aligned (and a single stream that has a mix of both). 
-/// This class does not allocate memory. 
-class BitWriter { 
- public: 
-  /// buffer: buffer to write bits to.  Buffer should be preallocated with 
-  /// 'buffer_len' bytes. 
-  BitWriter(uint8_t* buffer, int buffer_len) : buffer_(buffer), max_bytes_(buffer_len) { 
-    Clear(); 
-  } 
- 
-  void Clear() { 
-    buffered_values_ = 0; 
-    byte_offset_ = 0; 
-    bit_offset_ = 0; 
-  } 
- 
-  /// The number of current bytes written, including the current byte (i.e. may include a 
-  /// fraction of a byte). Includes buffered values. 
-  int bytes_written() const { 
-    return byte_offset_ + static_cast<int>(BitUtil::BytesForBits(bit_offset_)); 
-  } 
-  uint8_t* buffer() const { return buffer_; } 
-  int buffer_len() const { return max_bytes_; } 
- 
-  /// Writes a value to buffered_values_, flushing to buffer_ if necessary.  This is bit 
-  /// packed.  Returns false if there was not enough space. num_bits must be <= 32. 
-  bool PutValue(uint64_t v, int num_bits); 
- 
-  /// Writes v to the next aligned byte using num_bytes. If T is larger than 
-  /// num_bytes, the extra high-order bytes will be ignored. Returns false if 
-  /// there was not enough space. 
-  /// Assume the v is stored in buffer_ as a litte-endian format 
-  template <typename T> 
-  bool PutAligned(T v, int num_bytes); 
- 
-  /// Write a Vlq encoded int to the buffer.  Returns false if there was not enough 
-  /// room.  The value is written byte aligned. 
-  /// For more details on vlq: 
-  /// en.wikipedia.org/wiki/Variable-length_quantity 
-  bool PutVlqInt(uint32_t v); 
- 
-  // Writes an int zigzag encoded. 
-  bool PutZigZagVlqInt(int32_t v); 
- 
-  /// Get a pointer to the next aligned byte and advance the underlying buffer 
-  /// by num_bytes. 
-  /// Returns NULL if there was not enough space. 
-  uint8_t* GetNextBytePtr(int num_bytes = 1); 
- 
-  /// Flushes all buffered values to the buffer. Call this when done writing to 
-  /// the buffer.  If 'align' is true, buffered_values_ is reset and any future 
-  /// writes will be written to the next byte boundary. 
-  void Flush(bool align = false); 
- 
- private: 
-  uint8_t* buffer_; 
-  int max_bytes_; 
- 
-  /// Bit-packed values are initially written to this variable before being memcpy'd to 
-  /// buffer_. This is faster than writing values byte by byte directly to buffer_. 
-  uint64_t buffered_values_; 
- 
-  int byte_offset_;  // Offset in buffer_ 
-  int bit_offset_;   // Offset in buffered_values_ 
-}; 
- 
-/// Utility class to read bit/byte stream.  This class can read bits or bytes 
-/// that are either byte aligned or not.  It also has utilities to read multiple 
-/// bytes in one read (e.g. encoded int). 
-class BitReader { 
- public: 
-  /// 'buffer' is the buffer to read from.  The buffer's length is 'buffer_len'. 
-  BitReader(const uint8_t* buffer, int buffer_len) 
-      : buffer_(buffer), max_bytes_(buffer_len), byte_offset_(0), bit_offset_(0) { 
-    int num_bytes = std::min(8, max_bytes_ - byte_offset_); 
-    memcpy(&buffered_values_, buffer_ + byte_offset_, num_bytes); 
-    buffered_values_ = arrow::BitUtil::FromLittleEndian(buffered_values_); 
-  } 
- 
-  BitReader() 
-      : buffer_(NULL), 
-        max_bytes_(0), 
-        buffered_values_(0), 
-        byte_offset_(0), 
-        bit_offset_(0) {} 
- 
-  void Reset(const uint8_t* buffer, int buffer_len) { 
-    buffer_ = buffer; 
-    max_bytes_ = buffer_len; 
-    byte_offset_ = 0; 
-    bit_offset_ = 0; 
-    int num_bytes = std::min(8, max_bytes_ - byte_offset_); 
-    memcpy(&buffered_values_, buffer_ + byte_offset_, num_bytes); 
-    buffered_values_ = arrow::BitUtil::FromLittleEndian(buffered_values_); 
-  } 
- 
-  /// Gets the next value from the buffer.  Returns true if 'v' could be read or false if 
-  /// there are not enough bytes left. num_bits must be <= 32. 
-  template <typename T> 
-  bool GetValue(int num_bits, T* v); 
- 
-  /// Get a number of values from the buffer. Return the number of values actually read. 
-  template <typename T> 
-  int GetBatch(int num_bits, T* v, int batch_size); 
- 
-  /// Reads a 'num_bytes'-sized value from the buffer and stores it in 'v'. T 
-  /// needs to be a little-endian native type and big enough to store 
-  /// 'num_bytes'. The value is assumed to be byte-aligned so the stream will 
-  /// be advanced to the start of the next byte before 'v' is read. Returns 
-  /// false if there are not enough bytes left. 
-  /// Assume the v was stored in buffer_ as a litte-endian format 
-  template <typename T> 
-  bool GetAligned(int num_bytes, T* v); 
- 
-  /// Reads a vlq encoded int from the stream.  The encoded int must start at 
-  /// the beginning of a byte. Return false if there were not enough bytes in 
-  /// the buffer. 
-  bool GetVlqInt(uint32_t* v); 
- 
-  // Reads a zigzag encoded int `into` v. 
-  bool GetZigZagVlqInt(int32_t* v); 
- 
-  /// Returns the number of bytes left in the stream, not including the current 
-  /// byte (i.e., there may be an additional fraction of a byte). 
-  int bytes_left() { 
-    return max_bytes_ - 
-           (byte_offset_ + static_cast<int>(BitUtil::BytesForBits(bit_offset_))); 
-  } 
- 
-  /// Maximum byte length of a vlq encoded int 
-  static constexpr int kMaxVlqByteLength = 5; 
- 
- private: 
-  const uint8_t* buffer_; 
-  int max_bytes_; 
- 
-  /// Bytes are memcpy'd from buffer_ and values are read from this variable. This is 
-  /// faster than reading values byte by byte directly from buffer_. 
-  uint64_t buffered_values_; 
- 
-  int byte_offset_;  // Offset in buffer_ 
-  int bit_offset_;   // Offset in buffered_values_ 
-}; 
- 
-inline bool BitWriter::PutValue(uint64_t v, int num_bits) { 
-  // TODO: revisit this limit if necessary (can be raised to 64 by fixing some edge cases) 
-  DCHECK_LE(num_bits, 32); 
-  DCHECK_EQ(v >> num_bits, 0) << "v = " << v << ", num_bits = " << num_bits; 
- 
-  if (ARROW_PREDICT_FALSE(byte_offset_ * 8 + bit_offset_ + num_bits > max_bytes_ * 8)) 
-    return false; 
- 
-  buffered_values_ |= v << bit_offset_; 
-  bit_offset_ += num_bits; 
- 
-  if (ARROW_PREDICT_FALSE(bit_offset_ >= 64)) { 
-    // Flush buffered_values_ and write out bits of v that did not fit 
-    buffered_values_ = arrow::BitUtil::ToLittleEndian(buffered_values_); 
-    memcpy(buffer_ + byte_offset_, &buffered_values_, 8); 
-    buffered_values_ = 0; 
-    byte_offset_ += 8; 
-    bit_offset_ -= 64; 
-    buffered_values_ = v >> (num_bits - bit_offset_); 
-  } 
-  DCHECK_LT(bit_offset_, 64); 
-  return true; 
-} 
- 
-inline void BitWriter::Flush(bool align) { 
-  int num_bytes = static_cast<int>(BitUtil::BytesForBits(bit_offset_)); 
-  DCHECK_LE(byte_offset_ + num_bytes, max_bytes_); 
-  auto buffered_values = arrow::BitUtil::ToLittleEndian(buffered_values_); 
-  memcpy(buffer_ + byte_offset_, &buffered_values, num_bytes); 
- 
-  if (align) { 
-    buffered_values_ = 0; 
-    byte_offset_ += num_bytes; 
-    bit_offset_ = 0; 
-  } 
-} 
- 
-inline uint8_t* BitWriter::GetNextBytePtr(int num_bytes) { 
-  Flush(/* align */ true); 
-  DCHECK_LE(byte_offset_, max_bytes_); 
-  if (byte_offset_ + num_bytes > max_bytes_) return NULL; 
-  uint8_t* ptr = buffer_ + byte_offset_; 
-  byte_offset_ += num_bytes; 
-  return ptr; 
-} 
- 
-template <typename T> 
-inline bool BitWriter::PutAligned(T val, int num_bytes) { 
-  uint8_t* ptr = GetNextBytePtr(num_bytes); 
-  if (ptr == NULL) return false; 
-  val = arrow::BitUtil::ToLittleEndian(val); 
-  memcpy(ptr, &val, num_bytes); 
-  return true; 
-} 
- 
-namespace detail { 
- 
-template <typename T> 
-inline void GetValue_(int num_bits, T* v, int max_bytes, const uint8_t* buffer, 
-                      int* bit_offset, int* byte_offset, uint64_t* buffered_values) { 
-#ifdef _MSC_VER 
-#pragma warning(push) 
-#pragma warning(disable : 4800) 
-#endif 
-  *v = static_cast<T>(BitUtil::TrailingBits(*buffered_values, *bit_offset + num_bits) >> 
-                      *bit_offset); 
-#ifdef _MSC_VER 
-#pragma warning(pop) 
-#endif 
-  *bit_offset += num_bits; 
-  if (*bit_offset >= 64) { 
-    *byte_offset += 8; 
-    *bit_offset -= 64; 
- 
-    int bytes_remaining = max_bytes - *byte_offset; 
-    if (ARROW_PREDICT_TRUE(bytes_remaining >= 8)) { 
-      memcpy(buffered_values, buffer + *byte_offset, 8); 
-    } else { 
-      memcpy(buffered_values, buffer + *byte_offset, bytes_remaining); 
-    } 
-    *buffered_values = arrow::BitUtil::FromLittleEndian(*buffered_values); 
-#ifdef _MSC_VER 
-#pragma warning(push) 
-#pragma warning(disable : 4800 4805) 
-#endif 
-    // Read bits of v that crossed into new buffered_values_ 
-    *v = *v | static_cast<T>(BitUtil::TrailingBits(*buffered_values, *bit_offset) 
-                             << (num_bits - *bit_offset)); 
-#ifdef _MSC_VER 
-#pragma warning(pop) 
-#endif 
-    DCHECK_LE(*bit_offset, 64); 
-  } 
-} 
- 
-}  // namespace detail 
- 
-template <typename T> 
-inline bool BitReader::GetValue(int num_bits, T* v) { 
-  return GetBatch(num_bits, v, 1) == 1; 
-} 
- 
-template <typename T> 
-inline int BitReader::GetBatch(int num_bits, T* v, int batch_size) { 
-  DCHECK(buffer_ != NULL); 
-  // TODO: revisit this limit if necessary 
-  DCHECK_LE(num_bits, 32); 
-  DCHECK_LE(num_bits, static_cast<int>(sizeof(T) * 8)); 
- 
-  int bit_offset = bit_offset_; 
-  int byte_offset = byte_offset_; 
-  uint64_t buffered_values = buffered_values_; 
-  int max_bytes = max_bytes_; 
-  const uint8_t* buffer = buffer_; 
- 
-  uint64_t needed_bits = num_bits * batch_size; 
-  constexpr uint64_t kBitsPerByte = 8; 
-  uint64_t remaining_bits = (max_bytes - byte_offset) * kBitsPerByte - bit_offset; 
-  if (remaining_bits < needed_bits) { 
-    batch_size = static_cast<int>(remaining_bits) / num_bits; 
-  } 
- 
-  int i = 0; 
-  if (ARROW_PREDICT_FALSE(bit_offset != 0)) { 
-    for (; i < batch_size && bit_offset != 0; ++i) { 
-      detail::GetValue_(num_bits, &v[i], max_bytes, buffer, &bit_offset, &byte_offset, 
-                        &buffered_values); 
-    } 
-  } 
- 
-  if (sizeof(T) == 4) { 
-    int num_unpacked = 
-        internal::unpack32(reinterpret_cast<const uint32_t*>(buffer + byte_offset), 
-                           reinterpret_cast<uint32_t*>(v + i), batch_size - i, num_bits); 
-    i += num_unpacked; 
-    byte_offset += num_unpacked * num_bits / 8; 
-  } else { 
-    const int buffer_size = 1024; 
-    uint32_t unpack_buffer[buffer_size]; 
-    while (i < batch_size) { 
-      int unpack_size = std::min(buffer_size, batch_size - i); 
-      int num_unpacked = 
-          internal::unpack32(reinterpret_cast<const uint32_t*>(buffer + byte_offset), 
-                             unpack_buffer, unpack_size, num_bits); 
-      if (num_unpacked == 0) { 
-        break; 
-      } 
-      for (int k = 0; k < num_unpacked; ++k) { 
-#ifdef _MSC_VER 
-#pragma warning(push) 
-#pragma warning(disable : 4800) 
-#endif 
-        v[i + k] = static_cast<T>(unpack_buffer[k]); 
-#ifdef _MSC_VER 
-#pragma warning(pop) 
-#endif 
-      } 
-      i += num_unpacked; 
-      byte_offset += num_unpacked * num_bits / 8; 
-    } 
-  } 
- 
-  int bytes_remaining = max_bytes - byte_offset; 
-  if (bytes_remaining >= 8) { 
-    memcpy(&buffered_values, buffer + byte_offset, 8); 
-  } else { 
-    memcpy(&buffered_values, buffer + byte_offset, bytes_remaining); 
-  } 
-  buffered_values = arrow::BitUtil::FromLittleEndian(buffered_values); 
- 
-  for (; i < batch_size; ++i) { 
-    detail::GetValue_(num_bits, &v[i], max_bytes, buffer, &bit_offset, &byte_offset, 
-                      &buffered_values); 
-  } 
- 
-  bit_offset_ = bit_offset; 
-  byte_offset_ = byte_offset; 
-  buffered_values_ = buffered_values; 
- 
-  return batch_size; 
-} 
- 
-template <typename T> 
-inline bool BitReader::GetAligned(int num_bytes, T* v) { 
-  if (ARROW_PREDICT_FALSE(num_bytes > static_cast<int>(sizeof(T)))) { 
-    return false; 
-  } 
- 
-  int bytes_read = static_cast<int>(BitUtil::BytesForBits(bit_offset_)); 
-  if (ARROW_PREDICT_FALSE(byte_offset_ + bytes_read + num_bytes > max_bytes_)) { 
-    return false; 
-  } 
- 
-  // Advance byte_offset to next unread byte and read num_bytes 
-  byte_offset_ += bytes_read; 
-  memcpy(v, buffer_ + byte_offset_, num_bytes); 
-  *v = arrow::BitUtil::FromLittleEndian(*v); 
-  byte_offset_ += num_bytes; 
- 
-  // Reset buffered_values_ 
-  bit_offset_ = 0; 
-  int bytes_remaining = max_bytes_ - byte_offset_; 
-  if (ARROW_PREDICT_TRUE(bytes_remaining >= 8)) { 
-    memcpy(&buffered_values_, buffer_ + byte_offset_, 8); 
-  } else { 
-    memcpy(&buffered_values_, buffer_ + byte_offset_, bytes_remaining); 
-  } 
-  buffered_values_ = arrow::BitUtil::FromLittleEndian(buffered_values_); 
-  return true; 
-} 
- 
-inline bool BitWriter::PutVlqInt(uint32_t v) { 
-  bool result = true; 
-  while ((v & 0xFFFFFF80UL) != 0UL) { 
-    result &= PutAligned<uint8_t>(static_cast<uint8_t>((v & 0x7F) | 0x80), 1); 
-    v >>= 7; 
-  } 
-  result &= PutAligned<uint8_t>(static_cast<uint8_t>(v & 0x7F), 1); 
-  return result; 
-} 
- 
-inline bool BitReader::GetVlqInt(uint32_t* v) { 
-  uint32_t tmp = 0; 
- 
-  for (int i = 0; i < kMaxVlqByteLength; i++) { 
-    uint8_t byte = 0; 
-    if (ARROW_PREDICT_FALSE(!GetAligned<uint8_t>(1, &byte))) { 
-      return false; 
-    } 
-    tmp |= static_cast<uint32_t>(byte & 0x7F) << (7 * i); 
- 
-    if ((byte & 0x80) == 0) { 
-      *v = tmp; 
-      return true; 
-    } 
-  } 
- 
-  return false; 
-} 
- 
-inline bool BitWriter::PutZigZagVlqInt(int32_t v) { 
-  auto u_v = ::arrow::util::SafeCopy<uint32_t>(v); 
-  return PutVlqInt((u_v << 1) ^ (u_v >> 31)); 
-} 
- 
-inline bool BitReader::GetZigZagVlqInt(int32_t* v) { 
-  uint32_t u; 
-  if (!GetVlqInt(&u)) return false; 
-  *v = ::arrow::util::SafeCopy<int32_t>((u >> 1) ^ (u << 31)); 
-  return true; 
-} 
- 
-}  // namespace BitUtil 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// From Apache Impala (incubating) as of 2016-01-29
+
+#pragma once
+
+#include <string.h>
+#include <algorithm>
+#include <cstdint>
+
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bpacking.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/ubsan.h"
+
+namespace arrow {
+namespace BitUtil {
+
+/// Utility class to write bit/byte streams.  This class can write data to either be
+/// bit packed or byte aligned (and a single stream that has a mix of both).
+/// This class does not allocate memory.
+class BitWriter {
+ public:
+  /// buffer: buffer to write bits to.  Buffer should be preallocated with
+  /// 'buffer_len' bytes.
+  BitWriter(uint8_t* buffer, int buffer_len) : buffer_(buffer), max_bytes_(buffer_len) {
+    Clear();
+  }
+
+  void Clear() {
+    buffered_values_ = 0;
+    byte_offset_ = 0;
+    bit_offset_ = 0;
+  }
+
+  /// The number of current bytes written, including the current byte (i.e. may include a
+  /// fraction of a byte). Includes buffered values.
+  int bytes_written() const {
+    return byte_offset_ + static_cast<int>(BitUtil::BytesForBits(bit_offset_));
+  }
+  uint8_t* buffer() const { return buffer_; }
+  int buffer_len() const { return max_bytes_; }
+
+  /// Writes a value to buffered_values_, flushing to buffer_ if necessary.  This is bit
+  /// packed.  Returns false if there was not enough space. num_bits must be <= 32.
+  bool PutValue(uint64_t v, int num_bits);
+
+  /// Writes v to the next aligned byte using num_bytes. If T is larger than
+  /// num_bytes, the extra high-order bytes will be ignored. Returns false if
+  /// there was not enough space.
+  /// Assume the v is stored in buffer_ as a litte-endian format
+  template <typename T>
+  bool PutAligned(T v, int num_bytes);
+
+  /// Write a Vlq encoded int to the buffer.  Returns false if there was not enough
+  /// room.  The value is written byte aligned.
+  /// For more details on vlq:
+  /// en.wikipedia.org/wiki/Variable-length_quantity
+  bool PutVlqInt(uint32_t v);
+
+  // Writes an int zigzag encoded.
+  bool PutZigZagVlqInt(int32_t v);
+
+  /// Get a pointer to the next aligned byte and advance the underlying buffer
+  /// by num_bytes.
+  /// Returns NULL if there was not enough space.
+  uint8_t* GetNextBytePtr(int num_bytes = 1);
+
+  /// Flushes all buffered values to the buffer. Call this when done writing to
+  /// the buffer.  If 'align' is true, buffered_values_ is reset and any future
+  /// writes will be written to the next byte boundary.
+  void Flush(bool align = false);
+
+ private:
+  uint8_t* buffer_;
+  int max_bytes_;
+
+  /// Bit-packed values are initially written to this variable before being memcpy'd to
+  /// buffer_. This is faster than writing values byte by byte directly to buffer_.
+  uint64_t buffered_values_;
+
+  int byte_offset_;  // Offset in buffer_
+  int bit_offset_;   // Offset in buffered_values_
+};
+
+/// Utility class to read bit/byte stream.  This class can read bits or bytes
+/// that are either byte aligned or not.  It also has utilities to read multiple
+/// bytes in one read (e.g. encoded int).
+class BitReader {
+ public:
+  /// 'buffer' is the buffer to read from.  The buffer's length is 'buffer_len'.
+  BitReader(const uint8_t* buffer, int buffer_len)
+      : buffer_(buffer), max_bytes_(buffer_len), byte_offset_(0), bit_offset_(0) {
+    int num_bytes = std::min(8, max_bytes_ - byte_offset_);
+    memcpy(&buffered_values_, buffer_ + byte_offset_, num_bytes);
+    buffered_values_ = arrow::BitUtil::FromLittleEndian(buffered_values_);
+  }
+
+  BitReader()
+      : buffer_(NULL),
+        max_bytes_(0),
+        buffered_values_(0),
+        byte_offset_(0),
+        bit_offset_(0) {}
+
+  void Reset(const uint8_t* buffer, int buffer_len) {
+    buffer_ = buffer;
+    max_bytes_ = buffer_len;
+    byte_offset_ = 0;
+    bit_offset_ = 0;
+    int num_bytes = std::min(8, max_bytes_ - byte_offset_);
+    memcpy(&buffered_values_, buffer_ + byte_offset_, num_bytes);
+    buffered_values_ = arrow::BitUtil::FromLittleEndian(buffered_values_);
+  }
+
+  /// Gets the next value from the buffer.  Returns true if 'v' could be read or false if
+  /// there are not enough bytes left. num_bits must be <= 32.
+  template <typename T>
+  bool GetValue(int num_bits, T* v);
+
+  /// Get a number of values from the buffer. Return the number of values actually read.
+  template <typename T>
+  int GetBatch(int num_bits, T* v, int batch_size);
+
+  /// Reads a 'num_bytes'-sized value from the buffer and stores it in 'v'. T
+  /// needs to be a little-endian native type and big enough to store
+  /// 'num_bytes'. The value is assumed to be byte-aligned so the stream will
+  /// be advanced to the start of the next byte before 'v' is read. Returns
+  /// false if there are not enough bytes left.
+  /// Assume the v was stored in buffer_ as a litte-endian format
+  template <typename T>
+  bool GetAligned(int num_bytes, T* v);
+
+  /// Reads a vlq encoded int from the stream.  The encoded int must start at
+  /// the beginning of a byte. Return false if there were not enough bytes in
+  /// the buffer.
+  bool GetVlqInt(uint32_t* v);
+
+  // Reads a zigzag encoded int `into` v.
+  bool GetZigZagVlqInt(int32_t* v);
+
+  /// Returns the number of bytes left in the stream, not including the current
+  /// byte (i.e., there may be an additional fraction of a byte).
+  int bytes_left() {
+    return max_bytes_ -
+           (byte_offset_ + static_cast<int>(BitUtil::BytesForBits(bit_offset_)));
+  }
+
+  /// Maximum byte length of a vlq encoded int
+  static constexpr int kMaxVlqByteLength = 5;
+
+ private:
+  const uint8_t* buffer_;
+  int max_bytes_;
+
+  /// Bytes are memcpy'd from buffer_ and values are read from this variable. This is
+  /// faster than reading values byte by byte directly from buffer_.
+  uint64_t buffered_values_;
+
+  int byte_offset_;  // Offset in buffer_
+  int bit_offset_;   // Offset in buffered_values_
+};
+
+inline bool BitWriter::PutValue(uint64_t v, int num_bits) {
+  // TODO: revisit this limit if necessary (can be raised to 64 by fixing some edge cases)
+  DCHECK_LE(num_bits, 32);
+  DCHECK_EQ(v >> num_bits, 0) << "v = " << v << ", num_bits = " << num_bits;
+
+  if (ARROW_PREDICT_FALSE(byte_offset_ * 8 + bit_offset_ + num_bits > max_bytes_ * 8))
+    return false;
+
+  buffered_values_ |= v << bit_offset_;
+  bit_offset_ += num_bits;
+
+  if (ARROW_PREDICT_FALSE(bit_offset_ >= 64)) {
+    // Flush buffered_values_ and write out bits of v that did not fit
+    buffered_values_ = arrow::BitUtil::ToLittleEndian(buffered_values_);
+    memcpy(buffer_ + byte_offset_, &buffered_values_, 8);
+    buffered_values_ = 0;
+    byte_offset_ += 8;
+    bit_offset_ -= 64;
+    buffered_values_ = v >> (num_bits - bit_offset_);
+  }
+  DCHECK_LT(bit_offset_, 64);
+  return true;
+}
+
+inline void BitWriter::Flush(bool align) {
+  int num_bytes = static_cast<int>(BitUtil::BytesForBits(bit_offset_));
+  DCHECK_LE(byte_offset_ + num_bytes, max_bytes_);
+  auto buffered_values = arrow::BitUtil::ToLittleEndian(buffered_values_);
+  memcpy(buffer_ + byte_offset_, &buffered_values, num_bytes);
+
+  if (align) {
+    buffered_values_ = 0;
+    byte_offset_ += num_bytes;
+    bit_offset_ = 0;
+  }
+}
+
+inline uint8_t* BitWriter::GetNextBytePtr(int num_bytes) {
+  Flush(/* align */ true);
+  DCHECK_LE(byte_offset_, max_bytes_);
+  if (byte_offset_ + num_bytes > max_bytes_) return NULL;
+  uint8_t* ptr = buffer_ + byte_offset_;
+  byte_offset_ += num_bytes;
+  return ptr;
+}
+
+template <typename T>
+inline bool BitWriter::PutAligned(T val, int num_bytes) {
+  uint8_t* ptr = GetNextBytePtr(num_bytes);
+  if (ptr == NULL) return false;
+  val = arrow::BitUtil::ToLittleEndian(val);
+  memcpy(ptr, &val, num_bytes);
+  return true;
+}
+
+namespace detail {
+
+template <typename T>
+inline void GetValue_(int num_bits, T* v, int max_bytes, const uint8_t* buffer,
+                      int* bit_offset, int* byte_offset, uint64_t* buffered_values) {
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4800)
+#endif
+  *v = static_cast<T>(BitUtil::TrailingBits(*buffered_values, *bit_offset + num_bits) >>
+                      *bit_offset);
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+  *bit_offset += num_bits;
+  if (*bit_offset >= 64) {
+    *byte_offset += 8;
+    *bit_offset -= 64;
+
+    int bytes_remaining = max_bytes - *byte_offset;
+    if (ARROW_PREDICT_TRUE(bytes_remaining >= 8)) {
+      memcpy(buffered_values, buffer + *byte_offset, 8);
+    } else {
+      memcpy(buffered_values, buffer + *byte_offset, bytes_remaining);
+    }
+    *buffered_values = arrow::BitUtil::FromLittleEndian(*buffered_values);
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4800 4805)
+#endif
+    // Read bits of v that crossed into new buffered_values_
+    *v = *v | static_cast<T>(BitUtil::TrailingBits(*buffered_values, *bit_offset)
+                             << (num_bits - *bit_offset));
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+    DCHECK_LE(*bit_offset, 64);
+  }
+}
+
+}  // namespace detail
+
+template <typename T>
+inline bool BitReader::GetValue(int num_bits, T* v) {
+  return GetBatch(num_bits, v, 1) == 1;
+}
+
+template <typename T>
+inline int BitReader::GetBatch(int num_bits, T* v, int batch_size) {
+  DCHECK(buffer_ != NULL);
+  // TODO: revisit this limit if necessary
+  DCHECK_LE(num_bits, 32);
+  DCHECK_LE(num_bits, static_cast<int>(sizeof(T) * 8));
+
+  int bit_offset = bit_offset_;
+  int byte_offset = byte_offset_;
+  uint64_t buffered_values = buffered_values_;
+  int max_bytes = max_bytes_;
+  const uint8_t* buffer = buffer_;
+
+  uint64_t needed_bits = num_bits * batch_size;
+  constexpr uint64_t kBitsPerByte = 8;
+  uint64_t remaining_bits = (max_bytes - byte_offset) * kBitsPerByte - bit_offset;
+  if (remaining_bits < needed_bits) {
+    batch_size = static_cast<int>(remaining_bits) / num_bits;
+  }
+
+  int i = 0;
+  if (ARROW_PREDICT_FALSE(bit_offset != 0)) {
+    for (; i < batch_size && bit_offset != 0; ++i) {
+      detail::GetValue_(num_bits, &v[i], max_bytes, buffer, &bit_offset, &byte_offset,
+                        &buffered_values);
+    }
+  }
+
+  if (sizeof(T) == 4) {
+    int num_unpacked =
+        internal::unpack32(reinterpret_cast<const uint32_t*>(buffer + byte_offset),
+                           reinterpret_cast<uint32_t*>(v + i), batch_size - i, num_bits);
+    i += num_unpacked;
+    byte_offset += num_unpacked * num_bits / 8;
+  } else {
+    const int buffer_size = 1024;
+    uint32_t unpack_buffer[buffer_size];
+    while (i < batch_size) {
+      int unpack_size = std::min(buffer_size, batch_size - i);
+      int num_unpacked =
+          internal::unpack32(reinterpret_cast<const uint32_t*>(buffer + byte_offset),
+                             unpack_buffer, unpack_size, num_bits);
+      if (num_unpacked == 0) {
+        break;
+      }
+      for (int k = 0; k < num_unpacked; ++k) {
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4800)
+#endif
+        v[i + k] = static_cast<T>(unpack_buffer[k]);
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+      }
+      i += num_unpacked;
+      byte_offset += num_unpacked * num_bits / 8;
+    }
+  }
+
+  int bytes_remaining = max_bytes - byte_offset;
+  if (bytes_remaining >= 8) {
+    memcpy(&buffered_values, buffer + byte_offset, 8);
+  } else {
+    memcpy(&buffered_values, buffer + byte_offset, bytes_remaining);
+  }
+  buffered_values = arrow::BitUtil::FromLittleEndian(buffered_values);
+
+  for (; i < batch_size; ++i) {
+    detail::GetValue_(num_bits, &v[i], max_bytes, buffer, &bit_offset, &byte_offset,
+                      &buffered_values);
+  }
+
+  bit_offset_ = bit_offset;
+  byte_offset_ = byte_offset;
+  buffered_values_ = buffered_values;
+
+  return batch_size;
+}
+
+template <typename T>
+inline bool BitReader::GetAligned(int num_bytes, T* v) {
+  if (ARROW_PREDICT_FALSE(num_bytes > static_cast<int>(sizeof(T)))) {
+    return false;
+  }
+
+  int bytes_read = static_cast<int>(BitUtil::BytesForBits(bit_offset_));
+  if (ARROW_PREDICT_FALSE(byte_offset_ + bytes_read + num_bytes > max_bytes_)) {
+    return false;
+  }
+
+  // Advance byte_offset to next unread byte and read num_bytes
+  byte_offset_ += bytes_read;
+  memcpy(v, buffer_ + byte_offset_, num_bytes);
+  *v = arrow::BitUtil::FromLittleEndian(*v);
+  byte_offset_ += num_bytes;
+
+  // Reset buffered_values_
+  bit_offset_ = 0;
+  int bytes_remaining = max_bytes_ - byte_offset_;
+  if (ARROW_PREDICT_TRUE(bytes_remaining >= 8)) {
+    memcpy(&buffered_values_, buffer_ + byte_offset_, 8);
+  } else {
+    memcpy(&buffered_values_, buffer_ + byte_offset_, bytes_remaining);
+  }
+  buffered_values_ = arrow::BitUtil::FromLittleEndian(buffered_values_);
+  return true;
+}
+
+inline bool BitWriter::PutVlqInt(uint32_t v) {
+  bool result = true;
+  while ((v & 0xFFFFFF80UL) != 0UL) {
+    result &= PutAligned<uint8_t>(static_cast<uint8_t>((v & 0x7F) | 0x80), 1);
+    v >>= 7;
+  }
+  result &= PutAligned<uint8_t>(static_cast<uint8_t>(v & 0x7F), 1);
+  return result;
+}
+
+inline bool BitReader::GetVlqInt(uint32_t* v) {
+  uint32_t tmp = 0;
+
+  for (int i = 0; i < kMaxVlqByteLength; i++) {
+    uint8_t byte = 0;
+    if (ARROW_PREDICT_FALSE(!GetAligned<uint8_t>(1, &byte))) {
+      return false;
+    }
+    tmp |= static_cast<uint32_t>(byte & 0x7F) << (7 * i);
+
+    if ((byte & 0x80) == 0) {
+      *v = tmp;
+      return true;
+    }
+  }
+
+  return false;
+}
+
+inline bool BitWriter::PutZigZagVlqInt(int32_t v) {
+  auto u_v = ::arrow::util::SafeCopy<uint32_t>(v);
+  return PutVlqInt((u_v << 1) ^ (u_v >> 31));
+}
+
+inline bool BitReader::GetZigZagVlqInt(int32_t* v) {
+  uint32_t u;
+  if (!GetVlqInt(&u)) return false;
+  *v = ::arrow::util::SafeCopy<int32_t>((u >> 1) ^ (u << 31));
+  return true;
+}
+
+}  // namespace BitUtil
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_util.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_util.cc
index 1b123f4153a..ee4bcde7713 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_util.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_util.cc
@@ -20,8 +20,8 @@
 #include <cstdint>
 #include <cstring>
 
-#include "arrow/util/logging.h" 
- 
+#include "arrow/util/logging.h"
+
 namespace arrow {
 namespace BitUtil {
 
@@ -69,59 +69,59 @@ void SetBitsTo(uint8_t* bits, int64_t start_offset, int64_t length, bool bits_ar
   bits[bytes_end - 1] |= static_cast<uint8_t>(fill_byte & ~last_byte_mask);
 }
 
-template <bool value> 
-void SetBitmapImpl(uint8_t* data, int64_t offset, int64_t length) { 
-  //                 offset  length 
-  // data              |<------------->| 
-  //   |--------|...|--------|...|--------| 
-  //                   |<--->|   |<--->| 
-  //                     pro       epi 
-  if (ARROW_PREDICT_FALSE(length == 0)) { 
-    return; 
-  } 
- 
-  constexpr uint8_t set_byte = value ? UINT8_MAX : 0; 
- 
-  auto prologue = static_cast<int32_t>(BitUtil::RoundUp(offset, 8) - offset); 
-  DCHECK_LT(prologue, 8); 
- 
-  if (length < prologue) {  // special case where a mask is required 
-    //             offset length 
-    // data             |<->| 
-    //   |--------|...|--------|... 
-    //         mask --> |111| 
-    //                  |<---->| 
-    //                     pro 
-    uint8_t mask = BitUtil::kPrecedingBitmask[8 - prologue] ^ 
-                   BitUtil::kPrecedingBitmask[8 - prologue + length]; 
-    data[offset / 8] = value ? data[offset / 8] | mask : data[offset / 8] & ~mask; 
-    return; 
-  } 
- 
-  // align to a byte boundary 
-  data[offset / 8] = BitUtil::SpliceWord(8 - prologue, data[offset / 8], set_byte); 
-  offset += prologue; 
-  length -= prologue; 
- 
-  // set values per byte 
-  DCHECK_EQ(offset % 8, 0); 
-  std::memset(data + offset / 8, set_byte, length / 8); 
-  offset += BitUtil::RoundDown(length, 8); 
-  length -= BitUtil::RoundDown(length, 8); 
- 
-  // clean up 
-  DCHECK_LT(length, 8); 
-  data[offset / 8] = 
-      BitUtil::SpliceWord(static_cast<int32_t>(length), set_byte, data[offset / 8]); 
-} 
- 
-void SetBitmap(uint8_t* data, int64_t offset, int64_t length) { 
-  SetBitmapImpl<true>(data, offset, length); 
-} 
- 
-void ClearBitmap(uint8_t* data, int64_t offset, int64_t length) { 
-  SetBitmapImpl<false>(data, offset, length); 
-} 
- 
+template <bool value>
+void SetBitmapImpl(uint8_t* data, int64_t offset, int64_t length) {
+  //                 offset  length
+  // data              |<------------->|
+  //   |--------|...|--------|...|--------|
+  //                   |<--->|   |<--->|
+  //                     pro       epi
+  if (ARROW_PREDICT_FALSE(length == 0)) {
+    return;
+  }
+
+  constexpr uint8_t set_byte = value ? UINT8_MAX : 0;
+
+  auto prologue = static_cast<int32_t>(BitUtil::RoundUp(offset, 8) - offset);
+  DCHECK_LT(prologue, 8);
+
+  if (length < prologue) {  // special case where a mask is required
+    //             offset length
+    // data             |<->|
+    //   |--------|...|--------|...
+    //         mask --> |111|
+    //                  |<---->|
+    //                     pro
+    uint8_t mask = BitUtil::kPrecedingBitmask[8 - prologue] ^
+                   BitUtil::kPrecedingBitmask[8 - prologue + length];
+    data[offset / 8] = value ? data[offset / 8] | mask : data[offset / 8] & ~mask;
+    return;
+  }
+
+  // align to a byte boundary
+  data[offset / 8] = BitUtil::SpliceWord(8 - prologue, data[offset / 8], set_byte);
+  offset += prologue;
+  length -= prologue;
+
+  // set values per byte
+  DCHECK_EQ(offset % 8, 0);
+  std::memset(data + offset / 8, set_byte, length / 8);
+  offset += BitUtil::RoundDown(length, 8);
+  length -= BitUtil::RoundDown(length, 8);
+
+  // clean up
+  DCHECK_LT(length, 8);
+  data[offset / 8] =
+      BitUtil::SpliceWord(static_cast<int32_t>(length), set_byte, data[offset / 8]);
+}
+
+void SetBitmap(uint8_t* data, int64_t offset, int64_t length) {
+  SetBitmapImpl<true>(data, offset, length);
+}
+
+void ClearBitmap(uint8_t* data, int64_t offset, int64_t length) {
+  SetBitmapImpl<false>(data, offset, length);
+}
+
 }  // namespace BitUtil
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_util.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_util.h
index 216cf9fba88..c306ce7821b 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_util.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bit_util.h
@@ -112,7 +112,7 @@ constexpr bool IsMultipleOf8(int64_t n) { return (n & 7) == 0; }
 
 // Returns a mask for the bit_index lower order bits.
 // Only valid for bit_index in the range [0, 64).
-constexpr uint64_t LeastSignificantBitMask(int64_t bit_index) { 
+constexpr uint64_t LeastSignificantBitMask(int64_t bit_index) {
   return (static_cast<uint64_t>(1) << bit_index) - 1;
 }
 
@@ -290,14 +290,14 @@ static constexpr uint8_t kPrecedingWrappingBitmask[] = {255, 1, 3, 7, 15, 31, 63
 // the bitwise complement version of kPrecedingBitmask
 static constexpr uint8_t kTrailingBitmask[] = {255, 254, 252, 248, 240, 224, 192, 128};
 
-static constexpr bool GetBit(const uint8_t* bits, uint64_t i) { 
+static constexpr bool GetBit(const uint8_t* bits, uint64_t i) {
   return (bits[i >> 3] >> (i & 0x07)) & 1;
 }
 
 // Gets the i-th bit from a byte. Should only be used with i <= 7.
-static constexpr bool GetBitFromByte(uint8_t byte, uint8_t i) { 
-  return byte & kBitmask[i]; 
-} 
+static constexpr bool GetBitFromByte(uint8_t byte, uint8_t i) {
+  return byte & kBitmask[i];
+}
 
 static inline void ClearBit(uint8_t* bits, int64_t i) {
   bits[i / 8] &= kFlippedBitmask[i % 8];
@@ -318,37 +318,37 @@ static inline void SetBitTo(uint8_t* bits, int64_t i, bool bit_is_set) {
 ARROW_EXPORT
 void SetBitsTo(uint8_t* bits, int64_t start_offset, int64_t length, bool bits_are_set);
 
-/// \brief Sets all bits in the bitmap to true 
-ARROW_EXPORT 
-void SetBitmap(uint8_t* data, int64_t offset, int64_t length); 
- 
-/// \brief Clears all bits in the bitmap (set to false) 
-ARROW_EXPORT 
-void ClearBitmap(uint8_t* data, int64_t offset, int64_t length); 
- 
-/// Returns a mask with lower i bits set to 1. If i >= sizeof(Word)*8, all-ones will be 
-/// returned 
-/// ex: 
-/// ref: https://stackoverflow.com/a/59523400 
-template <typename Word> 
-constexpr Word PrecedingWordBitmask(unsigned int const i) { 
-  return (static_cast<Word>(i < sizeof(Word) * 8) << (i & (sizeof(Word) * 8 - 1))) - 1; 
-} 
-static_assert(PrecedingWordBitmask<uint8_t>(0) == 0x00, ""); 
-static_assert(PrecedingWordBitmask<uint8_t>(4) == 0x0f, ""); 
-static_assert(PrecedingWordBitmask<uint8_t>(8) == 0xff, ""); 
-static_assert(PrecedingWordBitmask<uint16_t>(8) == 0x00ff, ""); 
- 
-/// \brief Create a word with low `n` bits from `low` and high `sizeof(Word)-n` bits 
-/// from `high`. 
-/// Word ret 
-/// for (i = 0; i < sizeof(Word)*8; i++){ 
-///     ret[i]= i < n ? low[i]: high[i]; 
-/// } 
-template <typename Word> 
-constexpr Word SpliceWord(int n, Word low, Word high) { 
-  return (high & ~PrecedingWordBitmask<Word>(n)) | (low & PrecedingWordBitmask<Word>(n)); 
-} 
- 
+/// \brief Sets all bits in the bitmap to true
+ARROW_EXPORT
+void SetBitmap(uint8_t* data, int64_t offset, int64_t length);
+
+/// \brief Clears all bits in the bitmap (set to false)
+ARROW_EXPORT
+void ClearBitmap(uint8_t* data, int64_t offset, int64_t length);
+
+/// Returns a mask with lower i bits set to 1. If i >= sizeof(Word)*8, all-ones will be
+/// returned
+/// ex:
+/// ref: https://stackoverflow.com/a/59523400
+template <typename Word>
+constexpr Word PrecedingWordBitmask(unsigned int const i) {
+  return (static_cast<Word>(i < sizeof(Word) * 8) << (i & (sizeof(Word) * 8 - 1))) - 1;
+}
+static_assert(PrecedingWordBitmask<uint8_t>(0) == 0x00, "");
+static_assert(PrecedingWordBitmask<uint8_t>(4) == 0x0f, "");
+static_assert(PrecedingWordBitmask<uint8_t>(8) == 0xff, "");
+static_assert(PrecedingWordBitmask<uint16_t>(8) == 0x00ff, "");
+
+/// \brief Create a word with low `n` bits from `low` and high `sizeof(Word)-n` bits
+/// from `high`.
+/// Word ret
+/// for (i = 0; i < sizeof(Word)*8; i++){
+///     ret[i]= i < n ? low[i]: high[i];
+/// }
+template <typename Word>
+constexpr Word SpliceWord(int n, Word low, Word high) {
+  return (high & ~PrecedingWordBitmask<Word>(n)) | (low & PrecedingWordBitmask<Word>(n));
+}
+
 }  // namespace BitUtil
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap.cc
index bd389138316..33d1dee1957 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap.cc
@@ -46,16 +46,16 @@ std::string Bitmap::Diff(const Bitmap& other) const {
   return ToArray()->Diff(*other.ToArray());
 }
 
-void Bitmap::CopyFrom(const Bitmap& other) { 
-  ::arrow::internal::CopyBitmap(other.buffer_->data(), other.offset_, other.length_, 
-                                buffer_->mutable_data(), offset_); 
-} 
- 
-void Bitmap::CopyFromInverted(const Bitmap& other) { 
-  ::arrow::internal::InvertBitmap(other.buffer_->data(), other.offset_, other.length_, 
-                                  buffer_->mutable_data(), offset_); 
-} 
- 
+void Bitmap::CopyFrom(const Bitmap& other) {
+  ::arrow::internal::CopyBitmap(other.buffer_->data(), other.offset_, other.length_,
+                                buffer_->mutable_data(), offset_);
+}
+
+void Bitmap::CopyFromInverted(const Bitmap& other) {
+  ::arrow::internal::InvertBitmap(other.buffer_->data(), other.offset_, other.length_,
+                                  buffer_->mutable_data(), offset_);
+}
+
 bool Bitmap::Equals(const Bitmap& other) const {
   if (length_ != other.length_) {
     return false;
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap.h
index 13e7c5dc00a..141f863c0b8 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap.h
@@ -29,11 +29,11 @@
 
 #include "arrow/buffer.h"
 #include "arrow/util/bit_util.h"
-#include "arrow/util/bitmap_ops.h" 
-#include "arrow/util/bitmap_reader.h" 
-#include "arrow/util/bitmap_writer.h" 
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/bitmap_reader.h"
+#include "arrow/util/bitmap_writer.h"
 #include "arrow/util/compare.h"
-#include "arrow/util/endian.h" 
+#include "arrow/util/endian.h"
 #include "arrow/util/functional.h"
 #include "arrow/util/string_builder.h"
 #include "arrow/util/string_view.h"
@@ -90,13 +90,13 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable<Bitmap>,
     BitUtil::SetBitTo(buffer_->mutable_data(), i + offset_, v);
   }
 
-  void SetBitsTo(bool v) { 
-    BitUtil::SetBitsTo(buffer_->mutable_data(), offset_, length_, v); 
-  } 
- 
-  void CopyFrom(const Bitmap& other); 
-  void CopyFromInverted(const Bitmap& other); 
- 
+  void SetBitsTo(bool v) {
+    BitUtil::SetBitsTo(buffer_->mutable_data(), offset_, length_, v);
+  }
+
+  void CopyFrom(const Bitmap& other);
+  void CopyFromInverted(const Bitmap& other);
+
   /// \brief Visit bits from each bitmap as bitset<N>
   ///
   /// All bitmaps must have identical length.
@@ -112,21 +112,21 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable<Bitmap>,
     }
   }
 
-  /// \brief Visit bits from each bitmap as bitset<N> 
-  /// 
-  /// All bitmaps must have identical length. 
-  template <size_t N, typename Visitor> 
-  static void VisitBits(const std::array<Bitmap, N>& bitmaps, Visitor&& visitor) { 
-    int64_t bit_length = BitLength(bitmaps); 
-    std::bitset<N> bits; 
-    for (int64_t bit_i = 0; bit_i < bit_length; ++bit_i) { 
-      for (size_t i = 0; i < N; ++i) { 
-        bits[i] = bitmaps[i].GetBit(bit_i); 
-      } 
-      visitor(bits); 
-    } 
-  } 
- 
+  /// \brief Visit bits from each bitmap as bitset<N>
+  ///
+  /// All bitmaps must have identical length.
+  template <size_t N, typename Visitor>
+  static void VisitBits(const std::array<Bitmap, N>& bitmaps, Visitor&& visitor) {
+    int64_t bit_length = BitLength(bitmaps);
+    std::bitset<N> bits;
+    for (int64_t bit_i = 0; bit_i < bit_length; ++bit_i) {
+      for (size_t i = 0; i < N; ++i) {
+        bits[i] = bitmaps[i].GetBit(bit_i);
+      }
+      visitor(bits);
+    }
+  }
+
   /// \brief Visit words of bits from each bitmap as array<Word, N>
   ///
   /// All bitmaps must have identical length. The first bit in a visited bitmap
@@ -135,14 +135,14 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable<Bitmap>,
   /// returned.
   ///
   /// TODO(bkietz) allow for early termination
-  // NOTE: this function is efficient on 3+ sufficiently large bitmaps. 
-  // It also has a large prolog / epilog overhead and should be used 
-  // carefully in other cases. 
-  // For 2 bitmaps or less, and/or smaller bitmaps, see also VisitTwoBitBlocksVoid 
-  // and BitmapUInt64Reader. 
+  // NOTE: this function is efficient on 3+ sufficiently large bitmaps.
+  // It also has a large prolog / epilog overhead and should be used
+  // carefully in other cases.
+  // For 2 bitmaps or less, and/or smaller bitmaps, see also VisitTwoBitBlocksVoid
+  // and BitmapUInt64Reader.
   template <size_t N, typename Visitor,
-            typename Word = typename std::decay< 
-                internal::call_traits::argument_type<0, Visitor&&>>::type::value_type> 
+            typename Word = typename std::decay<
+                internal::call_traits::argument_type<0, Visitor&&>>::type::value_type>
   static int64_t VisitWords(const Bitmap (&bitmaps_arg)[N], Visitor&& visitor) {
     constexpr int64_t kBitWidth = sizeof(Word) * 8;
 
@@ -243,132 +243,132 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable<Bitmap>,
     return min_offset;
   }
 
-  template <size_t N, size_t M, typename ReaderT, typename WriterT, typename Visitor, 
-            typename Word = typename std::decay< 
-                internal::call_traits::argument_type<0, Visitor&&>>::type::value_type> 
-  static void RunVisitWordsAndWriteLoop(int64_t bit_length, 
-                                        std::array<ReaderT, N>& readers, 
-                                        std::array<WriterT, M>& writers, 
-                                        Visitor&& visitor) { 
-    constexpr int64_t kBitWidth = sizeof(Word) * 8; 
- 
-    std::array<Word, N> visited_words; 
-    std::array<Word, M> output_words; 
- 
-    // every reader will have same number of words, since they are same length'ed 
-    // TODO($JIRA) this will be inefficient in some cases. When there are offsets beyond 
-    //  Word boundary, every Word would have to be created from 2 adjoining Words 
-    auto n_words = readers[0].words(); 
-    bit_length -= n_words * kBitWidth; 
-    while (n_words--) { 
-      // first collect all words to visited_words array 
-      for (size_t i = 0; i < N; i++) { 
-        visited_words[i] = readers[i].NextWord(); 
-      } 
-      visitor(visited_words, &output_words); 
-      for (size_t i = 0; i < M; i++) { 
-        writers[i].PutNextWord(output_words[i]); 
-      } 
-    } 
- 
-    // every reader will have same number of trailing bytes, because of the above reason 
-    // tailing portion could be more than one word! (ref: BitmapWordReader constructor) 
-    // remaining full/ partial words to write 
- 
-    if (bit_length) { 
-      // convert the word visitor lambda to a byte_visitor 
-      auto byte_visitor = [&](const std::array<uint8_t, N>& in, 
-                              std::array<uint8_t, M>* out) { 
-        std::array<Word, N> in_words; 
-        std::array<Word, M> out_words; 
-        std::copy(in.begin(), in.end(), in_words.begin()); 
-        visitor(in_words, &out_words); 
-        for (size_t i = 0; i < M; i++) { 
-          out->at(i) = static_cast<uint8_t>(out_words[i]); 
-        } 
-      }; 
- 
-      std::array<uint8_t, N> visited_bytes; 
-      std::array<uint8_t, M> output_bytes; 
-      int n_bytes = readers[0].trailing_bytes(); 
-      while (n_bytes--) { 
-        visited_bytes.fill(0); 
-        output_bytes.fill(0); 
-        int valid_bits; 
-        for (size_t i = 0; i < N; i++) { 
-          visited_bytes[i] = readers[i].NextTrailingByte(valid_bits); 
-        } 
-        byte_visitor(visited_bytes, &output_bytes); 
-        for (size_t i = 0; i < M; i++) { 
-          writers[i].PutNextTrailingByte(output_bytes[i], valid_bits); 
-        } 
-      } 
-    } 
-  } 
- 
-  /// \brief Visit words of bits from each input bitmap as array<Word, N> and collects 
-  /// outputs to an array<Word, M>, to be written into the output bitmaps accordingly. 
-  /// 
-  /// All bitmaps must have identical length. The first bit in a visited bitmap 
-  /// may be offset within the first visited word, but words will otherwise contain 
-  /// densely packed bits loaded from the bitmap. That offset within the first word is 
-  /// returned. 
-  /// Visitor is expected to have the following signature 
-  ///     [](const std::array<Word, N>& in_words, std::array<Word, M>* out_words){...} 
-  /// 
-  // NOTE: this function is efficient on 3+ sufficiently large bitmaps. 
-  // It also has a large prolog / epilog overhead and should be used 
-  // carefully in other cases. 
-  // For 2 bitmaps or less, and/or smaller bitmaps, see also VisitTwoBitBlocksVoid 
-  // and BitmapUInt64Reader. 
-  template <size_t N, size_t M, typename Visitor, 
-            typename Word = typename std::decay< 
-                internal::call_traits::argument_type<0, Visitor&&>>::type::value_type> 
-  static void VisitWordsAndWrite(const std::array<Bitmap, N>& bitmaps_arg, 
-                                 std::array<Bitmap, M>* out_bitmaps_arg, 
-                                 Visitor&& visitor) { 
-    int64_t bit_length = BitLength(bitmaps_arg); 
-    assert(bit_length == BitLength(*out_bitmaps_arg)); 
- 
-    // if both input and output bitmaps have no byte offset, then use special template 
-    if (std::all_of(bitmaps_arg.begin(), bitmaps_arg.end(), 
-                    [](const Bitmap& b) { return b.offset_ % 8 == 0; }) && 
-        std::all_of(out_bitmaps_arg->begin(), out_bitmaps_arg->end(), 
-                    [](const Bitmap& b) { return b.offset_ % 8 == 0; })) { 
-      std::array<BitmapWordReader<Word, /*may_have_byte_offset=*/false>, N> readers; 
-      for (size_t i = 0; i < N; ++i) { 
-        const Bitmap& in_bitmap = bitmaps_arg[i]; 
-        readers[i] = BitmapWordReader<Word, /*may_have_byte_offset=*/false>( 
-            in_bitmap.buffer_->data(), in_bitmap.offset_, in_bitmap.length_); 
-      } 
- 
-      std::array<BitmapWordWriter<Word, /*may_have_byte_offset=*/false>, M> writers; 
-      for (size_t i = 0; i < M; ++i) { 
-        const Bitmap& out_bitmap = out_bitmaps_arg->at(i); 
-        writers[i] = BitmapWordWriter<Word, /*may_have_byte_offset=*/false>( 
-            out_bitmap.buffer_->mutable_data(), out_bitmap.offset_, out_bitmap.length_); 
-      } 
- 
-      RunVisitWordsAndWriteLoop(bit_length, readers, writers, visitor); 
-    } else { 
-      std::array<BitmapWordReader<Word>, N> readers; 
-      for (size_t i = 0; i < N; ++i) { 
-        const Bitmap& in_bitmap = bitmaps_arg[i]; 
-        readers[i] = BitmapWordReader<Word>(in_bitmap.buffer_->data(), in_bitmap.offset_, 
-                                            in_bitmap.length_); 
-      } 
- 
-      std::array<BitmapWordWriter<Word>, M> writers; 
-      for (size_t i = 0; i < M; ++i) { 
-        const Bitmap& out_bitmap = out_bitmaps_arg->at(i); 
-        writers[i] = BitmapWordWriter<Word>(out_bitmap.buffer_->mutable_data(), 
-                                            out_bitmap.offset_, out_bitmap.length_); 
-      } 
- 
-      RunVisitWordsAndWriteLoop(bit_length, readers, writers, visitor); 
-    } 
-  } 
- 
+  template <size_t N, size_t M, typename ReaderT, typename WriterT, typename Visitor,
+            typename Word = typename std::decay<
+                internal::call_traits::argument_type<0, Visitor&&>>::type::value_type>
+  static void RunVisitWordsAndWriteLoop(int64_t bit_length,
+                                        std::array<ReaderT, N>& readers,
+                                        std::array<WriterT, M>& writers,
+                                        Visitor&& visitor) {
+    constexpr int64_t kBitWidth = sizeof(Word) * 8;
+
+    std::array<Word, N> visited_words;
+    std::array<Word, M> output_words;
+
+    // every reader will have same number of words, since they are same length'ed
+    // TODO($JIRA) this will be inefficient in some cases. When there are offsets beyond
+    //  Word boundary, every Word would have to be created from 2 adjoining Words
+    auto n_words = readers[0].words();
+    bit_length -= n_words * kBitWidth;
+    while (n_words--) {
+      // first collect all words to visited_words array
+      for (size_t i = 0; i < N; i++) {
+        visited_words[i] = readers[i].NextWord();
+      }
+      visitor(visited_words, &output_words);
+      for (size_t i = 0; i < M; i++) {
+        writers[i].PutNextWord(output_words[i]);
+      }
+    }
+
+    // every reader will have same number of trailing bytes, because of the above reason
+    // tailing portion could be more than one word! (ref: BitmapWordReader constructor)
+    // remaining full/ partial words to write
+
+    if (bit_length) {
+      // convert the word visitor lambda to a byte_visitor
+      auto byte_visitor = [&](const std::array<uint8_t, N>& in,
+                              std::array<uint8_t, M>* out) {
+        std::array<Word, N> in_words;
+        std::array<Word, M> out_words;
+        std::copy(in.begin(), in.end(), in_words.begin());
+        visitor(in_words, &out_words);
+        for (size_t i = 0; i < M; i++) {
+          out->at(i) = static_cast<uint8_t>(out_words[i]);
+        }
+      };
+
+      std::array<uint8_t, N> visited_bytes;
+      std::array<uint8_t, M> output_bytes;
+      int n_bytes = readers[0].trailing_bytes();
+      while (n_bytes--) {
+        visited_bytes.fill(0);
+        output_bytes.fill(0);
+        int valid_bits;
+        for (size_t i = 0; i < N; i++) {
+          visited_bytes[i] = readers[i].NextTrailingByte(valid_bits);
+        }
+        byte_visitor(visited_bytes, &output_bytes);
+        for (size_t i = 0; i < M; i++) {
+          writers[i].PutNextTrailingByte(output_bytes[i], valid_bits);
+        }
+      }
+    }
+  }
+
+  /// \brief Visit words of bits from each input bitmap as array<Word, N> and collects
+  /// outputs to an array<Word, M>, to be written into the output bitmaps accordingly.
+  ///
+  /// All bitmaps must have identical length. The first bit in a visited bitmap
+  /// may be offset within the first visited word, but words will otherwise contain
+  /// densely packed bits loaded from the bitmap. That offset within the first word is
+  /// returned.
+  /// Visitor is expected to have the following signature
+  ///     [](const std::array<Word, N>& in_words, std::array<Word, M>* out_words){...}
+  ///
+  // NOTE: this function is efficient on 3+ sufficiently large bitmaps.
+  // It also has a large prolog / epilog overhead and should be used
+  // carefully in other cases.
+  // For 2 bitmaps or less, and/or smaller bitmaps, see also VisitTwoBitBlocksVoid
+  // and BitmapUInt64Reader.
+  template <size_t N, size_t M, typename Visitor,
+            typename Word = typename std::decay<
+                internal::call_traits::argument_type<0, Visitor&&>>::type::value_type>
+  static void VisitWordsAndWrite(const std::array<Bitmap, N>& bitmaps_arg,
+                                 std::array<Bitmap, M>* out_bitmaps_arg,
+                                 Visitor&& visitor) {
+    int64_t bit_length = BitLength(bitmaps_arg);
+    assert(bit_length == BitLength(*out_bitmaps_arg));
+
+    // if both input and output bitmaps have no byte offset, then use special template
+    if (std::all_of(bitmaps_arg.begin(), bitmaps_arg.end(),
+                    [](const Bitmap& b) { return b.offset_ % 8 == 0; }) &&
+        std::all_of(out_bitmaps_arg->begin(), out_bitmaps_arg->end(),
+                    [](const Bitmap& b) { return b.offset_ % 8 == 0; })) {
+      std::array<BitmapWordReader<Word, /*may_have_byte_offset=*/false>, N> readers;
+      for (size_t i = 0; i < N; ++i) {
+        const Bitmap& in_bitmap = bitmaps_arg[i];
+        readers[i] = BitmapWordReader<Word, /*may_have_byte_offset=*/false>(
+            in_bitmap.buffer_->data(), in_bitmap.offset_, in_bitmap.length_);
+      }
+
+      std::array<BitmapWordWriter<Word, /*may_have_byte_offset=*/false>, M> writers;
+      for (size_t i = 0; i < M; ++i) {
+        const Bitmap& out_bitmap = out_bitmaps_arg->at(i);
+        writers[i] = BitmapWordWriter<Word, /*may_have_byte_offset=*/false>(
+            out_bitmap.buffer_->mutable_data(), out_bitmap.offset_, out_bitmap.length_);
+      }
+
+      RunVisitWordsAndWriteLoop(bit_length, readers, writers, visitor);
+    } else {
+      std::array<BitmapWordReader<Word>, N> readers;
+      for (size_t i = 0; i < N; ++i) {
+        const Bitmap& in_bitmap = bitmaps_arg[i];
+        readers[i] = BitmapWordReader<Word>(in_bitmap.buffer_->data(), in_bitmap.offset_,
+                                            in_bitmap.length_);
+      }
+
+      std::array<BitmapWordWriter<Word>, M> writers;
+      for (size_t i = 0; i < M; ++i) {
+        const Bitmap& out_bitmap = out_bitmaps_arg->at(i);
+        writers[i] = BitmapWordWriter<Word>(out_bitmap.buffer_->mutable_data(),
+                                            out_bitmap.offset_, out_bitmap.length_);
+      }
+
+      RunVisitWordsAndWriteLoop(bit_length, readers, writers, visitor);
+    }
+  }
+
   const std::shared_ptr<Buffer>& buffer() const { return buffer_; }
 
   /// offset of first bit relative to buffer().data()
@@ -445,14 +445,14 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable<Bitmap>,
   /// assert bitmaps have identical length and return that length
   static int64_t BitLength(const Bitmap* bitmaps, size_t N);
 
-  template <size_t N> 
-  static int64_t BitLength(const std::array<Bitmap, N>& bitmaps) { 
-    for (size_t i = 1; i < N; ++i) { 
-      assert(bitmaps[i].length() == bitmaps[0].length()); 
-    } 
-    return bitmaps[0].length(); 
-  } 
- 
+  template <size_t N>
+  static int64_t BitLength(const std::array<Bitmap, N>& bitmaps) {
+    for (size_t i = 1; i < N; ++i) {
+      assert(bitmaps[i].length() == bitmaps[0].length());
+    }
+    return bitmaps[0].length();
+  }
+
   std::shared_ptr<Buffer> buffer_;
   int64_t offset_ = 0, length_ = 0;
 };
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_generate.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_generate.h
index 68a9016d8a0..129fa913231 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_generate.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_generate.h
@@ -62,9 +62,9 @@ void GenerateBits(uint8_t* bitmap, int64_t start_offset, int64_t length, Generat
 template <class Generator>
 void GenerateBitsUnrolled(uint8_t* bitmap, int64_t start_offset, int64_t length,
                           Generator&& g) {
-  static_assert(std::is_same<typename std::result_of<Generator && ()>::type, bool>::value, 
-                "Functor passed to GenerateBitsUnrolled must return bool"); 
- 
+  static_assert(std::is_same<typename std::result_of<Generator && ()>::type, bool>::value,
+                "Functor passed to GenerateBitsUnrolled must return bool");
+
   if (length == 0) {
     return;
   }
@@ -77,7 +77,7 @@ void GenerateBitsUnrolled(uint8_t* bitmap, int64_t start_offset, int64_t length,
   if (bit_mask != 0x01) {
     current_byte = *cur & BitUtil::kPrecedingBitmask[start_bit_offset];
     while (bit_mask != 0 && remaining > 0) {
-      current_byte |= g() * bit_mask; 
+      current_byte |= g() * bit_mask;
       bit_mask = static_cast<uint8_t>(bit_mask << 1);
       --remaining;
     }
@@ -85,14 +85,14 @@ void GenerateBitsUnrolled(uint8_t* bitmap, int64_t start_offset, int64_t length,
   }
 
   int64_t remaining_bytes = remaining / 8;
-  uint8_t out_results[8]; 
+  uint8_t out_results[8];
   while (remaining_bytes-- > 0) {
-    for (int i = 0; i < 8; ++i) { 
-      out_results[i] = g(); 
-    } 
-    *cur++ = (out_results[0] | out_results[1] << 1 | out_results[2] << 2 | 
-              out_results[3] << 3 | out_results[4] << 4 | out_results[5] << 5 | 
-              out_results[6] << 6 | out_results[7] << 7); 
+    for (int i = 0; i < 8; ++i) {
+      out_results[i] = g();
+    }
+    *cur++ = (out_results[0] | out_results[1] << 1 | out_results[2] << 2 |
+              out_results[3] << 3 | out_results[4] << 4 | out_results[5] << 5 |
+              out_results[6] << 6 | out_results[7] << 7);
   }
 
   int64_t remaining_bits = remaining % 8;
@@ -100,7 +100,7 @@ void GenerateBitsUnrolled(uint8_t* bitmap, int64_t start_offset, int64_t length,
     current_byte = 0;
     bit_mask = 0x01;
     while (remaining_bits-- > 0) {
-      current_byte |= g() * bit_mask; 
+      current_byte |= g() * bit_mask;
       bit_mask = static_cast<uint8_t>(bit_mask << 1);
     }
     *cur++ = current_byte;
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_ops.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_ops.cc
index afbad3f8aba..63c8b008f4a 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_ops.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_ops.cc
@@ -172,7 +172,7 @@ Result<std::shared_ptr<Buffer>> CopyBitmap(MemoryPool* pool, const uint8_t* data
 }
 
 Result<std::shared_ptr<Buffer>> InvertBitmap(MemoryPool* pool, const uint8_t* data,
-                                             int64_t offset, int64_t length) { 
+                                             int64_t offset, int64_t length) {
   return TransferBitmap<TransferMode::Invert>(pool, data, offset, length);
 }
 
@@ -215,26 +215,26 @@ bool BitmapEquals(const uint8_t* left, int64_t left_offset, const uint8_t* right
   return true;
 }
 
-bool OptionalBitmapEquals(const uint8_t* left, int64_t left_offset, const uint8_t* right, 
-                          int64_t right_offset, int64_t length) { 
-  if (left == nullptr && right == nullptr) { 
-    return true; 
-  } else if (left != nullptr && right != nullptr) { 
-    return BitmapEquals(left, left_offset, right, right_offset, length); 
-  } else if (left != nullptr) { 
-    return CountSetBits(left, left_offset, length) == length; 
-  } else { 
-    return CountSetBits(right, right_offset, length) == length; 
-  } 
-} 
- 
-bool OptionalBitmapEquals(const std::shared_ptr<Buffer>& left, int64_t left_offset, 
-                          const std::shared_ptr<Buffer>& right, int64_t right_offset, 
-                          int64_t length) { 
-  return OptionalBitmapEquals(left ? left->data() : nullptr, left_offset, 
-                              right ? right->data() : nullptr, right_offset, length); 
-} 
- 
+bool OptionalBitmapEquals(const uint8_t* left, int64_t left_offset, const uint8_t* right,
+                          int64_t right_offset, int64_t length) {
+  if (left == nullptr && right == nullptr) {
+    return true;
+  } else if (left != nullptr && right != nullptr) {
+    return BitmapEquals(left, left_offset, right, right_offset, length);
+  } else if (left != nullptr) {
+    return CountSetBits(left, left_offset, length) == length;
+  } else {
+    return CountSetBits(right, right_offset, length) == length;
+  }
+}
+
+bool OptionalBitmapEquals(const std::shared_ptr<Buffer>& left, int64_t left_offset,
+                          const std::shared_ptr<Buffer>& right, int64_t right_offset,
+                          int64_t length) {
+  return OptionalBitmapEquals(left ? left->data() : nullptr, left_offset,
+                              right ? right->data() : nullptr, right_offset, length);
+}
+
 namespace {
 
 template <template <typename> class BitOp>
@@ -346,42 +346,42 @@ void BitmapXor(const uint8_t* left, int64_t left_offset, const uint8_t* right,
   BitmapOp<std::bit_xor>(left, left_offset, right, right_offset, length, out_offset, out);
 }
 
-template <typename T> 
-struct AndNotOp { 
-  constexpr T operator()(const T& l, const T& r) const { return l & ~r; } 
-}; 
- 
-Result<std::shared_ptr<Buffer>> BitmapAndNot(MemoryPool* pool, const uint8_t* left, 
-                                             int64_t left_offset, const uint8_t* right, 
-                                             int64_t right_offset, int64_t length, 
-                                             int64_t out_offset) { 
-  return BitmapOp<AndNotOp>(pool, left, left_offset, right, right_offset, length, 
-                            out_offset); 
-} 
- 
-void BitmapAndNot(const uint8_t* left, int64_t left_offset, const uint8_t* right, 
-                  int64_t right_offset, int64_t length, int64_t out_offset, 
-                  uint8_t* out) { 
-  BitmapOp<AndNotOp>(left, left_offset, right, right_offset, length, out_offset, out); 
-} 
- 
-template <typename T> 
-struct OrNotOp { 
-  constexpr T operator()(const T& l, const T& r) const { return l | ~r; } 
-}; 
- 
-Result<std::shared_ptr<Buffer>> BitmapOrNot(MemoryPool* pool, const uint8_t* left, 
-                                            int64_t left_offset, const uint8_t* right, 
-                                            int64_t right_offset, int64_t length, 
-                                            int64_t out_offset) { 
-  return BitmapOp<OrNotOp>(pool, left, left_offset, right, right_offset, length, 
-                           out_offset); 
-} 
- 
-void BitmapOrNot(const uint8_t* left, int64_t left_offset, const uint8_t* right, 
-                 int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out) { 
-  BitmapOp<OrNotOp>(left, left_offset, right, right_offset, length, out_offset, out); 
-} 
- 
+template <typename T>
+struct AndNotOp {
+  constexpr T operator()(const T& l, const T& r) const { return l & ~r; }
+};
+
+Result<std::shared_ptr<Buffer>> BitmapAndNot(MemoryPool* pool, const uint8_t* left,
+                                             int64_t left_offset, const uint8_t* right,
+                                             int64_t right_offset, int64_t length,
+                                             int64_t out_offset) {
+  return BitmapOp<AndNotOp>(pool, left, left_offset, right, right_offset, length,
+                            out_offset);
+}
+
+void BitmapAndNot(const uint8_t* left, int64_t left_offset, const uint8_t* right,
+                  int64_t right_offset, int64_t length, int64_t out_offset,
+                  uint8_t* out) {
+  BitmapOp<AndNotOp>(left, left_offset, right, right_offset, length, out_offset, out);
+}
+
+template <typename T>
+struct OrNotOp {
+  constexpr T operator()(const T& l, const T& r) const { return l | ~r; }
+};
+
+Result<std::shared_ptr<Buffer>> BitmapOrNot(MemoryPool* pool, const uint8_t* left,
+                                            int64_t left_offset, const uint8_t* right,
+                                            int64_t right_offset, int64_t length,
+                                            int64_t out_offset) {
+  return BitmapOp<OrNotOp>(pool, left, left_offset, right, right_offset, length,
+                           out_offset);
+}
+
+void BitmapOrNot(const uint8_t* left, int64_t left_offset, const uint8_t* right,
+                 int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out) {
+  BitmapOp<OrNotOp>(left, left_offset, right, right_offset, length, out_offset, out);
+}
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_ops.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_ops.h
index ad9990459de..40a7797a239 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_ops.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_ops.h
@@ -96,17 +96,17 @@ ARROW_EXPORT
 bool BitmapEquals(const uint8_t* left, int64_t left_offset, const uint8_t* right,
                   int64_t right_offset, int64_t length);
 
-// Same as BitmapEquals, but considers a NULL bitmap pointer the same as an 
-// all-ones bitmap. 
-ARROW_EXPORT 
-bool OptionalBitmapEquals(const uint8_t* left, int64_t left_offset, const uint8_t* right, 
-                          int64_t right_offset, int64_t length); 
- 
-ARROW_EXPORT 
-bool OptionalBitmapEquals(const std::shared_ptr<Buffer>& left, int64_t left_offset, 
-                          const std::shared_ptr<Buffer>& right, int64_t right_offset, 
-                          int64_t length); 
- 
+// Same as BitmapEquals, but considers a NULL bitmap pointer the same as an
+// all-ones bitmap.
+ARROW_EXPORT
+bool OptionalBitmapEquals(const uint8_t* left, int64_t left_offset, const uint8_t* right,
+                          int64_t right_offset, int64_t length);
+
+ARROW_EXPORT
+bool OptionalBitmapEquals(const std::shared_ptr<Buffer>& left, int64_t left_offset,
+                          const std::shared_ptr<Buffer>& right, int64_t right_offset,
+                          int64_t length);
+
 /// \brief Do a "bitmap and" on right and left buffers starting at
 /// their respective bit-offsets for the given bit-length and put
 /// the results in out_buffer starting at the given bit-offset.
@@ -164,43 +164,43 @@ ARROW_EXPORT
 void BitmapXor(const uint8_t* left, int64_t left_offset, const uint8_t* right,
                int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out);
 
-/// \brief Do a "bitmap and not" on right and left buffers starting at 
-/// their respective bit-offsets for the given bit-length and put 
-/// the results in out_buffer starting at the given bit-offset. 
-/// 
-/// out_buffer will be allocated and initialized to zeros using pool before 
-/// the operation. 
-ARROW_EXPORT 
-Result<std::shared_ptr<Buffer>> BitmapAndNot(MemoryPool* pool, const uint8_t* left, 
-                                             int64_t left_offset, const uint8_t* right, 
-                                             int64_t right_offset, int64_t length, 
-                                             int64_t out_offset); 
- 
-/// \brief Do a "bitmap and not" on right and left buffers starting at 
-/// their respective bit-offsets for the given bit-length and put 
-/// the results in out starting at the given bit-offset. 
-ARROW_EXPORT 
-void BitmapAndNot(const uint8_t* left, int64_t left_offset, const uint8_t* right, 
-                  int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out); 
- 
-/// \brief Do a "bitmap or not" on right and left buffers starting at 
-/// their respective bit-offsets for the given bit-length and put 
-/// the results in out_buffer starting at the given bit-offset. 
-/// 
-/// out_buffer will be allocated and initialized to zeros using pool before 
-/// the operation. 
-ARROW_EXPORT 
-Result<std::shared_ptr<Buffer>> BitmapOrNot(MemoryPool* pool, const uint8_t* left, 
-                                            int64_t left_offset, const uint8_t* right, 
-                                            int64_t right_offset, int64_t length, 
-                                            int64_t out_offset); 
- 
-/// \brief Do a "bitmap or not" on right and left buffers starting at 
-/// their respective bit-offsets for the given bit-length and put 
-/// the results in out starting at the given bit-offset. 
-ARROW_EXPORT 
-void BitmapOrNot(const uint8_t* left, int64_t left_offset, const uint8_t* right, 
-                 int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out); 
- 
+/// \brief Do a "bitmap and not" on right and left buffers starting at
+/// their respective bit-offsets for the given bit-length and put
+/// the results in out_buffer starting at the given bit-offset.
+///
+/// out_buffer will be allocated and initialized to zeros using pool before
+/// the operation.
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> BitmapAndNot(MemoryPool* pool, const uint8_t* left,
+                                             int64_t left_offset, const uint8_t* right,
+                                             int64_t right_offset, int64_t length,
+                                             int64_t out_offset);
+
+/// \brief Do a "bitmap and not" on right and left buffers starting at
+/// their respective bit-offsets for the given bit-length and put
+/// the results in out starting at the given bit-offset.
+ARROW_EXPORT
+void BitmapAndNot(const uint8_t* left, int64_t left_offset, const uint8_t* right,
+                  int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out);
+
+/// \brief Do a "bitmap or not" on right and left buffers starting at
+/// their respective bit-offsets for the given bit-length and put
+/// the results in out_buffer starting at the given bit-offset.
+///
+/// out_buffer will be allocated and initialized to zeros using pool before
+/// the operation.
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> BitmapOrNot(MemoryPool* pool, const uint8_t* left,
+                                            int64_t left_offset, const uint8_t* right,
+                                            int64_t right_offset, int64_t length,
+                                            int64_t out_offset);
+
+/// \brief Do a "bitmap or not" on right and left buffers starting at
+/// their respective bit-offsets for the given bit-length and put
+/// the results in out starting at the given bit-offset.
+ARROW_EXPORT
+void BitmapOrNot(const uint8_t* left, int64_t left_offset, const uint8_t* right,
+                 int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out);
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_reader.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_reader.h
index c0f08ff249c..7c43747fafb 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_reader.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_reader.h
@@ -22,7 +22,7 @@
 
 #include "arrow/buffer.h"
 #include "arrow/util/bit_util.h"
-#include "arrow/util/endian.h" 
+#include "arrow/util/endian.h"
 #include "arrow/util/macros.h"
 
 namespace arrow {
@@ -70,190 +70,190 @@ class BitmapReader {
   int64_t bit_offset_;
 };
 
-// XXX Cannot name it BitmapWordReader because the name is already used 
-// in bitmap_ops.cc 
- 
-class BitmapUInt64Reader { 
- public: 
-  BitmapUInt64Reader(const uint8_t* bitmap, int64_t start_offset, int64_t length) 
-      : bitmap_(bitmap + start_offset / 8), 
-        num_carry_bits_(8 - start_offset % 8), 
-        length_(length), 
-        remaining_length_(length_) { 
-    if (length_ > 0) { 
-      // Load carry bits from the first byte's MSBs 
-      if (length_ >= num_carry_bits_) { 
-        carry_bits_ = 
-            LoadPartialWord(static_cast<int8_t>(8 - num_carry_bits_), num_carry_bits_); 
-      } else { 
-        carry_bits_ = LoadPartialWord(static_cast<int8_t>(8 - num_carry_bits_), length_); 
-      } 
-    } 
-  } 
- 
-  uint64_t NextWord() { 
-    if (ARROW_PREDICT_TRUE(remaining_length_ >= 64 + num_carry_bits_)) { 
-      // We can load a full word 
-      uint64_t next_word = LoadFullWord(); 
-      // Carry bits come first, then the (64 - num_carry_bits_) LSBs from next_word 
-      uint64_t word = carry_bits_ | (next_word << num_carry_bits_); 
-      carry_bits_ = next_word >> (64 - num_carry_bits_); 
-      remaining_length_ -= 64; 
-      return word; 
-    } else if (remaining_length_ > num_carry_bits_) { 
-      // We can load a partial word 
-      uint64_t next_word = 
-          LoadPartialWord(/*bit_offset=*/0, remaining_length_ - num_carry_bits_); 
-      uint64_t word = carry_bits_ | (next_word << num_carry_bits_); 
-      carry_bits_ = next_word >> (64 - num_carry_bits_); 
-      remaining_length_ = std::max<int64_t>(remaining_length_ - 64, 0); 
-      return word; 
-    } else { 
-      remaining_length_ = 0; 
-      return carry_bits_; 
-    } 
-  } 
- 
-  int64_t position() const { return length_ - remaining_length_; } 
- 
-  int64_t length() const { return length_; } 
- 
- private: 
-  uint64_t LoadFullWord() { 
-    uint64_t word; 
-    memcpy(&word, bitmap_, 8); 
-    bitmap_ += 8; 
-    return BitUtil::ToLittleEndian(word); 
-  } 
- 
-  uint64_t LoadPartialWord(int8_t bit_offset, int64_t num_bits) { 
-    uint64_t word = 0; 
-    const int64_t num_bytes = BitUtil::BytesForBits(num_bits); 
-    memcpy(&word, bitmap_, num_bytes); 
-    bitmap_ += num_bytes; 
-    return (BitUtil::ToLittleEndian(word) >> bit_offset) & 
-           BitUtil::LeastSignificantBitMask(num_bits); 
-  } 
- 
-  const uint8_t* bitmap_; 
-  const int64_t num_carry_bits_;  // in [1, 8] 
-  const int64_t length_; 
-  int64_t remaining_length_; 
-  uint64_t carry_bits_; 
-}; 
- 
-// BitmapWordReader here is faster than BitmapUInt64Reader (in bitmap_reader.h) 
-// on sufficiently large inputs.  However, it has a larger prolog / epilog overhead 
-// and should probably not be used for small bitmaps. 
- 
-template <typename Word, bool may_have_byte_offset = true> 
-class BitmapWordReader { 
- public: 
-  BitmapWordReader() = default; 
-  BitmapWordReader(const uint8_t* bitmap, int64_t offset, int64_t length) 
-      : offset_(static_cast<int64_t>(may_have_byte_offset) * (offset % 8)), 
-        bitmap_(bitmap + offset / 8), 
-        bitmap_end_(bitmap_ + BitUtil::BytesForBits(offset_ + length)) { 
-    // decrement word count by one as we may touch two adjacent words in one iteration 
-    nwords_ = length / (sizeof(Word) * 8) - 1; 
-    if (nwords_ < 0) { 
-      nwords_ = 0; 
-    } 
-    trailing_bits_ = static_cast<int>(length - nwords_ * sizeof(Word) * 8); 
-    trailing_bytes_ = static_cast<int>(BitUtil::BytesForBits(trailing_bits_)); 
- 
-    if (nwords_ > 0) { 
-      current_word_ = load<Word>(bitmap_); 
-    } else if (length > 0) { 
-      current_byte_ = load<uint8_t>(bitmap_); 
-    } 
-  } 
- 
-  Word NextWord() { 
-    bitmap_ += sizeof(Word); 
-    const Word next_word = load<Word>(bitmap_); 
-    Word word = current_word_; 
-    if (may_have_byte_offset && offset_) { 
-      // combine two adjacent words into one word 
-      // |<------ next ----->|<---- current ---->| 
-      // +-------------+-----+-------------+-----+ 
-      // |     ---     |  A  |      B      | --- | 
-      // +-------------+-----+-------------+-----+ 
-      //                  |         |       offset 
-      //                  v         v 
-      //               +-----+-------------+ 
-      //               |  A  |      B      | 
-      //               +-----+-------------+ 
-      //               |<------ word ----->| 
-      word >>= offset_; 
-      word |= next_word << (sizeof(Word) * 8 - offset_); 
-    } 
-    current_word_ = next_word; 
-    return word; 
-  } 
- 
-  uint8_t NextTrailingByte(int& valid_bits) { 
-    uint8_t byte; 
-    assert(trailing_bits_ > 0); 
- 
-    if (trailing_bits_ <= 8) { 
-      // last byte 
-      valid_bits = trailing_bits_; 
-      trailing_bits_ = 0; 
-      byte = 0; 
-      internal::BitmapReader reader(bitmap_, offset_, valid_bits); 
-      for (int i = 0; i < valid_bits; ++i) { 
-        byte >>= 1; 
-        if (reader.IsSet()) { 
-          byte |= 0x80; 
-        } 
-        reader.Next(); 
-      } 
-      byte >>= (8 - valid_bits); 
-    } else { 
-      ++bitmap_; 
-      const uint8_t next_byte = load<uint8_t>(bitmap_); 
-      byte = current_byte_; 
-      if (may_have_byte_offset && offset_) { 
-        byte >>= offset_; 
-        byte |= next_byte << (8 - offset_); 
-      } 
-      current_byte_ = next_byte; 
-      trailing_bits_ -= 8; 
-      trailing_bytes_--; 
-      valid_bits = 8; 
-    } 
-    return byte; 
-  } 
- 
-  int64_t words() const { return nwords_; } 
-  int trailing_bytes() const { return trailing_bytes_; } 
- 
- private: 
-  int64_t offset_; 
-  const uint8_t* bitmap_; 
- 
-  const uint8_t* bitmap_end_; 
-  int64_t nwords_; 
-  int trailing_bits_; 
-  int trailing_bytes_; 
-  union { 
-    Word current_word_; 
-    struct { 
-#if ARROW_LITTLE_ENDIAN == 0 
-      uint8_t padding_bytes_[sizeof(Word) - 1]; 
-#endif 
-      uint8_t current_byte_; 
-    }; 
-  }; 
- 
-  template <typename DType> 
-  DType load(const uint8_t* bitmap) { 
-    assert(bitmap + sizeof(DType) <= bitmap_end_); 
-    return BitUtil::ToLittleEndian(util::SafeLoadAs<DType>(bitmap)); 
-  } 
-}; 
- 
+// XXX Cannot name it BitmapWordReader because the name is already used
+// in bitmap_ops.cc
+
+class BitmapUInt64Reader {
+ public:
+  BitmapUInt64Reader(const uint8_t* bitmap, int64_t start_offset, int64_t length)
+      : bitmap_(bitmap + start_offset / 8),
+        num_carry_bits_(8 - start_offset % 8),
+        length_(length),
+        remaining_length_(length_) {
+    if (length_ > 0) {
+      // Load carry bits from the first byte's MSBs
+      if (length_ >= num_carry_bits_) {
+        carry_bits_ =
+            LoadPartialWord(static_cast<int8_t>(8 - num_carry_bits_), num_carry_bits_);
+      } else {
+        carry_bits_ = LoadPartialWord(static_cast<int8_t>(8 - num_carry_bits_), length_);
+      }
+    }
+  }
+
+  uint64_t NextWord() {
+    if (ARROW_PREDICT_TRUE(remaining_length_ >= 64 + num_carry_bits_)) {
+      // We can load a full word
+      uint64_t next_word = LoadFullWord();
+      // Carry bits come first, then the (64 - num_carry_bits_) LSBs from next_word
+      uint64_t word = carry_bits_ | (next_word << num_carry_bits_);
+      carry_bits_ = next_word >> (64 - num_carry_bits_);
+      remaining_length_ -= 64;
+      return word;
+    } else if (remaining_length_ > num_carry_bits_) {
+      // We can load a partial word
+      uint64_t next_word =
+          LoadPartialWord(/*bit_offset=*/0, remaining_length_ - num_carry_bits_);
+      uint64_t word = carry_bits_ | (next_word << num_carry_bits_);
+      carry_bits_ = next_word >> (64 - num_carry_bits_);
+      remaining_length_ = std::max<int64_t>(remaining_length_ - 64, 0);
+      return word;
+    } else {
+      remaining_length_ = 0;
+      return carry_bits_;
+    }
+  }
+
+  int64_t position() const { return length_ - remaining_length_; }
+
+  int64_t length() const { return length_; }
+
+ private:
+  uint64_t LoadFullWord() {
+    uint64_t word;
+    memcpy(&word, bitmap_, 8);
+    bitmap_ += 8;
+    return BitUtil::ToLittleEndian(word);
+  }
+
+  uint64_t LoadPartialWord(int8_t bit_offset, int64_t num_bits) {
+    uint64_t word = 0;
+    const int64_t num_bytes = BitUtil::BytesForBits(num_bits);
+    memcpy(&word, bitmap_, num_bytes);
+    bitmap_ += num_bytes;
+    return (BitUtil::ToLittleEndian(word) >> bit_offset) &
+           BitUtil::LeastSignificantBitMask(num_bits);
+  }
+
+  const uint8_t* bitmap_;
+  const int64_t num_carry_bits_;  // in [1, 8]
+  const int64_t length_;
+  int64_t remaining_length_;
+  uint64_t carry_bits_;
+};
+
+// BitmapWordReader here is faster than BitmapUInt64Reader (in bitmap_reader.h)
+// on sufficiently large inputs.  However, it has a larger prolog / epilog overhead
+// and should probably not be used for small bitmaps.
+
+template <typename Word, bool may_have_byte_offset = true>
+class BitmapWordReader {
+ public:
+  BitmapWordReader() = default;
+  BitmapWordReader(const uint8_t* bitmap, int64_t offset, int64_t length)
+      : offset_(static_cast<int64_t>(may_have_byte_offset) * (offset % 8)),
+        bitmap_(bitmap + offset / 8),
+        bitmap_end_(bitmap_ + BitUtil::BytesForBits(offset_ + length)) {
+    // decrement word count by one as we may touch two adjacent words in one iteration
+    nwords_ = length / (sizeof(Word) * 8) - 1;
+    if (nwords_ < 0) {
+      nwords_ = 0;
+    }
+    trailing_bits_ = static_cast<int>(length - nwords_ * sizeof(Word) * 8);
+    trailing_bytes_ = static_cast<int>(BitUtil::BytesForBits(trailing_bits_));
+
+    if (nwords_ > 0) {
+      current_word_ = load<Word>(bitmap_);
+    } else if (length > 0) {
+      current_byte_ = load<uint8_t>(bitmap_);
+    }
+  }
+
+  Word NextWord() {
+    bitmap_ += sizeof(Word);
+    const Word next_word = load<Word>(bitmap_);
+    Word word = current_word_;
+    if (may_have_byte_offset && offset_) {
+      // combine two adjacent words into one word
+      // |<------ next ----->|<---- current ---->|
+      // +-------------+-----+-------------+-----+
+      // |     ---     |  A  |      B      | --- |
+      // +-------------+-----+-------------+-----+
+      //                  |         |       offset
+      //                  v         v
+      //               +-----+-------------+
+      //               |  A  |      B      |
+      //               +-----+-------------+
+      //               |<------ word ----->|
+      word >>= offset_;
+      word |= next_word << (sizeof(Word) * 8 - offset_);
+    }
+    current_word_ = next_word;
+    return word;
+  }
+
+  uint8_t NextTrailingByte(int& valid_bits) {
+    uint8_t byte;
+    assert(trailing_bits_ > 0);
+
+    if (trailing_bits_ <= 8) {
+      // last byte
+      valid_bits = trailing_bits_;
+      trailing_bits_ = 0;
+      byte = 0;
+      internal::BitmapReader reader(bitmap_, offset_, valid_bits);
+      for (int i = 0; i < valid_bits; ++i) {
+        byte >>= 1;
+        if (reader.IsSet()) {
+          byte |= 0x80;
+        }
+        reader.Next();
+      }
+      byte >>= (8 - valid_bits);
+    } else {
+      ++bitmap_;
+      const uint8_t next_byte = load<uint8_t>(bitmap_);
+      byte = current_byte_;
+      if (may_have_byte_offset && offset_) {
+        byte >>= offset_;
+        byte |= next_byte << (8 - offset_);
+      }
+      current_byte_ = next_byte;
+      trailing_bits_ -= 8;
+      trailing_bytes_--;
+      valid_bits = 8;
+    }
+    return byte;
+  }
+
+  int64_t words() const { return nwords_; }
+  int trailing_bytes() const { return trailing_bytes_; }
+
+ private:
+  int64_t offset_;
+  const uint8_t* bitmap_;
+
+  const uint8_t* bitmap_end_;
+  int64_t nwords_;
+  int trailing_bits_;
+  int trailing_bytes_;
+  union {
+    Word current_word_;
+    struct {
+#if ARROW_LITTLE_ENDIAN == 0
+      uint8_t padding_bytes_[sizeof(Word) - 1];
+#endif
+      uint8_t current_byte_;
+    };
+  };
+
+  template <typename DType>
+  DType load(const uint8_t* bitmap) {
+    assert(bitmap + sizeof(DType) <= bitmap_end_);
+    return BitUtil::ToLittleEndian(util::SafeLoadAs<DType>(bitmap));
+  }
+};
+
 /// \brief Index into a possibly non-existent bitmap
 struct OptionalBitIndexer {
   const uint8_t* bitmap;
@@ -263,7 +263,7 @@ struct OptionalBitIndexer {
       : bitmap(buffer == NULLPTR ? NULLPTR : buffer->data()), offset(offset) {}
 
   bool operator[](int64_t i) const {
-    return bitmap == NULLPTR || BitUtil::GetBit(bitmap, offset + i); 
+    return bitmap == NULLPTR || BitUtil::GetBit(bitmap, offset + i);
   }
 };
 
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_visit.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_visit.h
index dc495d1135b..8a16993e052 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_visit.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_visit.h
@@ -1,88 +1,88 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <cstdint> 
- 
-#include "arrow/util/bit_util.h" 
-#include "arrow/util/bitmap_reader.h" 
- 
-namespace arrow { 
-namespace internal { 
- 
-// A function that visits each bit in a bitmap and calls a visitor function with a 
-// boolean representation of that bit. This is intended to be analogous to 
-// GenerateBits. 
-template <class Visitor> 
-void VisitBits(const uint8_t* bitmap, int64_t start_offset, int64_t length, 
-               Visitor&& visit) { 
-  BitmapReader reader(bitmap, start_offset, length); 
-  for (int64_t index = 0; index < length; ++index) { 
-    visit(reader.IsSet()); 
-    reader.Next(); 
-  } 
-} 
- 
-// Like VisitBits(), but unrolls its main loop for better performance. 
-template <class Visitor> 
-void VisitBitsUnrolled(const uint8_t* bitmap, int64_t start_offset, int64_t length, 
-                       Visitor&& visit) { 
-  if (length == 0) { 
-    return; 
-  } 
- 
-  // Start by visiting any bits preceding the first full byte. 
-  int64_t num_bits_before_full_bytes = 
-      BitUtil::RoundUpToMultipleOf8(start_offset) - start_offset; 
-  // Truncate num_bits_before_full_bytes if it is greater than length. 
-  if (num_bits_before_full_bytes > length) { 
-    num_bits_before_full_bytes = length; 
-  } 
-  // Use the non loop-unrolled VisitBits since we don't want to add branches 
-  VisitBits<Visitor>(bitmap, start_offset, num_bits_before_full_bytes, visit); 
- 
-  // Shift the start pointer to the first full byte and compute the 
-  // number of full bytes to be read. 
-  const uint8_t* first_full_byte = bitmap + BitUtil::CeilDiv(start_offset, 8); 
-  const int64_t num_full_bytes = (length - num_bits_before_full_bytes) / 8; 
- 
-  // Iterate over each full byte of the input bitmap and call the visitor in 
-  // a loop-unrolled manner. 
-  for (int64_t byte_index = 0; byte_index < num_full_bytes; ++byte_index) { 
-    // Get the current bit-packed byte value from the bitmap. 
-    const uint8_t byte = *(first_full_byte + byte_index); 
- 
-    // Execute the visitor function on each bit of the current byte. 
-    visit(BitUtil::GetBitFromByte(byte, 0)); 
-    visit(BitUtil::GetBitFromByte(byte, 1)); 
-    visit(BitUtil::GetBitFromByte(byte, 2)); 
-    visit(BitUtil::GetBitFromByte(byte, 3)); 
-    visit(BitUtil::GetBitFromByte(byte, 4)); 
-    visit(BitUtil::GetBitFromByte(byte, 5)); 
-    visit(BitUtil::GetBitFromByte(byte, 6)); 
-    visit(BitUtil::GetBitFromByte(byte, 7)); 
-  } 
- 
-  // Write any leftover bits in the last byte. 
-  const int64_t num_bits_after_full_bytes = (length - num_bits_before_full_bytes) % 8; 
-  VisitBits<Visitor>(first_full_byte + num_full_bytes, 0, num_bits_after_full_bytes, 
-                     visit); 
-} 
- 
-}  // namespace internal 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_reader.h"
+
+namespace arrow {
+namespace internal {
+
+// A function that visits each bit in a bitmap and calls a visitor function with a
+// boolean representation of that bit. This is intended to be analogous to
+// GenerateBits.
+template <class Visitor>
+void VisitBits(const uint8_t* bitmap, int64_t start_offset, int64_t length,
+               Visitor&& visit) {
+  BitmapReader reader(bitmap, start_offset, length);
+  for (int64_t index = 0; index < length; ++index) {
+    visit(reader.IsSet());
+    reader.Next();
+  }
+}
+
+// Like VisitBits(), but unrolls its main loop for better performance.
+template <class Visitor>
+void VisitBitsUnrolled(const uint8_t* bitmap, int64_t start_offset, int64_t length,
+                       Visitor&& visit) {
+  if (length == 0) {
+    return;
+  }
+
+  // Start by visiting any bits preceding the first full byte.
+  int64_t num_bits_before_full_bytes =
+      BitUtil::RoundUpToMultipleOf8(start_offset) - start_offset;
+  // Truncate num_bits_before_full_bytes if it is greater than length.
+  if (num_bits_before_full_bytes > length) {
+    num_bits_before_full_bytes = length;
+  }
+  // Use the non loop-unrolled VisitBits since we don't want to add branches
+  VisitBits<Visitor>(bitmap, start_offset, num_bits_before_full_bytes, visit);
+
+  // Shift the start pointer to the first full byte and compute the
+  // number of full bytes to be read.
+  const uint8_t* first_full_byte = bitmap + BitUtil::CeilDiv(start_offset, 8);
+  const int64_t num_full_bytes = (length - num_bits_before_full_bytes) / 8;
+
+  // Iterate over each full byte of the input bitmap and call the visitor in
+  // a loop-unrolled manner.
+  for (int64_t byte_index = 0; byte_index < num_full_bytes; ++byte_index) {
+    // Get the current bit-packed byte value from the bitmap.
+    const uint8_t byte = *(first_full_byte + byte_index);
+
+    // Execute the visitor function on each bit of the current byte.
+    visit(BitUtil::GetBitFromByte(byte, 0));
+    visit(BitUtil::GetBitFromByte(byte, 1));
+    visit(BitUtil::GetBitFromByte(byte, 2));
+    visit(BitUtil::GetBitFromByte(byte, 3));
+    visit(BitUtil::GetBitFromByte(byte, 4));
+    visit(BitUtil::GetBitFromByte(byte, 5));
+    visit(BitUtil::GetBitFromByte(byte, 6));
+    visit(BitUtil::GetBitFromByte(byte, 7));
+  }
+
+  // Write any leftover bits in the last byte.
+  const int64_t num_bits_after_full_bytes = (length - num_bits_before_full_bytes) % 8;
+  VisitBits<Visitor>(first_full_byte + num_full_bytes, 0, num_bits_after_full_bytes,
+                     visit);
+}
+
+}  // namespace internal
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_writer.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_writer.h
index 096cfc8655a..d5c6d909df0 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_writer.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitmap_writer.h
@@ -21,7 +21,7 @@
 #include <cstring>
 
 #include "arrow/util/bit_util.h"
-#include "arrow/util/endian.h" 
+#include "arrow/util/endian.h"
 #include "arrow/util/macros.h"
 
 namespace arrow {
@@ -180,106 +180,106 @@ class FirstTimeBitmapWriter {
   int64_t byte_offset_;
 };
 
-template <typename Word, bool may_have_byte_offset = true> 
-class BitmapWordWriter { 
- public: 
-  BitmapWordWriter() = default; 
-  BitmapWordWriter(uint8_t* bitmap, int64_t offset, int64_t length) 
-      : offset_(static_cast<int64_t>(may_have_byte_offset) * (offset % 8)), 
-        bitmap_(bitmap + offset / 8), 
-        bitmap_end_(bitmap_ + BitUtil::BytesForBits(offset_ + length)), 
-        mask_((1U << offset_) - 1) { 
-    if (offset_) { 
-      if (length >= static_cast<int>(sizeof(Word) * 8)) { 
-        current_word_ = load<Word>(bitmap_); 
-      } else if (length > 0) { 
-        current_byte_ = load<uint8_t>(bitmap_); 
-      } 
-    } 
-  } 
- 
-  void PutNextWord(Word word) { 
-    if (may_have_byte_offset && offset_) { 
-      // split one word into two adjacent words, don't touch unused bits 
-      //               |<------ word ----->| 
-      //               +-----+-------------+ 
-      //               |  A  |      B      | 
-      //               +-----+-------------+ 
-      //                  |         | 
-      //                  v         v       offset 
-      // +-------------+-----+-------------+-----+ 
-      // |     ---     |  A  |      B      | --- | 
-      // +-------------+-----+-------------+-----+ 
-      // |<------ next ----->|<---- current ---->| 
-      word = (word << offset_) | (word >> (sizeof(Word) * 8 - offset_)); 
-      Word next_word = load<Word>(bitmap_ + sizeof(Word)); 
-      current_word_ = (current_word_ & mask_) | (word & ~mask_); 
-      next_word = (next_word & ~mask_) | (word & mask_); 
-      store<Word>(bitmap_, current_word_); 
-      store<Word>(bitmap_ + sizeof(Word), next_word); 
-      current_word_ = next_word; 
-    } else { 
-      store<Word>(bitmap_, word); 
-    } 
-    bitmap_ += sizeof(Word); 
-  } 
- 
-  void PutNextTrailingByte(uint8_t byte, int valid_bits) { 
-    if (valid_bits == 8) { 
-      if (may_have_byte_offset && offset_) { 
-        byte = (byte << offset_) | (byte >> (8 - offset_)); 
-        uint8_t next_byte = load<uint8_t>(bitmap_ + 1); 
-        current_byte_ = (current_byte_ & mask_) | (byte & ~mask_); 
-        next_byte = (next_byte & ~mask_) | (byte & mask_); 
-        store<uint8_t>(bitmap_, current_byte_); 
-        store<uint8_t>(bitmap_ + 1, next_byte); 
-        current_byte_ = next_byte; 
-      } else { 
-        store<uint8_t>(bitmap_, byte); 
-      } 
-      ++bitmap_; 
-    } else { 
-      assert(valid_bits > 0); 
-      assert(valid_bits < 8); 
-      assert(bitmap_ + BitUtil::BytesForBits(offset_ + valid_bits) <= bitmap_end_); 
-      internal::BitmapWriter writer(bitmap_, offset_, valid_bits); 
-      for (int i = 0; i < valid_bits; ++i) { 
-        (byte & 0x01) ? writer.Set() : writer.Clear(); 
-        writer.Next(); 
-        byte >>= 1; 
-      } 
-      writer.Finish(); 
-    } 
-  } 
- 
- private: 
-  int64_t offset_; 
-  uint8_t* bitmap_; 
- 
-  const uint8_t* bitmap_end_; 
-  uint64_t mask_; 
-  union { 
-    Word current_word_; 
-    struct { 
-#if ARROW_LITTLE_ENDIAN == 0 
-      uint8_t padding_bytes_[sizeof(Word) - 1]; 
-#endif 
-      uint8_t current_byte_; 
-    }; 
-  }; 
- 
-  template <typename DType> 
-  DType load(const uint8_t* bitmap) { 
-    assert(bitmap + sizeof(DType) <= bitmap_end_); 
-    return BitUtil::ToLittleEndian(util::SafeLoadAs<DType>(bitmap)); 
-  } 
- 
-  template <typename DType> 
-  void store(uint8_t* bitmap, DType data) { 
-    assert(bitmap + sizeof(DType) <= bitmap_end_); 
-    util::SafeStore(bitmap, BitUtil::FromLittleEndian(data)); 
-  } 
-}; 
- 
+template <typename Word, bool may_have_byte_offset = true>
+class BitmapWordWriter {
+ public:
+  BitmapWordWriter() = default;
+  BitmapWordWriter(uint8_t* bitmap, int64_t offset, int64_t length)
+      : offset_(static_cast<int64_t>(may_have_byte_offset) * (offset % 8)),
+        bitmap_(bitmap + offset / 8),
+        bitmap_end_(bitmap_ + BitUtil::BytesForBits(offset_ + length)),
+        mask_((1U << offset_) - 1) {
+    if (offset_) {
+      if (length >= static_cast<int>(sizeof(Word) * 8)) {
+        current_word_ = load<Word>(bitmap_);
+      } else if (length > 0) {
+        current_byte_ = load<uint8_t>(bitmap_);
+      }
+    }
+  }
+
+  void PutNextWord(Word word) {
+    if (may_have_byte_offset && offset_) {
+      // split one word into two adjacent words, don't touch unused bits
+      //               |<------ word ----->|
+      //               +-----+-------------+
+      //               |  A  |      B      |
+      //               +-----+-------------+
+      //                  |         |
+      //                  v         v       offset
+      // +-------------+-----+-------------+-----+
+      // |     ---     |  A  |      B      | --- |
+      // +-------------+-----+-------------+-----+
+      // |<------ next ----->|<---- current ---->|
+      word = (word << offset_) | (word >> (sizeof(Word) * 8 - offset_));
+      Word next_word = load<Word>(bitmap_ + sizeof(Word));
+      current_word_ = (current_word_ & mask_) | (word & ~mask_);
+      next_word = (next_word & ~mask_) | (word & mask_);
+      store<Word>(bitmap_, current_word_);
+      store<Word>(bitmap_ + sizeof(Word), next_word);
+      current_word_ = next_word;
+    } else {
+      store<Word>(bitmap_, word);
+    }
+    bitmap_ += sizeof(Word);
+  }
+
+  void PutNextTrailingByte(uint8_t byte, int valid_bits) {
+    if (valid_bits == 8) {
+      if (may_have_byte_offset && offset_) {
+        byte = (byte << offset_) | (byte >> (8 - offset_));
+        uint8_t next_byte = load<uint8_t>(bitmap_ + 1);
+        current_byte_ = (current_byte_ & mask_) | (byte & ~mask_);
+        next_byte = (next_byte & ~mask_) | (byte & mask_);
+        store<uint8_t>(bitmap_, current_byte_);
+        store<uint8_t>(bitmap_ + 1, next_byte);
+        current_byte_ = next_byte;
+      } else {
+        store<uint8_t>(bitmap_, byte);
+      }
+      ++bitmap_;
+    } else {
+      assert(valid_bits > 0);
+      assert(valid_bits < 8);
+      assert(bitmap_ + BitUtil::BytesForBits(offset_ + valid_bits) <= bitmap_end_);
+      internal::BitmapWriter writer(bitmap_, offset_, valid_bits);
+      for (int i = 0; i < valid_bits; ++i) {
+        (byte & 0x01) ? writer.Set() : writer.Clear();
+        writer.Next();
+        byte >>= 1;
+      }
+      writer.Finish();
+    }
+  }
+
+ private:
+  int64_t offset_;
+  uint8_t* bitmap_;
+
+  const uint8_t* bitmap_end_;
+  uint64_t mask_;
+  union {
+    Word current_word_;
+    struct {
+#if ARROW_LITTLE_ENDIAN == 0
+      uint8_t padding_bytes_[sizeof(Word) - 1];
+#endif
+      uint8_t current_byte_;
+    };
+  };
+
+  template <typename DType>
+  DType load(const uint8_t* bitmap) {
+    assert(bitmap + sizeof(DType) <= bitmap_end_);
+    return BitUtil::ToLittleEndian(util::SafeLoadAs<DType>(bitmap));
+  }
+
+  template <typename DType>
+  void store(uint8_t* bitmap, DType data) {
+    assert(bitmap + sizeof(DType) <= bitmap_end_);
+    util::SafeStore(bitmap, BitUtil::FromLittleEndian(data));
+  }
+};
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bpacking.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/bpacking.cc
index 538b7382e43..d9cafd602a2 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bpacking.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bpacking.cc
@@ -27,9 +27,9 @@
 #if defined(ARROW_HAVE_RUNTIME_AVX512)
 #error #include "arrow/util/bpacking_avx512.h"
 #endif
-#if defined(ARROW_HAVE_NEON) 
-#error #include "arrow/util/bpacking_neon.h" 
-#endif 
+#if defined(ARROW_HAVE_NEON)
+#error #include "arrow/util/bpacking_neon.h"
+#endif
 
 namespace arrow {
 namespace internal {
@@ -166,12 +166,12 @@ struct Unpack32DynamicFunction {
 }  // namespace
 
 int unpack32(const uint32_t* in, uint32_t* out, int batch_size, int num_bits) {
-#if defined(ARROW_HAVE_NEON) 
-  return unpack32_neon(in, out, batch_size, num_bits); 
-#else 
+#if defined(ARROW_HAVE_NEON)
+  return unpack32_neon(in, out, batch_size, num_bits);
+#else
   static DynamicDispatch<Unpack32DynamicFunction> dispatch;
   return dispatch.func(in, out, batch_size, num_bits);
-#endif 
+#endif
 }
 
 }  // namespace internal
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bpacking.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/bpacking.h
index 7f4ca3e384c..e5a4dbbed89 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/bpacking.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bpacking.h
@@ -17,7 +17,7 @@
 
 #pragma once
 
-#include "arrow/util/endian.h" 
+#include "arrow/util/endian.h"
 #include "arrow/util/visibility.h"
 
 #include <stdint.h>
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/byte_stream_split.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/byte_stream_split.h
index 53627aee18a..28dcce52bb8 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/byte_stream_split.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/byte_stream_split.h
@@ -1,626 +1,626 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include "arrow/util/simd.h" 
-#include "arrow/util/ubsan.h" 
- 
-#include <stdint.h> 
-#include <algorithm> 
- 
-#ifdef ARROW_HAVE_SSE4_2 
-// Enable the SIMD for ByteStreamSplit Encoder/Decoder 
-#define ARROW_HAVE_SIMD_SPLIT 
-#endif  // ARROW_HAVE_SSE4_2 
- 
-namespace arrow { 
-namespace util { 
-namespace internal { 
- 
-#if defined(ARROW_HAVE_SSE4_2) 
-template <typename T> 
-void ByteStreamSplitDecodeSse2(const uint8_t* data, int64_t num_values, int64_t stride, 
-                               T* out) { 
-  constexpr size_t kNumStreams = sizeof(T); 
-  static_assert(kNumStreams == 4U || kNumStreams == 8U, "Invalid number of streams."); 
-  constexpr size_t kNumStreamsLog2 = (kNumStreams == 8U ? 3U : 2U); 
- 
-  const int64_t size = num_values * sizeof(T); 
-  constexpr int64_t kBlockSize = sizeof(__m128i) * kNumStreams; 
-  const int64_t num_blocks = size / kBlockSize; 
-  uint8_t* output_data = reinterpret_cast<uint8_t*>(out); 
- 
-  // First handle suffix. 
-  // This helps catch if the simd-based processing overflows into the suffix 
-  // since almost surely a test would fail. 
-  const int64_t num_processed_elements = (num_blocks * kBlockSize) / kNumStreams; 
-  for (int64_t i = num_processed_elements; i < num_values; ++i) { 
-    uint8_t gathered_byte_data[kNumStreams]; 
-    for (size_t b = 0; b < kNumStreams; ++b) { 
-      const size_t byte_index = b * stride + i; 
-      gathered_byte_data[b] = data[byte_index]; 
-    } 
-    out[i] = arrow::util::SafeLoadAs<T>(&gathered_byte_data[0]); 
-  } 
- 
-  // The blocks get processed hierarchically using the unpack intrinsics. 
-  // Example with four streams: 
-  // Stage 1: AAAA BBBB CCCC DDDD 
-  // Stage 2: ACAC ACAC BDBD BDBD 
-  // Stage 3: ABCD ABCD ABCD ABCD 
-  __m128i stage[kNumStreamsLog2 + 1U][kNumStreams]; 
-  constexpr size_t kNumStreamsHalf = kNumStreams / 2U; 
- 
-  for (int64_t i = 0; i < num_blocks; ++i) { 
-    for (size_t j = 0; j < kNumStreams; ++j) { 
-      stage[0][j] = _mm_loadu_si128( 
-          reinterpret_cast<const __m128i*>(&data[i * sizeof(__m128i) + j * stride])); 
-    } 
-    for (size_t step = 0; step < kNumStreamsLog2; ++step) { 
-      for (size_t j = 0; j < kNumStreamsHalf; ++j) { 
-        stage[step + 1U][j * 2] = 
-            _mm_unpacklo_epi8(stage[step][j], stage[step][kNumStreamsHalf + j]); 
-        stage[step + 1U][j * 2 + 1U] = 
-            _mm_unpackhi_epi8(stage[step][j], stage[step][kNumStreamsHalf + j]); 
-      } 
-    } 
-    for (size_t j = 0; j < kNumStreams; ++j) { 
-      _mm_storeu_si128(reinterpret_cast<__m128i*>( 
-                           &output_data[(i * kNumStreams + j) * sizeof(__m128i)]), 
-                       stage[kNumStreamsLog2][j]); 
-    } 
-  } 
-} 
- 
-template <typename T> 
-void ByteStreamSplitEncodeSse2(const uint8_t* raw_values, const size_t num_values, 
-                               uint8_t* output_buffer_raw) { 
-  constexpr size_t kNumStreams = sizeof(T); 
-  static_assert(kNumStreams == 4U || kNumStreams == 8U, "Invalid number of streams."); 
-  __m128i stage[3][kNumStreams]; 
-  __m128i final_result[kNumStreams]; 
- 
-  const size_t size = num_values * sizeof(T); 
-  constexpr size_t kBlockSize = sizeof(__m128i) * kNumStreams; 
-  const size_t num_blocks = size / kBlockSize; 
-  const __m128i* raw_values_sse = reinterpret_cast<const __m128i*>(raw_values); 
-  __m128i* output_buffer_streams[kNumStreams]; 
-  for (size_t i = 0; i < kNumStreams; ++i) { 
-    output_buffer_streams[i] = 
-        reinterpret_cast<__m128i*>(&output_buffer_raw[num_values * i]); 
-  } 
- 
-  // First handle suffix. 
-  const size_t num_processed_elements = (num_blocks * kBlockSize) / sizeof(T); 
-  for (size_t i = num_processed_elements; i < num_values; ++i) { 
-    for (size_t j = 0U; j < kNumStreams; ++j) { 
-      const uint8_t byte_in_value = raw_values[i * kNumStreams + j]; 
-      output_buffer_raw[j * num_values + i] = byte_in_value; 
-    } 
-  } 
-  // The current shuffling algorithm diverges for float and double types but the compiler 
-  // should be able to remove the branch since only one path is taken for each template 
-  // instantiation. 
-  // Example run for floats: 
-  // Step 0, copy: 
-  //   0: ABCD ABCD ABCD ABCD 1: ABCD ABCD ABCD ABCD ... 
-  // Step 1: _mm_unpacklo_epi8 and mm_unpackhi_epi8: 
-  //   0: AABB CCDD AABB CCDD 1: AABB CCDD AABB CCDD ... 
-  //   0: AAAA BBBB CCCC DDDD 1: AAAA BBBB CCCC DDDD ... 
-  // Step 3: __mm_unpacklo_epi8 and _mm_unpackhi_epi8: 
-  //   0: AAAA AAAA BBBB BBBB 1: CCCC CCCC DDDD DDDD ... 
-  // Step 4: __mm_unpacklo_epi64 and _mm_unpackhi_epi64: 
-  //   0: AAAA AAAA AAAA AAAA 1: BBBB BBBB BBBB BBBB ... 
-  for (size_t block_index = 0; block_index < num_blocks; ++block_index) { 
-    // First copy the data to stage 0. 
-    for (size_t i = 0; i < kNumStreams; ++i) { 
-      stage[0][i] = _mm_loadu_si128(&raw_values_sse[block_index * kNumStreams + i]); 
-    } 
- 
-    // The shuffling of bytes is performed through the unpack intrinsics. 
-    // In my measurements this gives better performance then an implementation 
-    // which uses the shuffle intrinsics. 
-    for (size_t stage_lvl = 0; stage_lvl < 2U; ++stage_lvl) { 
-      for (size_t i = 0; i < kNumStreams / 2U; ++i) { 
-        stage[stage_lvl + 1][i * 2] = 
-            _mm_unpacklo_epi8(stage[stage_lvl][i * 2], stage[stage_lvl][i * 2 + 1]); 
-        stage[stage_lvl + 1][i * 2 + 1] = 
-            _mm_unpackhi_epi8(stage[stage_lvl][i * 2], stage[stage_lvl][i * 2 + 1]); 
-      } 
-    } 
-    if (kNumStreams == 8U) { 
-      // This is the path for double. 
-      __m128i tmp[8]; 
-      for (size_t i = 0; i < 4; ++i) { 
-        tmp[i * 2] = _mm_unpacklo_epi32(stage[2][i], stage[2][i + 4]); 
-        tmp[i * 2 + 1] = _mm_unpackhi_epi32(stage[2][i], stage[2][i + 4]); 
-      } 
- 
-      for (size_t i = 0; i < 4; ++i) { 
-        final_result[i * 2] = _mm_unpacklo_epi32(tmp[i], tmp[i + 4]); 
-        final_result[i * 2 + 1] = _mm_unpackhi_epi32(tmp[i], tmp[i + 4]); 
-      } 
-    } else { 
-      // this is the path for float. 
-      __m128i tmp[4]; 
-      for (size_t i = 0; i < 2; ++i) { 
-        tmp[i * 2] = _mm_unpacklo_epi8(stage[2][i * 2], stage[2][i * 2 + 1]); 
-        tmp[i * 2 + 1] = _mm_unpackhi_epi8(stage[2][i * 2], stage[2][i * 2 + 1]); 
-      } 
-      for (size_t i = 0; i < 2; ++i) { 
-        final_result[i * 2] = _mm_unpacklo_epi64(tmp[i], tmp[i + 2]); 
-        final_result[i * 2 + 1] = _mm_unpackhi_epi64(tmp[i], tmp[i + 2]); 
-      } 
-    } 
-    for (size_t i = 0; i < kNumStreams; ++i) { 
-      _mm_storeu_si128(&output_buffer_streams[i][block_index], final_result[i]); 
-    } 
-  } 
-} 
-#endif  // ARROW_HAVE_SSE4_2 
- 
-#if defined(ARROW_HAVE_AVX2) 
-template <typename T> 
-void ByteStreamSplitDecodeAvx2(const uint8_t* data, int64_t num_values, int64_t stride, 
-                               T* out) { 
-  constexpr size_t kNumStreams = sizeof(T); 
-  static_assert(kNumStreams == 4U || kNumStreams == 8U, "Invalid number of streams."); 
-  constexpr size_t kNumStreamsLog2 = (kNumStreams == 8U ? 3U : 2U); 
- 
-  const int64_t size = num_values * sizeof(T); 
-  constexpr int64_t kBlockSize = sizeof(__m256i) * kNumStreams; 
-  if (size < kBlockSize)  // Back to SSE for small size 
-    return ByteStreamSplitDecodeSse2(data, num_values, stride, out); 
-  const int64_t num_blocks = size / kBlockSize; 
-  uint8_t* output_data = reinterpret_cast<uint8_t*>(out); 
- 
-  // First handle suffix. 
-  const int64_t num_processed_elements = (num_blocks * kBlockSize) / kNumStreams; 
-  for (int64_t i = num_processed_elements; i < num_values; ++i) { 
-    uint8_t gathered_byte_data[kNumStreams]; 
-    for (size_t b = 0; b < kNumStreams; ++b) { 
-      const size_t byte_index = b * stride + i; 
-      gathered_byte_data[b] = data[byte_index]; 
-    } 
-    out[i] = arrow::util::SafeLoadAs<T>(&gathered_byte_data[0]); 
-  } 
- 
-  // Processed hierarchically using unpack intrinsics, then permute intrinsics. 
-  __m256i stage[kNumStreamsLog2 + 1U][kNumStreams]; 
-  __m256i final_result[kNumStreams]; 
-  constexpr size_t kNumStreamsHalf = kNumStreams / 2U; 
- 
-  for (int64_t i = 0; i < num_blocks; ++i) { 
-    for (size_t j = 0; j < kNumStreams; ++j) { 
-      stage[0][j] = _mm256_loadu_si256( 
-          reinterpret_cast<const __m256i*>(&data[i * sizeof(__m256i) + j * stride])); 
-    } 
- 
-    for (size_t step = 0; step < kNumStreamsLog2; ++step) { 
-      for (size_t j = 0; j < kNumStreamsHalf; ++j) { 
-        stage[step + 1U][j * 2] = 
-            _mm256_unpacklo_epi8(stage[step][j], stage[step][kNumStreamsHalf + j]); 
-        stage[step + 1U][j * 2 + 1U] = 
-            _mm256_unpackhi_epi8(stage[step][j], stage[step][kNumStreamsHalf + j]); 
-      } 
-    } 
- 
-    if (kNumStreams == 8U) { 
-      // path for double, 128i index: 
-      //   {0x00, 0x08}, {0x01, 0x09}, {0x02, 0x0A}, {0x03, 0x0B}, 
-      //   {0x04, 0x0C}, {0x05, 0x0D}, {0x06, 0x0E}, {0x07, 0x0F}, 
-      final_result[0] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][0], 
-                                                  stage[kNumStreamsLog2][1], 0b00100000); 
-      final_result[1] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][2], 
-                                                  stage[kNumStreamsLog2][3], 0b00100000); 
-      final_result[2] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][4], 
-                                                  stage[kNumStreamsLog2][5], 0b00100000); 
-      final_result[3] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][6], 
-                                                  stage[kNumStreamsLog2][7], 0b00100000); 
-      final_result[4] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][0], 
-                                                  stage[kNumStreamsLog2][1], 0b00110001); 
-      final_result[5] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][2], 
-                                                  stage[kNumStreamsLog2][3], 0b00110001); 
-      final_result[6] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][4], 
-                                                  stage[kNumStreamsLog2][5], 0b00110001); 
-      final_result[7] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][6], 
-                                                  stage[kNumStreamsLog2][7], 0b00110001); 
-    } else { 
-      // path for float, 128i index: 
-      //   {0x00, 0x04}, {0x01, 0x05}, {0x02, 0x06}, {0x03, 0x07} 
-      final_result[0] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][0], 
-                                                  stage[kNumStreamsLog2][1], 0b00100000); 
-      final_result[1] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][2], 
-                                                  stage[kNumStreamsLog2][3], 0b00100000); 
-      final_result[2] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][0], 
-                                                  stage[kNumStreamsLog2][1], 0b00110001); 
-      final_result[3] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][2], 
-                                                  stage[kNumStreamsLog2][3], 0b00110001); 
-    } 
- 
-    for (size_t j = 0; j < kNumStreams; ++j) { 
-      _mm256_storeu_si256(reinterpret_cast<__m256i*>( 
-                              &output_data[(i * kNumStreams + j) * sizeof(__m256i)]), 
-                          final_result[j]); 
-    } 
-  } 
-} 
- 
-template <typename T> 
-void ByteStreamSplitEncodeAvx2(const uint8_t* raw_values, const size_t num_values, 
-                               uint8_t* output_buffer_raw) { 
-  constexpr size_t kNumStreams = sizeof(T); 
-  static_assert(kNumStreams == 4U || kNumStreams == 8U, "Invalid number of streams."); 
-  if (kNumStreams == 8U)  // Back to SSE, currently no path for double. 
-    return ByteStreamSplitEncodeSse2<T>(raw_values, num_values, output_buffer_raw); 
- 
-  const size_t size = num_values * sizeof(T); 
-  constexpr size_t kBlockSize = sizeof(__m256i) * kNumStreams; 
-  if (size < kBlockSize)  // Back to SSE for small size 
-    return ByteStreamSplitEncodeSse2<T>(raw_values, num_values, output_buffer_raw); 
-  const size_t num_blocks = size / kBlockSize; 
-  const __m256i* raw_values_simd = reinterpret_cast<const __m256i*>(raw_values); 
-  __m256i* output_buffer_streams[kNumStreams]; 
- 
-  for (size_t i = 0; i < kNumStreams; ++i) { 
-    output_buffer_streams[i] = 
-        reinterpret_cast<__m256i*>(&output_buffer_raw[num_values * i]); 
-  } 
- 
-  // First handle suffix. 
-  const size_t num_processed_elements = (num_blocks * kBlockSize) / sizeof(T); 
-  for (size_t i = num_processed_elements; i < num_values; ++i) { 
-    for (size_t j = 0U; j < kNumStreams; ++j) { 
-      const uint8_t byte_in_value = raw_values[i * kNumStreams + j]; 
-      output_buffer_raw[j * num_values + i] = byte_in_value; 
-    } 
-  } 
- 
-  // Path for float. 
-  // 1. Processed hierarchically to 32i blcok using the unpack intrinsics. 
-  // 2. Pack 128i block using _mm256_permutevar8x32_epi32. 
-  // 3. Pack final 256i block with _mm256_permute2x128_si256. 
-  constexpr size_t kNumUnpack = 3U; 
-  __m256i stage[kNumUnpack + 1][kNumStreams]; 
-  static const __m256i kPermuteMask = 
-      _mm256_set_epi32(0x07, 0x03, 0x06, 0x02, 0x05, 0x01, 0x04, 0x00); 
-  __m256i permute[kNumStreams]; 
-  __m256i final_result[kNumStreams]; 
- 
-  for (size_t block_index = 0; block_index < num_blocks; ++block_index) { 
-    for (size_t i = 0; i < kNumStreams; ++i) { 
-      stage[0][i] = _mm256_loadu_si256(&raw_values_simd[block_index * kNumStreams + i]); 
-    } 
- 
-    for (size_t stage_lvl = 0; stage_lvl < kNumUnpack; ++stage_lvl) { 
-      for (size_t i = 0; i < kNumStreams / 2U; ++i) { 
-        stage[stage_lvl + 1][i * 2] = 
-            _mm256_unpacklo_epi8(stage[stage_lvl][i * 2], stage[stage_lvl][i * 2 + 1]); 
-        stage[stage_lvl + 1][i * 2 + 1] = 
-            _mm256_unpackhi_epi8(stage[stage_lvl][i * 2], stage[stage_lvl][i * 2 + 1]); 
-      } 
-    } 
- 
-    for (size_t i = 0; i < kNumStreams; ++i) { 
-      permute[i] = _mm256_permutevar8x32_epi32(stage[kNumUnpack][i], kPermuteMask); 
-    } 
- 
-    final_result[0] = _mm256_permute2x128_si256(permute[0], permute[2], 0b00100000); 
-    final_result[1] = _mm256_permute2x128_si256(permute[0], permute[2], 0b00110001); 
-    final_result[2] = _mm256_permute2x128_si256(permute[1], permute[3], 0b00100000); 
-    final_result[3] = _mm256_permute2x128_si256(permute[1], permute[3], 0b00110001); 
- 
-    for (size_t i = 0; i < kNumStreams; ++i) { 
-      _mm256_storeu_si256(&output_buffer_streams[i][block_index], final_result[i]); 
-    } 
-  } 
-} 
-#endif  // ARROW_HAVE_AVX2 
- 
-#if defined(ARROW_HAVE_AVX512) 
-template <typename T> 
-void ByteStreamSplitDecodeAvx512(const uint8_t* data, int64_t num_values, int64_t stride, 
-                                 T* out) { 
-  constexpr size_t kNumStreams = sizeof(T); 
-  static_assert(kNumStreams == 4U || kNumStreams == 8U, "Invalid number of streams."); 
-  constexpr size_t kNumStreamsLog2 = (kNumStreams == 8U ? 3U : 2U); 
- 
-  const int64_t size = num_values * sizeof(T); 
-  constexpr int64_t kBlockSize = sizeof(__m512i) * kNumStreams; 
-  if (size < kBlockSize)  // Back to AVX2 for small size 
-    return ByteStreamSplitDecodeAvx2(data, num_values, stride, out); 
-  const int64_t num_blocks = size / kBlockSize; 
-  uint8_t* output_data = reinterpret_cast<uint8_t*>(out); 
- 
-  // First handle suffix. 
-  const int64_t num_processed_elements = (num_blocks * kBlockSize) / kNumStreams; 
-  for (int64_t i = num_processed_elements; i < num_values; ++i) { 
-    uint8_t gathered_byte_data[kNumStreams]; 
-    for (size_t b = 0; b < kNumStreams; ++b) { 
-      const size_t byte_index = b * stride + i; 
-      gathered_byte_data[b] = data[byte_index]; 
-    } 
-    out[i] = arrow::util::SafeLoadAs<T>(&gathered_byte_data[0]); 
-  } 
- 
-  // Processed hierarchically using the unpack, then two shuffles. 
-  __m512i stage[kNumStreamsLog2 + 1U][kNumStreams]; 
-  __m512i shuffle[kNumStreams]; 
-  __m512i final_result[kNumStreams]; 
-  constexpr size_t kNumStreamsHalf = kNumStreams / 2U; 
- 
-  for (int64_t i = 0; i < num_blocks; ++i) { 
-    for (size_t j = 0; j < kNumStreams; ++j) { 
-      stage[0][j] = _mm512_loadu_si512( 
-          reinterpret_cast<const __m512i*>(&data[i * sizeof(__m512i) + j * stride])); 
-    } 
- 
-    for (size_t step = 0; step < kNumStreamsLog2; ++step) { 
-      for (size_t j = 0; j < kNumStreamsHalf; ++j) { 
-        stage[step + 1U][j * 2] = 
-            _mm512_unpacklo_epi8(stage[step][j], stage[step][kNumStreamsHalf + j]); 
-        stage[step + 1U][j * 2 + 1U] = 
-            _mm512_unpackhi_epi8(stage[step][j], stage[step][kNumStreamsHalf + j]); 
-      } 
-    } 
- 
-    if (kNumStreams == 8U) { 
-      // path for double, 128i index: 
-      // {0x00, 0x04, 0x08, 0x0C}, {0x10, 0x14, 0x18, 0x1C}, 
-      // {0x01, 0x05, 0x09, 0x0D}, {0x11, 0x15, 0x19, 0x1D}, 
-      // {0x02, 0x06, 0x0A, 0x0E}, {0x12, 0x16, 0x1A, 0x1E}, 
-      // {0x03, 0x07, 0x0B, 0x0F}, {0x13, 0x17, 0x1B, 0x1F}, 
-      shuffle[0] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][0], 
-                                        stage[kNumStreamsLog2][1], 0b01000100); 
-      shuffle[1] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][2], 
-                                        stage[kNumStreamsLog2][3], 0b01000100); 
-      shuffle[2] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][4], 
-                                        stage[kNumStreamsLog2][5], 0b01000100); 
-      shuffle[3] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][6], 
-                                        stage[kNumStreamsLog2][7], 0b01000100); 
-      shuffle[4] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][0], 
-                                        stage[kNumStreamsLog2][1], 0b11101110); 
-      shuffle[5] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][2], 
-                                        stage[kNumStreamsLog2][3], 0b11101110); 
-      shuffle[6] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][4], 
-                                        stage[kNumStreamsLog2][5], 0b11101110); 
-      shuffle[7] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][6], 
-                                        stage[kNumStreamsLog2][7], 0b11101110); 
- 
-      final_result[0] = _mm512_shuffle_i32x4(shuffle[0], shuffle[1], 0b10001000); 
-      final_result[1] = _mm512_shuffle_i32x4(shuffle[2], shuffle[3], 0b10001000); 
-      final_result[2] = _mm512_shuffle_i32x4(shuffle[0], shuffle[1], 0b11011101); 
-      final_result[3] = _mm512_shuffle_i32x4(shuffle[2], shuffle[3], 0b11011101); 
-      final_result[4] = _mm512_shuffle_i32x4(shuffle[4], shuffle[5], 0b10001000); 
-      final_result[5] = _mm512_shuffle_i32x4(shuffle[6], shuffle[7], 0b10001000); 
-      final_result[6] = _mm512_shuffle_i32x4(shuffle[4], shuffle[5], 0b11011101); 
-      final_result[7] = _mm512_shuffle_i32x4(shuffle[6], shuffle[7], 0b11011101); 
-    } else { 
-      // path for float, 128i index: 
-      // {0x00, 0x04, 0x08, 0x0C}, {0x01, 0x05, 0x09, 0x0D} 
-      // {0x02, 0x06, 0x0A, 0x0E}, {0x03, 0x07, 0x0B, 0x0F}, 
-      shuffle[0] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][0], 
-                                        stage[kNumStreamsLog2][1], 0b01000100); 
-      shuffle[1] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][2], 
-                                        stage[kNumStreamsLog2][3], 0b01000100); 
-      shuffle[2] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][0], 
-                                        stage[kNumStreamsLog2][1], 0b11101110); 
-      shuffle[3] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][2], 
-                                        stage[kNumStreamsLog2][3], 0b11101110); 
- 
-      final_result[0] = _mm512_shuffle_i32x4(shuffle[0], shuffle[1], 0b10001000); 
-      final_result[1] = _mm512_shuffle_i32x4(shuffle[0], shuffle[1], 0b11011101); 
-      final_result[2] = _mm512_shuffle_i32x4(shuffle[2], shuffle[3], 0b10001000); 
-      final_result[3] = _mm512_shuffle_i32x4(shuffle[2], shuffle[3], 0b11011101); 
-    } 
- 
-    for (size_t j = 0; j < kNumStreams; ++j) { 
-      _mm512_storeu_si512(reinterpret_cast<__m512i*>( 
-                              &output_data[(i * kNumStreams + j) * sizeof(__m512i)]), 
-                          final_result[j]); 
-    } 
-  } 
-} 
- 
-template <typename T> 
-void ByteStreamSplitEncodeAvx512(const uint8_t* raw_values, const size_t num_values, 
-                                 uint8_t* output_buffer_raw) { 
-  constexpr size_t kNumStreams = sizeof(T); 
-  static_assert(kNumStreams == 4U || kNumStreams == 8U, "Invalid number of streams."); 
-  const size_t size = num_values * sizeof(T); 
-  constexpr size_t kBlockSize = sizeof(__m512i) * kNumStreams; 
-  if (size < kBlockSize)  // Back to AVX2 for small size 
-    return ByteStreamSplitEncodeAvx2<T>(raw_values, num_values, output_buffer_raw); 
- 
-  const size_t num_blocks = size / kBlockSize; 
-  const __m512i* raw_values_simd = reinterpret_cast<const __m512i*>(raw_values); 
-  __m512i* output_buffer_streams[kNumStreams]; 
-  for (size_t i = 0; i < kNumStreams; ++i) { 
-    output_buffer_streams[i] = 
-        reinterpret_cast<__m512i*>(&output_buffer_raw[num_values * i]); 
-  } 
- 
-  // First handle suffix. 
-  const size_t num_processed_elements = (num_blocks * kBlockSize) / sizeof(T); 
-  for (size_t i = num_processed_elements; i < num_values; ++i) { 
-    for (size_t j = 0U; j < kNumStreams; ++j) { 
-      const uint8_t byte_in_value = raw_values[i * kNumStreams + j]; 
-      output_buffer_raw[j * num_values + i] = byte_in_value; 
-    } 
-  } 
- 
-  constexpr size_t KNumUnpack = (kNumStreams == 8U) ? 2U : 3U; 
-  __m512i final_result[kNumStreams]; 
-  __m512i unpack[KNumUnpack + 1][kNumStreams]; 
-  __m512i permutex[kNumStreams]; 
-  __m512i permutex_mask; 
-  if (kNumStreams == 8U) { 
-    // use _mm512_set_epi32, no _mm512_set_epi16 for some old gcc version. 
-    permutex_mask = _mm512_set_epi32(0x001F0017, 0x000F0007, 0x001E0016, 0x000E0006, 
-                                     0x001D0015, 0x000D0005, 0x001C0014, 0x000C0004, 
-                                     0x001B0013, 0x000B0003, 0x001A0012, 0x000A0002, 
-                                     0x00190011, 0x00090001, 0x00180010, 0x00080000); 
-  } else { 
-    permutex_mask = _mm512_set_epi32(0x0F, 0x0B, 0x07, 0x03, 0x0E, 0x0A, 0x06, 0x02, 0x0D, 
-                                     0x09, 0x05, 0x01, 0x0C, 0x08, 0x04, 0x00); 
-  } 
- 
-  for (size_t block_index = 0; block_index < num_blocks; ++block_index) { 
-    for (size_t i = 0; i < kNumStreams; ++i) { 
-      unpack[0][i] = _mm512_loadu_si512(&raw_values_simd[block_index * kNumStreams + i]); 
-    } 
- 
-    for (size_t unpack_lvl = 0; unpack_lvl < KNumUnpack; ++unpack_lvl) { 
-      for (size_t i = 0; i < kNumStreams / 2U; ++i) { 
-        unpack[unpack_lvl + 1][i * 2] = _mm512_unpacklo_epi8( 
-            unpack[unpack_lvl][i * 2], unpack[unpack_lvl][i * 2 + 1]); 
-        unpack[unpack_lvl + 1][i * 2 + 1] = _mm512_unpackhi_epi8( 
-            unpack[unpack_lvl][i * 2], unpack[unpack_lvl][i * 2 + 1]); 
-      } 
-    } 
- 
-    if (kNumStreams == 8U) { 
-      // path for double 
-      // 1. unpack to epi16 block 
-      // 2. permutexvar_epi16 to 128i block 
-      // 3. shuffle 128i to final 512i target, index: 
-      //   {0x00, 0x04, 0x08, 0x0C}, {0x10, 0x14, 0x18, 0x1C}, 
-      //   {0x01, 0x05, 0x09, 0x0D}, {0x11, 0x15, 0x19, 0x1D}, 
-      //   {0x02, 0x06, 0x0A, 0x0E}, {0x12, 0x16, 0x1A, 0x1E}, 
-      //   {0x03, 0x07, 0x0B, 0x0F}, {0x13, 0x17, 0x1B, 0x1F}, 
-      for (size_t i = 0; i < kNumStreams; ++i) 
-        permutex[i] = _mm512_permutexvar_epi16(permutex_mask, unpack[KNumUnpack][i]); 
- 
-      __m512i shuffle[kNumStreams]; 
-      shuffle[0] = _mm512_shuffle_i32x4(permutex[0], permutex[2], 0b01000100); 
-      shuffle[1] = _mm512_shuffle_i32x4(permutex[4], permutex[6], 0b01000100); 
-      shuffle[2] = _mm512_shuffle_i32x4(permutex[0], permutex[2], 0b11101110); 
-      shuffle[3] = _mm512_shuffle_i32x4(permutex[4], permutex[6], 0b11101110); 
-      shuffle[4] = _mm512_shuffle_i32x4(permutex[1], permutex[3], 0b01000100); 
-      shuffle[5] = _mm512_shuffle_i32x4(permutex[5], permutex[7], 0b01000100); 
-      shuffle[6] = _mm512_shuffle_i32x4(permutex[1], permutex[3], 0b11101110); 
-      shuffle[7] = _mm512_shuffle_i32x4(permutex[5], permutex[7], 0b11101110); 
- 
-      final_result[0] = _mm512_shuffle_i32x4(shuffle[0], shuffle[1], 0b10001000); 
-      final_result[1] = _mm512_shuffle_i32x4(shuffle[0], shuffle[1], 0b11011101); 
-      final_result[2] = _mm512_shuffle_i32x4(shuffle[2], shuffle[3], 0b10001000); 
-      final_result[3] = _mm512_shuffle_i32x4(shuffle[2], shuffle[3], 0b11011101); 
-      final_result[4] = _mm512_shuffle_i32x4(shuffle[4], shuffle[5], 0b10001000); 
-      final_result[5] = _mm512_shuffle_i32x4(shuffle[4], shuffle[5], 0b11011101); 
-      final_result[6] = _mm512_shuffle_i32x4(shuffle[6], shuffle[7], 0b10001000); 
-      final_result[7] = _mm512_shuffle_i32x4(shuffle[6], shuffle[7], 0b11011101); 
-    } else { 
-      // Path for float. 
-      // 1. Processed hierarchically to 32i blcok using the unpack intrinsics. 
-      // 2. Pack 128i block using _mm256_permutevar8x32_epi32. 
-      // 3. Pack final 256i block with _mm256_permute2x128_si256. 
-      for (size_t i = 0; i < kNumStreams; ++i) 
-        permutex[i] = _mm512_permutexvar_epi32(permutex_mask, unpack[KNumUnpack][i]); 
- 
-      final_result[0] = _mm512_shuffle_i32x4(permutex[0], permutex[2], 0b01000100); 
-      final_result[1] = _mm512_shuffle_i32x4(permutex[0], permutex[2], 0b11101110); 
-      final_result[2] = _mm512_shuffle_i32x4(permutex[1], permutex[3], 0b01000100); 
-      final_result[3] = _mm512_shuffle_i32x4(permutex[1], permutex[3], 0b11101110); 
-    } 
- 
-    for (size_t i = 0; i < kNumStreams; ++i) { 
-      _mm512_storeu_si512(&output_buffer_streams[i][block_index], final_result[i]); 
-    } 
-  } 
-} 
-#endif  // ARROW_HAVE_AVX512 
- 
-#if defined(ARROW_HAVE_SIMD_SPLIT) 
-template <typename T> 
-void inline ByteStreamSplitDecodeSimd(const uint8_t* data, int64_t num_values, 
-                                      int64_t stride, T* out) { 
-#if defined(ARROW_HAVE_AVX512) 
-  return ByteStreamSplitDecodeAvx512(data, num_values, stride, out); 
-#elif defined(ARROW_HAVE_AVX2) 
-  return ByteStreamSplitDecodeAvx2(data, num_values, stride, out); 
-#elif defined(ARROW_HAVE_SSE4_2) 
-  return ByteStreamSplitDecodeSse2(data, num_values, stride, out); 
-#else 
-#error "ByteStreamSplitDecodeSimd not implemented" 
-#endif 
-} 
- 
-template <typename T> 
-void inline ByteStreamSplitEncodeSimd(const uint8_t* raw_values, const size_t num_values, 
-                                      uint8_t* output_buffer_raw) { 
-#if defined(ARROW_HAVE_AVX512) 
-  return ByteStreamSplitEncodeAvx512<T>(raw_values, num_values, output_buffer_raw); 
-#elif defined(ARROW_HAVE_AVX2) 
-  return ByteStreamSplitEncodeAvx2<T>(raw_values, num_values, output_buffer_raw); 
-#elif defined(ARROW_HAVE_SSE4_2) 
-  return ByteStreamSplitEncodeSse2<T>(raw_values, num_values, output_buffer_raw); 
-#else 
-#error "ByteStreamSplitEncodeSimd not implemented" 
-#endif 
-} 
-#endif 
- 
-template <typename T> 
-void ByteStreamSplitEncodeScalar(const uint8_t* raw_values, const size_t num_values, 
-                                 uint8_t* output_buffer_raw) { 
-  constexpr size_t kNumStreams = sizeof(T); 
-  for (size_t i = 0U; i < num_values; ++i) { 
-    for (size_t j = 0U; j < kNumStreams; ++j) { 
-      const uint8_t byte_in_value = raw_values[i * kNumStreams + j]; 
-      output_buffer_raw[j * num_values + i] = byte_in_value; 
-    } 
-  } 
-} 
- 
-template <typename T> 
-void ByteStreamSplitDecodeScalar(const uint8_t* data, int64_t num_values, int64_t stride, 
-                                 T* out) { 
-  constexpr size_t kNumStreams = sizeof(T); 
-  auto output_buffer_raw = reinterpret_cast<uint8_t*>(out); 
- 
-  for (int64_t i = 0; i < num_values; ++i) { 
-    for (size_t b = 0; b < kNumStreams; ++b) { 
-      const size_t byte_index = b * stride + i; 
-      output_buffer_raw[i * kNumStreams + b] = data[byte_index]; 
-    } 
-  } 
-} 
- 
-template <typename T> 
-void inline ByteStreamSplitEncode(const uint8_t* raw_values, const size_t num_values, 
-                                  uint8_t* output_buffer_raw) { 
-#if defined(ARROW_HAVE_SIMD_SPLIT) 
-  return ByteStreamSplitEncodeSimd<T>(raw_values, num_values, output_buffer_raw); 
-#else 
-  return ByteStreamSplitEncodeScalar<T>(raw_values, num_values, output_buffer_raw); 
-#endif 
-} 
- 
-template <typename T> 
-void inline ByteStreamSplitDecode(const uint8_t* data, int64_t num_values, int64_t stride, 
-                                  T* out) { 
-#if defined(ARROW_HAVE_SIMD_SPLIT) 
-  return ByteStreamSplitDecodeSimd(data, num_values, stride, out); 
-#else 
-  return ByteStreamSplitDecodeScalar(data, num_values, stride, out); 
-#endif 
-} 
- 
-}  // namespace internal 
-}  // namespace util 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/util/simd.h"
+#include "arrow/util/ubsan.h"
+
+#include <stdint.h>
+#include <algorithm>
+
+#ifdef ARROW_HAVE_SSE4_2
+// Enable the SIMD for ByteStreamSplit Encoder/Decoder
+#define ARROW_HAVE_SIMD_SPLIT
+#endif  // ARROW_HAVE_SSE4_2
+
+namespace arrow {
+namespace util {
+namespace internal {
+
+#if defined(ARROW_HAVE_SSE4_2)
+template <typename T>
+void ByteStreamSplitDecodeSse2(const uint8_t* data, int64_t num_values, int64_t stride,
+                               T* out) {
+  constexpr size_t kNumStreams = sizeof(T);
+  static_assert(kNumStreams == 4U || kNumStreams == 8U, "Invalid number of streams.");
+  constexpr size_t kNumStreamsLog2 = (kNumStreams == 8U ? 3U : 2U);
+
+  const int64_t size = num_values * sizeof(T);
+  constexpr int64_t kBlockSize = sizeof(__m128i) * kNumStreams;
+  const int64_t num_blocks = size / kBlockSize;
+  uint8_t* output_data = reinterpret_cast<uint8_t*>(out);
+
+  // First handle suffix.
+  // This helps catch if the simd-based processing overflows into the suffix
+  // since almost surely a test would fail.
+  const int64_t num_processed_elements = (num_blocks * kBlockSize) / kNumStreams;
+  for (int64_t i = num_processed_elements; i < num_values; ++i) {
+    uint8_t gathered_byte_data[kNumStreams];
+    for (size_t b = 0; b < kNumStreams; ++b) {
+      const size_t byte_index = b * stride + i;
+      gathered_byte_data[b] = data[byte_index];
+    }
+    out[i] = arrow::util::SafeLoadAs<T>(&gathered_byte_data[0]);
+  }
+
+  // The blocks get processed hierarchically using the unpack intrinsics.
+  // Example with four streams:
+  // Stage 1: AAAA BBBB CCCC DDDD
+  // Stage 2: ACAC ACAC BDBD BDBD
+  // Stage 3: ABCD ABCD ABCD ABCD
+  __m128i stage[kNumStreamsLog2 + 1U][kNumStreams];
+  constexpr size_t kNumStreamsHalf = kNumStreams / 2U;
+
+  for (int64_t i = 0; i < num_blocks; ++i) {
+    for (size_t j = 0; j < kNumStreams; ++j) {
+      stage[0][j] = _mm_loadu_si128(
+          reinterpret_cast<const __m128i*>(&data[i * sizeof(__m128i) + j * stride]));
+    }
+    for (size_t step = 0; step < kNumStreamsLog2; ++step) {
+      for (size_t j = 0; j < kNumStreamsHalf; ++j) {
+        stage[step + 1U][j * 2] =
+            _mm_unpacklo_epi8(stage[step][j], stage[step][kNumStreamsHalf + j]);
+        stage[step + 1U][j * 2 + 1U] =
+            _mm_unpackhi_epi8(stage[step][j], stage[step][kNumStreamsHalf + j]);
+      }
+    }
+    for (size_t j = 0; j < kNumStreams; ++j) {
+      _mm_storeu_si128(reinterpret_cast<__m128i*>(
+                           &output_data[(i * kNumStreams + j) * sizeof(__m128i)]),
+                       stage[kNumStreamsLog2][j]);
+    }
+  }
+}
+
+template <typename T>
+void ByteStreamSplitEncodeSse2(const uint8_t* raw_values, const size_t num_values,
+                               uint8_t* output_buffer_raw) {
+  constexpr size_t kNumStreams = sizeof(T);
+  static_assert(kNumStreams == 4U || kNumStreams == 8U, "Invalid number of streams.");
+  __m128i stage[3][kNumStreams];
+  __m128i final_result[kNumStreams];
+
+  const size_t size = num_values * sizeof(T);
+  constexpr size_t kBlockSize = sizeof(__m128i) * kNumStreams;
+  const size_t num_blocks = size / kBlockSize;
+  const __m128i* raw_values_sse = reinterpret_cast<const __m128i*>(raw_values);
+  __m128i* output_buffer_streams[kNumStreams];
+  for (size_t i = 0; i < kNumStreams; ++i) {
+    output_buffer_streams[i] =
+        reinterpret_cast<__m128i*>(&output_buffer_raw[num_values * i]);
+  }
+
+  // First handle suffix.
+  const size_t num_processed_elements = (num_blocks * kBlockSize) / sizeof(T);
+  for (size_t i = num_processed_elements; i < num_values; ++i) {
+    for (size_t j = 0U; j < kNumStreams; ++j) {
+      const uint8_t byte_in_value = raw_values[i * kNumStreams + j];
+      output_buffer_raw[j * num_values + i] = byte_in_value;
+    }
+  }
+  // The current shuffling algorithm diverges for float and double types but the compiler
+  // should be able to remove the branch since only one path is taken for each template
+  // instantiation.
+  // Example run for floats:
+  // Step 0, copy:
+  //   0: ABCD ABCD ABCD ABCD 1: ABCD ABCD ABCD ABCD ...
+  // Step 1: _mm_unpacklo_epi8 and mm_unpackhi_epi8:
+  //   0: AABB CCDD AABB CCDD 1: AABB CCDD AABB CCDD ...
+  //   0: AAAA BBBB CCCC DDDD 1: AAAA BBBB CCCC DDDD ...
+  // Step 3: __mm_unpacklo_epi8 and _mm_unpackhi_epi8:
+  //   0: AAAA AAAA BBBB BBBB 1: CCCC CCCC DDDD DDDD ...
+  // Step 4: __mm_unpacklo_epi64 and _mm_unpackhi_epi64:
+  //   0: AAAA AAAA AAAA AAAA 1: BBBB BBBB BBBB BBBB ...
+  for (size_t block_index = 0; block_index < num_blocks; ++block_index) {
+    // First copy the data to stage 0.
+    for (size_t i = 0; i < kNumStreams; ++i) {
+      stage[0][i] = _mm_loadu_si128(&raw_values_sse[block_index * kNumStreams + i]);
+    }
+
+    // The shuffling of bytes is performed through the unpack intrinsics.
+    // In my measurements this gives better performance then an implementation
+    // which uses the shuffle intrinsics.
+    for (size_t stage_lvl = 0; stage_lvl < 2U; ++stage_lvl) {
+      for (size_t i = 0; i < kNumStreams / 2U; ++i) {
+        stage[stage_lvl + 1][i * 2] =
+            _mm_unpacklo_epi8(stage[stage_lvl][i * 2], stage[stage_lvl][i * 2 + 1]);
+        stage[stage_lvl + 1][i * 2 + 1] =
+            _mm_unpackhi_epi8(stage[stage_lvl][i * 2], stage[stage_lvl][i * 2 + 1]);
+      }
+    }
+    if (kNumStreams == 8U) {
+      // This is the path for double.
+      __m128i tmp[8];
+      for (size_t i = 0; i < 4; ++i) {
+        tmp[i * 2] = _mm_unpacklo_epi32(stage[2][i], stage[2][i + 4]);
+        tmp[i * 2 + 1] = _mm_unpackhi_epi32(stage[2][i], stage[2][i + 4]);
+      }
+
+      for (size_t i = 0; i < 4; ++i) {
+        final_result[i * 2] = _mm_unpacklo_epi32(tmp[i], tmp[i + 4]);
+        final_result[i * 2 + 1] = _mm_unpackhi_epi32(tmp[i], tmp[i + 4]);
+      }
+    } else {
+      // this is the path for float.
+      __m128i tmp[4];
+      for (size_t i = 0; i < 2; ++i) {
+        tmp[i * 2] = _mm_unpacklo_epi8(stage[2][i * 2], stage[2][i * 2 + 1]);
+        tmp[i * 2 + 1] = _mm_unpackhi_epi8(stage[2][i * 2], stage[2][i * 2 + 1]);
+      }
+      for (size_t i = 0; i < 2; ++i) {
+        final_result[i * 2] = _mm_unpacklo_epi64(tmp[i], tmp[i + 2]);
+        final_result[i * 2 + 1] = _mm_unpackhi_epi64(tmp[i], tmp[i + 2]);
+      }
+    }
+    for (size_t i = 0; i < kNumStreams; ++i) {
+      _mm_storeu_si128(&output_buffer_streams[i][block_index], final_result[i]);
+    }
+  }
+}
+#endif  // ARROW_HAVE_SSE4_2
+
+#if defined(ARROW_HAVE_AVX2)
+template <typename T>
+void ByteStreamSplitDecodeAvx2(const uint8_t* data, int64_t num_values, int64_t stride,
+                               T* out) {
+  constexpr size_t kNumStreams = sizeof(T);
+  static_assert(kNumStreams == 4U || kNumStreams == 8U, "Invalid number of streams.");
+  constexpr size_t kNumStreamsLog2 = (kNumStreams == 8U ? 3U : 2U);
+
+  const int64_t size = num_values * sizeof(T);
+  constexpr int64_t kBlockSize = sizeof(__m256i) * kNumStreams;
+  if (size < kBlockSize)  // Back to SSE for small size
+    return ByteStreamSplitDecodeSse2(data, num_values, stride, out);
+  const int64_t num_blocks = size / kBlockSize;
+  uint8_t* output_data = reinterpret_cast<uint8_t*>(out);
+
+  // First handle suffix.
+  const int64_t num_processed_elements = (num_blocks * kBlockSize) / kNumStreams;
+  for (int64_t i = num_processed_elements; i < num_values; ++i) {
+    uint8_t gathered_byte_data[kNumStreams];
+    for (size_t b = 0; b < kNumStreams; ++b) {
+      const size_t byte_index = b * stride + i;
+      gathered_byte_data[b] = data[byte_index];
+    }
+    out[i] = arrow::util::SafeLoadAs<T>(&gathered_byte_data[0]);
+  }
+
+  // Processed hierarchically using unpack intrinsics, then permute intrinsics.
+  __m256i stage[kNumStreamsLog2 + 1U][kNumStreams];
+  __m256i final_result[kNumStreams];
+  constexpr size_t kNumStreamsHalf = kNumStreams / 2U;
+
+  for (int64_t i = 0; i < num_blocks; ++i) {
+    for (size_t j = 0; j < kNumStreams; ++j) {
+      stage[0][j] = _mm256_loadu_si256(
+          reinterpret_cast<const __m256i*>(&data[i * sizeof(__m256i) + j * stride]));
+    }
+
+    for (size_t step = 0; step < kNumStreamsLog2; ++step) {
+      for (size_t j = 0; j < kNumStreamsHalf; ++j) {
+        stage[step + 1U][j * 2] =
+            _mm256_unpacklo_epi8(stage[step][j], stage[step][kNumStreamsHalf + j]);
+        stage[step + 1U][j * 2 + 1U] =
+            _mm256_unpackhi_epi8(stage[step][j], stage[step][kNumStreamsHalf + j]);
+      }
+    }
+
+    if (kNumStreams == 8U) {
+      // path for double, 128i index:
+      //   {0x00, 0x08}, {0x01, 0x09}, {0x02, 0x0A}, {0x03, 0x0B},
+      //   {0x04, 0x0C}, {0x05, 0x0D}, {0x06, 0x0E}, {0x07, 0x0F},
+      final_result[0] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][0],
+                                                  stage[kNumStreamsLog2][1], 0b00100000);
+      final_result[1] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][2],
+                                                  stage[kNumStreamsLog2][3], 0b00100000);
+      final_result[2] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][4],
+                                                  stage[kNumStreamsLog2][5], 0b00100000);
+      final_result[3] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][6],
+                                                  stage[kNumStreamsLog2][7], 0b00100000);
+      final_result[4] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][0],
+                                                  stage[kNumStreamsLog2][1], 0b00110001);
+      final_result[5] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][2],
+                                                  stage[kNumStreamsLog2][3], 0b00110001);
+      final_result[6] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][4],
+                                                  stage[kNumStreamsLog2][5], 0b00110001);
+      final_result[7] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][6],
+                                                  stage[kNumStreamsLog2][7], 0b00110001);
+    } else {
+      // path for float, 128i index:
+      //   {0x00, 0x04}, {0x01, 0x05}, {0x02, 0x06}, {0x03, 0x07}
+      final_result[0] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][0],
+                                                  stage[kNumStreamsLog2][1], 0b00100000);
+      final_result[1] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][2],
+                                                  stage[kNumStreamsLog2][3], 0b00100000);
+      final_result[2] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][0],
+                                                  stage[kNumStreamsLog2][1], 0b00110001);
+      final_result[3] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][2],
+                                                  stage[kNumStreamsLog2][3], 0b00110001);
+    }
+
+    for (size_t j = 0; j < kNumStreams; ++j) {
+      _mm256_storeu_si256(reinterpret_cast<__m256i*>(
+                              &output_data[(i * kNumStreams + j) * sizeof(__m256i)]),
+                          final_result[j]);
+    }
+  }
+}
+
+template <typename T>
+void ByteStreamSplitEncodeAvx2(const uint8_t* raw_values, const size_t num_values,
+                               uint8_t* output_buffer_raw) {
+  constexpr size_t kNumStreams = sizeof(T);
+  static_assert(kNumStreams == 4U || kNumStreams == 8U, "Invalid number of streams.");
+  if (kNumStreams == 8U)  // Back to SSE, currently no path for double.
+    return ByteStreamSplitEncodeSse2<T>(raw_values, num_values, output_buffer_raw);
+
+  const size_t size = num_values * sizeof(T);
+  constexpr size_t kBlockSize = sizeof(__m256i) * kNumStreams;
+  if (size < kBlockSize)  // Back to SSE for small size
+    return ByteStreamSplitEncodeSse2<T>(raw_values, num_values, output_buffer_raw);
+  const size_t num_blocks = size / kBlockSize;
+  const __m256i* raw_values_simd = reinterpret_cast<const __m256i*>(raw_values);
+  __m256i* output_buffer_streams[kNumStreams];
+
+  for (size_t i = 0; i < kNumStreams; ++i) {
+    output_buffer_streams[i] =
+        reinterpret_cast<__m256i*>(&output_buffer_raw[num_values * i]);
+  }
+
+  // First handle suffix.
+  const size_t num_processed_elements = (num_blocks * kBlockSize) / sizeof(T);
+  for (size_t i = num_processed_elements; i < num_values; ++i) {
+    for (size_t j = 0U; j < kNumStreams; ++j) {
+      const uint8_t byte_in_value = raw_values[i * kNumStreams + j];
+      output_buffer_raw[j * num_values + i] = byte_in_value;
+    }
+  }
+
+  // Path for float.
+  // 1. Processed hierarchically to 32i blcok using the unpack intrinsics.
+  // 2. Pack 128i block using _mm256_permutevar8x32_epi32.
+  // 3. Pack final 256i block with _mm256_permute2x128_si256.
+  constexpr size_t kNumUnpack = 3U;
+  __m256i stage[kNumUnpack + 1][kNumStreams];
+  static const __m256i kPermuteMask =
+      _mm256_set_epi32(0x07, 0x03, 0x06, 0x02, 0x05, 0x01, 0x04, 0x00);
+  __m256i permute[kNumStreams];
+  __m256i final_result[kNumStreams];
+
+  for (size_t block_index = 0; block_index < num_blocks; ++block_index) {
+    for (size_t i = 0; i < kNumStreams; ++i) {
+      stage[0][i] = _mm256_loadu_si256(&raw_values_simd[block_index * kNumStreams + i]);
+    }
+
+    for (size_t stage_lvl = 0; stage_lvl < kNumUnpack; ++stage_lvl) {
+      for (size_t i = 0; i < kNumStreams / 2U; ++i) {
+        stage[stage_lvl + 1][i * 2] =
+            _mm256_unpacklo_epi8(stage[stage_lvl][i * 2], stage[stage_lvl][i * 2 + 1]);
+        stage[stage_lvl + 1][i * 2 + 1] =
+            _mm256_unpackhi_epi8(stage[stage_lvl][i * 2], stage[stage_lvl][i * 2 + 1]);
+      }
+    }
+
+    for (size_t i = 0; i < kNumStreams; ++i) {
+      permute[i] = _mm256_permutevar8x32_epi32(stage[kNumUnpack][i], kPermuteMask);
+    }
+
+    final_result[0] = _mm256_permute2x128_si256(permute[0], permute[2], 0b00100000);
+    final_result[1] = _mm256_permute2x128_si256(permute[0], permute[2], 0b00110001);
+    final_result[2] = _mm256_permute2x128_si256(permute[1], permute[3], 0b00100000);
+    final_result[3] = _mm256_permute2x128_si256(permute[1], permute[3], 0b00110001);
+
+    for (size_t i = 0; i < kNumStreams; ++i) {
+      _mm256_storeu_si256(&output_buffer_streams[i][block_index], final_result[i]);
+    }
+  }
+}
+#endif  // ARROW_HAVE_AVX2
+
+#if defined(ARROW_HAVE_AVX512)
+template <typename T>
+void ByteStreamSplitDecodeAvx512(const uint8_t* data, int64_t num_values, int64_t stride,
+                                 T* out) {
+  constexpr size_t kNumStreams = sizeof(T);
+  static_assert(kNumStreams == 4U || kNumStreams == 8U, "Invalid number of streams.");
+  constexpr size_t kNumStreamsLog2 = (kNumStreams == 8U ? 3U : 2U);
+
+  const int64_t size = num_values * sizeof(T);
+  constexpr int64_t kBlockSize = sizeof(__m512i) * kNumStreams;
+  if (size < kBlockSize)  // Back to AVX2 for small size
+    return ByteStreamSplitDecodeAvx2(data, num_values, stride, out);
+  const int64_t num_blocks = size / kBlockSize;
+  uint8_t* output_data = reinterpret_cast<uint8_t*>(out);
+
+  // First handle suffix.
+  const int64_t num_processed_elements = (num_blocks * kBlockSize) / kNumStreams;
+  for (int64_t i = num_processed_elements; i < num_values; ++i) {
+    uint8_t gathered_byte_data[kNumStreams];
+    for (size_t b = 0; b < kNumStreams; ++b) {
+      const size_t byte_index = b * stride + i;
+      gathered_byte_data[b] = data[byte_index];
+    }
+    out[i] = arrow::util::SafeLoadAs<T>(&gathered_byte_data[0]);
+  }
+
+  // Processed hierarchically using the unpack, then two shuffles.
+  __m512i stage[kNumStreamsLog2 + 1U][kNumStreams];
+  __m512i shuffle[kNumStreams];
+  __m512i final_result[kNumStreams];
+  constexpr size_t kNumStreamsHalf = kNumStreams / 2U;
+
+  for (int64_t i = 0; i < num_blocks; ++i) {
+    for (size_t j = 0; j < kNumStreams; ++j) {
+      stage[0][j] = _mm512_loadu_si512(
+          reinterpret_cast<const __m512i*>(&data[i * sizeof(__m512i) + j * stride]));
+    }
+
+    for (size_t step = 0; step < kNumStreamsLog2; ++step) {
+      for (size_t j = 0; j < kNumStreamsHalf; ++j) {
+        stage[step + 1U][j * 2] =
+            _mm512_unpacklo_epi8(stage[step][j], stage[step][kNumStreamsHalf + j]);
+        stage[step + 1U][j * 2 + 1U] =
+            _mm512_unpackhi_epi8(stage[step][j], stage[step][kNumStreamsHalf + j]);
+      }
+    }
+
+    if (kNumStreams == 8U) {
+      // path for double, 128i index:
+      // {0x00, 0x04, 0x08, 0x0C}, {0x10, 0x14, 0x18, 0x1C},
+      // {0x01, 0x05, 0x09, 0x0D}, {0x11, 0x15, 0x19, 0x1D},
+      // {0x02, 0x06, 0x0A, 0x0E}, {0x12, 0x16, 0x1A, 0x1E},
+      // {0x03, 0x07, 0x0B, 0x0F}, {0x13, 0x17, 0x1B, 0x1F},
+      shuffle[0] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][0],
+                                        stage[kNumStreamsLog2][1], 0b01000100);
+      shuffle[1] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][2],
+                                        stage[kNumStreamsLog2][3], 0b01000100);
+      shuffle[2] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][4],
+                                        stage[kNumStreamsLog2][5], 0b01000100);
+      shuffle[3] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][6],
+                                        stage[kNumStreamsLog2][7], 0b01000100);
+      shuffle[4] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][0],
+                                        stage[kNumStreamsLog2][1], 0b11101110);
+      shuffle[5] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][2],
+                                        stage[kNumStreamsLog2][3], 0b11101110);
+      shuffle[6] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][4],
+                                        stage[kNumStreamsLog2][5], 0b11101110);
+      shuffle[7] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][6],
+                                        stage[kNumStreamsLog2][7], 0b11101110);
+
+      final_result[0] = _mm512_shuffle_i32x4(shuffle[0], shuffle[1], 0b10001000);
+      final_result[1] = _mm512_shuffle_i32x4(shuffle[2], shuffle[3], 0b10001000);
+      final_result[2] = _mm512_shuffle_i32x4(shuffle[0], shuffle[1], 0b11011101);
+      final_result[3] = _mm512_shuffle_i32x4(shuffle[2], shuffle[3], 0b11011101);
+      final_result[4] = _mm512_shuffle_i32x4(shuffle[4], shuffle[5], 0b10001000);
+      final_result[5] = _mm512_shuffle_i32x4(shuffle[6], shuffle[7], 0b10001000);
+      final_result[6] = _mm512_shuffle_i32x4(shuffle[4], shuffle[5], 0b11011101);
+      final_result[7] = _mm512_shuffle_i32x4(shuffle[6], shuffle[7], 0b11011101);
+    } else {
+      // path for float, 128i index:
+      // {0x00, 0x04, 0x08, 0x0C}, {0x01, 0x05, 0x09, 0x0D}
+      // {0x02, 0x06, 0x0A, 0x0E}, {0x03, 0x07, 0x0B, 0x0F},
+      shuffle[0] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][0],
+                                        stage[kNumStreamsLog2][1], 0b01000100);
+      shuffle[1] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][2],
+                                        stage[kNumStreamsLog2][3], 0b01000100);
+      shuffle[2] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][0],
+                                        stage[kNumStreamsLog2][1], 0b11101110);
+      shuffle[3] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][2],
+                                        stage[kNumStreamsLog2][3], 0b11101110);
+
+      final_result[0] = _mm512_shuffle_i32x4(shuffle[0], shuffle[1], 0b10001000);
+      final_result[1] = _mm512_shuffle_i32x4(shuffle[0], shuffle[1], 0b11011101);
+      final_result[2] = _mm512_shuffle_i32x4(shuffle[2], shuffle[3], 0b10001000);
+      final_result[3] = _mm512_shuffle_i32x4(shuffle[2], shuffle[3], 0b11011101);
+    }
+
+    for (size_t j = 0; j < kNumStreams; ++j) {
+      _mm512_storeu_si512(reinterpret_cast<__m512i*>(
+                              &output_data[(i * kNumStreams + j) * sizeof(__m512i)]),
+                          final_result[j]);
+    }
+  }
+}
+
+template <typename T>
+void ByteStreamSplitEncodeAvx512(const uint8_t* raw_values, const size_t num_values,
+                                 uint8_t* output_buffer_raw) {
+  constexpr size_t kNumStreams = sizeof(T);
+  static_assert(kNumStreams == 4U || kNumStreams == 8U, "Invalid number of streams.");
+  const size_t size = num_values * sizeof(T);
+  constexpr size_t kBlockSize = sizeof(__m512i) * kNumStreams;
+  if (size < kBlockSize)  // Back to AVX2 for small size
+    return ByteStreamSplitEncodeAvx2<T>(raw_values, num_values, output_buffer_raw);
+
+  const size_t num_blocks = size / kBlockSize;
+  const __m512i* raw_values_simd = reinterpret_cast<const __m512i*>(raw_values);
+  __m512i* output_buffer_streams[kNumStreams];
+  for (size_t i = 0; i < kNumStreams; ++i) {
+    output_buffer_streams[i] =
+        reinterpret_cast<__m512i*>(&output_buffer_raw[num_values * i]);
+  }
+
+  // First handle suffix.
+  const size_t num_processed_elements = (num_blocks * kBlockSize) / sizeof(T);
+  for (size_t i = num_processed_elements; i < num_values; ++i) {
+    for (size_t j = 0U; j < kNumStreams; ++j) {
+      const uint8_t byte_in_value = raw_values[i * kNumStreams + j];
+      output_buffer_raw[j * num_values + i] = byte_in_value;
+    }
+  }
+
+  constexpr size_t KNumUnpack = (kNumStreams == 8U) ? 2U : 3U;
+  __m512i final_result[kNumStreams];
+  __m512i unpack[KNumUnpack + 1][kNumStreams];
+  __m512i permutex[kNumStreams];
+  __m512i permutex_mask;
+  if (kNumStreams == 8U) {
+    // use _mm512_set_epi32, no _mm512_set_epi16 for some old gcc version.
+    permutex_mask = _mm512_set_epi32(0x001F0017, 0x000F0007, 0x001E0016, 0x000E0006,
+                                     0x001D0015, 0x000D0005, 0x001C0014, 0x000C0004,
+                                     0x001B0013, 0x000B0003, 0x001A0012, 0x000A0002,
+                                     0x00190011, 0x00090001, 0x00180010, 0x00080000);
+  } else {
+    permutex_mask = _mm512_set_epi32(0x0F, 0x0B, 0x07, 0x03, 0x0E, 0x0A, 0x06, 0x02, 0x0D,
+                                     0x09, 0x05, 0x01, 0x0C, 0x08, 0x04, 0x00);
+  }
+
+  for (size_t block_index = 0; block_index < num_blocks; ++block_index) {
+    for (size_t i = 0; i < kNumStreams; ++i) {
+      unpack[0][i] = _mm512_loadu_si512(&raw_values_simd[block_index * kNumStreams + i]);
+    }
+
+    for (size_t unpack_lvl = 0; unpack_lvl < KNumUnpack; ++unpack_lvl) {
+      for (size_t i = 0; i < kNumStreams / 2U; ++i) {
+        unpack[unpack_lvl + 1][i * 2] = _mm512_unpacklo_epi8(
+            unpack[unpack_lvl][i * 2], unpack[unpack_lvl][i * 2 + 1]);
+        unpack[unpack_lvl + 1][i * 2 + 1] = _mm512_unpackhi_epi8(
+            unpack[unpack_lvl][i * 2], unpack[unpack_lvl][i * 2 + 1]);
+      }
+    }
+
+    if (kNumStreams == 8U) {
+      // path for double
+      // 1. unpack to epi16 block
+      // 2. permutexvar_epi16 to 128i block
+      // 3. shuffle 128i to final 512i target, index:
+      //   {0x00, 0x04, 0x08, 0x0C}, {0x10, 0x14, 0x18, 0x1C},
+      //   {0x01, 0x05, 0x09, 0x0D}, {0x11, 0x15, 0x19, 0x1D},
+      //   {0x02, 0x06, 0x0A, 0x0E}, {0x12, 0x16, 0x1A, 0x1E},
+      //   {0x03, 0x07, 0x0B, 0x0F}, {0x13, 0x17, 0x1B, 0x1F},
+      for (size_t i = 0; i < kNumStreams; ++i)
+        permutex[i] = _mm512_permutexvar_epi16(permutex_mask, unpack[KNumUnpack][i]);
+
+      __m512i shuffle[kNumStreams];
+      shuffle[0] = _mm512_shuffle_i32x4(permutex[0], permutex[2], 0b01000100);
+      shuffle[1] = _mm512_shuffle_i32x4(permutex[4], permutex[6], 0b01000100);
+      shuffle[2] = _mm512_shuffle_i32x4(permutex[0], permutex[2], 0b11101110);
+      shuffle[3] = _mm512_shuffle_i32x4(permutex[4], permutex[6], 0b11101110);
+      shuffle[4] = _mm512_shuffle_i32x4(permutex[1], permutex[3], 0b01000100);
+      shuffle[5] = _mm512_shuffle_i32x4(permutex[5], permutex[7], 0b01000100);
+      shuffle[6] = _mm512_shuffle_i32x4(permutex[1], permutex[3], 0b11101110);
+      shuffle[7] = _mm512_shuffle_i32x4(permutex[5], permutex[7], 0b11101110);
+
+      final_result[0] = _mm512_shuffle_i32x4(shuffle[0], shuffle[1], 0b10001000);
+      final_result[1] = _mm512_shuffle_i32x4(shuffle[0], shuffle[1], 0b11011101);
+      final_result[2] = _mm512_shuffle_i32x4(shuffle[2], shuffle[3], 0b10001000);
+      final_result[3] = _mm512_shuffle_i32x4(shuffle[2], shuffle[3], 0b11011101);
+      final_result[4] = _mm512_shuffle_i32x4(shuffle[4], shuffle[5], 0b10001000);
+      final_result[5] = _mm512_shuffle_i32x4(shuffle[4], shuffle[5], 0b11011101);
+      final_result[6] = _mm512_shuffle_i32x4(shuffle[6], shuffle[7], 0b10001000);
+      final_result[7] = _mm512_shuffle_i32x4(shuffle[6], shuffle[7], 0b11011101);
+    } else {
+      // Path for float.
+      // 1. Processed hierarchically to 32i blcok using the unpack intrinsics.
+      // 2. Pack 128i block using _mm256_permutevar8x32_epi32.
+      // 3. Pack final 256i block with _mm256_permute2x128_si256.
+      for (size_t i = 0; i < kNumStreams; ++i)
+        permutex[i] = _mm512_permutexvar_epi32(permutex_mask, unpack[KNumUnpack][i]);
+
+      final_result[0] = _mm512_shuffle_i32x4(permutex[0], permutex[2], 0b01000100);
+      final_result[1] = _mm512_shuffle_i32x4(permutex[0], permutex[2], 0b11101110);
+      final_result[2] = _mm512_shuffle_i32x4(permutex[1], permutex[3], 0b01000100);
+      final_result[3] = _mm512_shuffle_i32x4(permutex[1], permutex[3], 0b11101110);
+    }
+
+    for (size_t i = 0; i < kNumStreams; ++i) {
+      _mm512_storeu_si512(&output_buffer_streams[i][block_index], final_result[i]);
+    }
+  }
+}
+#endif  // ARROW_HAVE_AVX512
+
+#if defined(ARROW_HAVE_SIMD_SPLIT)
+template <typename T>
+void inline ByteStreamSplitDecodeSimd(const uint8_t* data, int64_t num_values,
+                                      int64_t stride, T* out) {
+#if defined(ARROW_HAVE_AVX512)
+  return ByteStreamSplitDecodeAvx512(data, num_values, stride, out);
+#elif defined(ARROW_HAVE_AVX2)
+  return ByteStreamSplitDecodeAvx2(data, num_values, stride, out);
+#elif defined(ARROW_HAVE_SSE4_2)
+  return ByteStreamSplitDecodeSse2(data, num_values, stride, out);
+#else
+#error "ByteStreamSplitDecodeSimd not implemented"
+#endif
+}
+
+template <typename T>
+void inline ByteStreamSplitEncodeSimd(const uint8_t* raw_values, const size_t num_values,
+                                      uint8_t* output_buffer_raw) {
+#if defined(ARROW_HAVE_AVX512)
+  return ByteStreamSplitEncodeAvx512<T>(raw_values, num_values, output_buffer_raw);
+#elif defined(ARROW_HAVE_AVX2)
+  return ByteStreamSplitEncodeAvx2<T>(raw_values, num_values, output_buffer_raw);
+#elif defined(ARROW_HAVE_SSE4_2)
+  return ByteStreamSplitEncodeSse2<T>(raw_values, num_values, output_buffer_raw);
+#else
+#error "ByteStreamSplitEncodeSimd not implemented"
+#endif
+}
+#endif
+
+template <typename T>
+void ByteStreamSplitEncodeScalar(const uint8_t* raw_values, const size_t num_values,
+                                 uint8_t* output_buffer_raw) {
+  constexpr size_t kNumStreams = sizeof(T);
+  for (size_t i = 0U; i < num_values; ++i) {
+    for (size_t j = 0U; j < kNumStreams; ++j) {
+      const uint8_t byte_in_value = raw_values[i * kNumStreams + j];
+      output_buffer_raw[j * num_values + i] = byte_in_value;
+    }
+  }
+}
+
+template <typename T>
+void ByteStreamSplitDecodeScalar(const uint8_t* data, int64_t num_values, int64_t stride,
+                                 T* out) {
+  constexpr size_t kNumStreams = sizeof(T);
+  auto output_buffer_raw = reinterpret_cast<uint8_t*>(out);
+
+  for (int64_t i = 0; i < num_values; ++i) {
+    for (size_t b = 0; b < kNumStreams; ++b) {
+      const size_t byte_index = b * stride + i;
+      output_buffer_raw[i * kNumStreams + b] = data[byte_index];
+    }
+  }
+}
+
+template <typename T>
+void inline ByteStreamSplitEncode(const uint8_t* raw_values, const size_t num_values,
+                                  uint8_t* output_buffer_raw) {
+#if defined(ARROW_HAVE_SIMD_SPLIT)
+  return ByteStreamSplitEncodeSimd<T>(raw_values, num_values, output_buffer_raw);
+#else
+  return ByteStreamSplitEncodeScalar<T>(raw_values, num_values, output_buffer_raw);
+#endif
+}
+
+template <typename T>
+void inline ByteStreamSplitDecode(const uint8_t* data, int64_t num_values, int64_t stride,
+                                  T* out) {
+#if defined(ARROW_HAVE_SIMD_SPLIT)
+  return ByteStreamSplitDecodeSimd(data, num_values, stride, out);
+#else
+  return ByteStreamSplitDecodeScalar(data, num_values, stride, out);
+#endif
+}
+
+}  // namespace internal
+}  // namespace util
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/cancel.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/cancel.cc
index 671280a0a17..874b2c2c886 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/cancel.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/cancel.cc
@@ -1,226 +1,226 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "arrow/util/cancel.h" 
- 
-#include <atomic> 
-#include <mutex> 
-#include <sstream> 
-#include <utility> 
- 
-#include "arrow/result.h" 
-#include "arrow/util/atomic_shared_ptr.h" 
-#include "arrow/util/io_util.h" 
-#include "arrow/util/logging.h" 
-#include "arrow/util/visibility.h" 
- 
-namespace arrow { 
- 
-#if ATOMIC_INT_LOCK_FREE != 2 
-#error Lock-free atomic int required for signal safety 
-#endif 
- 
-using internal::ReinstateSignalHandler; 
-using internal::SetSignalHandler; 
-using internal::SignalHandler; 
- 
-// NOTE: We care mainly about the making the common case (not cancelled) fast. 
- 
-struct StopSourceImpl { 
-  std::atomic<int> requested_{0};  // will be -1 or signal number if requested 
-  std::mutex mutex_; 
-  Status cancel_error_; 
-}; 
- 
-StopSource::StopSource() : impl_(new StopSourceImpl) {} 
- 
-StopSource::~StopSource() = default; 
- 
-void StopSource::RequestStop() { RequestStop(Status::Cancelled("Operation cancelled")); } 
- 
-void StopSource::RequestStop(Status st) { 
-  std::lock_guard<std::mutex> lock(impl_->mutex_); 
-  DCHECK(!st.ok()); 
-  if (!impl_->requested_) { 
-    impl_->requested_ = -1; 
-    impl_->cancel_error_ = std::move(st); 
-  } 
-} 
- 
-void StopSource::RequestStopFromSignal(int signum) { 
-  // Only async-signal-safe code allowed here 
-  impl_->requested_.store(signum); 
-} 
- 
-void StopSource::Reset() { 
-  std::lock_guard<std::mutex> lock(impl_->mutex_); 
-  impl_->cancel_error_ = Status::OK(); 
-  impl_->requested_.store(0); 
-} 
- 
-StopToken StopSource::token() { return StopToken(impl_); } 
- 
-bool StopToken::IsStopRequested() const { 
-  if (!impl_) { 
-    return false; 
-  } 
-  return impl_->requested_.load() != 0; 
-} 
- 
-Status StopToken::Poll() const { 
-  if (!impl_) { 
-    return Status::OK(); 
-  } 
-  if (!impl_->requested_.load()) { 
-    return Status::OK(); 
-  } 
- 
-  std::lock_guard<std::mutex> lock(impl_->mutex_); 
-  if (impl_->cancel_error_.ok()) { 
-    auto signum = impl_->requested_.load(); 
-    DCHECK_GT(signum, 0); 
-    impl_->cancel_error_ = internal::CancelledFromSignal(signum, "Operation cancelled"); 
-  } 
-  return impl_->cancel_error_; 
-} 
- 
-namespace { 
- 
-struct SignalStopState { 
-  struct SavedSignalHandler { 
-    int signum; 
-    SignalHandler handler; 
-  }; 
- 
-  Status RegisterHandlers(const std::vector<int>& signals) { 
-    if (!saved_handlers_.empty()) { 
-      return Status::Invalid("Signal handlers already registered"); 
-    } 
-    for (int signum : signals) { 
-      ARROW_ASSIGN_OR_RAISE(auto handler, 
-                            SetSignalHandler(signum, SignalHandler{&HandleSignal})); 
-      saved_handlers_.push_back({signum, handler}); 
-    } 
-    return Status::OK(); 
-  } 
- 
-  void UnregisterHandlers() { 
-    auto handlers = std::move(saved_handlers_); 
-    for (const auto& h : handlers) { 
-      ARROW_CHECK_OK(SetSignalHandler(h.signum, h.handler).status()); 
-    } 
-  } 
- 
-  ~SignalStopState() { 
-    UnregisterHandlers(); 
-    Disable(); 
-  } 
- 
-  StopSource* stop_source() { return stop_source_.get(); } 
- 
-  bool enabled() { return stop_source_ != nullptr; } 
- 
-  void Enable() { 
-    // Before creating a new StopSource, delete any lingering reference to 
-    // the previous one in the trash can.  See DoHandleSignal() for details. 
-    EmptyTrashCan(); 
-    internal::atomic_store(&stop_source_, std::make_shared<StopSource>()); 
-  } 
- 
-  void Disable() { internal::atomic_store(&stop_source_, NullSource()); } 
- 
-  static SignalStopState* instance() { return &instance_; } 
- 
- private: 
-  // For readability 
-  std::shared_ptr<StopSource> NullSource() { return nullptr; } 
- 
-  void EmptyTrashCan() { internal::atomic_store(&trash_can_, NullSource()); } 
- 
-  static void HandleSignal(int signum) { instance_.DoHandleSignal(signum); } 
- 
-  void DoHandleSignal(int signum) { 
-    // async-signal-safe code only 
-    auto source = internal::atomic_load(&stop_source_); 
-    if (source) { 
-      source->RequestStopFromSignal(signum); 
-      // Disable() may have been called in the meantime, but we can't 
-      // deallocate a shared_ptr here, so instead move it to a "trash can". 
-      // This minimizes the possibility of running a deallocator here, 
-      // however it doesn't entirely preclude it. 
-      // 
-      // Possible case: 
-      // - a signal handler (A) starts running, fetches the current source 
-      // - Disable() then Enable() are called, emptying the trash can and 
-      //   replacing the current source 
-      // - a signal handler (B) starts running, fetches the current source 
-      // - signal handler A resumes, moves its source (the old source) into 
-      //   the trash can (the only remaining reference) 
-      // - signal handler B resumes, moves its source (the current source) 
-      //   into the trash can.  This triggers deallocation of the old source, 
-      //   since the trash can had the only remaining reference to it. 
-      // 
-      // This case should be sufficiently unlikely, but we cannot entirely 
-      // rule it out.  The problem might be solved properly with a lock-free 
-      // linked list of StopSources. 
-      internal::atomic_store(&trash_can_, std::move(source)); 
-    } 
-    ReinstateSignalHandler(signum, &HandleSignal); 
-  } 
- 
-  std::shared_ptr<StopSource> stop_source_; 
-  std::shared_ptr<StopSource> trash_can_; 
- 
-  std::vector<SavedSignalHandler> saved_handlers_; 
- 
-  static SignalStopState instance_; 
-}; 
- 
-SignalStopState SignalStopState::instance_{}; 
- 
-}  // namespace 
- 
-Result<StopSource*> SetSignalStopSource() { 
-  auto stop_state = SignalStopState::instance(); 
-  if (stop_state->enabled()) { 
-    return Status::Invalid("Signal stop source already set up"); 
-  } 
-  stop_state->Enable(); 
-  return stop_state->stop_source(); 
-} 
- 
-void ResetSignalStopSource() { 
-  auto stop_state = SignalStopState::instance(); 
-  DCHECK(stop_state->enabled()); 
-  stop_state->Disable(); 
-} 
- 
-Status RegisterCancellingSignalHandler(const std::vector<int>& signals) { 
-  auto stop_state = SignalStopState::instance(); 
-  if (!stop_state->enabled()) { 
-    return Status::Invalid("Signal stop source was not set up"); 
-  } 
-  return stop_state->RegisterHandlers(signals); 
-} 
- 
-void UnregisterCancellingSignalHandler() { 
-  auto stop_state = SignalStopState::instance(); 
-  DCHECK(stop_state->enabled()); 
-  stop_state->UnregisterHandlers(); 
-} 
- 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/cancel.h"
+
+#include <atomic>
+#include <mutex>
+#include <sstream>
+#include <utility>
+
+#include "arrow/result.h"
+#include "arrow/util/atomic_shared_ptr.h"
+#include "arrow/util/io_util.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+#if ATOMIC_INT_LOCK_FREE != 2
+#error Lock-free atomic int required for signal safety
+#endif
+
+using internal::ReinstateSignalHandler;
+using internal::SetSignalHandler;
+using internal::SignalHandler;
+
+// NOTE: We care mainly about the making the common case (not cancelled) fast.
+
+struct StopSourceImpl {
+  std::atomic<int> requested_{0};  // will be -1 or signal number if requested
+  std::mutex mutex_;
+  Status cancel_error_;
+};
+
+StopSource::StopSource() : impl_(new StopSourceImpl) {}
+
+StopSource::~StopSource() = default;
+
+void StopSource::RequestStop() { RequestStop(Status::Cancelled("Operation cancelled")); }
+
+void StopSource::RequestStop(Status st) {
+  std::lock_guard<std::mutex> lock(impl_->mutex_);
+  DCHECK(!st.ok());
+  if (!impl_->requested_) {
+    impl_->requested_ = -1;
+    impl_->cancel_error_ = std::move(st);
+  }
+}
+
+void StopSource::RequestStopFromSignal(int signum) {
+  // Only async-signal-safe code allowed here
+  impl_->requested_.store(signum);
+}
+
+void StopSource::Reset() {
+  std::lock_guard<std::mutex> lock(impl_->mutex_);
+  impl_->cancel_error_ = Status::OK();
+  impl_->requested_.store(0);
+}
+
+StopToken StopSource::token() { return StopToken(impl_); }
+
+bool StopToken::IsStopRequested() const {
+  if (!impl_) {
+    return false;
+  }
+  return impl_->requested_.load() != 0;
+}
+
+Status StopToken::Poll() const {
+  if (!impl_) {
+    return Status::OK();
+  }
+  if (!impl_->requested_.load()) {
+    return Status::OK();
+  }
+
+  std::lock_guard<std::mutex> lock(impl_->mutex_);
+  if (impl_->cancel_error_.ok()) {
+    auto signum = impl_->requested_.load();
+    DCHECK_GT(signum, 0);
+    impl_->cancel_error_ = internal::CancelledFromSignal(signum, "Operation cancelled");
+  }
+  return impl_->cancel_error_;
+}
+
+namespace {
+
+struct SignalStopState {
+  struct SavedSignalHandler {
+    int signum;
+    SignalHandler handler;
+  };
+
+  Status RegisterHandlers(const std::vector<int>& signals) {
+    if (!saved_handlers_.empty()) {
+      return Status::Invalid("Signal handlers already registered");
+    }
+    for (int signum : signals) {
+      ARROW_ASSIGN_OR_RAISE(auto handler,
+                            SetSignalHandler(signum, SignalHandler{&HandleSignal}));
+      saved_handlers_.push_back({signum, handler});
+    }
+    return Status::OK();
+  }
+
+  void UnregisterHandlers() {
+    auto handlers = std::move(saved_handlers_);
+    for (const auto& h : handlers) {
+      ARROW_CHECK_OK(SetSignalHandler(h.signum, h.handler).status());
+    }
+  }
+
+  ~SignalStopState() {
+    UnregisterHandlers();
+    Disable();
+  }
+
+  StopSource* stop_source() { return stop_source_.get(); }
+
+  bool enabled() { return stop_source_ != nullptr; }
+
+  void Enable() {
+    // Before creating a new StopSource, delete any lingering reference to
+    // the previous one in the trash can.  See DoHandleSignal() for details.
+    EmptyTrashCan();
+    internal::atomic_store(&stop_source_, std::make_shared<StopSource>());
+  }
+
+  void Disable() { internal::atomic_store(&stop_source_, NullSource()); }
+
+  static SignalStopState* instance() { return &instance_; }
+
+ private:
+  // For readability
+  std::shared_ptr<StopSource> NullSource() { return nullptr; }
+
+  void EmptyTrashCan() { internal::atomic_store(&trash_can_, NullSource()); }
+
+  static void HandleSignal(int signum) { instance_.DoHandleSignal(signum); }
+
+  void DoHandleSignal(int signum) {
+    // async-signal-safe code only
+    auto source = internal::atomic_load(&stop_source_);
+    if (source) {
+      source->RequestStopFromSignal(signum);
+      // Disable() may have been called in the meantime, but we can't
+      // deallocate a shared_ptr here, so instead move it to a "trash can".
+      // This minimizes the possibility of running a deallocator here,
+      // however it doesn't entirely preclude it.
+      //
+      // Possible case:
+      // - a signal handler (A) starts running, fetches the current source
+      // - Disable() then Enable() are called, emptying the trash can and
+      //   replacing the current source
+      // - a signal handler (B) starts running, fetches the current source
+      // - signal handler A resumes, moves its source (the old source) into
+      //   the trash can (the only remaining reference)
+      // - signal handler B resumes, moves its source (the current source)
+      //   into the trash can.  This triggers deallocation of the old source,
+      //   since the trash can had the only remaining reference to it.
+      //
+      // This case should be sufficiently unlikely, but we cannot entirely
+      // rule it out.  The problem might be solved properly with a lock-free
+      // linked list of StopSources.
+      internal::atomic_store(&trash_can_, std::move(source));
+    }
+    ReinstateSignalHandler(signum, &HandleSignal);
+  }
+
+  std::shared_ptr<StopSource> stop_source_;
+  std::shared_ptr<StopSource> trash_can_;
+
+  std::vector<SavedSignalHandler> saved_handlers_;
+
+  static SignalStopState instance_;
+};
+
+SignalStopState SignalStopState::instance_{};
+
+}  // namespace
+
+Result<StopSource*> SetSignalStopSource() {
+  auto stop_state = SignalStopState::instance();
+  if (stop_state->enabled()) {
+    return Status::Invalid("Signal stop source already set up");
+  }
+  stop_state->Enable();
+  return stop_state->stop_source();
+}
+
+void ResetSignalStopSource() {
+  auto stop_state = SignalStopState::instance();
+  DCHECK(stop_state->enabled());
+  stop_state->Disable();
+}
+
+Status RegisterCancellingSignalHandler(const std::vector<int>& signals) {
+  auto stop_state = SignalStopState::instance();
+  if (!stop_state->enabled()) {
+    return Status::Invalid("Signal stop source was not set up");
+  }
+  return stop_state->RegisterHandlers(signals);
+}
+
+void UnregisterCancellingSignalHandler() {
+  auto stop_state = SignalStopState::instance();
+  DCHECK(stop_state->enabled());
+  stop_state->UnregisterHandlers();
+}
+
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/cancel.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/cancel.h
index 7c755c02d68..9e00f673a21 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/cancel.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/cancel.h
@@ -1,102 +1,102 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <functional> 
-#include <memory> 
-#include <string> 
-#include <vector> 
- 
-#include "arrow/status.h" 
-#include "arrow/type_fwd.h" 
-#include "arrow/util/macros.h" 
-#include "arrow/util/visibility.h" 
- 
-namespace arrow { 
- 
-class StopToken; 
- 
-struct StopSourceImpl; 
- 
-/// EXPERIMENTAL 
-class ARROW_EXPORT StopSource { 
- public: 
-  StopSource(); 
-  ~StopSource(); 
- 
-  // Consumer API (the side that stops) 
-  void RequestStop(); 
-  void RequestStop(Status error); 
-  void RequestStopFromSignal(int signum); 
- 
-  StopToken token(); 
- 
-  // For internal use only 
-  void Reset(); 
- 
- protected: 
-  std::shared_ptr<StopSourceImpl> impl_; 
-}; 
- 
-/// EXPERIMENTAL 
-class ARROW_EXPORT StopToken { 
- public: 
-  // Public for Cython 
-  StopToken() {} 
- 
-  explicit StopToken(std::shared_ptr<StopSourceImpl> impl) : impl_(std::move(impl)) {} 
- 
-  // A trivial token that never propagates any stop request 
-  static StopToken Unstoppable() { return StopToken(); } 
- 
-  // Producer API (the side that gets asked to stopped) 
-  Status Poll() const; 
-  bool IsStopRequested() const; 
- 
- protected: 
-  std::shared_ptr<StopSourceImpl> impl_; 
-}; 
- 
-/// EXPERIMENTAL: Set a global StopSource that can receive signals 
-/// 
-/// The only allowed order of calls is the following: 
-/// - SetSignalStopSource() 
-/// - any number of pairs of (RegisterCancellingSignalHandler, 
-///   UnregisterCancellingSignalHandler) calls 
-/// - ResetSignalStopSource() 
-/// 
-/// Beware that these settings are process-wide.  Typically, only one 
-/// thread should call these APIs, even in a multithreaded setting. 
-ARROW_EXPORT 
-Result<StopSource*> SetSignalStopSource(); 
- 
-/// EXPERIMENTAL: Reset the global signal-receiving StopSource 
-/// 
-/// This will invalidate the pointer returned by SetSignalStopSource. 
-ARROW_EXPORT 
-void ResetSignalStopSource(); 
- 
-/// EXPERIMENTAL: Register signal handler triggering the signal-receiving StopSource 
-ARROW_EXPORT 
-Status RegisterCancellingSignalHandler(const std::vector<int>& signals); 
- 
-/// EXPERIMENTAL: Unregister signal handler set up by RegisterCancellingSignalHandler 
-ARROW_EXPORT 
-void UnregisterCancellingSignalHandler(); 
- 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <functional>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class StopToken;
+
+struct StopSourceImpl;
+
+/// EXPERIMENTAL
+class ARROW_EXPORT StopSource {
+ public:
+  StopSource();
+  ~StopSource();
+
+  // Consumer API (the side that stops)
+  void RequestStop();
+  void RequestStop(Status error);
+  void RequestStopFromSignal(int signum);
+
+  StopToken token();
+
+  // For internal use only
+  void Reset();
+
+ protected:
+  std::shared_ptr<StopSourceImpl> impl_;
+};
+
+/// EXPERIMENTAL
+class ARROW_EXPORT StopToken {
+ public:
+  // Public for Cython
+  StopToken() {}
+
+  explicit StopToken(std::shared_ptr<StopSourceImpl> impl) : impl_(std::move(impl)) {}
+
+  // A trivial token that never propagates any stop request
+  static StopToken Unstoppable() { return StopToken(); }
+
+  // Producer API (the side that gets asked to stopped)
+  Status Poll() const;
+  bool IsStopRequested() const;
+
+ protected:
+  std::shared_ptr<StopSourceImpl> impl_;
+};
+
+/// EXPERIMENTAL: Set a global StopSource that can receive signals
+///
+/// The only allowed order of calls is the following:
+/// - SetSignalStopSource()
+/// - any number of pairs of (RegisterCancellingSignalHandler,
+///   UnregisterCancellingSignalHandler) calls
+/// - ResetSignalStopSource()
+///
+/// Beware that these settings are process-wide.  Typically, only one
+/// thread should call these APIs, even in a multithreaded setting.
+ARROW_EXPORT
+Result<StopSource*> SetSignalStopSource();
+
+/// EXPERIMENTAL: Reset the global signal-receiving StopSource
+///
+/// This will invalidate the pointer returned by SetSignalStopSource.
+ARROW_EXPORT
+void ResetSignalStopSource();
+
+/// EXPERIMENTAL: Register signal handler triggering the signal-receiving StopSource
+ARROW_EXPORT
+Status RegisterCancellingSignalHandler(const std::vector<int>& signals);
+
+/// EXPERIMENTAL: Unregister signal handler set up by RegisterCancellingSignalHandler
+ARROW_EXPORT
+void UnregisterCancellingSignalHandler();
+
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/compression.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/compression.cc
index 41109e80faa..8db199b4e76 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/compression.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/compression.cc
@@ -29,18 +29,18 @@
 namespace arrow {
 namespace util {
 
-namespace { 
- 
-Status CheckSupportsCompressionLevel(Compression::type type) { 
-  if (!Codec::SupportsCompressionLevel(type)) { 
-    return Status::Invalid( 
-        "The specified codec does not support the compression level parameter"); 
-  } 
-  return Status::OK(); 
-} 
- 
-}  // namespace 
- 
+namespace {
+
+Status CheckSupportsCompressionLevel(Compression::type type) {
+  if (!Codec::SupportsCompressionLevel(type)) {
+    return Status::Invalid(
+        "The specified codec does not support the compression level parameter");
+  }
+  return Status::OK();
+}
+
+}  // namespace
+
 int Codec::UseDefaultCompressionLevel() { return kUseDefaultCompressionLevel; }
 
 Status Codec::Init() { return Status::OK(); }
@@ -115,24 +115,24 @@ bool Codec::SupportsCompressionLevel(Compression::type codec) {
   }
 }
 
-Result<int> Codec::MaximumCompressionLevel(Compression::type codec_type) { 
-  RETURN_NOT_OK(CheckSupportsCompressionLevel(codec_type)); 
-  ARROW_ASSIGN_OR_RAISE(auto codec, Codec::Create(codec_type)); 
-  return codec->maximum_compression_level(); 
-} 
- 
-Result<int> Codec::MinimumCompressionLevel(Compression::type codec_type) { 
-  RETURN_NOT_OK(CheckSupportsCompressionLevel(codec_type)); 
-  ARROW_ASSIGN_OR_RAISE(auto codec, Codec::Create(codec_type)); 
-  return codec->minimum_compression_level(); 
-} 
- 
-Result<int> Codec::DefaultCompressionLevel(Compression::type codec_type) { 
-  RETURN_NOT_OK(CheckSupportsCompressionLevel(codec_type)); 
-  ARROW_ASSIGN_OR_RAISE(auto codec, Codec::Create(codec_type)); 
-  return codec->default_compression_level(); 
-} 
- 
+Result<int> Codec::MaximumCompressionLevel(Compression::type codec_type) {
+  RETURN_NOT_OK(CheckSupportsCompressionLevel(codec_type));
+  ARROW_ASSIGN_OR_RAISE(auto codec, Codec::Create(codec_type));
+  return codec->maximum_compression_level();
+}
+
+Result<int> Codec::MinimumCompressionLevel(Compression::type codec_type) {
+  RETURN_NOT_OK(CheckSupportsCompressionLevel(codec_type));
+  ARROW_ASSIGN_OR_RAISE(auto codec, Codec::Create(codec_type));
+  return codec->minimum_compression_level();
+}
+
+Result<int> Codec::DefaultCompressionLevel(Compression::type codec_type) {
+  RETURN_NOT_OK(CheckSupportsCompressionLevel(codec_type));
+  ARROW_ASSIGN_OR_RAISE(auto codec, Codec::Create(codec_type));
+  return codec->default_compression_level();
+}
+
 Result<std::unique_ptr<Codec>> Codec::Create(Compression::type codec_type,
                                              int compression_level) {
   if (!IsAvailable(codec_type)) {
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/compression.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/compression.h
index bd5f2d1c647..0832e82a606 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/compression.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/compression.h
@@ -24,13 +24,13 @@
 
 #include "arrow/result.h"
 #include "arrow/status.h"
-#include "arrow/util/type_fwd.h" 
+#include "arrow/util/type_fwd.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
 namespace util {
 
-constexpr int kUseDefaultCompressionLevel = std::numeric_limits<int>::min(); 
+constexpr int kUseDefaultCompressionLevel = std::numeric_limits<int>::min();
 
 /// \brief Streaming compressor interface
 ///
@@ -132,27 +132,27 @@ class ARROW_EXPORT Codec {
   /// \brief Return true if indicated codec supports setting a compression level
   static bool SupportsCompressionLevel(Compression::type codec);
 
-  /// \brief Return the smallest supported compression level for the codec 
-  /// Note: This function creates a temporary Codec instance 
-  static Result<int> MinimumCompressionLevel(Compression::type codec); 
- 
-  /// \brief Return the largest supported compression level for the codec 
-  /// Note: This function creates a temporary Codec instance 
-  static Result<int> MaximumCompressionLevel(Compression::type codec); 
- 
-  /// \brief Return the default compression level 
-  /// Note: This function creates a temporary Codec instance 
-  static Result<int> DefaultCompressionLevel(Compression::type codec); 
- 
-  /// \brief Return the smallest supported compression level 
-  virtual int minimum_compression_level() const = 0; 
- 
-  /// \brief Return the largest supported compression level 
-  virtual int maximum_compression_level() const = 0; 
- 
-  /// \brief Return the default compression level 
-  virtual int default_compression_level() const = 0; 
- 
+  /// \brief Return the smallest supported compression level for the codec
+  /// Note: This function creates a temporary Codec instance
+  static Result<int> MinimumCompressionLevel(Compression::type codec);
+
+  /// \brief Return the largest supported compression level for the codec
+  /// Note: This function creates a temporary Codec instance
+  static Result<int> MaximumCompressionLevel(Compression::type codec);
+
+  /// \brief Return the default compression level
+  /// Note: This function creates a temporary Codec instance
+  static Result<int> DefaultCompressionLevel(Compression::type codec);
+
+  /// \brief Return the smallest supported compression level
+  virtual int minimum_compression_level() const = 0;
+
+  /// \brief Return the largest supported compression level
+  virtual int maximum_compression_level() const = 0;
+
+  /// \brief Return the default compression level
+  virtual int default_compression_level() const = 0;
+
   /// \brief One-shot decompression function
   ///
   /// output_buffer_len must be correct and therefore be obtained in advance.
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_brotli.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_brotli.cc
index cc41ce43f91..cb547c2c8cf 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_brotli.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_brotli.cc
@@ -224,11 +224,11 @@ class BrotliCodec : public Codec {
   Compression::type compression_type() const override { return Compression::BROTLI; }
 
   int compression_level() const override { return compression_level_; }
-  int minimum_compression_level() const override { return BROTLI_MIN_QUALITY; } 
-  int maximum_compression_level() const override { return BROTLI_MAX_QUALITY; } 
-  int default_compression_level() const override { 
-    return kBrotliDefaultCompressionLevel; 
-  } 
+  int minimum_compression_level() const override { return BROTLI_MIN_QUALITY; }
+  int maximum_compression_level() const override { return BROTLI_MAX_QUALITY; }
+  int default_compression_level() const override {
+    return kBrotliDefaultCompressionLevel;
+  }
 
  private:
   const int compression_level_;
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_lz4.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_lz4.cc
index 04c13cc4c5f..c783e405590 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_lz4.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_lz4.cc
@@ -27,7 +27,7 @@
 #include "arrow/result.h"
 #include "arrow/status.h"
 #include "arrow/util/bit_util.h"
-#include "arrow/util/endian.h" 
+#include "arrow/util/endian.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/ubsan.h"
@@ -300,9 +300,9 @@ class Lz4FrameCodec : public Codec {
   }
 
   Compression::type compression_type() const override { return Compression::LZ4_FRAME; }
-  int minimum_compression_level() const override { return kUseDefaultCompressionLevel; } 
-  int maximum_compression_level() const override { return kUseDefaultCompressionLevel; } 
-  int default_compression_level() const override { return kUseDefaultCompressionLevel; } 
+  int minimum_compression_level() const override { return kUseDefaultCompressionLevel; }
+  int maximum_compression_level() const override { return kUseDefaultCompressionLevel; }
+  int default_compression_level() const override { return kUseDefaultCompressionLevel; }
 
  protected:
   const LZ4F_preferences_t prefs_;
@@ -353,9 +353,9 @@ class Lz4Codec : public Codec {
   }
 
   Compression::type compression_type() const override { return Compression::LZ4; }
-  int minimum_compression_level() const override { return kUseDefaultCompressionLevel; } 
-  int maximum_compression_level() const override { return kUseDefaultCompressionLevel; } 
-  int default_compression_level() const override { return kUseDefaultCompressionLevel; } 
+  int minimum_compression_level() const override { return kUseDefaultCompressionLevel; }
+  int maximum_compression_level() const override { return kUseDefaultCompressionLevel; }
+  int default_compression_level() const override { return kUseDefaultCompressionLevel; }
 };
 
 // ----------------------------------------------------------------------
@@ -424,52 +424,52 @@ class Lz4HadoopCodec : public Lz4Codec {
 
   int64_t TryDecompressHadoop(int64_t input_len, const uint8_t* input,
                               int64_t output_buffer_len, uint8_t* output_buffer) {
-    // Parquet files written with the Hadoop Lz4Codec use their own framing. 
-    // The input buffer can contain an arbitrary number of "frames", each 
-    // with the following structure: 
-    // - bytes 0..3: big-endian uint32_t representing the frame decompressed size 
-    // - bytes 4..7: big-endian uint32_t representing the frame compressed size 
-    // - bytes 8...: frame compressed data 
+    // Parquet files written with the Hadoop Lz4Codec use their own framing.
+    // The input buffer can contain an arbitrary number of "frames", each
+    // with the following structure:
+    // - bytes 0..3: big-endian uint32_t representing the frame decompressed size
+    // - bytes 4..7: big-endian uint32_t representing the frame compressed size
+    // - bytes 8...: frame compressed data
     //
     // The Hadoop Lz4Codec source code can be found here:
     // https://github.com/apache/hadoop/blob/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native/src/codec/Lz4Codec.cc
-    int64_t total_decompressed_size = 0; 
-
-    while (input_len >= kPrefixLength) { 
-      const uint32_t expected_decompressed_size = 
-          BitUtil::FromBigEndian(SafeLoadAs<uint32_t>(input)); 
-      const uint32_t expected_compressed_size = 
-          BitUtil::FromBigEndian(SafeLoadAs<uint32_t>(input + sizeof(uint32_t))); 
-      input += kPrefixLength; 
-      input_len -= kPrefixLength; 
-
-      if (input_len < expected_compressed_size) { 
-        // Not enough bytes for Hadoop "frame" 
-        return kNotHadoop; 
+    int64_t total_decompressed_size = 0;
+
+    while (input_len >= kPrefixLength) {
+      const uint32_t expected_decompressed_size =
+          BitUtil::FromBigEndian(SafeLoadAs<uint32_t>(input));
+      const uint32_t expected_compressed_size =
+          BitUtil::FromBigEndian(SafeLoadAs<uint32_t>(input + sizeof(uint32_t)));
+      input += kPrefixLength;
+      input_len -= kPrefixLength;
+
+      if (input_len < expected_compressed_size) {
+        // Not enough bytes for Hadoop "frame"
+        return kNotHadoop;
       }
-      if (output_buffer_len < expected_decompressed_size) { 
-        // Not enough bytes to hold advertised output => probably not Hadoop 
-        return kNotHadoop; 
-      } 
-      // Try decompressing and compare with expected decompressed length 
-      auto maybe_decompressed_size = Lz4Codec::Decompress( 
-          expected_compressed_size, input, output_buffer_len, output_buffer); 
-      if (!maybe_decompressed_size.ok() || 
-          *maybe_decompressed_size != expected_decompressed_size) { 
-        return kNotHadoop; 
-      } 
-      input += expected_compressed_size; 
-      input_len -= expected_compressed_size; 
-      output_buffer += expected_decompressed_size; 
-      output_buffer_len -= expected_decompressed_size; 
-      total_decompressed_size += expected_decompressed_size; 
+      if (output_buffer_len < expected_decompressed_size) {
+        // Not enough bytes to hold advertised output => probably not Hadoop
+        return kNotHadoop;
+      }
+      // Try decompressing and compare with expected decompressed length
+      auto maybe_decompressed_size = Lz4Codec::Decompress(
+          expected_compressed_size, input, output_buffer_len, output_buffer);
+      if (!maybe_decompressed_size.ok() ||
+          *maybe_decompressed_size != expected_decompressed_size) {
+        return kNotHadoop;
+      }
+      input += expected_compressed_size;
+      input_len -= expected_compressed_size;
+      output_buffer += expected_decompressed_size;
+      output_buffer_len -= expected_decompressed_size;
+      total_decompressed_size += expected_decompressed_size;
     }
 
-    if (input_len == 0) { 
-      return total_decompressed_size; 
-    } else { 
-      return kNotHadoop; 
-    } 
+    if (input_len == 0) {
+      return total_decompressed_size;
+    } else {
+      return kNotHadoop;
+    }
   }
 };
 
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_snappy.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_snappy.cc
index da00607d13b..3756f957d04 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_snappy.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_snappy.cc
@@ -86,9 +86,9 @@ class SnappyCodec : public Codec {
   }
 
   Compression::type compression_type() const override { return Compression::SNAPPY; }
-  int minimum_compression_level() const override { return kUseDefaultCompressionLevel; } 
-  int maximum_compression_level() const override { return kUseDefaultCompressionLevel; } 
-  int default_compression_level() const override { return kUseDefaultCompressionLevel; } 
+  int minimum_compression_level() const override { return kUseDefaultCompressionLevel; }
+  int maximum_compression_level() const override { return kUseDefaultCompressionLevel; }
+  int default_compression_level() const override { return kUseDefaultCompressionLevel; }
 };
 
 }  // namespace
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_zlib.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_zlib.cc
index 51373cc227c..e9cb2470ee2 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_zlib.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_zlib.cc
@@ -52,9 +52,9 @@ constexpr int GZIP_CODEC = 16;
 // Determine if this is libz or gzip from header.
 constexpr int DETECT_CODEC = 32;
 
-constexpr int kGZipMinCompressionLevel = 1; 
-constexpr int kGZipMaxCompressionLevel = 9; 
- 
+constexpr int kGZipMinCompressionLevel = 1;
+constexpr int kGZipMaxCompressionLevel = 9;
+
 int CompressionWindowBitsForFormat(GZipFormat::type format) {
   int window_bits = WINDOW_BITS;
   switch (format) {
@@ -249,9 +249,9 @@ class GZipCompressor : public Compressor {
     //  again with the same value of the flush parameter and more output space
     //  (updated avail_out), until the flush is complete (deflate returns
     //  with non-zero avail_out)."
-    // "Note that Z_BUF_ERROR is not fatal, and deflate() can be called again 
-    //  with more input and more output space to continue compressing." 
-    return FlushResult{bytes_written, stream_.avail_out == 0}; 
+    // "Note that Z_BUF_ERROR is not fatal, and deflate() can be called again
+    //  with more input and more output space to continue compressing."
+    return FlushResult{bytes_written, stream_.avail_out == 0};
   }
 
   Result<EndResult> End(int64_t output_len, uint8_t* output) override {
@@ -471,9 +471,9 @@ class GZipCodec : public Codec {
   Compression::type compression_type() const override { return Compression::GZIP; }
 
   int compression_level() const override { return compression_level_; }
-  int minimum_compression_level() const override { return kGZipMinCompressionLevel; } 
-  int maximum_compression_level() const override { return kGZipMaxCompressionLevel; } 
-  int default_compression_level() const override { return kGZipDefaultCompressionLevel; } 
+  int minimum_compression_level() const override { return kGZipMinCompressionLevel; }
+  int maximum_compression_level() const override { return kGZipMaxCompressionLevel; }
+  int default_compression_level() const override { return kGZipDefaultCompressionLevel; }
 
  private:
   // zlib is stateful and the z_stream state variable must be initialized
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_zstd.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_zstd.cc
index 715b6e7374a..e15ecb4e1fe 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_zstd.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/compression_zstd.cc
@@ -228,9 +228,9 @@ class ZSTDCodec : public Codec {
   }
 
   Compression::type compression_type() const override { return Compression::ZSTD; }
-  int minimum_compression_level() const override { return ZSTD_minCLevel(); } 
-  int maximum_compression_level() const override { return ZSTD_maxCLevel(); } 
-  int default_compression_level() const override { return kZSTDDefaultCompressionLevel; } 
+  int minimum_compression_level() const override { return ZSTD_minCLevel(); }
+  int maximum_compression_level() const override { return ZSTD_maxCLevel(); }
+  int default_compression_level() const override { return kZSTDDefaultCompressionLevel; }
 
   int compression_level() const override { return compression_level_; }
 
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/cpu_info.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/cpu_info.cc
index 1f54969539a..d803521a2d9 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/cpu_info.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/cpu_info.cc
@@ -31,11 +31,11 @@
 #endif
 
 #ifdef _WIN32
-#include <immintrin.h> 
+#include <immintrin.h>
 #include <intrin.h>
 #include <array>
 #include <bitset>
- 
+
 #include "arrow/util/windows_compatibility.h"
 #endif
 
@@ -51,19 +51,19 @@
 #include "arrow/result.h"
 #include "arrow/util/io_util.h"
 #include "arrow/util/logging.h"
-#include "arrow/util/optional.h" 
+#include "arrow/util/optional.h"
 #include "arrow/util/string.h"
 
-namespace arrow { 
-namespace internal { 
- 
-namespace { 
- 
+namespace arrow {
+namespace internal {
+
+namespace {
+
 using std::max;
 
-constexpr int64_t kDefaultL1CacheSize = 32 * 1024;    // Level 1: 32k 
-constexpr int64_t kDefaultL2CacheSize = 256 * 1024;   // Level 2: 256k 
-constexpr int64_t kDefaultL3CacheSize = 3072 * 1024;  // Level 3: 3M 
+constexpr int64_t kDefaultL1CacheSize = 32 * 1024;    // Level 1: 32k
+constexpr int64_t kDefaultL2CacheSize = 256 * 1024;   // Level 2: 256k
+constexpr int64_t kDefaultL3CacheSize = 3072 * 1024;  // Level 3: 3M
 
 #if defined(__MINGW64_VERSION_MAJOR) && __MINGW64_VERSION_MAJOR < 5
 void __cpuidex(int CPUInfo[4], int function_id, int subfunction_id) {
@@ -72,31 +72,31 @@ void __cpuidex(int CPUInfo[4], int function_id, int subfunction_id) {
                          "=d"(CPUInfo[3])
                        : "a"(function_id), "c"(subfunction_id));
 }
- 
-int64_t _xgetbv(int xcr) { 
-  int out = 0; 
-  __asm__ __volatile__("xgetbv" : "=a"(out) : "c"(xcr) : "%edx"); 
-  return out; 
-} 
+
+int64_t _xgetbv(int xcr) {
+  int out = 0;
+  __asm__ __volatile__("xgetbv" : "=a"(out) : "c"(xcr) : "%edx");
+  return out;
+}
+#endif
+
+#ifdef __APPLE__
+util::optional<int64_t> IntegerSysCtlByName(const char* name) {
+  size_t len = sizeof(int64_t);
+  int64_t data = 0;
+  if (sysctlbyname(name, &data, &len, nullptr, 0) == 0) {
+    return data;
+  }
+  // ENOENT is the official errno value for non-existing sysctl's,
+  // but EINVAL and ENOTSUP have been seen in the wild.
+  if (errno != ENOENT && errno != EINVAL && errno != ENOTSUP) {
+    auto st = IOErrorFromErrno(errno, "sysctlbyname failed for '", name, "'");
+    ARROW_LOG(WARNING) << st.ToString();
+  }
+  return util::nullopt;
+}
 #endif
 
-#ifdef __APPLE__ 
-util::optional<int64_t> IntegerSysCtlByName(const char* name) { 
-  size_t len = sizeof(int64_t); 
-  int64_t data = 0; 
-  if (sysctlbyname(name, &data, &len, nullptr, 0) == 0) { 
-    return data; 
-  } 
-  // ENOENT is the official errno value for non-existing sysctl's, 
-  // but EINVAL and ENOTSUP have been seen in the wild. 
-  if (errno != ENOENT && errno != EINVAL && errno != ENOTSUP) { 
-    auto st = IOErrorFromErrno(errno, "sysctlbyname failed for '", name, "'"); 
-    ARROW_LOG(WARNING) << st.ToString(); 
-  } 
-  return util::nullopt; 
-} 
-#endif 
- 
 #if defined(__GNUC__) && defined(__linux__) && defined(__aarch64__)
 // There is no direct instruction to get cache size on Arm64 like '__cpuid' on x86;
 // Get Arm64 cache size by reading '/sys/devices/system/cpu/cpu0/cache/index*/size';
@@ -105,11 +105,11 @@ util::optional<int64_t> IntegerSysCtlByName(const char* name) {
 //   index1: L1 Icache
 //   index2: L2 cache
 //   index3: L3 cache
-const char* kL1CacheSizeFile = "/sys/devices/system/cpu/cpu0/cache/index0/size"; 
-const char* kL2CacheSizeFile = "/sys/devices/system/cpu/cpu0/cache/index2/size"; 
-const char* kL3CacheSizeFile = "/sys/devices/system/cpu/cpu0/cache/index3/size"; 
+const char* kL1CacheSizeFile = "/sys/devices/system/cpu/cpu0/cache/index0/size";
+const char* kL2CacheSizeFile = "/sys/devices/system/cpu/cpu0/cache/index2/size";
+const char* kL3CacheSizeFile = "/sys/devices/system/cpu/cpu0/cache/index3/size";
 
-int64_t GetArm64CacheSize(const char* filename, int64_t default_size = -1) { 
+int64_t GetArm64CacheSize(const char* filename, int64_t default_size = -1) {
   char* content = nullptr;
   char* last_char = nullptr;
   size_t file_len = 0;
@@ -148,8 +148,8 @@ int64_t GetArm64CacheSize(const char* filename, int64_t default_size = -1) {
 }
 #endif
 
-#if !defined(_WIN32) && !defined(__APPLE__) 
-struct { 
+#if !defined(_WIN32) && !defined(__APPLE__)
+struct {
   std::string name;
   int64_t flag;
 } flag_mappings[] = {
@@ -166,7 +166,7 @@ struct {
     {"asimd", CpuInfo::ASIMD},
 #endif
 };
-const int64_t num_flags = sizeof(flag_mappings) / sizeof(flag_mappings[0]); 
+const int64_t num_flags = sizeof(flag_mappings) / sizeof(flag_mappings[0]);
 
 // Helper function to parse for hardware flags.
 // values contains a list of space-separated flags.  check to see if the flags we
@@ -274,13 +274,13 @@ bool RetrieveCPUInfo(int64_t* hardware_flags, std::string* model_name,
     }
   }
 
-  bool zmm_enabled = false; 
-  if (features_ECX[27]) {  // OSXSAVE 
-    // Query if the OS supports saving ZMM registers when switching contexts 
-    int64_t xcr0 = _xgetbv(0); 
-    zmm_enabled = (xcr0 & 0xE0) == 0xE0; 
-  } 
- 
+  bool zmm_enabled = false;
+  if (features_ECX[27]) {  // OSXSAVE
+    // Query if the OS supports saving ZMM registers when switching contexts
+    int64_t xcr0 = _xgetbv(0);
+    zmm_enabled = (xcr0 & 0xE0) == 0xE0;
+  }
+
   if (features_ECX[9]) *hardware_flags |= CpuInfo::SSSE3;
   if (features_ECX[19]) *hardware_flags |= CpuInfo::SSE4_1;
   if (features_ECX[20]) *hardware_flags |= CpuInfo::SSE4_2;
@@ -296,22 +296,22 @@ bool RetrieveCPUInfo(int64_t* hardware_flags, std::string* model_name,
     if (features_EBX[3]) *hardware_flags |= CpuInfo::BMI1;
     if (features_EBX[5]) *hardware_flags |= CpuInfo::AVX2;
     if (features_EBX[8]) *hardware_flags |= CpuInfo::BMI2;
-    // ARROW-11427: only use AVX512 if enabled by the OS 
-    if (zmm_enabled) { 
-      if (features_EBX[16]) *hardware_flags |= CpuInfo::AVX512F; 
-      if (features_EBX[17]) *hardware_flags |= CpuInfo::AVX512DQ; 
-      if (features_EBX[28]) *hardware_flags |= CpuInfo::AVX512CD; 
-      if (features_EBX[30]) *hardware_flags |= CpuInfo::AVX512BW; 
-      if (features_EBX[31]) *hardware_flags |= CpuInfo::AVX512VL; 
-    } 
+    // ARROW-11427: only use AVX512 if enabled by the OS
+    if (zmm_enabled) {
+      if (features_EBX[16]) *hardware_flags |= CpuInfo::AVX512F;
+      if (features_EBX[17]) *hardware_flags |= CpuInfo::AVX512DQ;
+      if (features_EBX[28]) *hardware_flags |= CpuInfo::AVX512CD;
+      if (features_EBX[30]) *hardware_flags |= CpuInfo::AVX512BW;
+      if (features_EBX[31]) *hardware_flags |= CpuInfo::AVX512VL;
+    }
   }
 
   return true;
 }
 #endif
 
-}  // namespace 
- 
+}  // namespace
+
 CpuInfo::CpuInfo()
     : hardware_flags_(0),
       num_cores_(1),
@@ -348,37 +348,37 @@ void CpuInfo::Init() {
   if (QueryPerformanceFrequency(&performance_frequency)) {
     max_mhz = static_cast<float>(performance_frequency.QuadPart);
   }
-#elif defined(__APPLE__) 
-  // On macOS, get CPU information from system information base 
-  struct SysCtlCpuFeature { 
-    const char* name; 
-    int64_t flag; 
-  }; 
-  std::vector<SysCtlCpuFeature> features = { 
-#if defined(__aarch64__) 
-    // ARM64 (note that this is exposed under Rosetta as well) 
-    {"hw.optional.neon", ASIMD}, 
+#elif defined(__APPLE__)
+  // On macOS, get CPU information from system information base
+  struct SysCtlCpuFeature {
+    const char* name;
+    int64_t flag;
+  };
+  std::vector<SysCtlCpuFeature> features = {
+#if defined(__aarch64__)
+    // ARM64 (note that this is exposed under Rosetta as well)
+    {"hw.optional.neon", ASIMD},
+#else
+    // x86
+    {"hw.optional.sse4_2", SSSE3 | SSE4_1 | SSE4_2 | POPCNT},
+    {"hw.optional.avx1_0", AVX},
+    {"hw.optional.avx2_0", AVX2},
+    {"hw.optional.bmi1", BMI1},
+    {"hw.optional.bmi2", BMI2},
+    {"hw.optional.avx512f", AVX512F},
+    {"hw.optional.avx512cd", AVX512CD},
+    {"hw.optional.avx512dq", AVX512DQ},
+    {"hw.optional.avx512bw", AVX512BW},
+    {"hw.optional.avx512vl", AVX512VL},
+#endif
+  };
+  for (const auto& feature : features) {
+    auto v = IntegerSysCtlByName(feature.name);
+    if (v.value_or(0)) {
+      hardware_flags_ |= feature.flag;
+    }
+  }
 #else
-    // x86 
-    {"hw.optional.sse4_2", SSSE3 | SSE4_1 | SSE4_2 | POPCNT}, 
-    {"hw.optional.avx1_0", AVX}, 
-    {"hw.optional.avx2_0", AVX2}, 
-    {"hw.optional.bmi1", BMI1}, 
-    {"hw.optional.bmi2", BMI2}, 
-    {"hw.optional.avx512f", AVX512F}, 
-    {"hw.optional.avx512cd", AVX512CD}, 
-    {"hw.optional.avx512dq", AVX512DQ}, 
-    {"hw.optional.avx512bw", AVX512BW}, 
-    {"hw.optional.avx512vl", AVX512VL}, 
-#endif 
-  }; 
-  for (const auto& feature : features) { 
-    auto v = IntegerSysCtlByName(feature.name); 
-    if (v.value_or(0)) { 
-      hardware_flags_ |= feature.flag; 
-    } 
-  } 
-#else 
   // Read from /proc/cpuinfo
   std::ifstream cpuinfo("/proc/cpuinfo", std::ios::in);
   while (cpuinfo) {
@@ -413,20 +413,20 @@ void CpuInfo::Init() {
 #endif
 
 #ifdef __APPLE__
-  // On macOS, get cache size from system information base 
-  SetDefaultCacheSize(); 
-  auto c = IntegerSysCtlByName("hw.l1dcachesize"); 
-  if (c.has_value()) { 
-    cache_sizes_[0] = *c; 
-  } 
-  c = IntegerSysCtlByName("hw.l2cachesize"); 
-  if (c.has_value()) { 
-    cache_sizes_[1] = *c; 
-  } 
-  c = IntegerSysCtlByName("hw.l3cachesize"); 
-  if (c.has_value()) { 
-    cache_sizes_[2] = *c; 
-  } 
+  // On macOS, get cache size from system information base
+  SetDefaultCacheSize();
+  auto c = IntegerSysCtlByName("hw.l1dcachesize");
+  if (c.has_value()) {
+    cache_sizes_[0] = *c;
+  }
+  c = IntegerSysCtlByName("hw.l2cachesize");
+  if (c.has_value()) {
+    cache_sizes_[1] = *c;
+  }
+  c = IntegerSysCtlByName("hw.l3cachesize");
+  if (c.has_value()) {
+    cache_sizes_[2] = *c;
+  }
 #elif _WIN32
   if (!RetrieveCacheSize(cache_sizes_)) {
     SetDefaultCacheSize();
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/cpu_info.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/cpu_info.h
index 7b434229c1b..83819c25519 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/cpu_info.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/cpu_info.h
@@ -70,18 +70,18 @@ class ARROW_EXPORT CpuInfo {
   /// Returns all the flags for this cpu
   int64_t hardware_flags();
 
-  /// \brief Returns whether or not the given feature is enabled. 
-  /// 
-  /// IsSupported() is true iff IsDetected() is also true and the feature 
-  /// wasn't disabled by the user (for example by setting the ARROW_USER_SIMD_LEVEL 
-  /// environment variable). 
+  /// \brief Returns whether or not the given feature is enabled.
+  ///
+  /// IsSupported() is true iff IsDetected() is also true and the feature
+  /// wasn't disabled by the user (for example by setting the ARROW_USER_SIMD_LEVEL
+  /// environment variable).
   bool IsSupported(int64_t flags) const { return (hardware_flags_ & flags) == flags; }
 
-  /// Returns whether or not the given feature is available on the CPU. 
-  bool IsDetected(int64_t flags) const { 
-    return (original_hardware_flags_ & flags) == flags; 
-  } 
- 
+  /// Returns whether or not the given feature is available on the CPU.
+  bool IsDetected(int64_t flags) const {
+    return (original_hardware_flags_ & flags) == flags;
+  }
+
   /// \brief The processor supports SSE4.2 and the Arrow libraries are built
   /// with support for it
   bool CanUseSSE4_2() const;
@@ -113,15 +113,15 @@ class ARROW_EXPORT CpuInfo {
  private:
   CpuInfo();
 
-  enum UserSimdLevel { 
-    USER_SIMD_NONE = 0, 
-    USER_SIMD_SSE4_2, 
-    USER_SIMD_AVX, 
-    USER_SIMD_AVX2, 
-    USER_SIMD_AVX512, 
-    USER_SIMD_MAX, 
-  }; 
- 
+  enum UserSimdLevel {
+    USER_SIMD_NONE = 0,
+    USER_SIMD_SSE4_2,
+    USER_SIMD_AVX,
+    USER_SIMD_AVX2,
+    USER_SIMD_AVX512,
+    USER_SIMD_MAX,
+  };
+
   void Init();
 
   /// Inits CPU cache size variables with default values
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/decimal.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/decimal.cc
index bbbb11c7252..7aefd1ab9cd 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/decimal.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/decimal.cc
@@ -30,7 +30,7 @@
 
 #include "arrow/status.h"
 #include "arrow/util/decimal.h"
-#include "arrow/util/endian.h" 
+#include "arrow/util/endian.h"
 #include "arrow/util/formatting.h"
 #include "arrow/util/int128_internal.h"
 #include "arrow/util/int_util_internal.h"
@@ -94,47 +94,47 @@ static constexpr double kDoublePowersOfTen[2 * 38 + 1] = {
     1e17,  1e18,  1e19,  1e20,  1e21,  1e22,  1e23,  1e24,  1e25,  1e26,  1e27,
     1e28,  1e29,  1e30,  1e31,  1e32,  1e33,  1e34,  1e35,  1e36,  1e37,  1e38};
 
-// On the Windows R toolchain, INFINITY is double type instead of float 
-static constexpr float kFloatInf = std::numeric_limits<float>::infinity(); 
-static constexpr float kFloatPowersOfTen76[2 * 76 + 1] = { 
-    0,         0,         0,         0,         0,         0,         0, 
-    0,         0,         0,         0,         0,         0,         0, 
-    0,         0,         0,         0,         0,         0,         0, 
-    0,         0,         0,         0,         0,         0,         0, 
-    0,         0,         0,         1e-45f,    1e-44f,    1e-43f,    1e-42f, 
-    1e-41f,    1e-40f,    1e-39f,    1e-38f,    1e-37f,    1e-36f,    1e-35f, 
-    1e-34f,    1e-33f,    1e-32f,    1e-31f,    1e-30f,    1e-29f,    1e-28f, 
-    1e-27f,    1e-26f,    1e-25f,    1e-24f,    1e-23f,    1e-22f,    1e-21f, 
-    1e-20f,    1e-19f,    1e-18f,    1e-17f,    1e-16f,    1e-15f,    1e-14f, 
-    1e-13f,    1e-12f,    1e-11f,    1e-10f,    1e-9f,     1e-8f,     1e-7f, 
-    1e-6f,     1e-5f,     1e-4f,     1e-3f,     1e-2f,     1e-1f,     1e0f, 
-    1e1f,      1e2f,      1e3f,      1e4f,      1e5f,      1e6f,      1e7f, 
-    1e8f,      1e9f,      1e10f,     1e11f,     1e12f,     1e13f,     1e14f, 
-    1e15f,     1e16f,     1e17f,     1e18f,     1e19f,     1e20f,     1e21f, 
-    1e22f,     1e23f,     1e24f,     1e25f,     1e26f,     1e27f,     1e28f, 
-    1e29f,     1e30f,     1e31f,     1e32f,     1e33f,     1e34f,     1e35f, 
-    1e36f,     1e37f,     1e38f,     kFloatInf, kFloatInf, kFloatInf, kFloatInf, 
-    kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, 
-    kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, 
-    kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, 
-    kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, 
-    kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf}; 
- 
-static constexpr double kDoublePowersOfTen76[2 * 76 + 1] = { 
-    1e-76, 1e-75, 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, 1e-65, 
-    1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, 1e-56, 1e-55, 1e-54, 1e-53, 
-    1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41, 
-    1e-40, 1e-39, 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, 
-    1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17, 
-    1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9,  1e-8,  1e-7,  1e-6,  1e-5, 
-    1e-4,  1e-3,  1e-2,  1e-1,  1e0,   1e1,   1e2,   1e3,   1e4,   1e5,   1e6,   1e7, 
-    1e8,   1e9,   1e10,  1e11,  1e12,  1e13,  1e14,  1e15,  1e16,  1e17,  1e18,  1e19, 
-    1e20,  1e21,  1e22,  1e23,  1e24,  1e25,  1e26,  1e27,  1e28,  1e29,  1e30,  1e31, 
-    1e32,  1e33,  1e34,  1e35,  1e36,  1e37,  1e38,  1e39,  1e40,  1e41,  1e42,  1e43, 
-    1e44,  1e45,  1e46,  1e47,  1e48,  1e49,  1e50,  1e51,  1e52,  1e53,  1e54,  1e55, 
-    1e56,  1e57,  1e58,  1e59,  1e60,  1e61,  1e62,  1e63,  1e64,  1e65,  1e66,  1e67, 
-    1e68,  1e69,  1e70,  1e71,  1e72,  1e73,  1e74,  1e75,  1e76}; 
- 
+// On the Windows R toolchain, INFINITY is double type instead of float
+static constexpr float kFloatInf = std::numeric_limits<float>::infinity();
+static constexpr float kFloatPowersOfTen76[2 * 76 + 1] = {
+    0,         0,         0,         0,         0,         0,         0,
+    0,         0,         0,         0,         0,         0,         0,
+    0,         0,         0,         0,         0,         0,         0,
+    0,         0,         0,         0,         0,         0,         0,
+    0,         0,         0,         1e-45f,    1e-44f,    1e-43f,    1e-42f,
+    1e-41f,    1e-40f,    1e-39f,    1e-38f,    1e-37f,    1e-36f,    1e-35f,
+    1e-34f,    1e-33f,    1e-32f,    1e-31f,    1e-30f,    1e-29f,    1e-28f,
+    1e-27f,    1e-26f,    1e-25f,    1e-24f,    1e-23f,    1e-22f,    1e-21f,
+    1e-20f,    1e-19f,    1e-18f,    1e-17f,    1e-16f,    1e-15f,    1e-14f,
+    1e-13f,    1e-12f,    1e-11f,    1e-10f,    1e-9f,     1e-8f,     1e-7f,
+    1e-6f,     1e-5f,     1e-4f,     1e-3f,     1e-2f,     1e-1f,     1e0f,
+    1e1f,      1e2f,      1e3f,      1e4f,      1e5f,      1e6f,      1e7f,
+    1e8f,      1e9f,      1e10f,     1e11f,     1e12f,     1e13f,     1e14f,
+    1e15f,     1e16f,     1e17f,     1e18f,     1e19f,     1e20f,     1e21f,
+    1e22f,     1e23f,     1e24f,     1e25f,     1e26f,     1e27f,     1e28f,
+    1e29f,     1e30f,     1e31f,     1e32f,     1e33f,     1e34f,     1e35f,
+    1e36f,     1e37f,     1e38f,     kFloatInf, kFloatInf, kFloatInf, kFloatInf,
+    kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf,
+    kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf,
+    kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf,
+    kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf,
+    kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf};
+
+static constexpr double kDoublePowersOfTen76[2 * 76 + 1] = {
+    1e-76, 1e-75, 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, 1e-65,
+    1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, 1e-56, 1e-55, 1e-54, 1e-53,
+    1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41,
+    1e-40, 1e-39, 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29,
+    1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17,
+    1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9,  1e-8,  1e-7,  1e-6,  1e-5,
+    1e-4,  1e-3,  1e-2,  1e-1,  1e0,   1e1,   1e2,   1e3,   1e4,   1e5,   1e6,   1e7,
+    1e8,   1e9,   1e10,  1e11,  1e12,  1e13,  1e14,  1e15,  1e16,  1e17,  1e18,  1e19,
+    1e20,  1e21,  1e22,  1e23,  1e24,  1e25,  1e26,  1e27,  1e28,  1e29,  1e30,  1e31,
+    1e32,  1e33,  1e34,  1e35,  1e36,  1e37,  1e38,  1e39,  1e40,  1e41,  1e42,  1e43,
+    1e44,  1e45,  1e46,  1e47,  1e48,  1e49,  1e50,  1e51,  1e52,  1e53,  1e54,  1e55,
+    1e56,  1e57,  1e58,  1e59,  1e60,  1e61,  1e62,  1e63,  1e64,  1e65,  1e66,  1e67,
+    1e68,  1e69,  1e70,  1e71,  1e72,  1e73,  1e74,  1e75,  1e76};
+
 namespace {
 
 template <typename Real, typename Derived>
@@ -267,7 +267,7 @@ static void AppendLittleEndianArrayToString(const std::array<uint64_t, n>& array
       // *elem = dividend / 1e9;
       // remainder = dividend % 1e9.
       uint32_t hi = static_cast<uint32_t>(*elem >> 32);
-      uint32_t lo = static_cast<uint32_t>(*elem & BitUtil::LeastSignificantBitMask(32)); 
+      uint32_t lo = static_cast<uint32_t>(*elem & BitUtil::LeastSignificantBitMask(32));
       uint64_t dividend_hi = (static_cast<uint64_t>(remainder) << 32) | hi;
       uint64_t quotient_hi = dividend_hi / k1e9;
       remainder = static_cast<uint32_t>(dividend_hi % k1e9);
@@ -486,24 +486,24 @@ bool ParseDecimalComponents(const char* s, size_t size, DecimalComponents* out)
   return pos == size;
 }
 
-inline Status ToArrowStatus(DecimalStatus dstatus, int num_bits) { 
-  switch (dstatus) { 
-    case DecimalStatus::kSuccess: 
-      return Status::OK(); 
- 
-    case DecimalStatus::kDivideByZero: 
-      return Status::Invalid("Division by 0 in Decimal", num_bits); 
- 
-    case DecimalStatus::kOverflow: 
-      return Status::Invalid("Overflow occurred during Decimal", num_bits, " operation."); 
- 
-    case DecimalStatus::kRescaleDataLoss: 
-      return Status::Invalid("Rescaling Decimal", num_bits, 
-                             " value would cause data loss"); 
-  } 
-  return Status::OK(); 
-} 
- 
+inline Status ToArrowStatus(DecimalStatus dstatus, int num_bits) {
+  switch (dstatus) {
+    case DecimalStatus::kSuccess:
+      return Status::OK();
+
+    case DecimalStatus::kDivideByZero:
+      return Status::Invalid("Division by 0 in Decimal", num_bits);
+
+    case DecimalStatus::kOverflow:
+      return Status::Invalid("Overflow occurred during Decimal", num_bits, " operation.");
+
+    case DecimalStatus::kRescaleDataLoss:
+      return Status::Invalid("Rescaling Decimal", num_bits,
+                             " value would cause data loss");
+  }
+  return Status::OK();
+}
+
 }  // namespace
 
 Status Decimal128::FromString(const util::string_view& s, Decimal128* out,
@@ -609,7 +609,7 @@ Result<Decimal128> Decimal128::FromBigEndian(const uint8_t* bytes, int32_t lengt
 
   int64_t high, low;
 
-  if (ARROW_PREDICT_FALSE(length < kMinDecimalBytes || length > kMaxDecimalBytes)) { 
+  if (ARROW_PREDICT_FALSE(length < kMinDecimalBytes || length > kMaxDecimalBytes)) {
     return Status::Invalid("Length of byte array passed to Decimal128::FromBigEndian ",
                            "was ", length, ", but must be between ", kMinDecimalBytes,
                            " and ", kMaxDecimalBytes);
@@ -657,275 +657,275 @@ Result<Decimal128> Decimal128::FromBigEndian(const uint8_t* bytes, int32_t lengt
 }
 
 Status Decimal128::ToArrowStatus(DecimalStatus dstatus) const {
-  return arrow::ToArrowStatus(dstatus, 128); 
-} 
-
-std::ostream& operator<<(std::ostream& os, const Decimal128& decimal) { 
-  os << decimal.ToIntegerString(); 
-  return os; 
-} 
-
-Decimal256::Decimal256(const std::string& str) : Decimal256() { 
-  *this = Decimal256::FromString(str).ValueOrDie(); 
-} 
-
-std::string Decimal256::ToIntegerString() const { 
-  std::string result; 
-  if (static_cast<int64_t>(little_endian_array()[3]) < 0) { 
-    result.push_back('-'); 
-    Decimal256 abs = *this; 
-    abs.Negate(); 
-    AppendLittleEndianArrayToString(abs.little_endian_array(), &result); 
-  } else { 
-    AppendLittleEndianArrayToString(little_endian_array(), &result); 
-  } 
-  return result; 
-} 
-
-std::string Decimal256::ToString(int32_t scale) const { 
-  std::string str(ToIntegerString()); 
-  AdjustIntegerStringWithScale(scale, &str); 
-  return str; 
-} 
- 
-Status Decimal256::FromString(const util::string_view& s, Decimal256* out, 
-                              int32_t* precision, int32_t* scale) { 
-  if (s.empty()) { 
-    return Status::Invalid("Empty string cannot be converted to decimal"); 
-  }
- 
-  DecimalComponents dec; 
-  if (!ParseDecimalComponents(s.data(), s.size(), &dec)) { 
-    return Status::Invalid("The string '", s, "' is not a valid decimal number"); 
-  } 
- 
-  // Count number of significant digits (without leading zeros) 
-  size_t first_non_zero = dec.whole_digits.find_first_not_of('0'); 
-  size_t significant_digits = dec.fractional_digits.size(); 
-  if (first_non_zero != std::string::npos) { 
-    significant_digits += dec.whole_digits.size() - first_non_zero; 
-  } 
- 
-  if (precision != nullptr) { 
-    *precision = static_cast<int32_t>(significant_digits); 
-  } 
- 
-  if (scale != nullptr) { 
-    if (dec.has_exponent) { 
-      auto adjusted_exponent = dec.exponent; 
-      auto len = static_cast<int32_t>(significant_digits); 
-      *scale = -adjusted_exponent + len - 1; 
-    } else { 
-      *scale = static_cast<int32_t>(dec.fractional_digits.size()); 
-    } 
-  } 
- 
-  if (out != nullptr) { 
-    std::array<uint64_t, 4> little_endian_array = {0, 0, 0, 0}; 
-    ShiftAndAdd(dec.whole_digits, little_endian_array.data(), little_endian_array.size()); 
-    ShiftAndAdd(dec.fractional_digits, little_endian_array.data(), 
-                little_endian_array.size()); 
-    *out = Decimal256(little_endian_array); 
- 
-    if (dec.sign == '-') { 
-      out->Negate(); 
-    } 
-  } 
- 
-  return Status::OK(); 
-}
-
-Status Decimal256::FromString(const std::string& s, Decimal256* out, int32_t* precision, 
-                              int32_t* scale) { 
-  return FromString(util::string_view(s), out, precision, scale); 
-} 
- 
-Status Decimal256::FromString(const char* s, Decimal256* out, int32_t* precision, 
-                              int32_t* scale) { 
-  return FromString(util::string_view(s), out, precision, scale); 
-} 
- 
-Result<Decimal256> Decimal256::FromString(const util::string_view& s) { 
-  Decimal256 out; 
-  RETURN_NOT_OK(FromString(s, &out, nullptr, nullptr)); 
-  return std::move(out); 
-} 
- 
-Result<Decimal256> Decimal256::FromString(const std::string& s) { 
-  return FromString(util::string_view(s)); 
-} 
- 
-Result<Decimal256> Decimal256::FromString(const char* s) { 
-  return FromString(util::string_view(s)); 
-} 
- 
-Result<Decimal256> Decimal256::FromBigEndian(const uint8_t* bytes, int32_t length) { 
-  static constexpr int32_t kMinDecimalBytes = 1; 
-  static constexpr int32_t kMaxDecimalBytes = 32; 
- 
-  std::array<uint64_t, 4> little_endian_array; 
- 
-  if (ARROW_PREDICT_FALSE(length < kMinDecimalBytes || length > kMaxDecimalBytes)) { 
-    return Status::Invalid("Length of byte array passed to Decimal128::FromBigEndian ", 
-                           "was ", length, ", but must be between ", kMinDecimalBytes, 
-                           " and ", kMaxDecimalBytes); 
-  } 
- 
-  // Bytes are coming in big-endian, so the first byte is the MSB and therefore holds the 
-  // sign bit. 
-  const bool is_negative = static_cast<int8_t>(bytes[0]) < 0; 
- 
-  for (int word_idx = 0; word_idx < 4; word_idx++) { 
-    const int32_t word_length = std::min(length, static_cast<int32_t>(sizeof(uint64_t))); 
- 
-    if (word_length == 8) { 
-      // Full words can be assigned as is (and are UB with the shift below). 
-      little_endian_array[word_idx] = 
-          UInt64FromBigEndian(bytes + length - word_length, word_length); 
-    } else { 
-      // Sign extend the word its if necessary 
-      uint64_t word = -1 * is_negative; 
-      if (length > 0) { 
-        // Incorporate the actual values if present. 
-        // Shift left enough bits to make room for the incoming int64_t 
-        word = SafeLeftShift(word, word_length * CHAR_BIT); 
-        // Preserve the upper bits by inplace OR-ing the int64_t 
-        word |= UInt64FromBigEndian(bytes + length - word_length, word_length); 
-      } 
-      little_endian_array[word_idx] = word; 
-    } 
-    // Move on to the next word. 
-    length -= word_length; 
-  } 
- 
-  return Decimal256(little_endian_array); 
-} 
- 
-Status Decimal256::ToArrowStatus(DecimalStatus dstatus) const { 
-  return arrow::ToArrowStatus(dstatus, 256); 
-} 
- 
-namespace { 
- 
-template <typename Real, typename Derived> 
-struct Decimal256RealConversion { 
-  static Result<Decimal256> FromPositiveReal(Real real, int32_t precision, 
-                                             int32_t scale) { 
-    auto x = real; 
-    if (scale >= -76 && scale <= 76) { 
-      x *= Derived::powers_of_ten()[scale + 76]; 
-    } else { 
-      x *= std::pow(static_cast<Real>(10), static_cast<Real>(scale)); 
-    } 
-    x = std::nearbyint(x); 
-    const auto max_abs = Derived::powers_of_ten()[precision + 76]; 
-    if (x >= max_abs) { 
-      return Status::Invalid("Cannot convert ", real, 
-                             " to Decimal256(precision = ", precision, 
-                             ", scale = ", scale, "): overflow"); 
-    } 
-    // Extract parts 
-    const auto part3 = std::floor(std::ldexp(x, -192)); 
-    x -= std::ldexp(part3, 192); 
-    const auto part2 = std::floor(std::ldexp(x, -128)); 
-    x -= std::ldexp(part2, 128); 
-    const auto part1 = std::floor(std::ldexp(x, -64)); 
-    x -= std::ldexp(part1, 64); 
-    const auto part0 = x; 
- 
-    DCHECK_GE(part3, 0); 
-    DCHECK_LT(part3, 1.8446744073709552e+19);  // 2**64 
-    DCHECK_GE(part2, 0); 
-    DCHECK_LT(part2, 1.8446744073709552e+19);  // 2**64 
-    DCHECK_GE(part1, 0); 
-    DCHECK_LT(part1, 1.8446744073709552e+19);  // 2**64 
-    DCHECK_GE(part0, 0); 
-    DCHECK_LT(part0, 1.8446744073709552e+19);  // 2**64 
-    return Decimal256(std::array<uint64_t, 4>{ 
-        static_cast<uint64_t>(part0), static_cast<uint64_t>(part1), 
-        static_cast<uint64_t>(part2), static_cast<uint64_t>(part3)}); 
-  } 
- 
-  static Result<Decimal256> FromReal(Real x, int32_t precision, int32_t scale) { 
-    DCHECK_GT(precision, 0); 
-    DCHECK_LE(precision, 76); 
- 
-    if (!std::isfinite(x)) { 
-      return Status::Invalid("Cannot convert ", x, " to Decimal256"); 
-    } 
-    if (x < 0) { 
-      ARROW_ASSIGN_OR_RAISE(auto dec, FromPositiveReal(-x, precision, scale)); 
-      return dec.Negate(); 
-    } else { 
-      // Includes negative zero 
-      return FromPositiveReal(x, precision, scale); 
-    } 
-  } 
- 
-  static Real ToRealPositive(const Decimal256& decimal, int32_t scale) { 
-    DCHECK_GE(decimal, 0); 
-    Real x = 0; 
-    const auto& parts = decimal.little_endian_array(); 
-    x += Derived::two_to_192(static_cast<Real>(parts[3])); 
-    x += Derived::two_to_128(static_cast<Real>(parts[2])); 
-    x += Derived::two_to_64(static_cast<Real>(parts[1])); 
-    x += static_cast<Real>(parts[0]); 
-    if (scale >= -76 && scale <= 76) { 
-      x *= Derived::powers_of_ten()[-scale + 76]; 
-    } else { 
-      x *= std::pow(static_cast<Real>(10), static_cast<Real>(-scale)); 
-    } 
-    return x; 
-  } 
- 
-  static Real ToReal(Decimal256 decimal, int32_t scale) { 
-    if (decimal.little_endian_array()[3] & (1ULL << 63)) { 
-      // Convert the absolute value to avoid precision loss 
-      decimal.Negate(); 
-      return -ToRealPositive(decimal, scale); 
-    } else { 
-      return ToRealPositive(decimal, scale); 
-    } 
-  } 
-}; 
- 
-struct Decimal256FloatConversion 
-    : public Decimal256RealConversion<float, Decimal256FloatConversion> { 
-  static constexpr const float* powers_of_ten() { return kFloatPowersOfTen76; } 
- 
-  static float two_to_64(float x) { return x * 1.8446744e+19f; } 
-  static float two_to_128(float x) { return x == 0 ? 0 : INFINITY; } 
-  static float two_to_192(float x) { return x == 0 ? 0 : INFINITY; } 
-}; 
- 
-struct Decimal256DoubleConversion 
-    : public Decimal256RealConversion<double, Decimal256DoubleConversion> { 
-  static constexpr const double* powers_of_ten() { return kDoublePowersOfTen76; } 
- 
-  static double two_to_64(double x) { return x * 1.8446744073709552e+19; } 
-  static double two_to_128(double x) { return x * 3.402823669209385e+38; } 
-  static double two_to_192(double x) { return x * 6.277101735386681e+57; } 
-}; 
- 
-}  // namespace 
- 
-Result<Decimal256> Decimal256::FromReal(float x, int32_t precision, int32_t scale) { 
-  return Decimal256FloatConversion::FromReal(x, precision, scale); 
-} 
- 
-Result<Decimal256> Decimal256::FromReal(double x, int32_t precision, int32_t scale) { 
-  return Decimal256DoubleConversion::FromReal(x, precision, scale); 
-} 
- 
-float Decimal256::ToFloat(int32_t scale) const { 
-  return Decimal256FloatConversion::ToReal(*this, scale); 
-} 
- 
-double Decimal256::ToDouble(int32_t scale) const { 
-  return Decimal256DoubleConversion::ToReal(*this, scale); 
-} 
- 
-std::ostream& operator<<(std::ostream& os, const Decimal256& decimal) { 
+  return arrow::ToArrowStatus(dstatus, 128);
+}
+
+std::ostream& operator<<(std::ostream& os, const Decimal128& decimal) {
+  os << decimal.ToIntegerString();
+  return os;
+}
+
+Decimal256::Decimal256(const std::string& str) : Decimal256() {
+  *this = Decimal256::FromString(str).ValueOrDie();
+}
+
+std::string Decimal256::ToIntegerString() const {
+  std::string result;
+  if (static_cast<int64_t>(little_endian_array()[3]) < 0) {
+    result.push_back('-');
+    Decimal256 abs = *this;
+    abs.Negate();
+    AppendLittleEndianArrayToString(abs.little_endian_array(), &result);
+  } else {
+    AppendLittleEndianArrayToString(little_endian_array(), &result);
+  }
+  return result;
+}
+
+std::string Decimal256::ToString(int32_t scale) const {
+  std::string str(ToIntegerString());
+  AdjustIntegerStringWithScale(scale, &str);
+  return str;
+}
+
+Status Decimal256::FromString(const util::string_view& s, Decimal256* out,
+                              int32_t* precision, int32_t* scale) {
+  if (s.empty()) {
+    return Status::Invalid("Empty string cannot be converted to decimal");
+  }
+
+  DecimalComponents dec;
+  if (!ParseDecimalComponents(s.data(), s.size(), &dec)) {
+    return Status::Invalid("The string '", s, "' is not a valid decimal number");
+  }
+
+  // Count number of significant digits (without leading zeros)
+  size_t first_non_zero = dec.whole_digits.find_first_not_of('0');
+  size_t significant_digits = dec.fractional_digits.size();
+  if (first_non_zero != std::string::npos) {
+    significant_digits += dec.whole_digits.size() - first_non_zero;
+  }
+
+  if (precision != nullptr) {
+    *precision = static_cast<int32_t>(significant_digits);
+  }
+
+  if (scale != nullptr) {
+    if (dec.has_exponent) {
+      auto adjusted_exponent = dec.exponent;
+      auto len = static_cast<int32_t>(significant_digits);
+      *scale = -adjusted_exponent + len - 1;
+    } else {
+      *scale = static_cast<int32_t>(dec.fractional_digits.size());
+    }
+  }
+
+  if (out != nullptr) {
+    std::array<uint64_t, 4> little_endian_array = {0, 0, 0, 0};
+    ShiftAndAdd(dec.whole_digits, little_endian_array.data(), little_endian_array.size());
+    ShiftAndAdd(dec.fractional_digits, little_endian_array.data(),
+                little_endian_array.size());
+    *out = Decimal256(little_endian_array);
+
+    if (dec.sign == '-') {
+      out->Negate();
+    }
+  }
+
+  return Status::OK();
+}
+
+Status Decimal256::FromString(const std::string& s, Decimal256* out, int32_t* precision,
+                              int32_t* scale) {
+  return FromString(util::string_view(s), out, precision, scale);
+}
+
+Status Decimal256::FromString(const char* s, Decimal256* out, int32_t* precision,
+                              int32_t* scale) {
+  return FromString(util::string_view(s), out, precision, scale);
+}
+
+Result<Decimal256> Decimal256::FromString(const util::string_view& s) {
+  Decimal256 out;
+  RETURN_NOT_OK(FromString(s, &out, nullptr, nullptr));
+  return std::move(out);
+}
+
+Result<Decimal256> Decimal256::FromString(const std::string& s) {
+  return FromString(util::string_view(s));
+}
+
+Result<Decimal256> Decimal256::FromString(const char* s) {
+  return FromString(util::string_view(s));
+}
+
+Result<Decimal256> Decimal256::FromBigEndian(const uint8_t* bytes, int32_t length) {
+  static constexpr int32_t kMinDecimalBytes = 1;
+  static constexpr int32_t kMaxDecimalBytes = 32;
+
+  std::array<uint64_t, 4> little_endian_array;
+
+  if (ARROW_PREDICT_FALSE(length < kMinDecimalBytes || length > kMaxDecimalBytes)) {
+    return Status::Invalid("Length of byte array passed to Decimal128::FromBigEndian ",
+                           "was ", length, ", but must be between ", kMinDecimalBytes,
+                           " and ", kMaxDecimalBytes);
+  }
+
+  // Bytes are coming in big-endian, so the first byte is the MSB and therefore holds the
+  // sign bit.
+  const bool is_negative = static_cast<int8_t>(bytes[0]) < 0;
+
+  for (int word_idx = 0; word_idx < 4; word_idx++) {
+    const int32_t word_length = std::min(length, static_cast<int32_t>(sizeof(uint64_t)));
+
+    if (word_length == 8) {
+      // Full words can be assigned as is (and are UB with the shift below).
+      little_endian_array[word_idx] =
+          UInt64FromBigEndian(bytes + length - word_length, word_length);
+    } else {
+      // Sign extend the word its if necessary
+      uint64_t word = -1 * is_negative;
+      if (length > 0) {
+        // Incorporate the actual values if present.
+        // Shift left enough bits to make room for the incoming int64_t
+        word = SafeLeftShift(word, word_length * CHAR_BIT);
+        // Preserve the upper bits by inplace OR-ing the int64_t
+        word |= UInt64FromBigEndian(bytes + length - word_length, word_length);
+      }
+      little_endian_array[word_idx] = word;
+    }
+    // Move on to the next word.
+    length -= word_length;
+  }
+
+  return Decimal256(little_endian_array);
+}
+
+Status Decimal256::ToArrowStatus(DecimalStatus dstatus) const {
+  return arrow::ToArrowStatus(dstatus, 256);
+}
+
+namespace {
+
+template <typename Real, typename Derived>
+struct Decimal256RealConversion {
+  static Result<Decimal256> FromPositiveReal(Real real, int32_t precision,
+                                             int32_t scale) {
+    auto x = real;
+    if (scale >= -76 && scale <= 76) {
+      x *= Derived::powers_of_ten()[scale + 76];
+    } else {
+      x *= std::pow(static_cast<Real>(10), static_cast<Real>(scale));
+    }
+    x = std::nearbyint(x);
+    const auto max_abs = Derived::powers_of_ten()[precision + 76];
+    if (x >= max_abs) {
+      return Status::Invalid("Cannot convert ", real,
+                             " to Decimal256(precision = ", precision,
+                             ", scale = ", scale, "): overflow");
+    }
+    // Extract parts
+    const auto part3 = std::floor(std::ldexp(x, -192));
+    x -= std::ldexp(part3, 192);
+    const auto part2 = std::floor(std::ldexp(x, -128));
+    x -= std::ldexp(part2, 128);
+    const auto part1 = std::floor(std::ldexp(x, -64));
+    x -= std::ldexp(part1, 64);
+    const auto part0 = x;
+
+    DCHECK_GE(part3, 0);
+    DCHECK_LT(part3, 1.8446744073709552e+19);  // 2**64
+    DCHECK_GE(part2, 0);
+    DCHECK_LT(part2, 1.8446744073709552e+19);  // 2**64
+    DCHECK_GE(part1, 0);
+    DCHECK_LT(part1, 1.8446744073709552e+19);  // 2**64
+    DCHECK_GE(part0, 0);
+    DCHECK_LT(part0, 1.8446744073709552e+19);  // 2**64
+    return Decimal256(std::array<uint64_t, 4>{
+        static_cast<uint64_t>(part0), static_cast<uint64_t>(part1),
+        static_cast<uint64_t>(part2), static_cast<uint64_t>(part3)});
+  }
+
+  static Result<Decimal256> FromReal(Real x, int32_t precision, int32_t scale) {
+    DCHECK_GT(precision, 0);
+    DCHECK_LE(precision, 76);
+
+    if (!std::isfinite(x)) {
+      return Status::Invalid("Cannot convert ", x, " to Decimal256");
+    }
+    if (x < 0) {
+      ARROW_ASSIGN_OR_RAISE(auto dec, FromPositiveReal(-x, precision, scale));
+      return dec.Negate();
+    } else {
+      // Includes negative zero
+      return FromPositiveReal(x, precision, scale);
+    }
+  }
+
+  static Real ToRealPositive(const Decimal256& decimal, int32_t scale) {
+    DCHECK_GE(decimal, 0);
+    Real x = 0;
+    const auto& parts = decimal.little_endian_array();
+    x += Derived::two_to_192(static_cast<Real>(parts[3]));
+    x += Derived::two_to_128(static_cast<Real>(parts[2]));
+    x += Derived::two_to_64(static_cast<Real>(parts[1]));
+    x += static_cast<Real>(parts[0]);
+    if (scale >= -76 && scale <= 76) {
+      x *= Derived::powers_of_ten()[-scale + 76];
+    } else {
+      x *= std::pow(static_cast<Real>(10), static_cast<Real>(-scale));
+    }
+    return x;
+  }
+
+  static Real ToReal(Decimal256 decimal, int32_t scale) {
+    if (decimal.little_endian_array()[3] & (1ULL << 63)) {
+      // Convert the absolute value to avoid precision loss
+      decimal.Negate();
+      return -ToRealPositive(decimal, scale);
+    } else {
+      return ToRealPositive(decimal, scale);
+    }
+  }
+};
+
+struct Decimal256FloatConversion
+    : public Decimal256RealConversion<float, Decimal256FloatConversion> {
+  static constexpr const float* powers_of_ten() { return kFloatPowersOfTen76; }
+
+  static float two_to_64(float x) { return x * 1.8446744e+19f; }
+  static float two_to_128(float x) { return x == 0 ? 0 : INFINITY; }
+  static float two_to_192(float x) { return x == 0 ? 0 : INFINITY; }
+};
+
+struct Decimal256DoubleConversion
+    : public Decimal256RealConversion<double, Decimal256DoubleConversion> {
+  static constexpr const double* powers_of_ten() { return kDoublePowersOfTen76; }
+
+  static double two_to_64(double x) { return x * 1.8446744073709552e+19; }
+  static double two_to_128(double x) { return x * 3.402823669209385e+38; }
+  static double two_to_192(double x) { return x * 6.277101735386681e+57; }
+};
+
+}  // namespace
+
+Result<Decimal256> Decimal256::FromReal(float x, int32_t precision, int32_t scale) {
+  return Decimal256FloatConversion::FromReal(x, precision, scale);
+}
+
+Result<Decimal256> Decimal256::FromReal(double x, int32_t precision, int32_t scale) {
+  return Decimal256DoubleConversion::FromReal(x, precision, scale);
+}
+
+float Decimal256::ToFloat(int32_t scale) const {
+  return Decimal256FloatConversion::ToReal(*this, scale);
+}
+
+double Decimal256::ToDouble(int32_t scale) const {
+  return Decimal256DoubleConversion::ToReal(*this, scale);
+}
+
+std::ostream& operator<<(std::ostream& os, const Decimal256& decimal) {
   os << decimal.ToIntegerString();
   return os;
 }
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/decimal.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/decimal.h
index 4c6cc9dd1db..4a158728833 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/decimal.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/decimal.h
@@ -55,8 +55,8 @@ class ARROW_EXPORT Decimal128 : public BasicDecimal128 {
   /// \endcond
 
   /// \brief constructor creates a Decimal128 from a BasicDecimal128.
-  constexpr Decimal128(const BasicDecimal128& value) noexcept  // NOLINT runtime/explicit 
-      : BasicDecimal128(value) {} 
+  constexpr Decimal128(const BasicDecimal128& value) noexcept  // NOLINT runtime/explicit
+      : BasicDecimal128(value) {}
 
   /// \brief Parse the number from a base 10 string representation.
   explicit Decimal128(const std::string& value);
@@ -173,119 +173,119 @@ struct Decimal128::ToRealConversion<double> {
   }
 };
 
-/// Represents a signed 256-bit integer in two's complement. 
-/// The max decimal precision that can be safely represented is 
-/// 76 significant digits. 
-/// 
-/// The implementation is split into two parts : 
-/// 
-/// 1. BasicDecimal256 
-///    - can be safely compiled to IR without references to libstdc++. 
-/// 2. Decimal256 
-///    - (TODO) has additional functionality on top of BasicDecimal256 to deal with 
-///      strings and streams. 
-class ARROW_EXPORT Decimal256 : public BasicDecimal256 { 
- public: 
-  /// \cond FALSE 
-  // (need to avoid a duplicate definition in Sphinx) 
-  using BasicDecimal256::BasicDecimal256; 
-  /// \endcond 
- 
-  /// \brief constructor creates a Decimal256 from a BasicDecimal256. 
-  constexpr Decimal256(const BasicDecimal256& value) noexcept : BasicDecimal256(value) {} 
- 
-  /// \brief Parse the number from a base 10 string representation. 
-  explicit Decimal256(const std::string& value); 
- 
-  /// \brief Empty constructor creates a Decimal256 with a value of 0. 
-  // This is required on some older compilers. 
-  constexpr Decimal256() noexcept : BasicDecimal256() {} 
- 
-  /// \brief Convert the Decimal256 value to a base 10 decimal string with the given 
-  /// scale. 
-  std::string ToString(int32_t scale) const; 
- 
-  /// \brief Convert the value to an integer string 
-  std::string ToIntegerString() const; 
- 
-  /// \brief Convert a decimal string to a Decimal256 value, optionally including 
-  /// precision and scale if they're passed in and not null. 
-  static Status FromString(const util::string_view& s, Decimal256* out, 
-                           int32_t* precision, int32_t* scale = NULLPTR); 
-  static Status FromString(const std::string& s, Decimal256* out, int32_t* precision, 
-                           int32_t* scale = NULLPTR); 
-  static Status FromString(const char* s, Decimal256* out, int32_t* precision, 
-                           int32_t* scale = NULLPTR); 
-  static Result<Decimal256> FromString(const util::string_view& s); 
-  static Result<Decimal256> FromString(const std::string& s); 
-  static Result<Decimal256> FromString(const char* s); 
- 
-  /// \brief Convert Decimal256 from one scale to another 
-  Result<Decimal256> Rescale(int32_t original_scale, int32_t new_scale) const { 
-    Decimal256 out; 
-    auto dstatus = BasicDecimal256::Rescale(original_scale, new_scale, &out); 
-    ARROW_RETURN_NOT_OK(ToArrowStatus(dstatus)); 
-    return std::move(out); 
-  } 
- 
-  /// Divide this number by right and return the result. 
-  /// 
-  /// This operation is not destructive. 
-  /// The answer rounds to zero. Signs work like: 
-  ///   21 /  5 ->  4,  1 
-  ///  -21 /  5 -> -4, -1 
-  ///   21 / -5 -> -4,  1 
-  ///  -21 / -5 ->  4, -1 
-  /// \param[in] divisor the number to divide by 
-  /// \return the pair of the quotient and the remainder 
-  Result<std::pair<Decimal256, Decimal256>> Divide(const Decimal256& divisor) const { 
-    std::pair<Decimal256, Decimal256> result; 
-    auto dstatus = BasicDecimal256::Divide(divisor, &result.first, &result.second); 
-    ARROW_RETURN_NOT_OK(ToArrowStatus(dstatus)); 
-    return std::move(result); 
-  } 
- 
-  /// \brief Convert from a big-endian byte representation. The length must be 
-  ///        between 1 and 32. 
-  /// \return error status if the length is an invalid value 
-  static Result<Decimal256> FromBigEndian(const uint8_t* data, int32_t length); 
- 
-  static Result<Decimal256> FromReal(double real, int32_t precision, int32_t scale); 
-  static Result<Decimal256> FromReal(float real, int32_t precision, int32_t scale); 
- 
-  /// \brief Convert to a floating-point number (scaled). 
-  /// May return infinity in case of overflow. 
-  float ToFloat(int32_t scale) const; 
-  /// \brief Convert to a floating-point number (scaled) 
-  double ToDouble(int32_t scale) const; 
- 
-  /// \brief Convert to a floating-point number (scaled) 
-  template <typename T> 
-  T ToReal(int32_t scale) const { 
-    return ToRealConversion<T>::ToReal(*this, scale); 
-  } 
- 
-  friend ARROW_EXPORT std::ostream& operator<<(std::ostream& os, 
-                                               const Decimal256& decimal); 
- 
- private: 
-  /// Converts internal error code to Status 
-  Status ToArrowStatus(DecimalStatus dstatus) const; 
- 
-  template <typename T> 
-  struct ToRealConversion {}; 
-}; 
- 
-template <> 
-struct Decimal256::ToRealConversion<float> { 
-  static float ToReal(const Decimal256& dec, int32_t scale) { return dec.ToFloat(scale); } 
-}; 
- 
-template <> 
-struct Decimal256::ToRealConversion<double> { 
-  static double ToReal(const Decimal256& dec, int32_t scale) { 
-    return dec.ToDouble(scale); 
-  } 
-}; 
- 
+/// Represents a signed 256-bit integer in two's complement.
+/// The max decimal precision that can be safely represented is
+/// 76 significant digits.
+///
+/// The implementation is split into two parts :
+///
+/// 1. BasicDecimal256
+///    - can be safely compiled to IR without references to libstdc++.
+/// 2. Decimal256
+///    - (TODO) has additional functionality on top of BasicDecimal256 to deal with
+///      strings and streams.
+class ARROW_EXPORT Decimal256 : public BasicDecimal256 {
+ public:
+  /// \cond FALSE
+  // (need to avoid a duplicate definition in Sphinx)
+  using BasicDecimal256::BasicDecimal256;
+  /// \endcond
+
+  /// \brief constructor creates a Decimal256 from a BasicDecimal256.
+  constexpr Decimal256(const BasicDecimal256& value) noexcept : BasicDecimal256(value) {}
+
+  /// \brief Parse the number from a base 10 string representation.
+  explicit Decimal256(const std::string& value);
+
+  /// \brief Empty constructor creates a Decimal256 with a value of 0.
+  // This is required on some older compilers.
+  constexpr Decimal256() noexcept : BasicDecimal256() {}
+
+  /// \brief Convert the Decimal256 value to a base 10 decimal string with the given
+  /// scale.
+  std::string ToString(int32_t scale) const;
+
+  /// \brief Convert the value to an integer string
+  std::string ToIntegerString() const;
+
+  /// \brief Convert a decimal string to a Decimal256 value, optionally including
+  /// precision and scale if they're passed in and not null.
+  static Status FromString(const util::string_view& s, Decimal256* out,
+                           int32_t* precision, int32_t* scale = NULLPTR);
+  static Status FromString(const std::string& s, Decimal256* out, int32_t* precision,
+                           int32_t* scale = NULLPTR);
+  static Status FromString(const char* s, Decimal256* out, int32_t* precision,
+                           int32_t* scale = NULLPTR);
+  static Result<Decimal256> FromString(const util::string_view& s);
+  static Result<Decimal256> FromString(const std::string& s);
+  static Result<Decimal256> FromString(const char* s);
+
+  /// \brief Convert Decimal256 from one scale to another
+  Result<Decimal256> Rescale(int32_t original_scale, int32_t new_scale) const {
+    Decimal256 out;
+    auto dstatus = BasicDecimal256::Rescale(original_scale, new_scale, &out);
+    ARROW_RETURN_NOT_OK(ToArrowStatus(dstatus));
+    return std::move(out);
+  }
+
+  /// Divide this number by right and return the result.
+  ///
+  /// This operation is not destructive.
+  /// The answer rounds to zero. Signs work like:
+  ///   21 /  5 ->  4,  1
+  ///  -21 /  5 -> -4, -1
+  ///   21 / -5 -> -4,  1
+  ///  -21 / -5 ->  4, -1
+  /// \param[in] divisor the number to divide by
+  /// \return the pair of the quotient and the remainder
+  Result<std::pair<Decimal256, Decimal256>> Divide(const Decimal256& divisor) const {
+    std::pair<Decimal256, Decimal256> result;
+    auto dstatus = BasicDecimal256::Divide(divisor, &result.first, &result.second);
+    ARROW_RETURN_NOT_OK(ToArrowStatus(dstatus));
+    return std::move(result);
+  }
+
+  /// \brief Convert from a big-endian byte representation. The length must be
+  ///        between 1 and 32.
+  /// \return error status if the length is an invalid value
+  static Result<Decimal256> FromBigEndian(const uint8_t* data, int32_t length);
+
+  static Result<Decimal256> FromReal(double real, int32_t precision, int32_t scale);
+  static Result<Decimal256> FromReal(float real, int32_t precision, int32_t scale);
+
+  /// \brief Convert to a floating-point number (scaled).
+  /// May return infinity in case of overflow.
+  float ToFloat(int32_t scale) const;
+  /// \brief Convert to a floating-point number (scaled)
+  double ToDouble(int32_t scale) const;
+
+  /// \brief Convert to a floating-point number (scaled)
+  template <typename T>
+  T ToReal(int32_t scale) const {
+    return ToRealConversion<T>::ToReal(*this, scale);
+  }
+
+  friend ARROW_EXPORT std::ostream& operator<<(std::ostream& os,
+                                               const Decimal256& decimal);
+
+ private:
+  /// Converts internal error code to Status
+  Status ToArrowStatus(DecimalStatus dstatus) const;
+
+  template <typename T>
+  struct ToRealConversion {};
+};
+
+template <>
+struct Decimal256::ToRealConversion<float> {
+  static float ToReal(const Decimal256& dec, int32_t scale) { return dec.ToFloat(scale); }
+};
+
+template <>
+struct Decimal256::ToRealConversion<double> {
+  static double ToReal(const Decimal256& dec, int32_t scale) {
+    return dec.ToDouble(scale);
+  }
+};
+
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/delimiting.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/delimiting.cc
index a499fdd2562..fe1b6ea3126 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/delimiting.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/delimiting.cc
@@ -17,7 +17,7 @@
 
 #include "arrow/util/delimiting.h"
 #include "arrow/buffer.h"
-#include "arrow/util/logging.h" 
+#include "arrow/util/logging.h"
 
 namespace arrow {
 
@@ -61,35 +61,35 @@ class NewlineBoundaryFinder : public BoundaryFinder {
     return Status::OK();
   }
 
-  Status FindNth(util::string_view partial, util::string_view block, int64_t count, 
-                 int64_t* out_pos, int64_t* num_found) override { 
-    DCHECK(partial.find_first_of(newline_delimiters) == util::string_view::npos); 
- 
-    int64_t found = 0; 
-    int64_t pos = kNoDelimiterFound; 
- 
-    auto cur_pos = block.find_first_of(newline_delimiters); 
-    while (cur_pos != util::string_view::npos) { 
-      if (block[cur_pos] == '\r' && cur_pos + 1 < block.length() && 
-          block[cur_pos + 1] == '\n') { 
-        cur_pos += 2; 
-      } else { 
-        ++cur_pos; 
-      } 
- 
-      pos = static_cast<int64_t>(cur_pos); 
-      if (++found >= count) { 
-        break; 
-      } 
- 
-      cur_pos = block.find_first_of(newline_delimiters, cur_pos); 
-    } 
- 
-    *out_pos = pos; 
-    *num_found = found; 
-    return Status::OK(); 
-  } 
- 
+  Status FindNth(util::string_view partial, util::string_view block, int64_t count,
+                 int64_t* out_pos, int64_t* num_found) override {
+    DCHECK(partial.find_first_of(newline_delimiters) == util::string_view::npos);
+
+    int64_t found = 0;
+    int64_t pos = kNoDelimiterFound;
+
+    auto cur_pos = block.find_first_of(newline_delimiters);
+    while (cur_pos != util::string_view::npos) {
+      if (block[cur_pos] == '\r' && cur_pos + 1 < block.length() &&
+          block[cur_pos + 1] == '\n') {
+        cur_pos += 2;
+      } else {
+        ++cur_pos;
+      }
+
+      pos = static_cast<int64_t>(cur_pos);
+      if (++found >= count) {
+        break;
+      }
+
+      cur_pos = block.find_first_of(newline_delimiters, cur_pos);
+    }
+
+    *out_pos = pos;
+    *num_found = found;
+    return Status::OK();
+  }
+
  protected:
   static constexpr const char* newline_delimiters = "\r\n";
 };
@@ -168,26 +168,26 @@ Status Chunker::ProcessFinal(std::shared_ptr<Buffer> partial,
   return Status::OK();
 }
 
-Status Chunker::ProcessSkip(std::shared_ptr<Buffer> partial, 
-                            std::shared_ptr<Buffer> block, bool final, int64_t* count, 
-                            std::shared_ptr<Buffer>* rest) { 
-  DCHECK_GT(*count, 0); 
-  int64_t pos; 
-  int64_t num_found; 
-  ARROW_RETURN_NOT_OK(boundary_finder_->FindNth( 
-      util::string_view(*partial), util::string_view(*block), *count, &pos, &num_found)); 
-  if (pos == BoundaryFinder::kNoDelimiterFound) { 
-    return StraddlingTooLarge(); 
-  } 
-  if (ARROW_PREDICT_FALSE(final && *count > num_found && block->size() != pos)) { 
-    // Skip the last row in the final block which does not have a delimiter 
-    ++num_found; 
-    *rest = SliceBuffer(block, 0, 0); 
-  } else { 
-    *rest = SliceBuffer(block, pos); 
-  } 
-  *count -= num_found; 
-  return Status::OK(); 
-} 
- 
+Status Chunker::ProcessSkip(std::shared_ptr<Buffer> partial,
+                            std::shared_ptr<Buffer> block, bool final, int64_t* count,
+                            std::shared_ptr<Buffer>* rest) {
+  DCHECK_GT(*count, 0);
+  int64_t pos;
+  int64_t num_found;
+  ARROW_RETURN_NOT_OK(boundary_finder_->FindNth(
+      util::string_view(*partial), util::string_view(*block), *count, &pos, &num_found));
+  if (pos == BoundaryFinder::kNoDelimiterFound) {
+    return StraddlingTooLarge();
+  }
+  if (ARROW_PREDICT_FALSE(final && *count > num_found && block->size() != pos)) {
+    // Skip the last row in the final block which does not have a delimiter
+    ++num_found;
+    *rest = SliceBuffer(block, 0, 0);
+  } else {
+    *rest = SliceBuffer(block, pos);
+  }
+  *count -= num_found;
+  return Status::OK();
+}
+
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/delimiting.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/delimiting.h
index 0ffe652441d..b4b868340db 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/delimiting.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/delimiting.h
@@ -53,19 +53,19 @@ class ARROW_EXPORT BoundaryFinder {
   /// `out_pos` will be -1 if no delimiter is found.
   virtual Status FindLast(util::string_view block, int64_t* out_pos) = 0;
 
-  /// \brief Find the position of the Nth delimiter inside the block 
-  /// 
-  /// `partial` is taken to be the beginning of the block, and `block` 
-  /// its continuation.  Also, `partial` doesn't contain a delimiter. 
-  /// 
-  /// The returned `out_pos` is relative to `block`'s start and should point 
-  /// to the first character after the first delimiter. 
-  /// `out_pos` will be -1 if no delimiter is found. 
-  /// 
-  /// The returned `num_found` is the number of delimiters actually found 
-  virtual Status FindNth(util::string_view partial, util::string_view block, 
-                         int64_t count, int64_t* out_pos, int64_t* num_found) = 0; 
- 
+  /// \brief Find the position of the Nth delimiter inside the block
+  ///
+  /// `partial` is taken to be the beginning of the block, and `block`
+  /// its continuation.  Also, `partial` doesn't contain a delimiter.
+  ///
+  /// The returned `out_pos` is relative to `block`'s start and should point
+  /// to the first character after the first delimiter.
+  /// `out_pos` will be -1 if no delimiter is found.
+  ///
+  /// The returned `num_found` is the number of delimiters actually found
+  virtual Status FindNth(util::string_view partial, util::string_view block,
+                         int64_t count, int64_t* out_pos, int64_t* num_found) = 0;
+
   static constexpr int64_t kNoDelimiterFound = -1;
 
  protected:
@@ -151,27 +151,27 @@ class ARROW_EXPORT Chunker {
   Status ProcessFinal(std::shared_ptr<Buffer> partial, std::shared_ptr<Buffer> block,
                       std::shared_ptr<Buffer>* completion, std::shared_ptr<Buffer>* rest);
 
-  /// \brief Skip count number of rows 
-  /// Pre-conditions: 
-  /// - `partial` is the start of a valid block of delimited data 
-  ///   (i.e. starts just after a delimiter) 
-  /// - `block` follows `partial` in file order 
-  /// 
-  /// Post-conditions: 
-  /// - `count` is updated to indicate the number of rows that still need to be skipped 
-  /// - If `count` is > 0 then `rest` is an incomplete block that should be a future 
-  /// `partial` 
-  /// - Else `rest` could be one or more valid blocks of delimited data which need to be 
-  /// parsed 
-  /// 
-  /// \param[in] partial incomplete delimited data 
-  /// \param[in] block delimited data following partial 
-  /// \param[in] final whether this is the final chunk 
-  /// \param[in,out] count number of rows that need to be skipped 
-  /// \param[out] rest subrange of block containing what was not skipped 
-  Status ProcessSkip(std::shared_ptr<Buffer> partial, std::shared_ptr<Buffer> block, 
-                     bool final, int64_t* count, std::shared_ptr<Buffer>* rest); 
- 
+  /// \brief Skip count number of rows
+  /// Pre-conditions:
+  /// - `partial` is the start of a valid block of delimited data
+  ///   (i.e. starts just after a delimiter)
+  /// - `block` follows `partial` in file order
+  ///
+  /// Post-conditions:
+  /// - `count` is updated to indicate the number of rows that still need to be skipped
+  /// - If `count` is > 0 then `rest` is an incomplete block that should be a future
+  /// `partial`
+  /// - Else `rest` could be one or more valid blocks of delimited data which need to be
+  /// parsed
+  ///
+  /// \param[in] partial incomplete delimited data
+  /// \param[in] block delimited data following partial
+  /// \param[in] final whether this is the final chunk
+  /// \param[in,out] count number of rows that need to be skipped
+  /// \param[out] rest subrange of block containing what was not skipped
+  Status ProcessSkip(std::shared_ptr<Buffer> partial, std::shared_ptr<Buffer> block,
+                     bool final, int64_t* count, std::shared_ptr<Buffer>* rest);
+
  protected:
   ARROW_DISALLOW_COPY_AND_ASSIGN(Chunker);
 
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/endian.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/endian.h
index a1d953d12ad..0cb2e44d275 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/endian.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/endian.h
@@ -1,181 +1,181 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#ifdef _WIN32 
-#define ARROW_LITTLE_ENDIAN 1 
-#else 
-#if defined(__APPLE__) || defined(__FreeBSD__) 
-#include <machine/endian.h>  // IWYU pragma: keep 
-#elif defined(sun) || defined(__sun) 
-#include <sys/byteorder.h>  // IWYU pragma: keep 
-#else 
-#include <endian.h>  // IWYU pragma: keep 
-#endif 
-# 
-#ifndef __BYTE_ORDER__ 
-#error "__BYTE_ORDER__ not defined" 
-#endif 
-# 
-#ifndef __ORDER_LITTLE_ENDIAN__ 
-#error "__ORDER_LITTLE_ENDIAN__ not defined" 
-#endif 
-# 
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 
-#define ARROW_LITTLE_ENDIAN 1 
-#else 
-#define ARROW_LITTLE_ENDIAN 0 
-#endif 
-#endif 
- 
-#if defined(_MSC_VER) 
-#include <intrin.h>  // IWYU pragma: keep 
-#define ARROW_BYTE_SWAP64 _byteswap_uint64 
-#define ARROW_BYTE_SWAP32 _byteswap_ulong 
-#else 
-#define ARROW_BYTE_SWAP64 __builtin_bswap64 
-#define ARROW_BYTE_SWAP32 __builtin_bswap32 
-#endif 
- 
-#include "arrow/util/type_traits.h" 
-#include "arrow/util/ubsan.h" 
- 
-namespace arrow { 
-namespace BitUtil { 
- 
-// 
-// Byte-swap 16-bit, 32-bit and 64-bit values 
-// 
- 
-// Swap the byte order (i.e. endianness) 
-static inline int64_t ByteSwap(int64_t value) { return ARROW_BYTE_SWAP64(value); } 
-static inline uint64_t ByteSwap(uint64_t value) { 
-  return static_cast<uint64_t>(ARROW_BYTE_SWAP64(value)); 
-} 
-static inline int32_t ByteSwap(int32_t value) { return ARROW_BYTE_SWAP32(value); } 
-static inline uint32_t ByteSwap(uint32_t value) { 
-  return static_cast<uint32_t>(ARROW_BYTE_SWAP32(value)); 
-} 
-static inline int16_t ByteSwap(int16_t value) { 
-  constexpr auto m = static_cast<int16_t>(0xff); 
-  return static_cast<int16_t>(((value >> 8) & m) | ((value & m) << 8)); 
-} 
-static inline uint16_t ByteSwap(uint16_t value) { 
-  return static_cast<uint16_t>(ByteSwap(static_cast<int16_t>(value))); 
-} 
-static inline uint8_t ByteSwap(uint8_t value) { return value; } 
-static inline int8_t ByteSwap(int8_t value) { return value; } 
-static inline double ByteSwap(double value) { 
-  const uint64_t swapped = ARROW_BYTE_SWAP64(util::SafeCopy<uint64_t>(value)); 
-  return util::SafeCopy<double>(swapped); 
-} 
-static inline float ByteSwap(float value) { 
-  const uint32_t swapped = ARROW_BYTE_SWAP32(util::SafeCopy<uint32_t>(value)); 
-  return util::SafeCopy<float>(swapped); 
-} 
- 
-// Write the swapped bytes into dst. Src and dst cannot overlap. 
-static inline void ByteSwap(void* dst, const void* src, int len) { 
-  switch (len) { 
-    case 1: 
-      *reinterpret_cast<int8_t*>(dst) = *reinterpret_cast<const int8_t*>(src); 
-      return; 
-    case 2: 
-      *reinterpret_cast<int16_t*>(dst) = ByteSwap(*reinterpret_cast<const int16_t*>(src)); 
-      return; 
-    case 4: 
-      *reinterpret_cast<int32_t*>(dst) = ByteSwap(*reinterpret_cast<const int32_t*>(src)); 
-      return; 
-    case 8: 
-      *reinterpret_cast<int64_t*>(dst) = ByteSwap(*reinterpret_cast<const int64_t*>(src)); 
-      return; 
-    default: 
-      break; 
-  } 
- 
-  auto d = reinterpret_cast<uint8_t*>(dst); 
-  auto s = reinterpret_cast<const uint8_t*>(src); 
-  for (int i = 0; i < len; ++i) { 
-    d[i] = s[len - i - 1]; 
-  } 
-} 
- 
-// Convert to little/big endian format from the machine's native endian format. 
-#if ARROW_LITTLE_ENDIAN 
-template <typename T, typename = internal::EnableIfIsOneOf< 
-                          T, int64_t, uint64_t, int32_t, uint32_t, int16_t, uint16_t, 
-                          uint8_t, int8_t, float, double>> 
-static inline T ToBigEndian(T value) { 
-  return ByteSwap(value); 
-} 
- 
-template <typename T, typename = internal::EnableIfIsOneOf< 
-                          T, int64_t, uint64_t, int32_t, uint32_t, int16_t, uint16_t, 
-                          uint8_t, int8_t, float, double>> 
-static inline T ToLittleEndian(T value) { 
-  return value; 
-} 
-#else 
-template <typename T, typename = internal::EnableIfIsOneOf< 
-                          T, int64_t, uint64_t, int32_t, uint32_t, int16_t, uint16_t, 
-                          uint8_t, int8_t, float, double>> 
-static inline T ToBigEndian(T value) { 
-  return value; 
-} 
- 
-template <typename T, typename = internal::EnableIfIsOneOf< 
-                          T, int64_t, uint64_t, int32_t, uint32_t, int16_t, uint16_t, 
-                          uint8_t, int8_t, float, double>> 
-static inline T ToLittleEndian(T value) { 
-  return ByteSwap(value); 
-} 
-#endif 
- 
-// Convert from big/little endian format to the machine's native endian format. 
-#if ARROW_LITTLE_ENDIAN 
-template <typename T, typename = internal::EnableIfIsOneOf< 
-                          T, int64_t, uint64_t, int32_t, uint32_t, int16_t, uint16_t, 
-                          uint8_t, int8_t, float, double>> 
-static inline T FromBigEndian(T value) { 
-  return ByteSwap(value); 
-} 
- 
-template <typename T, typename = internal::EnableIfIsOneOf< 
-                          T, int64_t, uint64_t, int32_t, uint32_t, int16_t, uint16_t, 
-                          uint8_t, int8_t, float, double>> 
-static inline T FromLittleEndian(T value) { 
-  return value; 
-} 
-#else 
-template <typename T, typename = internal::EnableIfIsOneOf< 
-                          T, int64_t, uint64_t, int32_t, uint32_t, int16_t, uint16_t, 
-                          uint8_t, int8_t, float, double>> 
-static inline T FromBigEndian(T value) { 
-  return value; 
-} 
- 
-template <typename T, typename = internal::EnableIfIsOneOf< 
-                          T, int64_t, uint64_t, int32_t, uint32_t, int16_t, uint16_t, 
-                          uint8_t, int8_t, float, double>> 
-static inline T FromLittleEndian(T value) { 
-  return ByteSwap(value); 
-} 
-#endif 
- 
-}  // namespace BitUtil 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#ifdef _WIN32
+#define ARROW_LITTLE_ENDIAN 1
+#else
+#if defined(__APPLE__) || defined(__FreeBSD__)
+#include <machine/endian.h>  // IWYU pragma: keep
+#elif defined(sun) || defined(__sun)
+#include <sys/byteorder.h>  // IWYU pragma: keep
+#else
+#include <endian.h>  // IWYU pragma: keep
+#endif
+#
+#ifndef __BYTE_ORDER__
+#error "__BYTE_ORDER__ not defined"
+#endif
+#
+#ifndef __ORDER_LITTLE_ENDIAN__
+#error "__ORDER_LITTLE_ENDIAN__ not defined"
+#endif
+#
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define ARROW_LITTLE_ENDIAN 1
+#else
+#define ARROW_LITTLE_ENDIAN 0
+#endif
+#endif
+
+#if defined(_MSC_VER)
+#include <intrin.h>  // IWYU pragma: keep
+#define ARROW_BYTE_SWAP64 _byteswap_uint64
+#define ARROW_BYTE_SWAP32 _byteswap_ulong
+#else
+#define ARROW_BYTE_SWAP64 __builtin_bswap64
+#define ARROW_BYTE_SWAP32 __builtin_bswap32
+#endif
+
+#include "arrow/util/type_traits.h"
+#include "arrow/util/ubsan.h"
+
+namespace arrow {
+namespace BitUtil {
+
+//
+// Byte-swap 16-bit, 32-bit and 64-bit values
+//
+
+// Swap the byte order (i.e. endianness)
+static inline int64_t ByteSwap(int64_t value) { return ARROW_BYTE_SWAP64(value); }
+static inline uint64_t ByteSwap(uint64_t value) {
+  return static_cast<uint64_t>(ARROW_BYTE_SWAP64(value));
+}
+static inline int32_t ByteSwap(int32_t value) { return ARROW_BYTE_SWAP32(value); }
+static inline uint32_t ByteSwap(uint32_t value) {
+  return static_cast<uint32_t>(ARROW_BYTE_SWAP32(value));
+}
+static inline int16_t ByteSwap(int16_t value) {
+  constexpr auto m = static_cast<int16_t>(0xff);
+  return static_cast<int16_t>(((value >> 8) & m) | ((value & m) << 8));
+}
+static inline uint16_t ByteSwap(uint16_t value) {
+  return static_cast<uint16_t>(ByteSwap(static_cast<int16_t>(value)));
+}
+static inline uint8_t ByteSwap(uint8_t value) { return value; }
+static inline int8_t ByteSwap(int8_t value) { return value; }
+static inline double ByteSwap(double value) {
+  const uint64_t swapped = ARROW_BYTE_SWAP64(util::SafeCopy<uint64_t>(value));
+  return util::SafeCopy<double>(swapped);
+}
+static inline float ByteSwap(float value) {
+  const uint32_t swapped = ARROW_BYTE_SWAP32(util::SafeCopy<uint32_t>(value));
+  return util::SafeCopy<float>(swapped);
+}
+
+// Write the swapped bytes into dst. Src and dst cannot overlap.
+static inline void ByteSwap(void* dst, const void* src, int len) {
+  switch (len) {
+    case 1:
+      *reinterpret_cast<int8_t*>(dst) = *reinterpret_cast<const int8_t*>(src);
+      return;
+    case 2:
+      *reinterpret_cast<int16_t*>(dst) = ByteSwap(*reinterpret_cast<const int16_t*>(src));
+      return;
+    case 4:
+      *reinterpret_cast<int32_t*>(dst) = ByteSwap(*reinterpret_cast<const int32_t*>(src));
+      return;
+    case 8:
+      *reinterpret_cast<int64_t*>(dst) = ByteSwap(*reinterpret_cast<const int64_t*>(src));
+      return;
+    default:
+      break;
+  }
+
+  auto d = reinterpret_cast<uint8_t*>(dst);
+  auto s = reinterpret_cast<const uint8_t*>(src);
+  for (int i = 0; i < len; ++i) {
+    d[i] = s[len - i - 1];
+  }
+}
+
+// Convert to little/big endian format from the machine's native endian format.
+#if ARROW_LITTLE_ENDIAN
+template <typename T, typename = internal::EnableIfIsOneOf<
+                          T, int64_t, uint64_t, int32_t, uint32_t, int16_t, uint16_t,
+                          uint8_t, int8_t, float, double>>
+static inline T ToBigEndian(T value) {
+  return ByteSwap(value);
+}
+
+template <typename T, typename = internal::EnableIfIsOneOf<
+                          T, int64_t, uint64_t, int32_t, uint32_t, int16_t, uint16_t,
+                          uint8_t, int8_t, float, double>>
+static inline T ToLittleEndian(T value) {
+  return value;
+}
+#else
+template <typename T, typename = internal::EnableIfIsOneOf<
+                          T, int64_t, uint64_t, int32_t, uint32_t, int16_t, uint16_t,
+                          uint8_t, int8_t, float, double>>
+static inline T ToBigEndian(T value) {
+  return value;
+}
+
+template <typename T, typename = internal::EnableIfIsOneOf<
+                          T, int64_t, uint64_t, int32_t, uint32_t, int16_t, uint16_t,
+                          uint8_t, int8_t, float, double>>
+static inline T ToLittleEndian(T value) {
+  return ByteSwap(value);
+}
+#endif
+
+// Convert from big/little endian format to the machine's native endian format.
+#if ARROW_LITTLE_ENDIAN
+template <typename T, typename = internal::EnableIfIsOneOf<
+                          T, int64_t, uint64_t, int32_t, uint32_t, int16_t, uint16_t,
+                          uint8_t, int8_t, float, double>>
+static inline T FromBigEndian(T value) {
+  return ByteSwap(value);
+}
+
+template <typename T, typename = internal::EnableIfIsOneOf<
+                          T, int64_t, uint64_t, int32_t, uint32_t, int16_t, uint16_t,
+                          uint8_t, int8_t, float, double>>
+static inline T FromLittleEndian(T value) {
+  return value;
+}
+#else
+template <typename T, typename = internal::EnableIfIsOneOf<
+                          T, int64_t, uint64_t, int32_t, uint32_t, int16_t, uint16_t,
+                          uint8_t, int8_t, float, double>>
+static inline T FromBigEndian(T value) {
+  return value;
+}
+
+template <typename T, typename = internal::EnableIfIsOneOf<
+                          T, int64_t, uint64_t, int32_t, uint32_t, int16_t, uint16_t,
+                          uint8_t, int8_t, float, double>>
+static inline T FromLittleEndian(T value) {
+  return ByteSwap(value);
+}
+#endif
+
+}  // namespace BitUtil
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/formatting.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/formatting.cc
index efa8a997efe..c16d42ce5cf 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/formatting.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/formatting.cc
@@ -43,29 +43,29 @@ struct FloatToStringFormatter::Impl {
       : converter_(DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN, "inf", "nan",
                    'e', -6, 10, 6, 0) {}
 
-  Impl(int flags, const char* inf_symbol, const char* nan_symbol, char exp_character, 
-       int decimal_in_shortest_low, int decimal_in_shortest_high, 
-       int max_leading_padding_zeroes_in_precision_mode, 
-       int max_trailing_padding_zeroes_in_precision_mode) 
-      : converter_(flags, inf_symbol, nan_symbol, exp_character, decimal_in_shortest_low, 
-                   decimal_in_shortest_high, max_leading_padding_zeroes_in_precision_mode, 
-                   max_trailing_padding_zeroes_in_precision_mode) {} 
- 
+  Impl(int flags, const char* inf_symbol, const char* nan_symbol, char exp_character,
+       int decimal_in_shortest_low, int decimal_in_shortest_high,
+       int max_leading_padding_zeroes_in_precision_mode,
+       int max_trailing_padding_zeroes_in_precision_mode)
+      : converter_(flags, inf_symbol, nan_symbol, exp_character, decimal_in_shortest_low,
+                   decimal_in_shortest_high, max_leading_padding_zeroes_in_precision_mode,
+                   max_trailing_padding_zeroes_in_precision_mode) {}
+
   DoubleToStringConverter converter_;
 };
 
 FloatToStringFormatter::FloatToStringFormatter() : impl_(new Impl()) {}
 
-FloatToStringFormatter::FloatToStringFormatter( 
-    int flags, const char* inf_symbol, const char* nan_symbol, char exp_character, 
-    int decimal_in_shortest_low, int decimal_in_shortest_high, 
-    int max_leading_padding_zeroes_in_precision_mode, 
-    int max_trailing_padding_zeroes_in_precision_mode) 
-    : impl_(new Impl(flags, inf_symbol, nan_symbol, exp_character, 
-                     decimal_in_shortest_low, decimal_in_shortest_high, 
-                     max_leading_padding_zeroes_in_precision_mode, 
-                     max_trailing_padding_zeroes_in_precision_mode)) {} 
- 
+FloatToStringFormatter::FloatToStringFormatter(
+    int flags, const char* inf_symbol, const char* nan_symbol, char exp_character,
+    int decimal_in_shortest_low, int decimal_in_shortest_high,
+    int max_leading_padding_zeroes_in_precision_mode,
+    int max_trailing_padding_zeroes_in_precision_mode)
+    : impl_(new Impl(flags, inf_symbol, nan_symbol, exp_character,
+                     decimal_in_shortest_low, decimal_in_shortest_high,
+                     max_leading_padding_zeroes_in_precision_mode,
+                     max_trailing_padding_zeroes_in_precision_mode)) {}
+
 FloatToStringFormatter::~FloatToStringFormatter() {}
 
 int FloatToStringFormatter::FormatFloat(float v, char* out_buffer, int out_size) {
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/formatting.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/formatting.h
index ac91ec6a123..566c9795f83 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/formatting.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/formatting.h
@@ -31,7 +31,7 @@
 #include "arrow/status.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
-#include "arrow/util/double_conversion.h" 
+#include "arrow/util/double_conversion.h"
 #include "arrow/util/string_view.h"
 #include "arrow/util/time.h"
 #include "arrow/util/visibility.h"
@@ -220,11 +220,11 @@ class StringFormatter<UInt64Type> : public IntToStringFormatterMixin<UInt64Type>
 class ARROW_EXPORT FloatToStringFormatter {
  public:
   FloatToStringFormatter();
-  FloatToStringFormatter(int flags, const char* inf_symbol, const char* nan_symbol, 
-                         char exp_character, int decimal_in_shortest_low, 
-                         int decimal_in_shortest_high, 
-                         int max_leading_padding_zeroes_in_precision_mode, 
-                         int max_trailing_padding_zeroes_in_precision_mode); 
+  FloatToStringFormatter(int flags, const char* inf_symbol, const char* nan_symbol,
+                         char exp_character, int decimal_in_shortest_low,
+                         int decimal_in_shortest_high,
+                         int max_leading_padding_zeroes_in_precision_mode,
+                         int max_trailing_padding_zeroes_in_precision_mode);
   ~FloatToStringFormatter();
 
   // Returns the number of characters written
@@ -245,16 +245,16 @@ class FloatToStringFormatterMixin : public FloatToStringFormatter {
 
   explicit FloatToStringFormatterMixin(const std::shared_ptr<DataType>& = NULLPTR) {}
 
-  FloatToStringFormatterMixin(int flags, const char* inf_symbol, const char* nan_symbol, 
-                              char exp_character, int decimal_in_shortest_low, 
-                              int decimal_in_shortest_high, 
-                              int max_leading_padding_zeroes_in_precision_mode, 
-                              int max_trailing_padding_zeroes_in_precision_mode) 
-      : FloatToStringFormatter(flags, inf_symbol, nan_symbol, exp_character, 
-                               decimal_in_shortest_low, decimal_in_shortest_high, 
-                               max_leading_padding_zeroes_in_precision_mode, 
-                               max_trailing_padding_zeroes_in_precision_mode) {} 
- 
+  FloatToStringFormatterMixin(int flags, const char* inf_symbol, const char* nan_symbol,
+                              char exp_character, int decimal_in_shortest_low,
+                              int decimal_in_shortest_high,
+                              int max_leading_padding_zeroes_in_precision_mode,
+                              int max_trailing_padding_zeroes_in_precision_mode)
+      : FloatToStringFormatter(flags, inf_symbol, nan_symbol, exp_character,
+                               decimal_in_shortest_low, decimal_in_shortest_high,
+                               max_leading_padding_zeroes_in_precision_mode,
+                               max_trailing_padding_zeroes_in_precision_mode) {}
+
   template <typename Appender>
   Return<Appender> operator()(value_type value, Appender&& append) {
     char buffer[buffer_size];
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/functional.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/functional.h
index 30b3066d06e..9da79046fec 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/functional.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/functional.h
@@ -17,27 +17,27 @@
 
 #pragma once
 
-#include <memory> 
+#include <memory>
 #include <tuple>
 #include <type_traits>
 
-#include "arrow/result.h" 
+#include "arrow/result.h"
 #include "arrow/util/macros.h"
 
 namespace arrow {
 namespace internal {
 
-struct Empty { 
-  static Result<Empty> ToResult(Status s) { 
-    if (ARROW_PREDICT_TRUE(s.ok())) { 
-      return Empty{}; 
-    } 
-    return s; 
-  } 
-}; 
- 
+struct Empty {
+  static Result<Empty> ToResult(Status s) {
+    if (ARROW_PREDICT_TRUE(s.ok())) {
+      return Empty{};
+    }
+    return s;
+  }
+};
+
 /// Helper struct for examining lambdas and other callables.
-/// TODO(ARROW-12655) support function pointers 
+/// TODO(ARROW-12655) support function pointers
 struct call_traits {
  public:
   template <typename R, typename... A>
@@ -63,20 +63,20 @@ struct call_traits {
   static typename std::tuple_element<I, std::tuple<A...>>::type argument_type_impl(
       R (F::*)(A...) const);
 
-  template <std::size_t I, typename F, typename R, typename... A> 
-  static typename std::tuple_element<I, std::tuple<A...>>::type argument_type_impl( 
-      R (F::*)(A...) &&); 
- 
-  template <typename F, typename R, typename... A> 
-  static std::integral_constant<int, sizeof...(A)> argument_count_impl(R (F::*)(A...)); 
- 
-  template <typename F, typename R, typename... A> 
-  static std::integral_constant<int, sizeof...(A)> argument_count_impl(R (F::*)(A...) 
-                                                                           const); 
- 
-  template <typename F, typename R, typename... A> 
-  static std::integral_constant<int, sizeof...(A)> argument_count_impl(R (F::*)(A...) &&); 
- 
+  template <std::size_t I, typename F, typename R, typename... A>
+  static typename std::tuple_element<I, std::tuple<A...>>::type argument_type_impl(
+      R (F::*)(A...) &&);
+
+  template <typename F, typename R, typename... A>
+  static std::integral_constant<int, sizeof...(A)> argument_count_impl(R (F::*)(A...));
+
+  template <typename F, typename R, typename... A>
+  static std::integral_constant<int, sizeof...(A)> argument_count_impl(R (F::*)(A...)
+                                                                           const);
+
+  template <typename F, typename R, typename... A>
+  static std::integral_constant<int, sizeof...(A)> argument_count_impl(R (F::*)(A...) &&);
+
   /// bool constant indicating whether F is a callable with more than one possible
   /// signature. Will be true_type for objects which define multiple operator() or which
   /// define a template operator()
@@ -97,64 +97,64 @@ struct call_traits {
   using argument_type = decltype(argument_type_impl<I>(&std::decay<F>::type::operator()));
 
   template <typename F>
-  using argument_count = decltype(argument_count_impl(&std::decay<F>::type::operator())); 
- 
-  template <typename F> 
+  using argument_count = decltype(argument_count_impl(&std::decay<F>::type::operator()));
+
+  template <typename F>
   using return_type = decltype(return_type_impl(&std::decay<F>::type::operator()));
 
   template <typename F, typename T, typename RT = T>
   using enable_if_return =
       typename std::enable_if<std::is_same<return_type<F>, T>::value, RT>;
- 
-  template <typename T, typename R = void> 
-  using enable_if_empty = typename std::enable_if<std::is_same<T, Empty>::value, R>::type; 
- 
-  template <typename T, typename R = void> 
-  using enable_if_not_empty = 
-      typename std::enable_if<!std::is_same<T, Empty>::value, R>::type; 
+
+  template <typename T, typename R = void>
+  using enable_if_empty = typename std::enable_if<std::is_same<T, Empty>::value, R>::type;
+
+  template <typename T, typename R = void>
+  using enable_if_not_empty =
+      typename std::enable_if<!std::is_same<T, Empty>::value, R>::type;
+};
+
+/// A type erased callable object which may only be invoked once.
+/// It can be constructed from any lambda which matches the provided call signature.
+/// Invoking it results in destruction of the lambda, freeing any state/references
+/// immediately. Invoking a default constructed FnOnce or one which has already been
+/// invoked will segfault.
+template <typename Signature>
+class FnOnce;
+
+template <typename R, typename... A>
+class FnOnce<R(A...)> {
+ public:
+  FnOnce() = default;
+
+  template <typename Fn,
+            typename = typename std::enable_if<std::is_convertible<
+                typename std::result_of<Fn && (A...)>::type, R>::value>::type>
+  FnOnce(Fn fn) : impl_(new FnImpl<Fn>(std::move(fn))) {  // NOLINT runtime/explicit
+  }
+
+  explicit operator bool() const { return impl_ != NULLPTR; }
+
+  R operator()(A... a) && {
+    auto bye = std::move(impl_);
+    return bye->invoke(std::forward<A&&>(a)...);
+  }
+
+ private:
+  struct Impl {
+    virtual ~Impl() = default;
+    virtual R invoke(A&&... a) = 0;
+  };
+
+  template <typename Fn>
+  struct FnImpl : Impl {
+    explicit FnImpl(Fn fn) : fn_(std::move(fn)) {}
+    R invoke(A&&... a) override { return std::move(fn_)(std::forward<A&&>(a)...); }
+    Fn fn_;
+  };
+
+  std::unique_ptr<Impl> impl_;
 };
 
-/// A type erased callable object which may only be invoked once. 
-/// It can be constructed from any lambda which matches the provided call signature. 
-/// Invoking it results in destruction of the lambda, freeing any state/references 
-/// immediately. Invoking a default constructed FnOnce or one which has already been 
-/// invoked will segfault. 
-template <typename Signature> 
-class FnOnce; 
- 
-template <typename R, typename... A> 
-class FnOnce<R(A...)> { 
- public: 
-  FnOnce() = default; 
- 
-  template <typename Fn, 
-            typename = typename std::enable_if<std::is_convertible< 
-                typename std::result_of<Fn && (A...)>::type, R>::value>::type> 
-  FnOnce(Fn fn) : impl_(new FnImpl<Fn>(std::move(fn))) {  // NOLINT runtime/explicit 
-  } 
- 
-  explicit operator bool() const { return impl_ != NULLPTR; } 
- 
-  R operator()(A... a) && { 
-    auto bye = std::move(impl_); 
-    return bye->invoke(std::forward<A&&>(a)...); 
-  } 
- 
- private: 
-  struct Impl { 
-    virtual ~Impl() = default; 
-    virtual R invoke(A&&... a) = 0; 
-  }; 
- 
-  template <typename Fn> 
-  struct FnImpl : Impl { 
-    explicit FnImpl(Fn fn) : fn_(std::move(fn)) {} 
-    R invoke(A&&... a) override { return std::move(fn_)(std::forward<A&&>(a)...); } 
-    Fn fn_; 
-  }; 
- 
-  std::unique_ptr<Impl> impl_; 
-}; 
- 
 }  // namespace internal
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/future.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/future.cc
index 640ff63655a..f288a15be3f 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/future.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/future.cc
@@ -26,7 +26,7 @@
 
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/logging.h"
-#include "arrow/util/thread_pool.h" 
+#include "arrow/util/thread_pool.h"
 
 namespace arrow {
 
@@ -40,8 +40,8 @@ using internal::checked_cast;
 // should ideally not limit scalability.
 static std::mutex global_waiter_mutex;
 
-const double FutureWaiter::kInfinity = HUGE_VAL; 
- 
+const double FutureWaiter::kInfinity = HUGE_VAL;
+
 class FutureWaiterImpl : public FutureWaiter {
  public:
   FutureWaiterImpl(Kind kind, std::vector<FutureImpl*> futures)
@@ -76,7 +76,7 @@ class FutureWaiterImpl : public FutureWaiter {
     }
   }
 
-  ~FutureWaiterImpl() override { 
+  ~FutureWaiterImpl() override {
     for (auto future : futures_) {
       future->RemoveWaiter(this);
     }
@@ -177,9 +177,9 @@ FutureWaiterImpl* GetConcreteWaiter(FutureWaiter* waiter) {
 
 }  // namespace
 
-FutureWaiter::FutureWaiter() = default; 
+FutureWaiter::FutureWaiter() = default;
 
-FutureWaiter::~FutureWaiter() = default; 
+FutureWaiter::~FutureWaiter() = default;
 
 std::unique_ptr<FutureWaiter> FutureWaiter::Make(Kind kind,
                                                  std::vector<FutureImpl*> futures) {
@@ -232,70 +232,70 @@ class ConcreteFutureImpl : public FutureImpl {
 
   void DoMarkFailed() { DoMarkFinishedOrFailed(FutureState::FAILURE); }
 
-  void CheckOptions(const CallbackOptions& opts) { 
-    if (opts.should_schedule != ShouldSchedule::Never) { 
-      DCHECK_NE(opts.executor, nullptr) 
-          << "An executor must be specified when adding a callback that might schedule"; 
-    } 
-  } 
- 
-  void AddCallback(Callback callback, CallbackOptions opts) { 
-    CheckOptions(opts); 
-    std::unique_lock<std::mutex> lock(mutex_); 
-    CallbackRecord callback_record{std::move(callback), opts}; 
-    if (IsFutureFinished(state_)) { 
-      lock.unlock(); 
-      RunOrScheduleCallback(std::move(callback_record), /*in_add_callback=*/true); 
-    } else { 
-      callbacks_.push_back(std::move(callback_record)); 
-    } 
-  } 
- 
-  bool TryAddCallback(const std::function<Callback()>& callback_factory, 
-                      CallbackOptions opts) { 
-    CheckOptions(opts); 
-    std::unique_lock<std::mutex> lock(mutex_); 
-    if (IsFutureFinished(state_)) { 
-      return false; 
-    } else { 
-      callbacks_.push_back({callback_factory(), opts}); 
-      return true; 
-    } 
-  } 
- 
-  bool ShouldScheduleCallback(const CallbackRecord& callback_record, 
-                              bool in_add_callback) { 
-    switch (callback_record.options.should_schedule) { 
-      case ShouldSchedule::Never: 
-        return false; 
-      case ShouldSchedule::Always: 
-        return true; 
-      case ShouldSchedule::IfUnfinished: 
-        return !in_add_callback; 
-      case ShouldSchedule::IfDifferentExecutor: 
-        return !callback_record.options.executor->OwnsThisThread(); 
-      default: 
-        DCHECK(false) << "Unrecognized ShouldSchedule option"; 
-        return false; 
-    } 
-  } 
- 
-  void RunOrScheduleCallback(CallbackRecord&& callback_record, bool in_add_callback) { 
-    if (ShouldScheduleCallback(callback_record, in_add_callback)) { 
-      struct CallbackTask { 
-        void operator()() { std::move(callback)(*self); } 
- 
-        Callback callback; 
-        std::shared_ptr<FutureImpl> self; 
-      }; 
-      // Need to keep `this` alive until the callback has a chance to be scheduled. 
-      CallbackTask task{std::move(callback_record.callback), shared_from_this()}; 
-      DCHECK_OK(callback_record.options.executor->Spawn(std::move(task))); 
-    } else { 
-      std::move(callback_record.callback)(*this); 
-    } 
-  } 
- 
+  void CheckOptions(const CallbackOptions& opts) {
+    if (opts.should_schedule != ShouldSchedule::Never) {
+      DCHECK_NE(opts.executor, nullptr)
+          << "An executor must be specified when adding a callback that might schedule";
+    }
+  }
+
+  void AddCallback(Callback callback, CallbackOptions opts) {
+    CheckOptions(opts);
+    std::unique_lock<std::mutex> lock(mutex_);
+    CallbackRecord callback_record{std::move(callback), opts};
+    if (IsFutureFinished(state_)) {
+      lock.unlock();
+      RunOrScheduleCallback(std::move(callback_record), /*in_add_callback=*/true);
+    } else {
+      callbacks_.push_back(std::move(callback_record));
+    }
+  }
+
+  bool TryAddCallback(const std::function<Callback()>& callback_factory,
+                      CallbackOptions opts) {
+    CheckOptions(opts);
+    std::unique_lock<std::mutex> lock(mutex_);
+    if (IsFutureFinished(state_)) {
+      return false;
+    } else {
+      callbacks_.push_back({callback_factory(), opts});
+      return true;
+    }
+  }
+
+  bool ShouldScheduleCallback(const CallbackRecord& callback_record,
+                              bool in_add_callback) {
+    switch (callback_record.options.should_schedule) {
+      case ShouldSchedule::Never:
+        return false;
+      case ShouldSchedule::Always:
+        return true;
+      case ShouldSchedule::IfUnfinished:
+        return !in_add_callback;
+      case ShouldSchedule::IfDifferentExecutor:
+        return !callback_record.options.executor->OwnsThisThread();
+      default:
+        DCHECK(false) << "Unrecognized ShouldSchedule option";
+        return false;
+    }
+  }
+
+  void RunOrScheduleCallback(CallbackRecord&& callback_record, bool in_add_callback) {
+    if (ShouldScheduleCallback(callback_record, in_add_callback)) {
+      struct CallbackTask {
+        void operator()() { std::move(callback)(*self); }
+
+        Callback callback;
+        std::shared_ptr<FutureImpl> self;
+      };
+      // Need to keep `this` alive until the callback has a chance to be scheduled.
+      CallbackTask task{std::move(callback_record.callback), shared_from_this()};
+      DCHECK_OK(callback_record.options.executor->Spawn(std::move(task)));
+    } else {
+      std::move(callback_record.callback)(*this);
+    }
+  }
+
   void DoMarkFinishedOrFailed(FutureState state) {
     {
       // Lock the hypothetical waiter first, and the future after.
@@ -310,17 +310,17 @@ class ConcreteFutureImpl : public FutureImpl {
       }
     }
     cv_.notify_all();
- 
-    // run callbacks, lock not needed since the future is finished by this 
-    // point so nothing else can modify the callbacks list and it is safe 
-    // to iterate. 
-    // 
-    // In fact, it is important not to hold the locks because the callback 
-    // may be slow or do its own locking on other resources 
-    for (auto& callback_record : callbacks_) { 
-      RunOrScheduleCallback(std::move(callback_record), /*in_add_callback=*/false); 
-    } 
-    callbacks_.clear(); 
+
+    // run callbacks, lock not needed since the future is finished by this
+    // point so nothing else can modify the callbacks list and it is safe
+    // to iterate.
+    //
+    // In fact, it is important not to hold the locks because the callback
+    // may be slow or do its own locking on other resources
+    for (auto& callback_record : callbacks_) {
+      RunOrScheduleCallback(std::move(callback_record), /*in_add_callback=*/false);
+    }
+    callbacks_.clear();
   }
 
   void DoWait() {
@@ -355,12 +355,12 @@ std::unique_ptr<FutureImpl> FutureImpl::Make() {
   return std::unique_ptr<FutureImpl>(new ConcreteFutureImpl());
 }
 
-std::unique_ptr<FutureImpl> FutureImpl::MakeFinished(FutureState state) { 
-  std::unique_ptr<ConcreteFutureImpl> ptr(new ConcreteFutureImpl()); 
-  ptr->state_ = state; 
-  return std::move(ptr); 
-} 
- 
+std::unique_ptr<FutureImpl> FutureImpl::MakeFinished(FutureState state) {
+  std::unique_ptr<ConcreteFutureImpl> ptr(new ConcreteFutureImpl());
+  ptr->state_ = state;
+  return std::move(ptr);
+}
+
 FutureImpl::FutureImpl() : state_(FutureState::PENDING) {}
 
 FutureState FutureImpl::SetWaiter(FutureWaiter* w, int future_num) {
@@ -379,43 +379,43 @@ void FutureImpl::MarkFinished() { GetConcreteFuture(this)->DoMarkFinished(); }
 
 void FutureImpl::MarkFailed() { GetConcreteFuture(this)->DoMarkFailed(); }
 
-void FutureImpl::AddCallback(Callback callback, CallbackOptions opts) { 
-  GetConcreteFuture(this)->AddCallback(std::move(callback), opts); 
-} 
- 
-bool FutureImpl::TryAddCallback(const std::function<Callback()>& callback_factory, 
-                                CallbackOptions opts) { 
-  return GetConcreteFuture(this)->TryAddCallback(callback_factory, opts); 
-} 
- 
-Future<> AllComplete(const std::vector<Future<>>& futures) { 
-  struct State { 
-    explicit State(int64_t n_futures) : mutex(), n_remaining(n_futures) {} 
- 
-    std::mutex mutex; 
-    std::atomic<size_t> n_remaining; 
-  }; 
- 
-  if (futures.empty()) { 
-    return Future<>::MakeFinished(); 
-  } 
- 
-  auto state = std::make_shared<State>(futures.size()); 
-  auto out = Future<>::Make(); 
-  for (const auto& future : futures) { 
-    future.AddCallback([state, out](const Status& status) mutable { 
-      if (!status.ok()) { 
-        std::unique_lock<std::mutex> lock(state->mutex); 
-        if (!out.is_finished()) { 
-          out.MarkFinished(status); 
-        } 
-        return; 
-      } 
-      if (state->n_remaining.fetch_sub(1) != 1) return; 
-      out.MarkFinished(); 
-    }); 
-  } 
-  return out; 
-} 
- 
+void FutureImpl::AddCallback(Callback callback, CallbackOptions opts) {
+  GetConcreteFuture(this)->AddCallback(std::move(callback), opts);
+}
+
+bool FutureImpl::TryAddCallback(const std::function<Callback()>& callback_factory,
+                                CallbackOptions opts) {
+  return GetConcreteFuture(this)->TryAddCallback(callback_factory, opts);
+}
+
+Future<> AllComplete(const std::vector<Future<>>& futures) {
+  struct State {
+    explicit State(int64_t n_futures) : mutex(), n_remaining(n_futures) {}
+
+    std::mutex mutex;
+    std::atomic<size_t> n_remaining;
+  };
+
+  if (futures.empty()) {
+    return Future<>::MakeFinished();
+  }
+
+  auto state = std::make_shared<State>(futures.size());
+  auto out = Future<>::Make();
+  for (const auto& future : futures) {
+    future.AddCallback([state, out](const Status& status) mutable {
+      if (!status.ok()) {
+        std::unique_lock<std::mutex> lock(state->mutex);
+        if (!out.is_finished()) {
+          out.MarkFinished(status);
+        }
+        return;
+      }
+      if (state->n_remaining.fetch_sub(1) != 1) return;
+      out.MarkFinished();
+    });
+  }
+  return out;
+}
+
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/future.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/future.h
index 9352de6596f..d9e0a939f25 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/future.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/future.h
@@ -18,8 +18,8 @@
 #pragma once
 
 #include <atomic>
-#include <cmath> 
-#include <functional> 
+#include <cmath>
+#include <functional>
 #include <memory>
 #include <type_traits>
 #include <utility>
@@ -27,263 +27,263 @@
 
 #include "arrow/result.h"
 #include "arrow/status.h"
-#include "arrow/type_fwd.h" 
-#include "arrow/util/functional.h" 
+#include "arrow/type_fwd.h"
+#include "arrow/util/functional.h"
 #include "arrow/util/macros.h"
-#include "arrow/util/optional.h" 
-#include "arrow/util/type_fwd.h" 
+#include "arrow/util/optional.h"
+#include "arrow/util/type_fwd.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
 
-template <typename> 
-struct EnsureFuture; 
- 
-namespace detail { 
- 
-template <typename> 
-struct is_future : std::false_type {}; 
- 
-template <typename T> 
-struct is_future<Future<T>> : std::true_type {}; 
- 
-template <typename Signature> 
-using result_of_t = typename std::result_of<Signature>::type; 
- 
-// Helper to find the synchronous counterpart for a Future 
-template <typename T> 
-struct SyncType { 
-  using type = Result<T>; 
-}; 
- 
-template <> 
-struct SyncType<internal::Empty> { 
-  using type = Status; 
-}; 
- 
-template <typename Fn> 
-using first_arg_is_status = 
-    std::is_same<typename std::decay<internal::call_traits::argument_type<0, Fn>>::type, 
-                 Status>; 
- 
-template <typename Fn, typename Then, typename Else, 
-          typename Count = internal::call_traits::argument_count<Fn>> 
-using if_has_no_args = typename std::conditional<Count::value == 0, Then, Else>::type; 
- 
-/// Creates a callback that can be added to a future to mark a `dest` future finished 
-template <typename Source, typename Dest, bool SourceEmpty = Source::is_empty, 
-          bool DestEmpty = Dest::is_empty> 
-struct MarkNextFinished {}; 
- 
-/// If the source and dest are both empty we can pass on the status 
-template <typename Source, typename Dest> 
-struct MarkNextFinished<Source, Dest, true, true> { 
-  void operator()(const Status& status) && { next.MarkFinished(status); } 
-  Dest next; 
-}; 
- 
-/// If the source is not empty but the dest is then we can take the 
-/// status out of the result 
-template <typename Source, typename Dest> 
-struct MarkNextFinished<Source, Dest, false, true> { 
-  void operator()(const Result<typename Source::ValueType>& res) && { 
-    next.MarkFinished(internal::Empty::ToResult(res.status())); 
-  } 
-  Dest next; 
-}; 
- 
-/// If neither are empty we pass on the result 
-template <typename Source, typename Dest> 
-struct MarkNextFinished<Source, Dest, false, false> { 
-  void operator()(const Result<typename Source::ValueType>& res) && { 
-    next.MarkFinished(res); 
-  } 
-  Dest next; 
-}; 
- 
-/// Helper that contains information about how to apply a continuation 
-struct ContinueFuture { 
-  template <typename Return> 
-  struct ForReturnImpl; 
- 
-  template <typename Return> 
-  using ForReturn = typename ForReturnImpl<Return>::type; 
- 
-  template <typename Signature> 
-  using ForSignature = ForReturn<result_of_t<Signature>>; 
- 
-  // If the callback returns void then we return Future<> that always finishes OK. 
-  template <typename ContinueFunc, typename... Args, 
-            typename ContinueResult = result_of_t<ContinueFunc && (Args && ...)>, 
-            typename NextFuture = ForReturn<ContinueResult>> 
-  typename std::enable_if<std::is_void<ContinueResult>::value>::type operator()( 
-      NextFuture next, ContinueFunc&& f, Args&&... a) const { 
-    std::forward<ContinueFunc>(f)(std::forward<Args>(a)...); 
-    next.MarkFinished(); 
-  } 
- 
-  /// If the callback returns a non-future then we return Future<T> 
-  /// and mark the future finished with the callback result.  It will get promoted 
-  /// to Result<T> as part of MarkFinished if it isn't already. 
-  /// 
-  /// If the callback returns Status and we return Future<> then also send the callback 
-  /// result as-is to the destination future. 
-  template <typename ContinueFunc, typename... Args, 
-            typename ContinueResult = result_of_t<ContinueFunc && (Args && ...)>, 
-            typename NextFuture = ForReturn<ContinueResult>> 
-  typename std::enable_if< 
-      !std::is_void<ContinueResult>::value && !is_future<ContinueResult>::value && 
-      (!NextFuture::is_empty || std::is_same<ContinueResult, Status>::value)>::type 
-  operator()(NextFuture next, ContinueFunc&& f, Args&&... a) const { 
-    next.MarkFinished(std::forward<ContinueFunc>(f)(std::forward<Args>(a)...)); 
-  } 
- 
-  /// If the callback returns a Result and the next future is Future<> then we mark 
-  /// the future finished with the callback result. 
-  /// 
-  /// It may seem odd that the next future is Future<> when the callback returns a 
-  /// result but this can occur if the OnFailure callback returns a result while the 
-  /// OnSuccess callback is void/Status (e.g. you would get this calling the one-arg 
-  /// version of Then with an OnSuccess callback that returns void) 
-  template <typename ContinueFunc, typename... Args, 
-            typename ContinueResult = result_of_t<ContinueFunc && (Args && ...)>, 
-            typename NextFuture = ForReturn<ContinueResult>> 
-  typename std::enable_if<!std::is_void<ContinueResult>::value && 
-                          !is_future<ContinueResult>::value && NextFuture::is_empty && 
-                          !std::is_same<ContinueResult, Status>::value>::type 
-  operator()(NextFuture next, ContinueFunc&& f, Args&&... a) const { 
-    next.MarkFinished(std::forward<ContinueFunc>(f)(std::forward<Args>(a)...).status()); 
-  } 
- 
-  /// If the callback returns a Future<T> then we return Future<T>.  We create a new 
-  /// future and add a callback to the future given to us by the user that forwards the 
-  /// result to the future we just created 
-  template <typename ContinueFunc, typename... Args, 
-            typename ContinueResult = result_of_t<ContinueFunc && (Args && ...)>, 
-            typename NextFuture = ForReturn<ContinueResult>> 
-  typename std::enable_if<is_future<ContinueResult>::value>::type operator()( 
-      NextFuture next, ContinueFunc&& f, Args&&... a) const { 
-    ContinueResult signal_to_complete_next = 
-        std::forward<ContinueFunc>(f)(std::forward<Args>(a)...); 
-    MarkNextFinished<ContinueResult, NextFuture> callback{std::move(next)}; 
-    signal_to_complete_next.AddCallback(std::move(callback)); 
-  } 
- 
-  /// Helpers to conditionally ignore arguments to ContinueFunc 
-  template <typename ContinueFunc, typename NextFuture, typename... Args> 
-  void IgnoringArgsIf(std::true_type, NextFuture&& next, ContinueFunc&& f, 
-                      Args&&...) const { 
-    operator()(std::forward<NextFuture>(next), std::forward<ContinueFunc>(f)); 
-  } 
-  template <typename ContinueFunc, typename NextFuture, typename... Args> 
-  void IgnoringArgsIf(std::false_type, NextFuture&& next, ContinueFunc&& f, 
-                      Args&&... a) const { 
-    operator()(std::forward<NextFuture>(next), std::forward<ContinueFunc>(f), 
-               std::forward<Args>(a)...); 
-  } 
-}; 
- 
-/// Helper struct which tells us what kind of Future gets returned from `Then` based on 
-/// the return type of the OnSuccess callback 
-template <> 
-struct ContinueFuture::ForReturnImpl<void> { 
-  using type = Future<>; 
-}; 
- 
-template <> 
-struct ContinueFuture::ForReturnImpl<Status> { 
-  using type = Future<>; 
-}; 
- 
-template <typename R> 
-struct ContinueFuture::ForReturnImpl { 
-  using type = Future<R>; 
-}; 
- 
-template <typename T> 
-struct ContinueFuture::ForReturnImpl<Result<T>> { 
-  using type = Future<T>; 
-}; 
- 
-template <typename T> 
-struct ContinueFuture::ForReturnImpl<Future<T>> { 
-  using type = Future<T>; 
-}; 
- 
-}  // namespace detail 
- 
+template <typename>
+struct EnsureFuture;
+
+namespace detail {
+
+template <typename>
+struct is_future : std::false_type {};
+
+template <typename T>
+struct is_future<Future<T>> : std::true_type {};
+
+template <typename Signature>
+using result_of_t = typename std::result_of<Signature>::type;
+
+// Helper to find the synchronous counterpart for a Future
+template <typename T>
+struct SyncType {
+  using type = Result<T>;
+};
+
+template <>
+struct SyncType<internal::Empty> {
+  using type = Status;
+};
+
+template <typename Fn>
+using first_arg_is_status =
+    std::is_same<typename std::decay<internal::call_traits::argument_type<0, Fn>>::type,
+                 Status>;
+
+template <typename Fn, typename Then, typename Else,
+          typename Count = internal::call_traits::argument_count<Fn>>
+using if_has_no_args = typename std::conditional<Count::value == 0, Then, Else>::type;
+
+/// Creates a callback that can be added to a future to mark a `dest` future finished
+template <typename Source, typename Dest, bool SourceEmpty = Source::is_empty,
+          bool DestEmpty = Dest::is_empty>
+struct MarkNextFinished {};
+
+/// If the source and dest are both empty we can pass on the status
+template <typename Source, typename Dest>
+struct MarkNextFinished<Source, Dest, true, true> {
+  void operator()(const Status& status) && { next.MarkFinished(status); }
+  Dest next;
+};
+
+/// If the source is not empty but the dest is then we can take the
+/// status out of the result
+template <typename Source, typename Dest>
+struct MarkNextFinished<Source, Dest, false, true> {
+  void operator()(const Result<typename Source::ValueType>& res) && {
+    next.MarkFinished(internal::Empty::ToResult(res.status()));
+  }
+  Dest next;
+};
+
+/// If neither are empty we pass on the result
+template <typename Source, typename Dest>
+struct MarkNextFinished<Source, Dest, false, false> {
+  void operator()(const Result<typename Source::ValueType>& res) && {
+    next.MarkFinished(res);
+  }
+  Dest next;
+};
+
+/// Helper that contains information about how to apply a continuation
+struct ContinueFuture {
+  template <typename Return>
+  struct ForReturnImpl;
+
+  template <typename Return>
+  using ForReturn = typename ForReturnImpl<Return>::type;
+
+  template <typename Signature>
+  using ForSignature = ForReturn<result_of_t<Signature>>;
+
+  // If the callback returns void then we return Future<> that always finishes OK.
+  template <typename ContinueFunc, typename... Args,
+            typename ContinueResult = result_of_t<ContinueFunc && (Args && ...)>,
+            typename NextFuture = ForReturn<ContinueResult>>
+  typename std::enable_if<std::is_void<ContinueResult>::value>::type operator()(
+      NextFuture next, ContinueFunc&& f, Args&&... a) const {
+    std::forward<ContinueFunc>(f)(std::forward<Args>(a)...);
+    next.MarkFinished();
+  }
+
+  /// If the callback returns a non-future then we return Future<T>
+  /// and mark the future finished with the callback result.  It will get promoted
+  /// to Result<T> as part of MarkFinished if it isn't already.
+  ///
+  /// If the callback returns Status and we return Future<> then also send the callback
+  /// result as-is to the destination future.
+  template <typename ContinueFunc, typename... Args,
+            typename ContinueResult = result_of_t<ContinueFunc && (Args && ...)>,
+            typename NextFuture = ForReturn<ContinueResult>>
+  typename std::enable_if<
+      !std::is_void<ContinueResult>::value && !is_future<ContinueResult>::value &&
+      (!NextFuture::is_empty || std::is_same<ContinueResult, Status>::value)>::type
+  operator()(NextFuture next, ContinueFunc&& f, Args&&... a) const {
+    next.MarkFinished(std::forward<ContinueFunc>(f)(std::forward<Args>(a)...));
+  }
+
+  /// If the callback returns a Result and the next future is Future<> then we mark
+  /// the future finished with the callback result.
+  ///
+  /// It may seem odd that the next future is Future<> when the callback returns a
+  /// result but this can occur if the OnFailure callback returns a result while the
+  /// OnSuccess callback is void/Status (e.g. you would get this calling the one-arg
+  /// version of Then with an OnSuccess callback that returns void)
+  template <typename ContinueFunc, typename... Args,
+            typename ContinueResult = result_of_t<ContinueFunc && (Args && ...)>,
+            typename NextFuture = ForReturn<ContinueResult>>
+  typename std::enable_if<!std::is_void<ContinueResult>::value &&
+                          !is_future<ContinueResult>::value && NextFuture::is_empty &&
+                          !std::is_same<ContinueResult, Status>::value>::type
+  operator()(NextFuture next, ContinueFunc&& f, Args&&... a) const {
+    next.MarkFinished(std::forward<ContinueFunc>(f)(std::forward<Args>(a)...).status());
+  }
+
+  /// If the callback returns a Future<T> then we return Future<T>.  We create a new
+  /// future and add a callback to the future given to us by the user that forwards the
+  /// result to the future we just created
+  template <typename ContinueFunc, typename... Args,
+            typename ContinueResult = result_of_t<ContinueFunc && (Args && ...)>,
+            typename NextFuture = ForReturn<ContinueResult>>
+  typename std::enable_if<is_future<ContinueResult>::value>::type operator()(
+      NextFuture next, ContinueFunc&& f, Args&&... a) const {
+    ContinueResult signal_to_complete_next =
+        std::forward<ContinueFunc>(f)(std::forward<Args>(a)...);
+    MarkNextFinished<ContinueResult, NextFuture> callback{std::move(next)};
+    signal_to_complete_next.AddCallback(std::move(callback));
+  }
+
+  /// Helpers to conditionally ignore arguments to ContinueFunc
+  template <typename ContinueFunc, typename NextFuture, typename... Args>
+  void IgnoringArgsIf(std::true_type, NextFuture&& next, ContinueFunc&& f,
+                      Args&&...) const {
+    operator()(std::forward<NextFuture>(next), std::forward<ContinueFunc>(f));
+  }
+  template <typename ContinueFunc, typename NextFuture, typename... Args>
+  void IgnoringArgsIf(std::false_type, NextFuture&& next, ContinueFunc&& f,
+                      Args&&... a) const {
+    operator()(std::forward<NextFuture>(next), std::forward<ContinueFunc>(f),
+               std::forward<Args>(a)...);
+  }
+};
+
+/// Helper struct which tells us what kind of Future gets returned from `Then` based on
+/// the return type of the OnSuccess callback
+template <>
+struct ContinueFuture::ForReturnImpl<void> {
+  using type = Future<>;
+};
+
+template <>
+struct ContinueFuture::ForReturnImpl<Status> {
+  using type = Future<>;
+};
+
+template <typename R>
+struct ContinueFuture::ForReturnImpl {
+  using type = Future<R>;
+};
+
+template <typename T>
+struct ContinueFuture::ForReturnImpl<Result<T>> {
+  using type = Future<T>;
+};
+
+template <typename T>
+struct ContinueFuture::ForReturnImpl<Future<T>> {
+  using type = Future<T>;
+};
+
+}  // namespace detail
+
 /// A Future's execution or completion status
 enum class FutureState : int8_t { PENDING, SUCCESS, FAILURE };
 
 inline bool IsFutureFinished(FutureState state) { return state != FutureState::PENDING; }
 
-/// \brief Describe whether the callback should be scheduled or run synchronously 
-enum class ShouldSchedule { 
-  /// Always run the callback synchronously (the default) 
-  Never = 0, 
-  /// Schedule a new task only if the future is not finished when the 
-  /// callback is added 
-  IfUnfinished = 1, 
-  /// Always schedule the callback as a new task 
-  Always = 2, 
-  /// Schedule a new task only if it would run on an executor other than 
-  /// the specified executor. 
-  IfDifferentExecutor = 3, 
-}; 
-
-/// \brief Options that control how a continuation is run 
-struct CallbackOptions { 
-  /// Describe whether the callback should be run synchronously or scheduled 
-  ShouldSchedule should_schedule = ShouldSchedule::Never; 
-  /// If the callback is scheduled then this is the executor it should be scheduled 
-  /// on.  If this is NULL then should_schedule must be Never 
-  internal::Executor* executor = NULLPTR; 
-
-  static CallbackOptions Defaults() { return {}; } 
-}; 
- 
-// Untyped private implementation 
-class ARROW_EXPORT FutureImpl : public std::enable_shared_from_this<FutureImpl> { 
+/// \brief Describe whether the callback should be scheduled or run synchronously
+enum class ShouldSchedule {
+  /// Always run the callback synchronously (the default)
+  Never = 0,
+  /// Schedule a new task only if the future is not finished when the
+  /// callback is added
+  IfUnfinished = 1,
+  /// Always schedule the callback as a new task
+  Always = 2,
+  /// Schedule a new task only if it would run on an executor other than
+  /// the specified executor.
+  IfDifferentExecutor = 3,
+};
+
+/// \brief Options that control how a continuation is run
+struct CallbackOptions {
+  /// Describe whether the callback should be run synchronously or scheduled
+  ShouldSchedule should_schedule = ShouldSchedule::Never;
+  /// If the callback is scheduled then this is the executor it should be scheduled
+  /// on.  If this is NULL then should_schedule must be Never
+  internal::Executor* executor = NULLPTR;
+
+  static CallbackOptions Defaults() { return {}; }
+};
+
+// Untyped private implementation
+class ARROW_EXPORT FutureImpl : public std::enable_shared_from_this<FutureImpl> {
  public:
-  FutureImpl(); 
+  FutureImpl();
   virtual ~FutureImpl() = default;
 
   FutureState state() { return state_.load(); }
 
   static std::unique_ptr<FutureImpl> Make();
-  static std::unique_ptr<FutureImpl> MakeFinished(FutureState state); 
+  static std::unique_ptr<FutureImpl> MakeFinished(FutureState state);
 
   // Future API
   void MarkFinished();
   void MarkFailed();
   void Wait();
   bool Wait(double seconds);
-  template <typename ValueType> 
-  Result<ValueType>* CastResult() const { 
-    return static_cast<Result<ValueType>*>(result_.get()); 
-  } 
-
-  using Callback = internal::FnOnce<void(const FutureImpl& impl)>; 
-  void AddCallback(Callback callback, CallbackOptions opts); 
-  bool TryAddCallback(const std::function<Callback()>& callback_factory, 
-                      CallbackOptions opts); 
- 
+  template <typename ValueType>
+  Result<ValueType>* CastResult() const {
+    return static_cast<Result<ValueType>*>(result_.get());
+  }
+
+  using Callback = internal::FnOnce<void(const FutureImpl& impl)>;
+  void AddCallback(Callback callback, CallbackOptions opts);
+  bool TryAddCallback(const std::function<Callback()>& callback_factory,
+                      CallbackOptions opts);
+
   // Waiter API
   inline FutureState SetWaiter(FutureWaiter* w, int future_num);
   inline void RemoveWaiter(FutureWaiter* w);
 
-  std::atomic<FutureState> state_{FutureState::PENDING}; 
-
-  // Type erased storage for arbitrary results 
-  // XXX small objects could be stored inline instead of boxed in a pointer 
-  using Storage = std::unique_ptr<void, void (*)(void*)>; 
-  Storage result_{NULLPTR, NULLPTR}; 
- 
-  struct CallbackRecord { 
-    Callback callback; 
-    CallbackOptions options; 
-  }; 
-  std::vector<CallbackRecord> callbacks_; 
+  std::atomic<FutureState> state_{FutureState::PENDING};
+
+  // Type erased storage for arbitrary results
+  // XXX small objects could be stored inline instead of boxed in a pointer
+  using Storage = std::unique_ptr<void, void (*)(void*)>;
+  Storage result_{NULLPTR, NULLPTR};
+
+  struct CallbackRecord {
+    Callback callback;
+    CallbackOptions options;
+  };
+  std::vector<CallbackRecord> callbacks_;
 };
 
 // An object that waits on multiple futures at once.  Only one waiter
@@ -292,9 +292,9 @@ class ARROW_EXPORT FutureWaiter {
  public:
   enum Kind : int8_t { ANY, ALL, ALL_OR_FIRST_FAILED, ITERATE };
 
-  // HUGE_VAL isn't constexpr on Windows 
-  // https://social.msdn.microsoft.com/Forums/vstudio/en-US/47e8b9ff-b205-4189-968e-ee3bc3e2719f/constexpr-compile-error?forum=vclanguage 
-  static const double kInfinity; 
+  // HUGE_VAL isn't constexpr on Windows
+  // https://social.msdn.microsoft.com/Forums/vstudio/en-US/47e8b9ff-b205-4189-968e-ee3bc3e2719f/constexpr-compile-error?forum=vclanguage
+  static const double kInfinity;
 
   static std::unique_ptr<FutureWaiter> Make(Kind kind, std::vector<FutureImpl*> futures);
 
@@ -318,7 +318,7 @@ class ARROW_EXPORT FutureWaiter {
   static std::vector<FutureImpl*> ExtractFutures(const std::vector<FutureType>& futures) {
     std::vector<FutureImpl*> base_futures(futures.size());
     for (int i = 0; i < static_cast<int>(futures.size()); ++i) {
-      base_futures[i] = futures[i].impl_.get(); 
+      base_futures[i] = futures[i].impl_.get();
     }
     return base_futures;
   }
@@ -329,7 +329,7 @@ class ARROW_EXPORT FutureWaiter {
       const std::vector<FutureType*>& futures) {
     std::vector<FutureImpl*> base_futures(futures.size());
     for (int i = 0; i < static_cast<int>(futures.size()); ++i) {
-      base_futures[i] = futures[i]->impl_.get(); 
+      base_futures[i] = futures[i]->impl_.get();
     }
     return base_futures;
   }
@@ -358,11 +358,11 @@ class ARROW_EXPORT FutureWaiter {
 /// to complete, or wait on multiple Futures at once (using WaitForAll,
 /// WaitForAny or AsCompletedIterator).
 template <typename T>
-class ARROW_MUST_USE_TYPE Future { 
+class ARROW_MUST_USE_TYPE Future {
  public:
-  using ValueType = T; 
-  using SyncType = typename detail::SyncType<T>::type; 
-  static constexpr bool is_empty = std::is_same<T, internal::Empty>::value; 
+  using ValueType = T;
+  using SyncType = typename detail::SyncType<T>::type;
+  static constexpr bool is_empty = std::is_same<T, internal::Empty>::value;
   // The default constructor creates an invalid Future.  Use Future::Make()
   // for a valid Future.  This constructor is mostly for the convenience
   // of being able to presize a vector of Futures.
@@ -370,7 +370,7 @@ class ARROW_MUST_USE_TYPE Future {
 
   // Consumer API
 
-  bool is_valid() const { return impl_ != NULLPTR; } 
+  bool is_valid() const { return impl_ != NULLPTR; }
 
   /// \brief Return the Future's current state
   ///
@@ -381,41 +381,41 @@ class ARROW_MUST_USE_TYPE Future {
     return impl_->state();
   }
 
-  /// \brief Whether the Future is finished 
+  /// \brief Whether the Future is finished
   ///
-  /// A false return value is only indicative, as the Future can complete 
-  /// concurrently.  A true return value is definitive, though. 
-  bool is_finished() const { 
+  /// A false return value is only indicative, as the Future can complete
+  /// concurrently.  A true return value is definitive, though.
+  bool is_finished() const {
     CheckValid();
-    return IsFutureFinished(impl_->state()); 
-  } 
- 
-  /// \brief Wait for the Future to complete and return its Result 
-  const Result<ValueType>& result() const& { 
+    return IsFutureFinished(impl_->state());
+  }
+
+  /// \brief Wait for the Future to complete and return its Result
+  const Result<ValueType>& result() const& {
     Wait();
-    return *GetResult(); 
+    return *GetResult();
   }
 
-  /// \brief Returns an rvalue to the result.  This method is potentially unsafe 
-  /// 
-  /// The future is not the unique owner of the result, copies of a future will 
-  /// also point to the same result.  You must make sure that no other copies 
-  /// of the future exist.  Attempts to add callbacks after you move the result 
-  /// will result in undefined behavior. 
-  Result<ValueType>&& MoveResult() { 
+  /// \brief Returns an rvalue to the result.  This method is potentially unsafe
+  ///
+  /// The future is not the unique owner of the result, copies of a future will
+  /// also point to the same result.  You must make sure that no other copies
+  /// of the future exist.  Attempts to add callbacks after you move the result
+  /// will result in undefined behavior.
+  Result<ValueType>&& MoveResult() {
     Wait();
-    return std::move(*GetResult()); 
+    return std::move(*GetResult());
   }
 
   /// \brief Wait for the Future to complete and return its Status
-  const Status& status() const { return result().status(); } 
- 
-  /// \brief Future<T> is convertible to Future<>, which views only the 
-  /// Status of the original. Marking the returned Future Finished is not supported. 
-  explicit operator Future<>() const { 
-    Future<> status_future; 
-    status_future.impl_ = impl_; 
-    return status_future; 
+  const Status& status() const { return result().status(); }
+
+  /// \brief Future<T> is convertible to Future<>, which views only the
+  /// Status of the original. Marking the returned Future Finished is not supported.
+  explicit operator Future<>() const {
+    Future<> status_future;
+    status_future.impl_ = impl_;
+    return status_future;
   }
 
   /// \brief Wait for the Future to complete
@@ -441,270 +441,270 @@ class ARROW_MUST_USE_TYPE Future {
 
   // Producer API
 
-  /// \brief Producer API: mark Future finished 
+  /// \brief Producer API: mark Future finished
   ///
-  /// The Future's result is set to `res`. 
-  void MarkFinished(Result<ValueType> res) { DoMarkFinished(std::move(res)); } 
-
-  /// \brief Mark a Future<> completed with the provided Status. 
-  template <typename E = ValueType, typename = typename std::enable_if< 
-                                        std::is_same<E, internal::Empty>::value>::type> 
-  void MarkFinished(Status s = Status::OK()) { 
-    return DoMarkFinished(E::ToResult(std::move(s))); 
+  /// The Future's result is set to `res`.
+  void MarkFinished(Result<ValueType> res) { DoMarkFinished(std::move(res)); }
+
+  /// \brief Mark a Future<> completed with the provided Status.
+  template <typename E = ValueType, typename = typename std::enable_if<
+                                        std::is_same<E, internal::Empty>::value>::type>
+  void MarkFinished(Status s = Status::OK()) {
+    return DoMarkFinished(E::ToResult(std::move(s)));
   }
 
   /// \brief Producer API: instantiate a valid Future
   ///
-  /// The Future's state is initialized with PENDING.  If you are creating a future with 
-  /// this method you must ensure that future is eventually completed (with success or 
-  /// failure).  Creating a future, returning it, and never completing the future can lead 
-  /// to memory leaks (for example, see Loop). 
+  /// The Future's state is initialized with PENDING.  If you are creating a future with
+  /// this method you must ensure that future is eventually completed (with success or
+  /// failure).  Creating a future, returning it, and never completing the future can lead
+  /// to memory leaks (for example, see Loop).
   static Future Make() {
     Future fut;
-    fut.impl_ = FutureImpl::Make(); 
+    fut.impl_ = FutureImpl::Make();
     return fut;
   }
 
   /// \brief Producer API: instantiate a finished Future
-  static Future<ValueType> MakeFinished(Result<ValueType> res) { 
-    Future<ValueType> fut; 
-    fut.InitializeFromResult(std::move(res)); 
+  static Future<ValueType> MakeFinished(Result<ValueType> res) {
+    Future<ValueType> fut;
+    fut.InitializeFromResult(std::move(res));
     return fut;
   }
 
-  /// \brief Make a finished Future<> with the provided Status. 
-  template <typename E = ValueType, typename = typename std::enable_if< 
-                                        std::is_same<E, internal::Empty>::value>::type> 
-  static Future<> MakeFinished(Status s = Status::OK()) { 
-    return MakeFinished(E::ToResult(std::move(s))); 
-  } 
- 
-  struct WrapResultyOnComplete { 
-    template <typename OnComplete> 
-    struct Callback { 
-      void operator()(const FutureImpl& impl) && { 
-        std::move(on_complete)(*impl.CastResult<ValueType>()); 
-      } 
-      OnComplete on_complete; 
-    }; 
-  }; 
- 
-  struct WrapStatusyOnComplete { 
-    template <typename OnComplete> 
-    struct Callback { 
-      static_assert(std::is_same<internal::Empty, ValueType>::value, 
-                    "Only callbacks for Future<> should accept Status and not Result"); 
- 
-      void operator()(const FutureImpl& impl) && { 
-        std::move(on_complete)(impl.CastResult<ValueType>()->status()); 
-      } 
-      OnComplete on_complete; 
-    }; 
-  }; 
- 
-  template <typename OnComplete> 
-  using WrapOnComplete = typename std::conditional< 
-      detail::first_arg_is_status<OnComplete>::value, WrapStatusyOnComplete, 
-      WrapResultyOnComplete>::type::template Callback<OnComplete>; 
- 
-  /// \brief Consumer API: Register a callback to run when this future completes 
-  /// 
-  /// The callback should receive the result of the future (const Result<T>&) 
-  /// For a void or statusy future this should be (const Status&) 
-  /// 
-  /// There is no guarantee to the order in which callbacks will run.  In 
-  /// particular, callbacks added while the future is being marked complete 
-  /// may be executed immediately, ahead of, or even the same time as, other 
-  /// callbacks that have been previously added. 
-  /// 
-  /// WARNING: callbacks may hold arbitrary references, including cyclic references. 
-  /// Since callbacks will only be destroyed after they are invoked, this can lead to 
-  /// memory leaks if a Future is never marked finished (abandoned): 
-  /// 
-  /// { 
-  ///     auto fut = Future<>::Make(); 
-  ///     fut.AddCallback([fut]() {}); 
-  /// } 
-  /// 
-  /// In this example `fut` falls out of scope but is not destroyed because it holds a 
-  /// cyclic reference to itself through the callback. 
-  template <typename OnComplete, typename Callback = WrapOnComplete<OnComplete>> 
-  void AddCallback(OnComplete on_complete, 
-                   CallbackOptions opts = CallbackOptions::Defaults()) const { 
-    // We know impl_ will not be dangling when invoking callbacks because at least one 
-    // thread will be waiting for MarkFinished to return. Thus it's safe to keep a 
-    // weak reference to impl_ here 
-    impl_->AddCallback(Callback{std::move(on_complete)}, opts); 
-  } 
- 
-  /// \brief Overload of AddCallback that will return false instead of running 
-  /// synchronously 
-  /// 
-  /// This overload will guarantee the callback is never run synchronously.  If the future 
-  /// is already finished then it will simply return false.  This can be useful to avoid 
-  /// stack overflow in a situation where you have recursive Futures.  For an example 
-  /// see the Loop function 
-  /// 
-  /// Takes in a callback factory function to allow moving callbacks (the factory function 
-  /// will only be called if the callback can successfully be added) 
-  /// 
-  /// Returns true if a callback was actually added and false if the callback failed 
-  /// to add because the future was marked complete. 
-  template <typename CallbackFactory, 
-            typename OnComplete = detail::result_of_t<CallbackFactory()>, 
-            typename Callback = WrapOnComplete<OnComplete>> 
-  bool TryAddCallback(const CallbackFactory& callback_factory, 
-                      CallbackOptions opts = CallbackOptions::Defaults()) const { 
-    return impl_->TryAddCallback([&]() { return Callback{callback_factory()}; }, opts); 
-  } 
- 
-  template <typename OnSuccess, typename OnFailure> 
-  struct ThenOnComplete { 
-    static constexpr bool has_no_args = 
-        internal::call_traits::argument_count<OnSuccess>::value == 0; 
- 
-    using ContinuedFuture = detail::ContinueFuture::ForSignature< 
-        detail::if_has_no_args<OnSuccess, OnSuccess && (), OnSuccess && (const T&)>>; 
- 
-    static_assert( 
-        std::is_same<detail::ContinueFuture::ForSignature<OnFailure && (const Status&)>, 
-                     ContinuedFuture>::value, 
-        "OnSuccess and OnFailure must continue with the same future type"); 
- 
-    struct DummyOnSuccess { 
-      void operator()(const T&); 
-    }; 
-    using OnSuccessArg = typename std::decay<internal::call_traits::argument_type< 
-        0, detail::if_has_no_args<OnSuccess, DummyOnSuccess, OnSuccess>>>::type; 
- 
-    static_assert( 
-        !std::is_same<OnSuccessArg, typename EnsureResult<OnSuccessArg>::type>::value, 
-        "OnSuccess' argument should not be a Result"); 
- 
-    void operator()(const Result<T>& result) && { 
-      detail::ContinueFuture continue_future; 
-      if (ARROW_PREDICT_TRUE(result.ok())) { 
-        // move on_failure to a(n immediately destroyed) temporary to free its resources 
-        ARROW_UNUSED(OnFailure(std::move(on_failure))); 
-        continue_future.IgnoringArgsIf( 
-            detail::if_has_no_args<OnSuccess, std::true_type, std::false_type>{}, 
-            std::move(next), std::move(on_success), result.ValueOrDie()); 
-      } else { 
-        ARROW_UNUSED(OnSuccess(std::move(on_success))); 
-        continue_future(std::move(next), std::move(on_failure), result.status()); 
-      } 
-    } 
- 
-    OnSuccess on_success; 
-    OnFailure on_failure; 
-    ContinuedFuture next; 
-  }; 
- 
-  template <typename OnSuccess> 
-  struct PassthruOnFailure { 
-    using ContinuedFuture = detail::ContinueFuture::ForSignature< 
-        detail::if_has_no_args<OnSuccess, OnSuccess && (), OnSuccess && (const T&)>>; 
- 
-    Result<typename ContinuedFuture::ValueType> operator()(const Status& s) { return s; } 
-  }; 
- 
-  /// \brief Consumer API: Register a continuation to run when this future completes 
-  /// 
-  /// The continuation will run in the same thread that called MarkFinished (whatever 
-  /// callback is registered with this function will run before MarkFinished returns). 
-  /// Avoid long-running callbacks in favor of submitting a task to an Executor and 
-  /// returning the future. 
-  /// 
-  /// Two callbacks are supported: 
-  /// - OnSuccess, called with the result (const ValueType&) on successul completion. 
-  ///              for an empty future this will be called with nothing () 
-  /// - OnFailure, called with the error (const Status&) on failed completion. 
-  ///              This callback is optional and defaults to a passthru of any errors. 
-  /// 
-  /// Then() returns a Future whose ValueType is derived from the return type of the 
-  /// callbacks. If a callback returns: 
-  /// - void, a Future<> will be returned which will completes successully as soon 
-  ///   as the callback runs. 
-  /// - Status, a Future<> will be returned which will complete with the returned Status 
-  ///   as soon as the callback runs. 
-  /// - V or Result<V>, a Future<V> will be returned which will complete with the result 
-  ///   of invoking the callback as soon as the callback runs. 
-  /// - Future<V>, a Future<V> will be returned which will be marked complete when the 
-  ///   future returned by the callback completes (and will complete with the same 
-  ///   result). 
-  /// 
-  /// The continued Future type must be the same for both callbacks. 
-  /// 
-  /// Note that OnFailure can swallow errors, allowing continued Futures to successully 
-  /// complete even if this Future fails. 
-  /// 
-  /// If this future is already completed then the callback will be run immediately 
-  /// and the returned future may already be marked complete. 
-  /// 
-  /// See AddCallback for general considerations when writing callbacks. 
-  template <typename OnSuccess, typename OnFailure = PassthruOnFailure<OnSuccess>, 
-            typename OnComplete = ThenOnComplete<OnSuccess, OnFailure>, 
-            typename ContinuedFuture = typename OnComplete::ContinuedFuture> 
-  ContinuedFuture Then(OnSuccess on_success, OnFailure on_failure = {}, 
-                       CallbackOptions options = CallbackOptions::Defaults()) const { 
-    auto next = ContinuedFuture::Make(); 
-    AddCallback(OnComplete{std::forward<OnSuccess>(on_success), 
-                           std::forward<OnFailure>(on_failure), next}, 
-                options); 
-    return next; 
-  } 
- 
-  /// \brief Implicit constructor to create a finished future from a value 
-  Future(ValueType val) : Future() {  // NOLINT runtime/explicit 
-    impl_ = FutureImpl::MakeFinished(FutureState::SUCCESS); 
-    SetResult(std::move(val)); 
-  } 
- 
-  /// \brief Implicit constructor to create a future from a Result, enabling use 
-  ///     of macros like ARROW_ASSIGN_OR_RAISE. 
-  Future(Result<ValueType> res) : Future() {  // NOLINT runtime/explicit 
-    if (ARROW_PREDICT_TRUE(res.ok())) { 
-      impl_ = FutureImpl::MakeFinished(FutureState::SUCCESS); 
-    } else { 
-      impl_ = FutureImpl::MakeFinished(FutureState::FAILURE); 
-    } 
-    SetResult(std::move(res)); 
-  } 
- 
-  /// \brief Implicit constructor to create a future from a Status, enabling use 
-  ///     of macros like ARROW_RETURN_NOT_OK. 
-  Future(Status s)  // NOLINT runtime/explicit 
-      : Future(Result<ValueType>(std::move(s))) {} 
- 
+  /// \brief Make a finished Future<> with the provided Status.
+  template <typename E = ValueType, typename = typename std::enable_if<
+                                        std::is_same<E, internal::Empty>::value>::type>
+  static Future<> MakeFinished(Status s = Status::OK()) {
+    return MakeFinished(E::ToResult(std::move(s)));
+  }
+
+  struct WrapResultyOnComplete {
+    template <typename OnComplete>
+    struct Callback {
+      void operator()(const FutureImpl& impl) && {
+        std::move(on_complete)(*impl.CastResult<ValueType>());
+      }
+      OnComplete on_complete;
+    };
+  };
+
+  struct WrapStatusyOnComplete {
+    template <typename OnComplete>
+    struct Callback {
+      static_assert(std::is_same<internal::Empty, ValueType>::value,
+                    "Only callbacks for Future<> should accept Status and not Result");
+
+      void operator()(const FutureImpl& impl) && {
+        std::move(on_complete)(impl.CastResult<ValueType>()->status());
+      }
+      OnComplete on_complete;
+    };
+  };
+
+  template <typename OnComplete>
+  using WrapOnComplete = typename std::conditional<
+      detail::first_arg_is_status<OnComplete>::value, WrapStatusyOnComplete,
+      WrapResultyOnComplete>::type::template Callback<OnComplete>;
+
+  /// \brief Consumer API: Register a callback to run when this future completes
+  ///
+  /// The callback should receive the result of the future (const Result<T>&)
+  /// For a void or statusy future this should be (const Status&)
+  ///
+  /// There is no guarantee to the order in which callbacks will run.  In
+  /// particular, callbacks added while the future is being marked complete
+  /// may be executed immediately, ahead of, or even the same time as, other
+  /// callbacks that have been previously added.
+  ///
+  /// WARNING: callbacks may hold arbitrary references, including cyclic references.
+  /// Since callbacks will only be destroyed after they are invoked, this can lead to
+  /// memory leaks if a Future is never marked finished (abandoned):
+  ///
+  /// {
+  ///     auto fut = Future<>::Make();
+  ///     fut.AddCallback([fut]() {});
+  /// }
+  ///
+  /// In this example `fut` falls out of scope but is not destroyed because it holds a
+  /// cyclic reference to itself through the callback.
+  template <typename OnComplete, typename Callback = WrapOnComplete<OnComplete>>
+  void AddCallback(OnComplete on_complete,
+                   CallbackOptions opts = CallbackOptions::Defaults()) const {
+    // We know impl_ will not be dangling when invoking callbacks because at least one
+    // thread will be waiting for MarkFinished to return. Thus it's safe to keep a
+    // weak reference to impl_ here
+    impl_->AddCallback(Callback{std::move(on_complete)}, opts);
+  }
+
+  /// \brief Overload of AddCallback that will return false instead of running
+  /// synchronously
+  ///
+  /// This overload will guarantee the callback is never run synchronously.  If the future
+  /// is already finished then it will simply return false.  This can be useful to avoid
+  /// stack overflow in a situation where you have recursive Futures.  For an example
+  /// see the Loop function
+  ///
+  /// Takes in a callback factory function to allow moving callbacks (the factory function
+  /// will only be called if the callback can successfully be added)
+  ///
+  /// Returns true if a callback was actually added and false if the callback failed
+  /// to add because the future was marked complete.
+  template <typename CallbackFactory,
+            typename OnComplete = detail::result_of_t<CallbackFactory()>,
+            typename Callback = WrapOnComplete<OnComplete>>
+  bool TryAddCallback(const CallbackFactory& callback_factory,
+                      CallbackOptions opts = CallbackOptions::Defaults()) const {
+    return impl_->TryAddCallback([&]() { return Callback{callback_factory()}; }, opts);
+  }
+
+  template <typename OnSuccess, typename OnFailure>
+  struct ThenOnComplete {
+    static constexpr bool has_no_args =
+        internal::call_traits::argument_count<OnSuccess>::value == 0;
+
+    using ContinuedFuture = detail::ContinueFuture::ForSignature<
+        detail::if_has_no_args<OnSuccess, OnSuccess && (), OnSuccess && (const T&)>>;
+
+    static_assert(
+        std::is_same<detail::ContinueFuture::ForSignature<OnFailure && (const Status&)>,
+                     ContinuedFuture>::value,
+        "OnSuccess and OnFailure must continue with the same future type");
+
+    struct DummyOnSuccess {
+      void operator()(const T&);
+    };
+    using OnSuccessArg = typename std::decay<internal::call_traits::argument_type<
+        0, detail::if_has_no_args<OnSuccess, DummyOnSuccess, OnSuccess>>>::type;
+
+    static_assert(
+        !std::is_same<OnSuccessArg, typename EnsureResult<OnSuccessArg>::type>::value,
+        "OnSuccess' argument should not be a Result");
+
+    void operator()(const Result<T>& result) && {
+      detail::ContinueFuture continue_future;
+      if (ARROW_PREDICT_TRUE(result.ok())) {
+        // move on_failure to a(n immediately destroyed) temporary to free its resources
+        ARROW_UNUSED(OnFailure(std::move(on_failure)));
+        continue_future.IgnoringArgsIf(
+            detail::if_has_no_args<OnSuccess, std::true_type, std::false_type>{},
+            std::move(next), std::move(on_success), result.ValueOrDie());
+      } else {
+        ARROW_UNUSED(OnSuccess(std::move(on_success)));
+        continue_future(std::move(next), std::move(on_failure), result.status());
+      }
+    }
+
+    OnSuccess on_success;
+    OnFailure on_failure;
+    ContinuedFuture next;
+  };
+
+  template <typename OnSuccess>
+  struct PassthruOnFailure {
+    using ContinuedFuture = detail::ContinueFuture::ForSignature<
+        detail::if_has_no_args<OnSuccess, OnSuccess && (), OnSuccess && (const T&)>>;
+
+    Result<typename ContinuedFuture::ValueType> operator()(const Status& s) { return s; }
+  };
+
+  /// \brief Consumer API: Register a continuation to run when this future completes
+  ///
+  /// The continuation will run in the same thread that called MarkFinished (whatever
+  /// callback is registered with this function will run before MarkFinished returns).
+  /// Avoid long-running callbacks in favor of submitting a task to an Executor and
+  /// returning the future.
+  ///
+  /// Two callbacks are supported:
+  /// - OnSuccess, called with the result (const ValueType&) on successul completion.
+  ///              for an empty future this will be called with nothing ()
+  /// - OnFailure, called with the error (const Status&) on failed completion.
+  ///              This callback is optional and defaults to a passthru of any errors.
+  ///
+  /// Then() returns a Future whose ValueType is derived from the return type of the
+  /// callbacks. If a callback returns:
+  /// - void, a Future<> will be returned which will completes successully as soon
+  ///   as the callback runs.
+  /// - Status, a Future<> will be returned which will complete with the returned Status
+  ///   as soon as the callback runs.
+  /// - V or Result<V>, a Future<V> will be returned which will complete with the result
+  ///   of invoking the callback as soon as the callback runs.
+  /// - Future<V>, a Future<V> will be returned which will be marked complete when the
+  ///   future returned by the callback completes (and will complete with the same
+  ///   result).
+  ///
+  /// The continued Future type must be the same for both callbacks.
+  ///
+  /// Note that OnFailure can swallow errors, allowing continued Futures to successully
+  /// complete even if this Future fails.
+  ///
+  /// If this future is already completed then the callback will be run immediately
+  /// and the returned future may already be marked complete.
+  ///
+  /// See AddCallback for general considerations when writing callbacks.
+  template <typename OnSuccess, typename OnFailure = PassthruOnFailure<OnSuccess>,
+            typename OnComplete = ThenOnComplete<OnSuccess, OnFailure>,
+            typename ContinuedFuture = typename OnComplete::ContinuedFuture>
+  ContinuedFuture Then(OnSuccess on_success, OnFailure on_failure = {},
+                       CallbackOptions options = CallbackOptions::Defaults()) const {
+    auto next = ContinuedFuture::Make();
+    AddCallback(OnComplete{std::forward<OnSuccess>(on_success),
+                           std::forward<OnFailure>(on_failure), next},
+                options);
+    return next;
+  }
+
+  /// \brief Implicit constructor to create a finished future from a value
+  Future(ValueType val) : Future() {  // NOLINT runtime/explicit
+    impl_ = FutureImpl::MakeFinished(FutureState::SUCCESS);
+    SetResult(std::move(val));
+  }
+
+  /// \brief Implicit constructor to create a future from a Result, enabling use
+  ///     of macros like ARROW_ASSIGN_OR_RAISE.
+  Future(Result<ValueType> res) : Future() {  // NOLINT runtime/explicit
+    if (ARROW_PREDICT_TRUE(res.ok())) {
+      impl_ = FutureImpl::MakeFinished(FutureState::SUCCESS);
+    } else {
+      impl_ = FutureImpl::MakeFinished(FutureState::FAILURE);
+    }
+    SetResult(std::move(res));
+  }
+
+  /// \brief Implicit constructor to create a future from a Status, enabling use
+  ///     of macros like ARROW_RETURN_NOT_OK.
+  Future(Status s)  // NOLINT runtime/explicit
+      : Future(Result<ValueType>(std::move(s))) {}
+
  protected:
-  void InitializeFromResult(Result<ValueType> res) { 
-    if (ARROW_PREDICT_TRUE(res.ok())) { 
-      impl_ = FutureImpl::MakeFinished(FutureState::SUCCESS); 
-    } else { 
-      impl_ = FutureImpl::MakeFinished(FutureState::FAILURE); 
-    } 
-    SetResult(std::move(res)); 
-  } 
- 
-  void Initialize() { impl_ = FutureImpl::Make(); } 
- 
-  Result<ValueType>* GetResult() const { return impl_->CastResult<ValueType>(); } 
- 
-  void SetResult(Result<ValueType> res) { 
-    impl_->result_ = {new Result<ValueType>(std::move(res)), 
-                      [](void* p) { delete static_cast<Result<ValueType>*>(p); }}; 
-  } 
- 
-  void DoMarkFinished(Result<ValueType> res) { 
-    SetResult(std::move(res)); 
- 
-    if (ARROW_PREDICT_TRUE(GetResult()->ok())) { 
-      impl_->MarkFinished(); 
-    } else { 
-      impl_->MarkFailed(); 
-    } 
-  } 
- 
+  void InitializeFromResult(Result<ValueType> res) {
+    if (ARROW_PREDICT_TRUE(res.ok())) {
+      impl_ = FutureImpl::MakeFinished(FutureState::SUCCESS);
+    } else {
+      impl_ = FutureImpl::MakeFinished(FutureState::FAILURE);
+    }
+    SetResult(std::move(res));
+  }
+
+  void Initialize() { impl_ = FutureImpl::Make(); }
+
+  Result<ValueType>* GetResult() const { return impl_->CastResult<ValueType>(); }
+
+  void SetResult(Result<ValueType> res) {
+    impl_->result_ = {new Result<ValueType>(std::move(res)),
+                      [](void* p) { delete static_cast<Result<ValueType>*>(p); }};
+  }
+
+  void DoMarkFinished(Result<ValueType> res) {
+    SetResult(std::move(res));
+
+    if (ARROW_PREDICT_TRUE(GetResult()->ok())) {
+      impl_->MarkFinished();
+    } else {
+      impl_->MarkFailed();
+    }
+  }
+
   void CheckValid() const {
 #ifndef NDEBUG
     if (!is_valid()) {
@@ -713,54 +713,54 @@ class ARROW_MUST_USE_TYPE Future {
 #endif
   }
 
-  explicit Future(std::shared_ptr<FutureImpl> impl) : impl_(std::move(impl)) {} 
+  explicit Future(std::shared_ptr<FutureImpl> impl) : impl_(std::move(impl)) {}
+
+  std::shared_ptr<FutureImpl> impl_;
 
-  std::shared_ptr<FutureImpl> impl_; 
- 
   friend class FutureWaiter;
-  friend struct detail::ContinueFuture; 
- 
-  template <typename U> 
-  friend class Future; 
-  friend class WeakFuture<T>; 
- 
-  FRIEND_TEST(FutureRefTest, ChainRemoved); 
-  FRIEND_TEST(FutureRefTest, TailRemoved); 
-  FRIEND_TEST(FutureRefTest, HeadRemoved); 
+  friend struct detail::ContinueFuture;
+
+  template <typename U>
+  friend class Future;
+  friend class WeakFuture<T>;
+
+  FRIEND_TEST(FutureRefTest, ChainRemoved);
+  FRIEND_TEST(FutureRefTest, TailRemoved);
+  FRIEND_TEST(FutureRefTest, HeadRemoved);
 };
 
-template <typename T> 
-typename Future<T>::SyncType FutureToSync(const Future<T>& fut) { 
-  return fut.result(); 
-} 
- 
-template <> 
-inline typename Future<internal::Empty>::SyncType FutureToSync<internal::Empty>( 
-    const Future<internal::Empty>& fut) { 
-  return fut.status(); 
-} 
- 
-template <typename T> 
-class WeakFuture { 
- public: 
-  explicit WeakFuture(const Future<T>& future) : impl_(future.impl_) {} 
- 
-  Future<T> get() { return Future<T>{impl_.lock()}; } 
- 
- private: 
-  std::weak_ptr<FutureImpl> impl_; 
-}; 
- 
-/// If a Result<Future> holds an error instead of a Future, construct a finished Future 
-/// holding that error. 
-template <typename T> 
-static Future<T> DeferNotOk(Result<Future<T>> maybe_future) { 
-  if (ARROW_PREDICT_FALSE(!maybe_future.ok())) { 
-    return Future<T>::MakeFinished(std::move(maybe_future).status()); 
-  } 
-  return std::move(maybe_future).MoveValueUnsafe(); 
-} 
- 
+template <typename T>
+typename Future<T>::SyncType FutureToSync(const Future<T>& fut) {
+  return fut.result();
+}
+
+template <>
+inline typename Future<internal::Empty>::SyncType FutureToSync<internal::Empty>(
+    const Future<internal::Empty>& fut) {
+  return fut.status();
+}
+
+template <typename T>
+class WeakFuture {
+ public:
+  explicit WeakFuture(const Future<T>& future) : impl_(future.impl_) {}
+
+  Future<T> get() { return Future<T>{impl_.lock()}; }
+
+ private:
+  std::weak_ptr<FutureImpl> impl_;
+};
+
+/// If a Result<Future> holds an error instead of a Future, construct a finished Future
+/// holding that error.
+template <typename T>
+static Future<T> DeferNotOk(Result<Future<T>> maybe_future) {
+  if (ARROW_PREDICT_FALSE(!maybe_future.ok())) {
+    return Future<T>::MakeFinished(std::move(maybe_future).status());
+  }
+  return std::move(maybe_future).MoveValueUnsafe();
+}
+
 /// \brief Wait for all the futures to end, or for the given timeout to expire.
 ///
 /// `true` is returned if all the futures completed before the timeout was reached,
@@ -783,53 +783,53 @@ inline bool WaitForAll(const std::vector<Future<T>*>& futures,
   return waiter->Wait(seconds);
 }
 
-/// \brief Create a Future which completes when all of `futures` complete. 
-/// 
-/// The future's result is a vector of the results of `futures`. 
-/// Note that this future will never be marked "failed"; failed results 
-/// will be stored in the result vector alongside successful results. 
-template <typename T> 
-Future<std::vector<Result<T>>> All(std::vector<Future<T>> futures) { 
-  struct State { 
-    explicit State(std::vector<Future<T>> f) 
-        : futures(std::move(f)), n_remaining(futures.size()) {} 
- 
-    std::vector<Future<T>> futures; 
-    std::atomic<size_t> n_remaining; 
-  }; 
- 
-  if (futures.size() == 0) { 
-    return {std::vector<Result<T>>{}}; 
-  } 
- 
-  auto state = std::make_shared<State>(std::move(futures)); 
- 
-  auto out = Future<std::vector<Result<T>>>::Make(); 
-  for (const Future<T>& future : state->futures) { 
-    future.AddCallback([state, out](const Result<T>&) mutable { 
-      if (state->n_remaining.fetch_sub(1) != 1) return; 
- 
-      std::vector<Result<T>> results(state->futures.size()); 
-      for (size_t i = 0; i < results.size(); ++i) { 
-        results[i] = state->futures[i].result(); 
-      } 
-      out.MarkFinished(std::move(results)); 
-    }); 
-  } 
-  return out; 
-} 
- 
-template <> 
-inline Future<>::Future(Status s) : Future(internal::Empty::ToResult(std::move(s))) {} 
- 
-/// \brief Create a Future which completes when all of `futures` complete. 
-/// 
-/// The future will be marked complete if all `futures` complete 
-/// successfully. Otherwise, it will be marked failed with the status of 
-/// the first failing future. 
-ARROW_EXPORT 
-Future<> AllComplete(const std::vector<Future<>>& futures); 
- 
+/// \brief Create a Future which completes when all of `futures` complete.
+///
+/// The future's result is a vector of the results of `futures`.
+/// Note that this future will never be marked "failed"; failed results
+/// will be stored in the result vector alongside successful results.
+template <typename T>
+Future<std::vector<Result<T>>> All(std::vector<Future<T>> futures) {
+  struct State {
+    explicit State(std::vector<Future<T>> f)
+        : futures(std::move(f)), n_remaining(futures.size()) {}
+
+    std::vector<Future<T>> futures;
+    std::atomic<size_t> n_remaining;
+  };
+
+  if (futures.size() == 0) {
+    return {std::vector<Result<T>>{}};
+  }
+
+  auto state = std::make_shared<State>(std::move(futures));
+
+  auto out = Future<std::vector<Result<T>>>::Make();
+  for (const Future<T>& future : state->futures) {
+    future.AddCallback([state, out](const Result<T>&) mutable {
+      if (state->n_remaining.fetch_sub(1) != 1) return;
+
+      std::vector<Result<T>> results(state->futures.size());
+      for (size_t i = 0; i < results.size(); ++i) {
+        results[i] = state->futures[i].result();
+      }
+      out.MarkFinished(std::move(results));
+    });
+  }
+  return out;
+}
+
+template <>
+inline Future<>::Future(Status s) : Future(internal::Empty::ToResult(std::move(s))) {}
+
+/// \brief Create a Future which completes when all of `futures` complete.
+///
+/// The future will be marked complete if all `futures` complete
+/// successfully. Otherwise, it will be marked failed with the status of
+/// the first failing future.
+ARROW_EXPORT
+Future<> AllComplete(const std::vector<Future<>>& futures);
+
 /// \brief Wait for one of the futures to end, or for the given timeout to expire.
 ///
 /// The indices of all completed futures are returned.  Note that some futures
@@ -854,104 +854,104 @@ inline std::vector<int> WaitForAny(const std::vector<Future<T>*>& futures,
   return waiter->MoveFinishedFutures();
 }
 
-struct Continue { 
-  template <typename T> 
-  operator util::optional<T>() && {  // NOLINT explicit 
-    return {}; 
-  } 
-}; 
- 
-template <typename T = internal::Empty> 
-util::optional<T> Break(T break_value = {}) { 
-  return util::optional<T>{std::move(break_value)}; 
-} 
- 
-template <typename T = internal::Empty> 
-using ControlFlow = util::optional<T>; 
- 
-/// \brief Loop through an asynchronous sequence 
-/// 
-/// \param[in] iterate A generator of Future<ControlFlow<BreakValue>>. On completion 
-/// of each yielded future the resulting ControlFlow will be examined. A Break will 
-/// terminate the loop, while a Continue will re-invoke `iterate`. 
-/// 
-/// \return A future which will complete when a Future returned by iterate completes with 
-/// a Break 
-template <typename Iterate, 
-          typename Control = typename detail::result_of_t<Iterate()>::ValueType, 
-          typename BreakValueType = typename Control::value_type> 
-Future<BreakValueType> Loop(Iterate iterate) { 
-  struct Callback { 
-    bool CheckForTermination(const Result<Control>& control_res) { 
-      if (!control_res.ok()) { 
-        break_fut.MarkFinished(control_res.status()); 
-        return true; 
-      } 
-      if (control_res->has_value()) { 
-        break_fut.MarkFinished(**control_res); 
-        return true; 
-      } 
-      return false; 
-    } 
- 
-    void operator()(const Result<Control>& maybe_control) && { 
-      if (CheckForTermination(maybe_control)) return; 
- 
-      auto control_fut = iterate(); 
-      while (true) { 
-        if (control_fut.TryAddCallback([this]() { return *this; })) { 
-          // Adding a callback succeeded; control_fut was not finished 
-          // and we must wait to CheckForTermination. 
-          return; 
-        } 
-        // Adding a callback failed; control_fut was finished and we 
-        // can CheckForTermination immediately. This also avoids recursion and potential 
-        // stack overflow. 
-        if (CheckForTermination(control_fut.result())) return; 
- 
-        control_fut = iterate(); 
-      } 
-    } 
- 
-    Iterate iterate; 
- 
-    // If the future returned by control_fut is never completed then we will be hanging on 
-    // to break_fut forever even if the listener has given up listening on it.  Instead we 
-    // rely on the fact that a producer (the caller of Future<>::Make) is always 
-    // responsible for completing the futures they create. 
-    // TODO: Could avoid this kind of situation with "future abandonment" similar to mesos 
-    Future<BreakValueType> break_fut; 
-  }; 
- 
-  auto break_fut = Future<BreakValueType>::Make(); 
-  auto control_fut = iterate(); 
-  control_fut.AddCallback(Callback{std::move(iterate), break_fut}); 
- 
-  return break_fut; 
-} 
- 
-inline Future<> ToFuture(Status status) { 
-  return Future<>::MakeFinished(std::move(status)); 
-} 
- 
-template <typename T> 
-Future<T> ToFuture(T value) { 
-  return Future<T>::MakeFinished(std::move(value)); 
-} 
- 
-template <typename T> 
-Future<T> ToFuture(Result<T> maybe_value) { 
-  return Future<T>::MakeFinished(std::move(maybe_value)); 
-} 
- 
-template <typename T> 
-Future<T> ToFuture(Future<T> fut) { 
-  return std::move(fut); 
-} 
- 
-template <typename T> 
-struct EnsureFuture { 
-  using type = decltype(ToFuture(std::declval<T>())); 
-}; 
- 
+struct Continue {
+  template <typename T>
+  operator util::optional<T>() && {  // NOLINT explicit
+    return {};
+  }
+};
+
+template <typename T = internal::Empty>
+util::optional<T> Break(T break_value = {}) {
+  return util::optional<T>{std::move(break_value)};
+}
+
+template <typename T = internal::Empty>
+using ControlFlow = util::optional<T>;
+
+/// \brief Loop through an asynchronous sequence
+///
+/// \param[in] iterate A generator of Future<ControlFlow<BreakValue>>. On completion
+/// of each yielded future the resulting ControlFlow will be examined. A Break will
+/// terminate the loop, while a Continue will re-invoke `iterate`.
+///
+/// \return A future which will complete when a Future returned by iterate completes with
+/// a Break
+template <typename Iterate,
+          typename Control = typename detail::result_of_t<Iterate()>::ValueType,
+          typename BreakValueType = typename Control::value_type>
+Future<BreakValueType> Loop(Iterate iterate) {
+  struct Callback {
+    bool CheckForTermination(const Result<Control>& control_res) {
+      if (!control_res.ok()) {
+        break_fut.MarkFinished(control_res.status());
+        return true;
+      }
+      if (control_res->has_value()) {
+        break_fut.MarkFinished(**control_res);
+        return true;
+      }
+      return false;
+    }
+
+    void operator()(const Result<Control>& maybe_control) && {
+      if (CheckForTermination(maybe_control)) return;
+
+      auto control_fut = iterate();
+      while (true) {
+        if (control_fut.TryAddCallback([this]() { return *this; })) {
+          // Adding a callback succeeded; control_fut was not finished
+          // and we must wait to CheckForTermination.
+          return;
+        }
+        // Adding a callback failed; control_fut was finished and we
+        // can CheckForTermination immediately. This also avoids recursion and potential
+        // stack overflow.
+        if (CheckForTermination(control_fut.result())) return;
+
+        control_fut = iterate();
+      }
+    }
+
+    Iterate iterate;
+
+    // If the future returned by control_fut is never completed then we will be hanging on
+    // to break_fut forever even if the listener has given up listening on it.  Instead we
+    // rely on the fact that a producer (the caller of Future<>::Make) is always
+    // responsible for completing the futures they create.
+    // TODO: Could avoid this kind of situation with "future abandonment" similar to mesos
+    Future<BreakValueType> break_fut;
+  };
+
+  auto break_fut = Future<BreakValueType>::Make();
+  auto control_fut = iterate();
+  control_fut.AddCallback(Callback{std::move(iterate), break_fut});
+
+  return break_fut;
+}
+
+inline Future<> ToFuture(Status status) {
+  return Future<>::MakeFinished(std::move(status));
+}
+
+template <typename T>
+Future<T> ToFuture(T value) {
+  return Future<T>::MakeFinished(std::move(value));
+}
+
+template <typename T>
+Future<T> ToFuture(Result<T> maybe_value) {
+  return Future<T>::MakeFinished(std::move(maybe_value));
+}
+
+template <typename T>
+Future<T> ToFuture(Future<T> fut) {
+  return std::move(fut);
+}
+
+template <typename T>
+struct EnsureFuture {
+  using type = decltype(ToFuture(std::declval<T>()));
+};
+
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/hashing.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/hashing.h
index 2b887cfbfeb..ac1adcfb13e 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/hashing.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/hashing.h
@@ -39,7 +39,7 @@
 #include "arrow/type_traits.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/bitmap_builders.h"
-#include "arrow/util/endian.h" 
+#include "arrow/util/endian.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/ubsan.h"
@@ -329,7 +329,7 @@ class HashTable {
 
     // Stash old entries and seal builder, effectively resetting the Buffer
     const Entry* old_entries = entries_;
-    ARROW_ASSIGN_OR_RAISE(auto previous, entries_builder_.FinishWithLength(capacity_)); 
+    ARROW_ASSIGN_OR_RAISE(auto previous, entries_builder_.FinishWithLength(capacity_));
     // Allocate new buffer
     RETURN_NOT_OK(UpsizeBuffer(new_capacity));
 
@@ -460,13 +460,13 @@ class ScalarMemoTable : public MemoTable {
         out_data[index] = entry->payload.value;
       }
     });
-    // Zero-initialize the null entry 
-    if (null_index_ != kKeyNotFound) { 
-      int32_t index = null_index_ - start; 
-      if (index >= 0) { 
-        out_data[index] = Scalar{}; 
-      } 
-    } 
+    // Zero-initialize the null entry
+    if (null_index_ != kKeyNotFound) {
+      int32_t index = null_index_ - start;
+      if (index >= 0) {
+        out_data[index] = Scalar{};
+      }
+    }
   }
 
   void CopyValues(Scalar* out_data) const { CopyValues(0, out_data); }
@@ -697,8 +697,8 @@ class BinaryMemoTable : public MemoTable {
     DCHECK_LE(start, size());
 
     const builder_offset_type* offsets = binary_builder_.offsets_data();
-    const builder_offset_type delta = 
-        start < binary_builder_.length() ? offsets[start] : 0; 
+    const builder_offset_type delta =
+        start < binary_builder_.length() ? offsets[start] : 0;
     for (int32_t i = start; i < size(); ++i) {
       const builder_offset_type adjusted_offset = offsets[i] - delta;
       Offset cast_offset = static_cast<Offset>(adjusted_offset);
@@ -781,8 +781,8 @@ class BinaryMemoTable : public MemoTable {
     if (left_size > 0) {
       memcpy(out_data, in_data + left_offset, left_size);
     }
-    // Zero-initialize the null entry 
-    memset(out_data + left_size, 0, width_size); 
+    // Zero-initialize the null entry
+    memset(out_data + left_size, 0, width_size);
 
     auto right_size = values_size() - static_cast<size_t>(null_data_offset);
     if (right_size > 0) {
@@ -852,8 +852,8 @@ struct HashTraits<T, enable_if_t<has_string_view<T>::value &&
   using MemoTableType = BinaryMemoTable<BinaryBuilder>;
 };
 
-template <typename T> 
-struct HashTraits<T, enable_if_decimal<T>> { 
+template <typename T>
+struct HashTraits<T, enable_if_decimal<T>> {
   using MemoTableType = BinaryMemoTable<BinaryBuilder>;
 };
 
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/int128_internal.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/int128_internal.h
index 34665dcf00c..1d494671a9f 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/int128_internal.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/int128_internal.h
@@ -16,10 +16,10 @@
 // under the License.
 #pragma once
 
-#include "arrow/util/config.h" 
+#include "arrow/util/config.h"
 #include "arrow/util/macros.h"
 
-#ifndef ARROW_USE_NATIVE_INT128 
+#ifndef ARROW_USE_NATIVE_INT128
 #include <boost/multiprecision/cpp_int.hpp>
 #endif
 
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/int_util.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/int_util.cc
index 91ab77c64c7..24c5fe56eff 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/int_util.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/int_util.cc
@@ -26,13 +26,13 @@
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/bit_block_counter.h"
-#include "arrow/util/bit_run_reader.h" 
+#include "arrow/util/bit_run_reader.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/ubsan.h"
-#include "arrow/visitor_inline.h" 
+#include "arrow/visitor_inline.h"
 
 namespace arrow {
 namespace internal {
@@ -59,7 +59,7 @@ static const uint64_t max_uints[] = {0, max_uint8, max_uint16, 0,         max_ui
                                      0, 0,         0,          max_uint64};
 
 // Check if we would need to expand the underlying storage type
-static inline uint8_t ExpandedUIntWidth(uint64_t val, uint8_t current_width) { 
+static inline uint8_t ExpandedUIntWidth(uint64_t val, uint8_t current_width) {
   // Optimize for the common case where width doesn't change
   if (ARROW_PREDICT_TRUE(val <= max_uints[current_width])) {
     return current_width;
@@ -366,7 +366,7 @@ width8:
 }
 
 template <typename Source, typename Dest>
-static inline void CastIntsInternal(const Source* src, Dest* dest, int64_t length) { 
+static inline void CastIntsInternal(const Source* src, Dest* dest, int64_t length) {
   while (length >= 4) {
     dest[0] = static_cast<Dest>(src[0]);
     dest[1] = static_cast<Dest>(src[1]);
@@ -383,15 +383,15 @@ static inline void CastIntsInternal(const Source* src, Dest* dest, int64_t lengt
 }
 
 void DowncastInts(const int64_t* source, int8_t* dest, int64_t length) {
-  CastIntsInternal(source, dest, length); 
+  CastIntsInternal(source, dest, length);
 }
 
 void DowncastInts(const int64_t* source, int16_t* dest, int64_t length) {
-  CastIntsInternal(source, dest, length); 
+  CastIntsInternal(source, dest, length);
 }
 
 void DowncastInts(const int64_t* source, int32_t* dest, int64_t length) {
-  CastIntsInternal(source, dest, length); 
+  CastIntsInternal(source, dest, length);
 }
 
 void DowncastInts(const int64_t* source, int64_t* dest, int64_t length) {
@@ -399,25 +399,25 @@ void DowncastInts(const int64_t* source, int64_t* dest, int64_t length) {
 }
 
 void DowncastUInts(const uint64_t* source, uint8_t* dest, int64_t length) {
-  CastIntsInternal(source, dest, length); 
+  CastIntsInternal(source, dest, length);
 }
 
 void DowncastUInts(const uint64_t* source, uint16_t* dest, int64_t length) {
-  CastIntsInternal(source, dest, length); 
+  CastIntsInternal(source, dest, length);
 }
 
 void DowncastUInts(const uint64_t* source, uint32_t* dest, int64_t length) {
-  CastIntsInternal(source, dest, length); 
+  CastIntsInternal(source, dest, length);
 }
 
 void DowncastUInts(const uint64_t* source, uint64_t* dest, int64_t length) {
   memcpy(dest, source, length * sizeof(int64_t));
 }
 
-void UpcastInts(const int32_t* source, int64_t* dest, int64_t length) { 
-  CastIntsInternal(source, dest, length); 
-} 
- 
+void UpcastInts(const int32_t* source, int64_t* dest, int64_t length) {
+  CastIntsInternal(source, dest, length);
+}
+
 template <typename InputInt, typename OutputInt>
 void TransposeInts(const InputInt* src, OutputInt* dest, int64_t length,
                    const int32_t* transpose_map) {
@@ -466,72 +466,72 @@ INSTANTIATE_ALL()
 #undef INSTANTIATE_ALL
 #undef INSTANTIATE_ALL_DEST
 
-namespace { 
- 
-template <typename SrcType> 
-struct TransposeIntsDest { 
-  const SrcType* src; 
-  uint8_t* dest; 
-  int64_t dest_offset; 
-  int64_t length; 
-  const int32_t* transpose_map; 
- 
-  template <typename T> 
-  enable_if_integer<T, Status> Visit(const T&) { 
-    using DestType = typename T::c_type; 
-    TransposeInts(src, reinterpret_cast<DestType*>(dest) + dest_offset, length, 
-                  transpose_map); 
-    return Status::OK(); 
-  } 
- 
-  Status Visit(const DataType& type) { 
-    return Status::TypeError("TransposeInts received non-integer dest_type"); 
-  } 
- 
-  Status operator()(const DataType& type) { return VisitTypeInline(type, this); } 
-}; 
- 
-struct TransposeIntsSrc { 
-  const uint8_t* src; 
-  uint8_t* dest; 
-  int64_t src_offset; 
-  int64_t dest_offset; 
-  int64_t length; 
-  const int32_t* transpose_map; 
-  const DataType& dest_type; 
- 
-  template <typename T> 
-  enable_if_integer<T, Status> Visit(const T&) { 
-    using SrcType = typename T::c_type; 
-    return TransposeIntsDest<SrcType>{reinterpret_cast<const SrcType*>(src) + src_offset, 
-                                      dest, dest_offset, length, 
-                                      transpose_map}(dest_type); 
-  } 
- 
-  Status Visit(const DataType& type) { 
-    return Status::TypeError("TransposeInts received non-integer dest_type"); 
-  } 
- 
-  Status operator()(const DataType& type) { return VisitTypeInline(type, this); } 
-}; 
- 
-};  // namespace 
- 
-Status TransposeInts(const DataType& src_type, const DataType& dest_type, 
-                     const uint8_t* src, uint8_t* dest, int64_t src_offset, 
-                     int64_t dest_offset, int64_t length, const int32_t* transpose_map) { 
-  TransposeIntsSrc transposer{src,    dest,          src_offset, dest_offset, 
-                              length, transpose_map, dest_type}; 
-  return transposer(src_type); 
-} 
- 
+namespace {
+
+template <typename SrcType>
+struct TransposeIntsDest {
+  const SrcType* src;
+  uint8_t* dest;
+  int64_t dest_offset;
+  int64_t length;
+  const int32_t* transpose_map;
+
+  template <typename T>
+  enable_if_integer<T, Status> Visit(const T&) {
+    using DestType = typename T::c_type;
+    TransposeInts(src, reinterpret_cast<DestType*>(dest) + dest_offset, length,
+                  transpose_map);
+    return Status::OK();
+  }
+
+  Status Visit(const DataType& type) {
+    return Status::TypeError("TransposeInts received non-integer dest_type");
+  }
+
+  Status operator()(const DataType& type) { return VisitTypeInline(type, this); }
+};
+
+struct TransposeIntsSrc {
+  const uint8_t* src;
+  uint8_t* dest;
+  int64_t src_offset;
+  int64_t dest_offset;
+  int64_t length;
+  const int32_t* transpose_map;
+  const DataType& dest_type;
+
+  template <typename T>
+  enable_if_integer<T, Status> Visit(const T&) {
+    using SrcType = typename T::c_type;
+    return TransposeIntsDest<SrcType>{reinterpret_cast<const SrcType*>(src) + src_offset,
+                                      dest, dest_offset, length,
+                                      transpose_map}(dest_type);
+  }
+
+  Status Visit(const DataType& type) {
+    return Status::TypeError("TransposeInts received non-integer dest_type");
+  }
+
+  Status operator()(const DataType& type) { return VisitTypeInline(type, this); }
+};
+
+};  // namespace
+
+Status TransposeInts(const DataType& src_type, const DataType& dest_type,
+                     const uint8_t* src, uint8_t* dest, int64_t src_offset,
+                     int64_t dest_offset, int64_t length, const int32_t* transpose_map) {
+  TransposeIntsSrc transposer{src,    dest,          src_offset, dest_offset,
+                              length, transpose_map, dest_type};
+  return transposer(src_type);
+}
+
 template <typename T>
-static std::string FormatInt(T val) { 
+static std::string FormatInt(T val) {
   return std::to_string(val);
 }
 
 template <typename IndexCType, bool IsSigned = std::is_signed<IndexCType>::value>
-static Status CheckIndexBoundsImpl(const ArrayData& indices, uint64_t upper_limit) { 
+static Status CheckIndexBoundsImpl(const ArrayData& indices, uint64_t upper_limit) {
   // For unsigned integers, if the values array is larger than the maximum
   // index value (e.g. especially for UINT8 / UINT16), then there is no need to
   // boundscheck.
@@ -549,22 +549,22 @@ static Status CheckIndexBoundsImpl(const ArrayData& indices, uint64_t upper_limi
     return ((IsSigned && val < 0) ||
             (val >= 0 && static_cast<uint64_t>(val) >= upper_limit));
   };
-  return VisitSetBitRuns( 
-      bitmap, indices.offset, indices.length, [&](int64_t offset, int64_t length) { 
-        bool block_out_of_bounds = false; 
-        for (int64_t i = 0; i < length; ++i) { 
-          block_out_of_bounds |= IsOutOfBounds(indices_data[offset + i]); 
+  return VisitSetBitRuns(
+      bitmap, indices.offset, indices.length, [&](int64_t offset, int64_t length) {
+        bool block_out_of_bounds = false;
+        for (int64_t i = 0; i < length; ++i) {
+          block_out_of_bounds |= IsOutOfBounds(indices_data[offset + i]);
         }
-        if (ARROW_PREDICT_FALSE(block_out_of_bounds)) { 
-          for (int64_t i = 0; i < length; ++i) { 
-            if (IsOutOfBounds(indices_data[offset + i])) { 
-              return Status::IndexError("Index ", FormatInt(indices_data[offset + i]), 
-                                        " out of bounds"); 
-            } 
+        if (ARROW_PREDICT_FALSE(block_out_of_bounds)) {
+          for (int64_t i = 0; i < length; ++i) {
+            if (IsOutOfBounds(indices_data[offset + i])) {
+              return Status::IndexError("Index ", FormatInt(indices_data[offset + i]),
+                                        " out of bounds");
+            }
           }
         }
-        return Status::OK(); 
-      }); 
+        return Status::OK();
+      });
 }
 
 /// \brief Branchless boundschecking of the indices. Processes batches of
@@ -596,8 +596,8 @@ Status CheckIndexBounds(const ArrayData& indices, uint64_t upper_limit) {
 // ----------------------------------------------------------------------
 // Utilities for casting from one integer type to another
 
-namespace { 
- 
+namespace {
+
 template <typename InType, typename CType = typename InType::c_type>
 Status IntegersInRange(const Datum& datum, CType bound_lower, CType bound_upper) {
   if (std::numeric_limits<CType>::lowest() >= bound_lower &&
@@ -696,8 +696,8 @@ Status CheckIntegersInRangeImpl(const Datum& datum, const Scalar& bound_lower,
                                checked_cast<const ScalarType&>(bound_upper).value);
 }
 
-}  // namespace 
- 
+}  // namespace
+
 Status CheckIntegersInRange(const Datum& datum, const Scalar& bound_lower,
                             const Scalar& bound_upper) {
   Type::type type_id = datum.type()->id();
@@ -729,8 +729,8 @@ Status CheckIntegersInRange(const Datum& datum, const Scalar& bound_lower,
   }
 }
 
-namespace { 
- 
+namespace {
+
 template <typename O, typename I, typename Enable = void>
 struct is_number_downcast {
   static constexpr bool value = false;
@@ -919,8 +919,8 @@ Status IntegersCanFitImpl(const Datum& datum, const DataType& target_type) {
   return CheckIntegersInRange(datum, ScalarType(bound_min), ScalarType(bound_max));
 }
 
-}  // namespace 
- 
+}  // namespace
+
 Status IntegersCanFit(const Datum& datum, const DataType& target_type) {
   if (!is_integer(target_type.id())) {
     return Status::Invalid("Target type is not an integer type: ", target_type);
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/int_util.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/int_util.h
index 145a83b3171..bf9226cdf12 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/int_util.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/int_util.h
@@ -18,7 +18,7 @@
 #pragma once
 
 #include <cstdint>
-#include <type_traits> 
+#include <type_traits>
 
 #include "arrow/status.h"
 #include "arrow/util/visibility.h"
@@ -70,30 +70,30 @@ void DowncastUInts(const uint64_t* source, uint32_t* dest, int64_t length);
 ARROW_EXPORT
 void DowncastUInts(const uint64_t* source, uint64_t* dest, int64_t length);
 
-ARROW_EXPORT 
-void UpcastInts(const int32_t* source, int64_t* dest, int64_t length); 
- 
+ARROW_EXPORT
+void UpcastInts(const int32_t* source, int64_t* dest, int64_t length);
+
+template <typename InputInt, typename OutputInt>
+inline typename std::enable_if<(sizeof(InputInt) >= sizeof(OutputInt))>::type CastInts(
+    const InputInt* source, OutputInt* dest, int64_t length) {
+  DowncastInts(source, dest, length);
+}
+
+template <typename InputInt, typename OutputInt>
+inline typename std::enable_if<(sizeof(InputInt) < sizeof(OutputInt))>::type CastInts(
+    const InputInt* source, OutputInt* dest, int64_t length) {
+  UpcastInts(source, dest, length);
+}
+
 template <typename InputInt, typename OutputInt>
-inline typename std::enable_if<(sizeof(InputInt) >= sizeof(OutputInt))>::type CastInts( 
-    const InputInt* source, OutputInt* dest, int64_t length) { 
-  DowncastInts(source, dest, length); 
-} 
- 
-template <typename InputInt, typename OutputInt> 
-inline typename std::enable_if<(sizeof(InputInt) < sizeof(OutputInt))>::type CastInts( 
-    const InputInt* source, OutputInt* dest, int64_t length) { 
-  UpcastInts(source, dest, length); 
-} 
- 
-template <typename InputInt, typename OutputInt> 
 ARROW_EXPORT void TransposeInts(const InputInt* source, OutputInt* dest, int64_t length,
                                 const int32_t* transpose_map);
 
-ARROW_EXPORT 
-Status TransposeInts(const DataType& src_type, const DataType& dest_type, 
-                     const uint8_t* src, uint8_t* dest, int64_t src_offset, 
-                     int64_t dest_offset, int64_t length, const int32_t* transpose_map); 
- 
+ARROW_EXPORT
+Status TransposeInts(const DataType& src_type, const DataType& dest_type,
+                     const uint8_t* src, uint8_t* dest, int64_t src_offset,
+                     int64_t dest_offset, int64_t length, const int32_t* transpose_map);
+
 /// \brief Do vectorized boundschecking of integer-type array indices. The
 /// indices must be non-nonnegative and strictly less than the passed upper
 /// limit (which is usually the length of an array that is being indexed-into).
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/int_util_internal.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/int_util_internal.h
index 3760d03c9ff..4136706629f 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/int_util_internal.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/int_util_internal.h
@@ -63,27 +63,27 @@ OPS_WITH_OVERFLOW(DivideWithOverflow, div)
 #undef OP_WITH_OVERFLOW
 #undef OPS_WITH_OVERFLOW
 
-// Define function NegateWithOverflow with the signature `bool(T u, T* out)` 
-// where T is a signed integer type.  On overflow, these functions return true. 
-// Otherwise, false is returned and `out` is updated with the result of the 
-// operation. 
- 
-#define UNARY_OP_WITH_OVERFLOW(_func_name, _psnip_op, _type, _psnip_type) \ 
-  static inline bool _func_name(_type u, _type* out) {                    \ 
-    return !psnip_safe_##_psnip_type##_##_psnip_op(out, u);               \ 
-  } 
- 
-#define SIGNED_UNARY_OPS_WITH_OVERFLOW(_func_name, _psnip_op)   \ 
-  UNARY_OP_WITH_OVERFLOW(_func_name, _psnip_op, int8_t, int8)   \ 
-  UNARY_OP_WITH_OVERFLOW(_func_name, _psnip_op, int16_t, int16) \ 
-  UNARY_OP_WITH_OVERFLOW(_func_name, _psnip_op, int32_t, int32) \ 
-  UNARY_OP_WITH_OVERFLOW(_func_name, _psnip_op, int64_t, int64) 
- 
-SIGNED_UNARY_OPS_WITH_OVERFLOW(NegateWithOverflow, neg) 
- 
-#undef UNARY_OP_WITH_OVERFLOW 
-#undef SIGNED_UNARY_OPS_WITH_OVERFLOW 
- 
+// Define function NegateWithOverflow with the signature `bool(T u, T* out)`
+// where T is a signed integer type.  On overflow, these functions return true.
+// Otherwise, false is returned and `out` is updated with the result of the
+// operation.
+
+#define UNARY_OP_WITH_OVERFLOW(_func_name, _psnip_op, _type, _psnip_type) \
+  static inline bool _func_name(_type u, _type* out) {                    \
+    return !psnip_safe_##_psnip_type##_##_psnip_op(out, u);               \
+  }
+
+#define SIGNED_UNARY_OPS_WITH_OVERFLOW(_func_name, _psnip_op)   \
+  UNARY_OP_WITH_OVERFLOW(_func_name, _psnip_op, int8_t, int8)   \
+  UNARY_OP_WITH_OVERFLOW(_func_name, _psnip_op, int16_t, int16) \
+  UNARY_OP_WITH_OVERFLOW(_func_name, _psnip_op, int32_t, int32) \
+  UNARY_OP_WITH_OVERFLOW(_func_name, _psnip_op, int64_t, int64)
+
+SIGNED_UNARY_OPS_WITH_OVERFLOW(NegateWithOverflow, neg)
+
+#undef UNARY_OP_WITH_OVERFLOW
+#undef SIGNED_UNARY_OPS_WITH_OVERFLOW
+
 /// Signed addition with well-defined behaviour on overflow (as unsigned)
 template <typename SignedInt>
 SignedInt SafeSignedAdd(SignedInt u, SignedInt v) {
@@ -100,13 +100,13 @@ SignedInt SafeSignedSubtract(SignedInt u, SignedInt v) {
                                 static_cast<UnsignedInt>(v));
 }
 
-/// Signed negation with well-defined behaviour on overflow (as unsigned) 
-template <typename SignedInt> 
-SignedInt SafeSignedNegate(SignedInt u) { 
-  using UnsignedInt = typename std::make_unsigned<SignedInt>::type; 
-  return static_cast<SignedInt>(~static_cast<UnsignedInt>(u) + 1); 
-} 
- 
+/// Signed negation with well-defined behaviour on overflow (as unsigned)
+template <typename SignedInt>
+SignedInt SafeSignedNegate(SignedInt u) {
+  using UnsignedInt = typename std::make_unsigned<SignedInt>::type;
+  return static_cast<SignedInt>(~static_cast<UnsignedInt>(u) + 1);
+}
+
 /// Signed left shift with well-defined behaviour on negative numbers or overflow
 template <typename SignedInt, typename Shift>
 SignedInt SafeLeftShift(SignedInt u, Shift shift) {
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/io_util.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/io_util.cc
index 85f3843f715..f6566ea7e36 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/io_util.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/io_util.cc
@@ -22,15 +22,15 @@
 
 #define _FILE_OFFSET_BITS 64
 
-#if defined(sun) || defined(__sun) 
-// According to https://bugs.python.org/issue1759169#msg82201, __EXTENSIONS__ 
-// is the best way to enable modern POSIX APIs, such as posix_madvise(), on Solaris. 
-// (see also 
-// https://github.com/illumos/illumos-gate/blob/master/usr/src/uts/common/sys/mman.h) 
-#undef __EXTENSIONS__ 
-#define __EXTENSIONS__ 
-#endif 
- 
+#if defined(sun) || defined(__sun)
+// According to https://bugs.python.org/issue1759169#msg82201, __EXTENSIONS__
+// is the best way to enable modern POSIX APIs, such as posix_madvise(), on Solaris.
+// (see also
+// https://github.com/illumos/illumos-gate/blob/master/usr/src/uts/common/sys/mman.h)
+#undef __EXTENSIONS__
+#define __EXTENSIONS__
+#endif
+
 #include "arrow/util/windows_compatibility.h"  // IWYU pragma: keep
 
 #include <algorithm>
@@ -41,7 +41,7 @@
 #include <random>
 #include <sstream>
 #include <string>
-#include <thread> 
+#include <thread>
 #include <utility>
 #include <vector>
 
@@ -244,26 +244,26 @@ class WinErrorDetail : public StatusDetail {
 };
 #endif
 
-const char kSignalDetailTypeId[] = "arrow::SignalDetail"; 
- 
-class SignalDetail : public StatusDetail { 
- public: 
-  explicit SignalDetail(int signum) : signum_(signum) {} 
- 
-  const char* type_id() const override { return kSignalDetailTypeId; } 
- 
-  std::string ToString() const override { 
-    std::stringstream ss; 
-    ss << "received signal " << signum_; 
-    return ss.str(); 
-  } 
- 
-  int signum() const { return signum_; } 
- 
- protected: 
-  int signum_; 
-}; 
- 
+const char kSignalDetailTypeId[] = "arrow::SignalDetail";
+
+class SignalDetail : public StatusDetail {
+ public:
+  explicit SignalDetail(int signum) : signum_(signum) {}
+
+  const char* type_id() const override { return kSignalDetailTypeId; }
+
+  std::string ToString() const override {
+    std::stringstream ss;
+    ss << "received signal " << signum_;
+    return ss.str();
+  }
+
+  int signum() const { return signum_; }
+
+ protected:
+  int signum_;
+};
+
 }  // namespace
 
 std::shared_ptr<StatusDetail> StatusDetailFromErrno(int errnum) {
@@ -276,10 +276,10 @@ std::shared_ptr<StatusDetail> StatusDetailFromWinError(int errnum) {
 }
 #endif
 
-std::shared_ptr<StatusDetail> StatusDetailFromSignal(int signum) { 
-  return std::make_shared<SignalDetail>(signum); 
-} 
- 
+std::shared_ptr<StatusDetail> StatusDetailFromSignal(int signum) {
+  return std::make_shared<SignalDetail>(signum);
+}
+
 int ErrnoFromStatus(const Status& status) {
   const auto detail = status.detail();
   if (detail != nullptr && detail->type_id() == kErrnoDetailTypeId) {
@@ -298,14 +298,14 @@ int WinErrorFromStatus(const Status& status) {
   return 0;
 }
 
-int SignalFromStatus(const Status& status) { 
-  const auto detail = status.detail(); 
-  if (detail != nullptr && detail->type_id() == kSignalDetailTypeId) { 
-    return checked_cast<const SignalDetail&>(*detail).signum(); 
-  } 
-  return 0; 
-} 
- 
+int SignalFromStatus(const Status& status) {
+  const auto detail = status.detail();
+  if (detail != nullptr && detail->type_id() == kSignalDetailTypeId) {
+    return checked_cast<const SignalDetail&>(*detail).signum();
+  }
+  return 0;
+}
+
 //
 // PlatformFilename implementation
 //
@@ -403,18 +403,18 @@ namespace {
 
 Result<bool> DoCreateDir(const PlatformFilename& dir_path, bool create_parents) {
 #ifdef _WIN32
-  const auto s = dir_path.ToNative().c_str(); 
-  if (CreateDirectoryW(s, nullptr)) { 
+  const auto s = dir_path.ToNative().c_str();
+  if (CreateDirectoryW(s, nullptr)) {
     return true;
   }
   int errnum = GetLastError();
   if (errnum == ERROR_ALREADY_EXISTS) {
-    const auto attrs = GetFileAttributesW(s); 
-    if (attrs == INVALID_FILE_ATTRIBUTES || !(attrs & FILE_ATTRIBUTE_DIRECTORY)) { 
-      // Note we propagate the original error, not the GetFileAttributesW() error 
-      return IOErrorFromWinError(ERROR_ALREADY_EXISTS, "Cannot create directory '", 
-                                 dir_path.ToString(), "': non-directory entry exists"); 
-    } 
+    const auto attrs = GetFileAttributesW(s);
+    if (attrs == INVALID_FILE_ATTRIBUTES || !(attrs & FILE_ATTRIBUTE_DIRECTORY)) {
+      // Note we propagate the original error, not the GetFileAttributesW() error
+      return IOErrorFromWinError(ERROR_ALREADY_EXISTS, "Cannot create directory '",
+                                 dir_path.ToString(), "': non-directory entry exists");
+    }
     return false;
   }
   if (create_parents && errnum == ERROR_PATH_NOT_FOUND) {
@@ -427,17 +427,17 @@ Result<bool> DoCreateDir(const PlatformFilename& dir_path, bool create_parents)
   return IOErrorFromWinError(GetLastError(), "Cannot create directory '",
                              dir_path.ToString(), "'");
 #else
-  const auto s = dir_path.ToNative().c_str(); 
-  if (mkdir(s, S_IRWXU | S_IRWXG | S_IRWXO) == 0) { 
+  const auto s = dir_path.ToNative().c_str();
+  if (mkdir(s, S_IRWXU | S_IRWXG | S_IRWXO) == 0) {
     return true;
   }
   if (errno == EEXIST) {
-    struct stat st; 
-    if (stat(s, &st) || !S_ISDIR(st.st_mode)) { 
-      // Note we propagate the original errno, not the stat() errno 
-      return IOErrorFromErrno(EEXIST, "Cannot create directory '", dir_path.ToString(), 
-                              "': non-directory entry exists"); 
-    } 
+    struct stat st;
+    if (stat(s, &st) || !S_ISDIR(st.st_mode)) {
+      // Note we propagate the original errno, not the stat() errno
+      return IOErrorFromErrno(EEXIST, "Cannot create directory '", dir_path.ToString(),
+                              "': non-directory entry exists");
+    }
     return false;
   }
   if (create_parents && errno == ENOENT) {
@@ -1019,15 +1019,15 @@ Status MemoryMapRemap(void* addr, size_t old_size, size_t new_size, int fildes,
     return StatusFromMmapErrno("MapViewOfFile failed");
   }
   return Status::OK();
-#elif defined(__linux__) 
-  if (ftruncate(fildes, new_size) == -1) { 
-    return StatusFromMmapErrno("ftruncate failed"); 
-  } 
-  *new_addr = mremap(addr, old_size, new_size, MREMAP_MAYMOVE); 
-  if (*new_addr == MAP_FAILED) { 
-    return StatusFromMmapErrno("mremap failed"); 
-  } 
-  return Status::OK(); 
+#elif defined(__linux__)
+  if (ftruncate(fildes, new_size) == -1) {
+    return StatusFromMmapErrno("ftruncate failed");
+  }
+  *new_addr = mremap(addr, old_size, new_size, MREMAP_MAYMOVE);
+  if (*new_addr == MAP_FAILED) {
+    return StatusFromMmapErrno("mremap failed");
+  }
+  return Status::OK();
 #else
   // we have to close the mmap first, truncate the file to the new size
   // and recreate the mmap
@@ -1089,7 +1089,7 @@ Status MemoryAdviseWillNeed(const std::vector<MemoryRegion>& regions) {
     }
   }
   return Status::OK();
-#elif defined(POSIX_MADV_WILLNEED) 
+#elif defined(POSIX_MADV_WILLNEED)
   for (const auto& region : regions) {
     if (region.size != 0) {
       const auto aligned = align_region(region);
@@ -1103,8 +1103,8 @@ Status MemoryAdviseWillNeed(const std::vector<MemoryRegion>& regions) {
     }
   }
   return Status::OK();
-#else 
-  return Status::OK(); 
+#else
+  return Status::OK();
 #endif
 }
 
@@ -1468,51 +1468,51 @@ std::string MakeRandomName(int num_chars) {
 }  // namespace
 
 Result<std::unique_ptr<TemporaryDir>> TemporaryDir::Make(const std::string& prefix) {
-  const int kNumChars = 8; 
- 
+  const int kNumChars = 8;
+
   NativePathString base_name;
 
-  auto MakeBaseName = [&]() { 
-    std::string suffix = MakeRandomName(kNumChars); 
-    return StringToNative(prefix + suffix); 
-  }; 
- 
-  auto TryCreatingDirectory = 
-      [&](const NativePathString& base_dir) -> Result<std::unique_ptr<TemporaryDir>> { 
-    Status st; 
-    for (int attempt = 0; attempt < 3; ++attempt) { 
-      PlatformFilename fn(base_dir + kNativeSep + base_name + kNativeSep); 
-      auto result = CreateDir(fn); 
-      if (!result.ok()) { 
-        // Probably a permissions error or a non-existing base_dir 
-        return nullptr; 
-      } 
-      if (*result) { 
-        return std::unique_ptr<TemporaryDir>(new TemporaryDir(std::move(fn))); 
-      } 
-      // The random name already exists in base_dir, try with another name 
-      st = Status::IOError("Path already exists: '", fn.ToString(), "'"); 
-      ARROW_ASSIGN_OR_RAISE(base_name, MakeBaseName()); 
-    } 
-    return st; 
-  }; 
- 
-  ARROW_ASSIGN_OR_RAISE(base_name, MakeBaseName()); 
- 
+  auto MakeBaseName = [&]() {
+    std::string suffix = MakeRandomName(kNumChars);
+    return StringToNative(prefix + suffix);
+  };
+
+  auto TryCreatingDirectory =
+      [&](const NativePathString& base_dir) -> Result<std::unique_ptr<TemporaryDir>> {
+    Status st;
+    for (int attempt = 0; attempt < 3; ++attempt) {
+      PlatformFilename fn(base_dir + kNativeSep + base_name + kNativeSep);
+      auto result = CreateDir(fn);
+      if (!result.ok()) {
+        // Probably a permissions error or a non-existing base_dir
+        return nullptr;
+      }
+      if (*result) {
+        return std::unique_ptr<TemporaryDir>(new TemporaryDir(std::move(fn)));
+      }
+      // The random name already exists in base_dir, try with another name
+      st = Status::IOError("Path already exists: '", fn.ToString(), "'");
+      ARROW_ASSIGN_OR_RAISE(base_name, MakeBaseName());
+    }
+    return st;
+  };
+
+  ARROW_ASSIGN_OR_RAISE(base_name, MakeBaseName());
+
   auto base_dirs = GetPlatformTemporaryDirs();
   DCHECK_NE(base_dirs.size(), 0);
 
-  for (const auto& base_dir : base_dirs) { 
-    ARROW_ASSIGN_OR_RAISE(auto ptr, TryCreatingDirectory(base_dir)); 
-    if (ptr) { 
-      return std::move(ptr); 
+  for (const auto& base_dir : base_dirs) {
+    ARROW_ASSIGN_OR_RAISE(auto ptr, TryCreatingDirectory(base_dir));
+    if (ptr) {
+      return std::move(ptr);
     }
-    // Cannot create in this directory, try the next one 
+    // Cannot create in this directory, try the next one
   }
 
-  return Status::IOError( 
-      "Cannot create temporary subdirectory in any " 
-      "of the platform temporary directories"); 
+  return Status::IOError(
+      "Cannot create temporary subdirectory in any "
+      "of the platform temporary directories");
 }
 
 TemporaryDir::TemporaryDir(PlatformFilename&& path) : path_(std::move(path)) {}
@@ -1594,64 +1594,64 @@ Result<SignalHandler> SetSignalHandler(int signum, const SignalHandler& handler)
   return Status::OK();
 }
 
-void ReinstateSignalHandler(int signum, SignalHandler::Callback handler) { 
-#if !ARROW_HAVE_SIGACTION 
-  // Cannot report any errors from signal() (but there shouldn't be any) 
-  signal(signum, handler); 
-#endif 
-} 
- 
-Status SendSignal(int signum) { 
-  if (raise(signum) == 0) { 
-    return Status::OK(); 
-  } 
-  if (errno == EINVAL) { 
-    return Status::Invalid("Invalid signal number ", signum); 
-  } 
-  return IOErrorFromErrno(errno, "Failed to raise signal"); 
-} 
- 
-Status SendSignalToThread(int signum, uint64_t thread_id) { 
-#ifdef _WIN32 
-  return Status::NotImplemented("Cannot send signal to specific thread on Windows"); 
-#else 
-  // Have to use a C-style cast because pthread_t can be a pointer *or* integer type 
-  int r = pthread_kill((pthread_t)thread_id, signum);  // NOLINT readability-casting 
-  if (r == 0) { 
-    return Status::OK(); 
-  } 
-  if (r == EINVAL) { 
-    return Status::Invalid("Invalid signal number ", signum); 
-  } 
-  return IOErrorFromErrno(r, "Failed to raise signal"); 
-#endif 
-} 
- 
+void ReinstateSignalHandler(int signum, SignalHandler::Callback handler) {
+#if !ARROW_HAVE_SIGACTION
+  // Cannot report any errors from signal() (but there shouldn't be any)
+  signal(signum, handler);
+#endif
+}
+
+Status SendSignal(int signum) {
+  if (raise(signum) == 0) {
+    return Status::OK();
+  }
+  if (errno == EINVAL) {
+    return Status::Invalid("Invalid signal number ", signum);
+  }
+  return IOErrorFromErrno(errno, "Failed to raise signal");
+}
+
+Status SendSignalToThread(int signum, uint64_t thread_id) {
+#ifdef _WIN32
+  return Status::NotImplemented("Cannot send signal to specific thread on Windows");
+#else
+  // Have to use a C-style cast because pthread_t can be a pointer *or* integer type
+  int r = pthread_kill((pthread_t)thread_id, signum);  // NOLINT readability-casting
+  if (r == 0) {
+    return Status::OK();
+  }
+  if (r == EINVAL) {
+    return Status::Invalid("Invalid signal number ", signum);
+  }
+  return IOErrorFromErrno(r, "Failed to raise signal");
+#endif
+}
+
 namespace {
 
-int64_t GetPid() { 
-#ifdef _WIN32 
-  return GetCurrentProcessId(); 
-#else 
-  return getpid(); 
-#endif 
-} 
- 
+int64_t GetPid() {
+#ifdef _WIN32
+  return GetCurrentProcessId();
+#else
+  return getpid();
+#endif
+}
+
 std::mt19937_64 GetSeedGenerator() {
   // Initialize Mersenne Twister PRNG with a true random seed.
-  // Make sure to mix in process id to minimize risks of clashes when parallel testing. 
+  // Make sure to mix in process id to minimize risks of clashes when parallel testing.
 #ifdef ARROW_VALGRIND
   // Valgrind can crash, hang or enter an infinite loop on std::random_device,
   // use a crude initializer instead.
   const uint8_t dummy = 0;
   ARROW_UNUSED(dummy);
   std::mt19937_64 seed_gen(reinterpret_cast<uintptr_t>(&dummy) ^
-                           static_cast<uintptr_t>(GetPid())); 
+                           static_cast<uintptr_t>(GetPid()));
 #else
   std::random_device true_random;
   std::mt19937_64 seed_gen(static_cast<uint64_t>(true_random()) ^
-                           (static_cast<uint64_t>(true_random()) << 32) ^ 
-                           static_cast<uint64_t>(GetPid())); 
+                           (static_cast<uint64_t>(true_random()) << 32) ^
+                           static_cast<uint64_t>(GetPid()));
 #endif
   return seed_gen;
 }
@@ -1665,21 +1665,21 @@ int64_t GetRandomSeed() {
   return static_cast<int64_t>(seed_gen());
 }
 
-uint64_t GetThreadId() { 
-  uint64_t equiv{0}; 
-  // std::thread::id is trivially copyable as per C++ spec, 
-  // so type punning as a uint64_t should work 
-  static_assert(sizeof(std::thread::id) <= sizeof(uint64_t), 
-                "std::thread::id can't fit into uint64_t"); 
-  const auto tid = std::this_thread::get_id(); 
-  memcpy(&equiv, reinterpret_cast<const void*>(&tid), sizeof(tid)); 
-  return equiv; 
-} 
- 
-uint64_t GetOptionalThreadId() { 
-  auto tid = GetThreadId(); 
-  return (tid == 0) ? tid - 1 : tid; 
-} 
- 
+uint64_t GetThreadId() {
+  uint64_t equiv{0};
+  // std::thread::id is trivially copyable as per C++ spec,
+  // so type punning as a uint64_t should work
+  static_assert(sizeof(std::thread::id) <= sizeof(uint64_t),
+                "std::thread::id can't fit into uint64_t");
+  const auto tid = std::this_thread::get_id();
+  memcpy(&equiv, reinterpret_cast<const void*>(&tid), sizeof(tid));
+  return equiv;
+}
+
+uint64_t GetOptionalThreadId() {
+  auto tid = GetThreadId();
+  return (tid == 0) ? tid - 1 : tid;
+}
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/io_util.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/io_util.h
index 7aa26f0819e..4255dd37105 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/io_util.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/io_util.h
@@ -209,8 +209,8 @@ std::shared_ptr<StatusDetail> StatusDetailFromErrno(int errnum);
 ARROW_EXPORT
 std::shared_ptr<StatusDetail> StatusDetailFromWinError(int errnum);
 #endif
-ARROW_EXPORT 
-std::shared_ptr<StatusDetail> StatusDetailFromSignal(int signum); 
+ARROW_EXPORT
+std::shared_ptr<StatusDetail> StatusDetailFromSignal(int signum);
 
 template <typename... Args>
 Status StatusFromErrno(int errnum, StatusCode code, Args&&... args) {
@@ -236,17 +236,17 @@ Status IOErrorFromWinError(int errnum, Args&&... args) {
 }
 #endif
 
-template <typename... Args> 
-Status StatusFromSignal(int signum, StatusCode code, Args&&... args) { 
-  return Status::FromDetailAndArgs(code, StatusDetailFromSignal(signum), 
-                                   std::forward<Args>(args)...); 
-} 
- 
-template <typename... Args> 
-Status CancelledFromSignal(int signum, Args&&... args) { 
-  return StatusFromSignal(signum, StatusCode::Cancelled, std::forward<Args>(args)...); 
-} 
- 
+template <typename... Args>
+Status StatusFromSignal(int signum, StatusCode code, Args&&... args) {
+  return Status::FromDetailAndArgs(code, StatusDetailFromSignal(signum),
+                                   std::forward<Args>(args)...);
+}
+
+template <typename... Args>
+Status CancelledFromSignal(int signum, Args&&... args) {
+  return StatusFromSignal(signum, StatusCode::Cancelled, std::forward<Args>(args)...);
+}
+
 ARROW_EXPORT
 int ErrnoFromStatus(const Status&);
 
@@ -254,9 +254,9 @@ int ErrnoFromStatus(const Status&);
 ARROW_EXPORT
 int WinErrorFromStatus(const Status&);
 
-ARROW_EXPORT 
-int SignalFromStatus(const Status&); 
- 
+ARROW_EXPORT
+int SignalFromStatus(const Status&);
+
 class ARROW_EXPORT TemporaryDir {
  public:
   ~TemporaryDir();
@@ -309,26 +309,26 @@ Result<SignalHandler> GetSignalHandler(int signum);
 ARROW_EXPORT
 Result<SignalHandler> SetSignalHandler(int signum, const SignalHandler& handler);
 
-/// \brief Reinstate the signal handler 
-/// 
-/// For use in signal handlers.  This is needed on platforms without sigaction() 
-/// such as Windows, as the default signal handler is restored there as 
-/// soon as a signal is raised. 
-ARROW_EXPORT 
-void ReinstateSignalHandler(int signum, SignalHandler::Callback handler); 
- 
-/// \brief Send a signal to the current process 
-/// 
-/// The thread which will receive the signal is unspecified. 
-ARROW_EXPORT 
-Status SendSignal(int signum); 
- 
-/// \brief Send a signal to the given thread 
-/// 
-/// This function isn't supported on Windows. 
-ARROW_EXPORT 
-Status SendSignalToThread(int signum, uint64_t thread_id); 
- 
+/// \brief Reinstate the signal handler
+///
+/// For use in signal handlers.  This is needed on platforms without sigaction()
+/// such as Windows, as the default signal handler is restored there as
+/// soon as a signal is raised.
+ARROW_EXPORT
+void ReinstateSignalHandler(int signum, SignalHandler::Callback handler);
+
+/// \brief Send a signal to the current process
+///
+/// The thread which will receive the signal is unspecified.
+ARROW_EXPORT
+Status SendSignal(int signum);
+
+/// \brief Send a signal to the given thread
+///
+/// This function isn't supported on Windows.
+ARROW_EXPORT
+Status SendSignalToThread(int signum, uint64_t thread_id);
+
 /// \brief Get an unpredictable random seed
 ///
 /// This function may be slightly costly, so should only be used to initialize
@@ -338,12 +338,12 @@ Status SendSignalToThread(int signum, uint64_t thread_id);
 ARROW_EXPORT
 int64_t GetRandomSeed();
 
-/// \brief Get the current thread id 
-/// 
-/// In addition to having the same properties as std::thread, the returned value 
-/// is a regular integer value, which is more convenient than an opaque type. 
-ARROW_EXPORT 
-uint64_t GetThreadId(); 
- 
+/// \brief Get the current thread id
+///
+/// In addition to having the same properties as std::thread, the returned value
+/// is a regular integer value, which is more convenient than an opaque type.
+ARROW_EXPORT
+uint64_t GetThreadId();
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/iterator.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/iterator.h
index 374ac1afd4e..2f42803d26f 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/iterator.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/iterator.h
@@ -43,40 +43,40 @@ struct IterationTraits {
   /// \brief a reserved value which indicates the end of iteration. By
   /// default this is NULLPTR since most iterators yield pointer types.
   /// Specialize IterationTraits if different end semantics are required.
-  /// 
-  /// Note: This should not be used to determine if a given value is a 
-  /// terminal value.  Use IsIterationEnd (which uses IsEnd) instead.  This 
-  /// is only for returning terminal values. 
+  ///
+  /// Note: This should not be used to determine if a given value is a
+  /// terminal value.  Use IsIterationEnd (which uses IsEnd) instead.  This
+  /// is only for returning terminal values.
   static T End() { return T(NULLPTR); }
- 
-  /// \brief Checks to see if the value is a terminal value. 
-  /// A method is used here since T is not neccesarily comparable in many 
-  /// cases even though it has a distinct final value 
-  static bool IsEnd(const T& val) { return val == End(); } 
+
+  /// \brief Checks to see if the value is a terminal value.
+  /// A method is used here since T is not neccesarily comparable in many
+  /// cases even though it has a distinct final value
+  static bool IsEnd(const T& val) { return val == End(); }
 };
 
 template <typename T>
-T IterationEnd() { 
-  return IterationTraits<T>::End(); 
-} 
- 
-template <typename T> 
-bool IsIterationEnd(const T& val) { 
-  return IterationTraits<T>::IsEnd(val); 
-} 
- 
-template <typename T> 
+T IterationEnd() {
+  return IterationTraits<T>::End();
+}
+
+template <typename T>
+bool IsIterationEnd(const T& val) {
+  return IterationTraits<T>::IsEnd(val);
+}
+
+template <typename T>
 struct IterationTraits<util::optional<T>> {
   /// \brief by default when iterating through a sequence of optional,
   /// nullopt indicates the end of iteration.
   /// Specialize IterationTraits if different end semantics are required.
   static util::optional<T> End() { return util::nullopt; }
 
-  /// \brief by default when iterating through a sequence of optional, 
-  /// nullopt (!has_value()) indicates the end of iteration. 
-  /// Specialize IterationTraits if different end semantics are required. 
-  static bool IsEnd(const util::optional<T>& val) { return !val.has_value(); } 
- 
+  /// \brief by default when iterating through a sequence of optional,
+  /// nullopt (!has_value()) indicates the end of iteration.
+  /// Specialize IterationTraits if different end semantics are required.
+  static bool IsEnd(const util::optional<T>& val) { return !val.has_value(); }
+
   // TODO(bkietz) The range-for loop over Iterator<optional<T>> yields
   // Result<optional<T>> which is unnecessary (since only the unyielded end optional
   // is nullopt. Add IterationTraits::GetRangeElement() to handle this case
@@ -87,8 +87,8 @@ template <typename T>
 class Iterator : public util::EqualityComparable<Iterator<T>> {
  public:
   /// \brief Iterator may be constructed from any type which has a member function
-  /// with signature Result<T> Next(); 
-  /// End of iterator is signalled by returning IteratorTraits<T>::End(); 
+  /// with signature Result<T> Next();
+  /// End of iterator is signalled by returning IteratorTraits<T>::End();
   ///
   /// The argument is moved or copied to the heap and kept in a unique_ptr<void>. Only
   /// its destructor and its Next method (which are stored in function pointers) are
@@ -116,7 +116,7 @@ class Iterator : public util::EqualityComparable<Iterator<T>> {
     for (;;) {
       ARROW_ASSIGN_OR_RAISE(auto value, Next());
 
-      if (IsIterationEnd(value)) break; 
+      if (IsIterationEnd(value)) break;
 
       ARROW_RETURN_NOT_OK(visitor(std::move(value)));
     }
@@ -210,132 +210,132 @@ class Iterator : public util::EqualityComparable<Iterator<T>> {
 };
 
 template <typename T>
-struct TransformFlow { 
-  using YieldValueType = T; 
- 
-  TransformFlow(YieldValueType value, bool ready_for_next) 
-      : finished_(false), 
-        ready_for_next_(ready_for_next), 
-        yield_value_(std::move(value)) {} 
-  TransformFlow(bool finished, bool ready_for_next) 
-      : finished_(finished), ready_for_next_(ready_for_next), yield_value_() {} 
- 
-  bool HasValue() const { return yield_value_.has_value(); } 
-  bool Finished() const { return finished_; } 
-  bool ReadyForNext() const { return ready_for_next_; } 
-  T Value() const { return *yield_value_; } 
- 
-  bool finished_ = false; 
-  bool ready_for_next_ = false; 
-  util::optional<YieldValueType> yield_value_; 
-}; 
- 
-struct TransformFinish { 
-  template <typename T> 
-  operator TransformFlow<T>() && {  // NOLINT explicit 
-    return TransformFlow<T>(true, true); 
-  } 
-}; 
- 
-struct TransformSkip { 
-  template <typename T> 
-  operator TransformFlow<T>() && {  // NOLINT explicit 
-    return TransformFlow<T>(false, true); 
-  } 
-}; 
- 
-template <typename T> 
-TransformFlow<T> TransformYield(T value = {}, bool ready_for_next = true) { 
-  return TransformFlow<T>(std::move(value), ready_for_next); 
-} 
- 
-template <typename T, typename V> 
-using Transformer = std::function<Result<TransformFlow<V>>(T)>; 
- 
-template <typename T, typename V> 
-class TransformIterator { 
- public: 
-  explicit TransformIterator(Iterator<T> it, Transformer<T, V> transformer) 
-      : it_(std::move(it)), 
-        transformer_(std::move(transformer)), 
-        last_value_(), 
-        finished_() {} 
- 
-  Result<V> Next() { 
-    while (!finished_) { 
-      ARROW_ASSIGN_OR_RAISE(util::optional<V> next, Pump()); 
-      if (next.has_value()) { 
-        return std::move(*next); 
-      } 
-      ARROW_ASSIGN_OR_RAISE(last_value_, it_.Next()); 
-    } 
-    return IterationTraits<V>::End(); 
-  } 
- 
- private: 
-  // Calls the transform function on the current value.  Can return in several ways 
-  // * If the next value is requested (e.g. skip) it will return an empty optional 
-  // * If an invalid status is encountered that will be returned 
-  // * If finished it will return IterationTraits<V>::End() 
-  // * If a value is returned by the transformer that will be returned 
-  Result<util::optional<V>> Pump() { 
-    if (!finished_ && last_value_.has_value()) { 
-      auto next_res = transformer_(*last_value_); 
-      if (!next_res.ok()) { 
-        finished_ = true; 
-        return next_res.status(); 
-      } 
-      auto next = *next_res; 
-      if (next.ReadyForNext()) { 
-        if (IsIterationEnd(*last_value_)) { 
-          finished_ = true; 
-        } 
-        last_value_.reset(); 
-      } 
-      if (next.Finished()) { 
-        finished_ = true; 
-      } 
-      if (next.HasValue()) { 
-        return next.Value(); 
-      } 
-    } 
-    if (finished_) { 
-      return IterationTraits<V>::End(); 
-    } 
-    return util::nullopt; 
-  } 
- 
-  Iterator<T> it_; 
-  Transformer<T, V> transformer_; 
-  util::optional<T> last_value_; 
-  bool finished_ = false; 
-}; 
- 
-/// \brief Transforms an iterator according to a transformer, returning a new Iterator. 
-/// 
-/// The transformer will be called on each element of the source iterator and for each 
-/// call it can yield a value, skip, or finish the iteration.  When yielding a value the 
-/// transformer can choose to consume the source item (the default, ready_for_next = true) 
-/// or to keep it and it will be called again on the same value. 
-/// 
-/// This is essentially a more generic form of the map operation that can return 0, 1, or 
-/// many values for each of the source items. 
-/// 
-/// The transformer will be exposed to the end of the source sequence 
-/// (IterationTraits::End) in case it needs to return some penultimate item(s). 
-/// 
-/// Any invalid status returned by the transformer will be returned immediately. 
-template <typename T, typename V> 
-Iterator<V> MakeTransformedIterator(Iterator<T> it, Transformer<T, V> op) { 
-  return Iterator<V>(TransformIterator<T, V>(std::move(it), std::move(op))); 
-} 
- 
-template <typename T> 
+struct TransformFlow {
+  using YieldValueType = T;
+
+  TransformFlow(YieldValueType value, bool ready_for_next)
+      : finished_(false),
+        ready_for_next_(ready_for_next),
+        yield_value_(std::move(value)) {}
+  TransformFlow(bool finished, bool ready_for_next)
+      : finished_(finished), ready_for_next_(ready_for_next), yield_value_() {}
+
+  bool HasValue() const { return yield_value_.has_value(); }
+  bool Finished() const { return finished_; }
+  bool ReadyForNext() const { return ready_for_next_; }
+  T Value() const { return *yield_value_; }
+
+  bool finished_ = false;
+  bool ready_for_next_ = false;
+  util::optional<YieldValueType> yield_value_;
+};
+
+struct TransformFinish {
+  template <typename T>
+  operator TransformFlow<T>() && {  // NOLINT explicit
+    return TransformFlow<T>(true, true);
+  }
+};
+
+struct TransformSkip {
+  template <typename T>
+  operator TransformFlow<T>() && {  // NOLINT explicit
+    return TransformFlow<T>(false, true);
+  }
+};
+
+template <typename T>
+TransformFlow<T> TransformYield(T value = {}, bool ready_for_next = true) {
+  return TransformFlow<T>(std::move(value), ready_for_next);
+}
+
+template <typename T, typename V>
+using Transformer = std::function<Result<TransformFlow<V>>(T)>;
+
+template <typename T, typename V>
+class TransformIterator {
+ public:
+  explicit TransformIterator(Iterator<T> it, Transformer<T, V> transformer)
+      : it_(std::move(it)),
+        transformer_(std::move(transformer)),
+        last_value_(),
+        finished_() {}
+
+  Result<V> Next() {
+    while (!finished_) {
+      ARROW_ASSIGN_OR_RAISE(util::optional<V> next, Pump());
+      if (next.has_value()) {
+        return std::move(*next);
+      }
+      ARROW_ASSIGN_OR_RAISE(last_value_, it_.Next());
+    }
+    return IterationTraits<V>::End();
+  }
+
+ private:
+  // Calls the transform function on the current value.  Can return in several ways
+  // * If the next value is requested (e.g. skip) it will return an empty optional
+  // * If an invalid status is encountered that will be returned
+  // * If finished it will return IterationTraits<V>::End()
+  // * If a value is returned by the transformer that will be returned
+  Result<util::optional<V>> Pump() {
+    if (!finished_ && last_value_.has_value()) {
+      auto next_res = transformer_(*last_value_);
+      if (!next_res.ok()) {
+        finished_ = true;
+        return next_res.status();
+      }
+      auto next = *next_res;
+      if (next.ReadyForNext()) {
+        if (IsIterationEnd(*last_value_)) {
+          finished_ = true;
+        }
+        last_value_.reset();
+      }
+      if (next.Finished()) {
+        finished_ = true;
+      }
+      if (next.HasValue()) {
+        return next.Value();
+      }
+    }
+    if (finished_) {
+      return IterationTraits<V>::End();
+    }
+    return util::nullopt;
+  }
+
+  Iterator<T> it_;
+  Transformer<T, V> transformer_;
+  util::optional<T> last_value_;
+  bool finished_ = false;
+};
+
+/// \brief Transforms an iterator according to a transformer, returning a new Iterator.
+///
+/// The transformer will be called on each element of the source iterator and for each
+/// call it can yield a value, skip, or finish the iteration.  When yielding a value the
+/// transformer can choose to consume the source item (the default, ready_for_next = true)
+/// or to keep it and it will be called again on the same value.
+///
+/// This is essentially a more generic form of the map operation that can return 0, 1, or
+/// many values for each of the source items.
+///
+/// The transformer will be exposed to the end of the source sequence
+/// (IterationTraits::End) in case it needs to return some penultimate item(s).
+///
+/// Any invalid status returned by the transformer will be returned immediately.
+template <typename T, typename V>
+Iterator<V> MakeTransformedIterator(Iterator<T> it, Transformer<T, V> op) {
+  return Iterator<V>(TransformIterator<T, V>(std::move(it), std::move(op)));
+}
+
+template <typename T>
 struct IterationTraits<Iterator<T>> {
   // The end condition for an Iterator of Iterators is a default constructed (null)
   // Iterator.
   static Iterator<T> End() { return Iterator<T>(); }
-  static bool IsEnd(const Iterator<T>& val) { return !val; } 
+  static bool IsEnd(const Iterator<T>& val) { return !val; }
 };
 
 template <typename Fn, typename T>
@@ -427,7 +427,7 @@ class MapIterator {
   Result<O> Next() {
     ARROW_ASSIGN_OR_RAISE(I i, it_.Next());
 
-    if (IsIterationEnd(i)) { 
+    if (IsIterationEnd(i)) {
       return IterationTraits<O>::End();
     }
 
@@ -489,7 +489,7 @@ struct FilterIterator {
       for (;;) {
         ARROW_ASSIGN_OR_RAISE(From i, it_.Next());
 
-        if (IsIterationEnd(i)) { 
+        if (IsIterationEnd(i)) {
           return IterationTraits<To>::End();
         }
 
@@ -525,12 +525,12 @@ class FlattenIterator {
   explicit FlattenIterator(Iterator<Iterator<T>> it) : parent_(std::move(it)) {}
 
   Result<T> Next() {
-    if (IsIterationEnd(child_)) { 
+    if (IsIterationEnd(child_)) {
       // Pop from parent's iterator.
       ARROW_ASSIGN_OR_RAISE(child_, parent_.Next());
 
       // Check if final iteration reached.
-      if (IsIterationEnd(child_)) { 
+      if (IsIterationEnd(child_)) {
         return IterationTraits<T>::End();
       }
 
@@ -539,7 +539,7 @@ class FlattenIterator {
 
     // Pop from child_ and check for depletion.
     ARROW_ASSIGN_OR_RAISE(T out, child_.Next());
-    if (IsIterationEnd(out)) { 
+    if (IsIterationEnd(out)) {
       // Reset state such that we pop from parent on the recursive call
       child_ = IterationTraits<Iterator<T>>::End();
 
@@ -559,10 +559,10 @@ Iterator<T> MakeFlattenIterator(Iterator<Iterator<T>> it) {
   return Iterator<T>(FlattenIterator<T>(std::move(it)));
 }
 
-template <typename Reader> 
-Iterator<typename Reader::ValueType> MakeIteratorFromReader( 
-    const std::shared_ptr<Reader>& reader) { 
-  return MakeFunctionIterator([reader] { return reader->Next(); }); 
+template <typename Reader>
+Iterator<typename Reader::ValueType> MakeIteratorFromReader(
+    const std::shared_ptr<Reader>& reader) {
+  return MakeFunctionIterator([reader] { return reader->Next(); });
 }
 
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/key_value_metadata.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/key_value_metadata.cc
index c4a3ac64aab..ad3b686a9bd 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/key_value_metadata.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/key_value_metadata.cc
@@ -70,11 +70,11 @@ KeyValueMetadata::KeyValueMetadata(std::vector<std::string> keys,
   ARROW_CHECK_EQ(keys.size(), values.size());
 }
 
-std::shared_ptr<KeyValueMetadata> KeyValueMetadata::Make( 
-    std::vector<std::string> keys, std::vector<std::string> values) { 
-  return std::make_shared<KeyValueMetadata>(std::move(keys), std::move(values)); 
-} 
- 
+std::shared_ptr<KeyValueMetadata> KeyValueMetadata::Make(
+    std::vector<std::string> keys, std::vector<std::string> values) {
+  return std::make_shared<KeyValueMetadata>(std::move(keys), std::move(values));
+}
+
 void KeyValueMetadata::ToUnorderedMap(
     std::unordered_map<std::string, std::string>* out) const {
   DCHECK_NE(out, nullptr);
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/key_value_metadata.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/key_value_metadata.h
index 9835b1739c7..d42ab78f667 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/key_value_metadata.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/key_value_metadata.h
@@ -39,9 +39,9 @@ class ARROW_EXPORT KeyValueMetadata {
   explicit KeyValueMetadata(const std::unordered_map<std::string, std::string>& map);
   virtual ~KeyValueMetadata() = default;
 
-  static std::shared_ptr<KeyValueMetadata> Make(std::vector<std::string> keys, 
-                                                std::vector<std::string> values); 
- 
+  static std::shared_ptr<KeyValueMetadata> Make(std::vector<std::string> keys,
+                                                std::vector<std::string> values);
+
   void ToUnorderedMap(std::unordered_map<std::string, std::string>* out) const;
   void Append(const std::string& key, const std::string& value);
 
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/logging.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/logging.cc
index 314b277a821..65359b44081 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/logging.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/logging.cc
@@ -24,31 +24,31 @@
 #include <iostream>
 
 #ifdef ARROW_USE_GLOG
- 
+
 #include <signal.h>
 #include <vector>
- 
+
 #error #include "glog/logging.h"
- 
-// Restore our versions of DCHECK and friends, as GLog defines its own 
-#undef DCHECK 
-#undef DCHECK_OK 
-#undef DCHECK_EQ 
-#undef DCHECK_NE 
-#undef DCHECK_LE 
-#undef DCHECK_LT 
-#undef DCHECK_GE 
-#undef DCHECK_GT 
- 
-#define DCHECK ARROW_DCHECK 
-#define DCHECK_OK ARROW_DCHECK_OK 
-#define DCHECK_EQ ARROW_DCHECK_EQ 
-#define DCHECK_NE ARROW_DCHECK_NE 
-#define DCHECK_LE ARROW_DCHECK_LE 
-#define DCHECK_LT ARROW_DCHECK_LT 
-#define DCHECK_GE ARROW_DCHECK_GE 
-#define DCHECK_GT ARROW_DCHECK_GT 
- 
+
+// Restore our versions of DCHECK and friends, as GLog defines its own
+#undef DCHECK
+#undef DCHECK_OK
+#undef DCHECK_EQ
+#undef DCHECK_NE
+#undef DCHECK_LE
+#undef DCHECK_LT
+#undef DCHECK_GE
+#undef DCHECK_GT
+
+#define DCHECK ARROW_DCHECK
+#define DCHECK_OK ARROW_DCHECK_OK
+#define DCHECK_EQ ARROW_DCHECK_EQ
+#define DCHECK_NE ARROW_DCHECK_NE
+#define DCHECK_LE ARROW_DCHECK_LE
+#define DCHECK_LT ARROW_DCHECK_LT
+#define DCHECK_GE ARROW_DCHECK_GE
+#define DCHECK_GT ARROW_DCHECK_GT
+
 #endif
 
 namespace arrow {
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/logging.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/logging.h
index 286cca361b0..15a0188ab76 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/logging.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/logging.h
@@ -92,33 +92,33 @@ enum class ArrowLogLevel : int {
 // CAUTION: DCHECK_OK() always evaluates its argument, but other DCHECK*() macros
 // only do so in debug mode.
 
-#define ARROW_DCHECK(condition)               \ 
+#define ARROW_DCHECK(condition)               \
   while (false) ARROW_IGNORE_EXPR(condition); \
   while (false) ::arrow::util::detail::NullLog()
-#define ARROW_DCHECK_OK(s) \ 
-  ARROW_IGNORE_EXPR(s);    \ 
+#define ARROW_DCHECK_OK(s) \
+  ARROW_IGNORE_EXPR(s);    \
   while (false) ::arrow::util::detail::NullLog()
-#define ARROW_DCHECK_EQ(val1, val2)      \ 
+#define ARROW_DCHECK_EQ(val1, val2)      \
   while (false) ARROW_IGNORE_EXPR(val1); \
   while (false) ARROW_IGNORE_EXPR(val2); \
   while (false) ::arrow::util::detail::NullLog()
-#define ARROW_DCHECK_NE(val1, val2)      \ 
+#define ARROW_DCHECK_NE(val1, val2)      \
   while (false) ARROW_IGNORE_EXPR(val1); \
   while (false) ARROW_IGNORE_EXPR(val2); \
   while (false) ::arrow::util::detail::NullLog()
-#define ARROW_DCHECK_LE(val1, val2)      \ 
+#define ARROW_DCHECK_LE(val1, val2)      \
   while (false) ARROW_IGNORE_EXPR(val1); \
   while (false) ARROW_IGNORE_EXPR(val2); \
   while (false) ::arrow::util::detail::NullLog()
-#define ARROW_DCHECK_LT(val1, val2)      \ 
+#define ARROW_DCHECK_LT(val1, val2)      \
   while (false) ARROW_IGNORE_EXPR(val1); \
   while (false) ARROW_IGNORE_EXPR(val2); \
   while (false) ::arrow::util::detail::NullLog()
-#define ARROW_DCHECK_GE(val1, val2)      \ 
+#define ARROW_DCHECK_GE(val1, val2)      \
   while (false) ARROW_IGNORE_EXPR(val1); \
   while (false) ARROW_IGNORE_EXPR(val2); \
   while (false) ::arrow::util::detail::NullLog()
-#define ARROW_DCHECK_GT(val1, val2)      \ 
+#define ARROW_DCHECK_GT(val1, val2)      \
   while (false) ARROW_IGNORE_EXPR(val1); \
   while (false) ARROW_IGNORE_EXPR(val2); \
   while (false) ::arrow::util::detail::NullLog()
@@ -126,26 +126,26 @@ enum class ArrowLogLevel : int {
 #else
 #define ARROW_DFATAL ::arrow::util::ArrowLogLevel::ARROW_FATAL
 
-#define ARROW_DCHECK ARROW_CHECK 
-#define ARROW_DCHECK_OK ARROW_CHECK_OK 
-#define ARROW_DCHECK_EQ ARROW_CHECK_EQ 
-#define ARROW_DCHECK_NE ARROW_CHECK_NE 
-#define ARROW_DCHECK_LE ARROW_CHECK_LE 
-#define ARROW_DCHECK_LT ARROW_CHECK_LT 
-#define ARROW_DCHECK_GE ARROW_CHECK_GE 
-#define ARROW_DCHECK_GT ARROW_CHECK_GT 
+#define ARROW_DCHECK ARROW_CHECK
+#define ARROW_DCHECK_OK ARROW_CHECK_OK
+#define ARROW_DCHECK_EQ ARROW_CHECK_EQ
+#define ARROW_DCHECK_NE ARROW_CHECK_NE
+#define ARROW_DCHECK_LE ARROW_CHECK_LE
+#define ARROW_DCHECK_LT ARROW_CHECK_LT
+#define ARROW_DCHECK_GE ARROW_CHECK_GE
+#define ARROW_DCHECK_GT ARROW_CHECK_GT
 
 #endif  // NDEBUG
 
-#define DCHECK ARROW_DCHECK 
-#define DCHECK_OK ARROW_DCHECK_OK 
-#define DCHECK_EQ ARROW_DCHECK_EQ 
-#define DCHECK_NE ARROW_DCHECK_NE 
-#define DCHECK_LE ARROW_DCHECK_LE 
-#define DCHECK_LT ARROW_DCHECK_LT 
-#define DCHECK_GE ARROW_DCHECK_GE 
-#define DCHECK_GT ARROW_DCHECK_GT 
- 
+#define DCHECK ARROW_DCHECK
+#define DCHECK_OK ARROW_DCHECK_OK
+#define DCHECK_EQ ARROW_DCHECK_EQ
+#define DCHECK_NE ARROW_DCHECK_NE
+#define DCHECK_LE ARROW_DCHECK_LE
+#define DCHECK_LT ARROW_DCHECK_LT
+#define DCHECK_GE ARROW_DCHECK_GE
+#define DCHECK_GT ARROW_DCHECK_GT
+
 // This code is adapted from
 // https://github.com/ray-project/ray/blob/master/src/ray/util/logging.h.
 
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/mutex.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/mutex.h
index 3f665c01838..6c80be380ae 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/mutex.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/mutex.h
@@ -37,7 +37,7 @@ class ARROW_EXPORT Mutex {
   /// A Guard is falsy if a lock could not be acquired.
   class ARROW_EXPORT Guard {
    public:
-    Guard() : locked_(NULLPTR, [](Mutex* /* mutex */) {}) {} 
+    Guard() : locked_(NULLPTR, [](Mutex* /* mutex */) {}) {}
     Guard(Guard&&) = default;
     Guard& operator=(Guard&&) = default;
 
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/parallel.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/parallel.h
index b4858f0bf96..80f60fbdb36 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/parallel.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/parallel.h
@@ -21,9 +21,9 @@
 #include <vector>
 
 #include "arrow/status.h"
-#include "arrow/util/functional.h" 
+#include "arrow/util/functional.h"
 #include "arrow/util/thread_pool.h"
-#include "arrow/util/vector.h" 
+#include "arrow/util/vector.h"
 
 namespace arrow {
 namespace internal {
@@ -32,12 +32,12 @@ namespace internal {
 // arguments between 0 and `num_tasks - 1`, on an arbitrary number of threads.
 
 template <class FUNCTION>
-Status ParallelFor(int num_tasks, FUNCTION&& func, 
-                   Executor* executor = internal::GetCpuThreadPool()) { 
-  std::vector<Future<>> futures(num_tasks); 
+Status ParallelFor(int num_tasks, FUNCTION&& func,
+                   Executor* executor = internal::GetCpuThreadPool()) {
+  std::vector<Future<>> futures(num_tasks);
 
   for (int i = 0; i < num_tasks; ++i) {
-    ARROW_ASSIGN_OR_RAISE(futures[i], executor->Submit(func, i)); 
+    ARROW_ASSIGN_OR_RAISE(futures[i], executor->Submit(func, i));
   }
   auto st = Status::OK();
   for (auto& fut : futures) {
@@ -46,30 +46,30 @@ Status ParallelFor(int num_tasks, FUNCTION&& func,
   return st;
 }
 
-template <class FUNCTION, typename T, 
-          typename R = typename internal::call_traits::return_type<FUNCTION>::ValueType> 
-Future<std::vector<R>> ParallelForAsync( 
-    std::vector<T> inputs, FUNCTION&& func, 
-    Executor* executor = internal::GetCpuThreadPool()) { 
-  std::vector<Future<R>> futures(inputs.size()); 
-  for (size_t i = 0; i < inputs.size(); ++i) { 
-    ARROW_ASSIGN_OR_RAISE(futures[i], executor->Submit(func, i, std::move(inputs[i]))); 
-  } 
-  return All(std::move(futures)) 
-      .Then([](const std::vector<Result<R>>& results) -> Result<std::vector<R>> { 
-        return UnwrapOrRaise(results); 
-      }); 
-} 
- 
+template <class FUNCTION, typename T,
+          typename R = typename internal::call_traits::return_type<FUNCTION>::ValueType>
+Future<std::vector<R>> ParallelForAsync(
+    std::vector<T> inputs, FUNCTION&& func,
+    Executor* executor = internal::GetCpuThreadPool()) {
+  std::vector<Future<R>> futures(inputs.size());
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    ARROW_ASSIGN_OR_RAISE(futures[i], executor->Submit(func, i, std::move(inputs[i])));
+  }
+  return All(std::move(futures))
+      .Then([](const std::vector<Result<R>>& results) -> Result<std::vector<R>> {
+        return UnwrapOrRaise(results);
+      });
+}
+
 // A parallelizer that takes a `Status(int)` function and calls it with
 // arguments between 0 and `num_tasks - 1`, in sequence or in parallel,
 // depending on the input boolean.
 
 template <class FUNCTION>
-Status OptionalParallelFor(bool use_threads, int num_tasks, FUNCTION&& func, 
-                           Executor* executor = internal::GetCpuThreadPool()) { 
+Status OptionalParallelFor(bool use_threads, int num_tasks, FUNCTION&& func,
+                           Executor* executor = internal::GetCpuThreadPool()) {
   if (use_threads) {
-    return ParallelFor(num_tasks, std::forward<FUNCTION>(func), executor); 
+    return ParallelFor(num_tasks, std::forward<FUNCTION>(func), executor);
   } else {
     for (int i = 0; i < num_tasks; ++i) {
       RETURN_NOT_OK(func(i));
@@ -78,25 +78,25 @@ Status OptionalParallelFor(bool use_threads, int num_tasks, FUNCTION&& func,
   }
 }
 
-// A parallelizer that takes a `Result<R>(int index, T item)` function and 
-// calls it with each item from the input array, in sequence or in parallel, 
-// depending on the input boolean. 
- 
-template <class FUNCTION, typename T, 
-          typename R = typename internal::call_traits::return_type<FUNCTION>::ValueType> 
-Future<std::vector<R>> OptionalParallelForAsync( 
-    bool use_threads, std::vector<T> inputs, FUNCTION&& func, 
-    Executor* executor = internal::GetCpuThreadPool()) { 
-  if (use_threads) { 
-    return ParallelForAsync(std::move(inputs), std::forward<FUNCTION>(func), executor); 
-  } else { 
-    std::vector<R> result(inputs.size()); 
-    for (size_t i = 0; i < inputs.size(); ++i) { 
-      ARROW_ASSIGN_OR_RAISE(result[i], func(i, inputs[i])); 
-    } 
-    return result; 
-  } 
-} 
- 
+// A parallelizer that takes a `Result<R>(int index, T item)` function and
+// calls it with each item from the input array, in sequence or in parallel,
+// depending on the input boolean.
+
+template <class FUNCTION, typename T,
+          typename R = typename internal::call_traits::return_type<FUNCTION>::ValueType>
+Future<std::vector<R>> OptionalParallelForAsync(
+    bool use_threads, std::vector<T> inputs, FUNCTION&& func,
+    Executor* executor = internal::GetCpuThreadPool()) {
+  if (use_threads) {
+    return ParallelForAsync(std::move(inputs), std::forward<FUNCTION>(func), executor);
+  } else {
+    std::vector<R> result(inputs.size());
+    for (size_t i = 0; i < inputs.size(); ++i) {
+      ARROW_ASSIGN_OR_RAISE(result[i], func(i, inputs[i]));
+    }
+    return result;
+  }
+}
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/queue.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/queue.h
index 677778774e3..6c71fa6e155 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/queue.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/queue.h
@@ -1,29 +1,29 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include "arrow/vendored/ProducerConsumerQueue.h" 
- 
-namespace arrow { 
-namespace util { 
- 
-template <typename T> 
-using SpscQueue = arrow_vendored::folly::ProducerConsumerQueue<T>; 
- 
-} 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/vendored/ProducerConsumerQueue.h"
+
+namespace arrow {
+namespace util {
+
+template <typename T>
+using SpscQueue = arrow_vendored::folly::ProducerConsumerQueue<T>;
+
+}
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/reflection_internal.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/reflection_internal.h
index d9598a6eb34..0440a2eb563 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/reflection_internal.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/reflection_internal.h
@@ -1,133 +1,133 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <string> 
-#include <tuple> 
-#include <utility> 
- 
-#include "arrow/type_traits.h" 
-#include "arrow/util/string_view.h" 
- 
-namespace arrow { 
-namespace internal { 
- 
-template <size_t...> 
-struct index_sequence {}; 
- 
-template <size_t N, size_t Head = N, size_t... Tail> 
-struct make_index_sequence_impl; 
- 
-template <size_t N> 
-using make_index_sequence = typename make_index_sequence_impl<N>::type; 
- 
-template <typename... T> 
-using index_sequence_for = make_index_sequence<sizeof...(T)>; 
- 
-template <size_t N, size_t... I> 
-struct make_index_sequence_impl<N, 0, I...> { 
-  using type = index_sequence<I...>; 
-}; 
- 
-template <size_t N, size_t H, size_t... I> 
-struct make_index_sequence_impl : make_index_sequence_impl<N, H - 1, H - 1, I...> {}; 
- 
-static_assert(std::is_same<index_sequence<>, make_index_sequence<0>>::value, ""); 
-static_assert(std::is_same<index_sequence<0, 1, 2>, make_index_sequence<3>>::value, ""); 
- 
-template <typename...> 
-struct all_same : std::true_type {}; 
- 
-template <typename One> 
-struct all_same<One> : std::true_type {}; 
- 
-template <typename Same, typename... Rest> 
-struct all_same<Same, Same, Rest...> : all_same<Same, Rest...> {}; 
- 
-template <typename One, typename Other, typename... Rest> 
-struct all_same<One, Other, Rest...> : std::false_type {}; 
- 
-template <size_t... I, typename... T, typename Fn> 
-void ForEachTupleMemberImpl(const std::tuple<T...>& tup, Fn&& fn, index_sequence<I...>) { 
-  (void)std::make_tuple((fn(std::get<I>(tup), I), std::ignore)...); 
-} 
- 
-template <typename... T, typename Fn> 
-void ForEachTupleMember(const std::tuple<T...>& tup, Fn&& fn) { 
-  ForEachTupleMemberImpl(tup, fn, index_sequence_for<T...>()); 
-} 
- 
-template <typename C, typename T> 
-struct DataMemberProperty { 
-  using Class = C; 
-  using Type = T; 
- 
-  constexpr const Type& get(const Class& obj) const { return obj.*ptr_; } 
- 
-  void set(Class* obj, Type value) const { (*obj).*ptr_ = std::move(value); } 
- 
-  constexpr util::string_view name() const { return name_; } 
- 
-  util::string_view name_; 
-  Type Class::*ptr_; 
-}; 
- 
-template <typename Class, typename Type> 
-constexpr DataMemberProperty<Class, Type> DataMember(util::string_view name, 
-                                                     Type Class::*ptr) { 
-  return {name, ptr}; 
-} 
- 
-template <typename... Properties> 
-struct PropertyTuple { 
-  template <typename Fn> 
-  void ForEach(Fn&& fn) const { 
-    ForEachTupleMember(props_, fn); 
-  } 
- 
-  static_assert(all_same<typename Properties::Class...>::value, 
-                "All properties must be properties of the same class"); 
- 
-  size_t size() const { return sizeof...(Properties); } 
- 
-  std::tuple<Properties...> props_; 
-}; 
- 
-template <typename... Properties> 
-PropertyTuple<Properties...> MakeProperties(Properties... props) { 
-  return {std::make_tuple(props...)}; 
-} 
- 
-template <typename Enum> 
-struct EnumTraits {}; 
- 
-template <typename Enum, Enum... Values> 
-struct BasicEnumTraits { 
-  using CType = typename std::underlying_type<Enum>::type; 
-  using Type = typename CTypeTraits<CType>::ArrowType; 
-  static std::array<Enum, sizeof...(Values)> values() { return {Values...}; } 
-}; 
- 
-template <typename T, typename Enable = void> 
-struct has_enum_traits : std::false_type {}; 
- 
-template <typename T> 
-struct has_enum_traits<T, void_t<typename EnumTraits<T>::Type>> : std::true_type {}; 
- 
-}  // namespace internal 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+#include <tuple>
+#include <utility>
+
+#include "arrow/type_traits.h"
+#include "arrow/util/string_view.h"
+
+namespace arrow {
+namespace internal {
+
+template <size_t...>
+struct index_sequence {};
+
+template <size_t N, size_t Head = N, size_t... Tail>
+struct make_index_sequence_impl;
+
+template <size_t N>
+using make_index_sequence = typename make_index_sequence_impl<N>::type;
+
+template <typename... T>
+using index_sequence_for = make_index_sequence<sizeof...(T)>;
+
+template <size_t N, size_t... I>
+struct make_index_sequence_impl<N, 0, I...> {
+  using type = index_sequence<I...>;
+};
+
+template <size_t N, size_t H, size_t... I>
+struct make_index_sequence_impl : make_index_sequence_impl<N, H - 1, H - 1, I...> {};
+
+static_assert(std::is_same<index_sequence<>, make_index_sequence<0>>::value, "");
+static_assert(std::is_same<index_sequence<0, 1, 2>, make_index_sequence<3>>::value, "");
+
+template <typename...>
+struct all_same : std::true_type {};
+
+template <typename One>
+struct all_same<One> : std::true_type {};
+
+template <typename Same, typename... Rest>
+struct all_same<Same, Same, Rest...> : all_same<Same, Rest...> {};
+
+template <typename One, typename Other, typename... Rest>
+struct all_same<One, Other, Rest...> : std::false_type {};
+
+template <size_t... I, typename... T, typename Fn>
+void ForEachTupleMemberImpl(const std::tuple<T...>& tup, Fn&& fn, index_sequence<I...>) {
+  (void)std::make_tuple((fn(std::get<I>(tup), I), std::ignore)...);
+}
+
+template <typename... T, typename Fn>
+void ForEachTupleMember(const std::tuple<T...>& tup, Fn&& fn) {
+  ForEachTupleMemberImpl(tup, fn, index_sequence_for<T...>());
+}
+
+template <typename C, typename T>
+struct DataMemberProperty {
+  using Class = C;
+  using Type = T;
+
+  constexpr const Type& get(const Class& obj) const { return obj.*ptr_; }
+
+  void set(Class* obj, Type value) const { (*obj).*ptr_ = std::move(value); }
+
+  constexpr util::string_view name() const { return name_; }
+
+  util::string_view name_;
+  Type Class::*ptr_;
+};
+
+template <typename Class, typename Type>
+constexpr DataMemberProperty<Class, Type> DataMember(util::string_view name,
+                                                     Type Class::*ptr) {
+  return {name, ptr};
+}
+
+template <typename... Properties>
+struct PropertyTuple {
+  template <typename Fn>
+  void ForEach(Fn&& fn) const {
+    ForEachTupleMember(props_, fn);
+  }
+
+  static_assert(all_same<typename Properties::Class...>::value,
+                "All properties must be properties of the same class");
+
+  size_t size() const { return sizeof...(Properties); }
+
+  std::tuple<Properties...> props_;
+};
+
+template <typename... Properties>
+PropertyTuple<Properties...> MakeProperties(Properties... props) {
+  return {std::make_tuple(props...)};
+}
+
+template <typename Enum>
+struct EnumTraits {};
+
+template <typename Enum, Enum... Values>
+struct BasicEnumTraits {
+  using CType = typename std::underlying_type<Enum>::type;
+  using Type = typename CTypeTraits<CType>::ArrowType;
+  static std::array<Enum, sizeof...(Values)> values() { return {Values...}; }
+};
+
+template <typename T, typename Enable = void>
+struct has_enum_traits : std::false_type {};
+
+template <typename T>
+struct has_enum_traits<T, void_t<typename EnumTraits<T>::Type>> : std::true_type {};
+
+}  // namespace internal
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/rle_encoding.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/rle_encoding.h
index cf13264e41e..68d29930666 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/rle_encoding.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/rle_encoding.h
@@ -1,826 +1,826 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-// Imported from Apache Impala (incubating) on 2016-01-29 and modified for use 
-// in parquet-cpp, Arrow 
- 
-#pragma once 
- 
-#include <algorithm> 
-#include <cmath> 
-#include <limits> 
-#include <vector> 
- 
-#include "arrow/util/bit_block_counter.h" 
-#include "arrow/util/bit_run_reader.h" 
-#include "arrow/util/bit_stream_utils.h" 
-#include "arrow/util/bit_util.h" 
-#include "arrow/util/macros.h" 
- 
-namespace arrow { 
-namespace util { 
- 
-/// Utility classes to do run length encoding (RLE) for fixed bit width values.  If runs 
-/// are sufficiently long, RLE is used, otherwise, the values are just bit-packed 
-/// (literal encoding). 
-/// For both types of runs, there is a byte-aligned indicator which encodes the length 
-/// of the run and the type of the run. 
-/// This encoding has the benefit that when there aren't any long enough runs, values 
-/// are always decoded at fixed (can be precomputed) bit offsets OR both the value and 
-/// the run length are byte aligned. This allows for very efficient decoding 
-/// implementations. 
-/// The encoding is: 
-///    encoded-block := run* 
-///    run := literal-run | repeated-run 
-///    literal-run := literal-indicator < literal bytes > 
-///    repeated-run := repeated-indicator < repeated value. padded to byte boundary > 
-///    literal-indicator := varint_encode( number_of_groups << 1 | 1) 
-///    repeated-indicator := varint_encode( number_of_repetitions << 1 ) 
-// 
-/// Each run is preceded by a varint. The varint's least significant bit is 
-/// used to indicate whether the run is a literal run or a repeated run. The rest 
-/// of the varint is used to determine the length of the run (eg how many times the 
-/// value repeats). 
-// 
-/// In the case of literal runs, the run length is always a multiple of 8 (i.e. encode 
-/// in groups of 8), so that no matter the bit-width of the value, the sequence will end 
-/// on a byte boundary without padding. 
-/// Given that we know it is a multiple of 8, we store the number of 8-groups rather than 
-/// the actual number of encoded ints. (This means that the total number of encoded values 
-/// can not be determined from the encoded data, since the number of values in the last 
-/// group may not be a multiple of 8). For the last group of literal runs, we pad 
-/// the group to 8 with zeros. This allows for 8 at a time decoding on the read side 
-/// without the need for additional checks. 
-// 
-/// There is a break-even point when it is more storage efficient to do run length 
-/// encoding.  For 1 bit-width values, that point is 8 values.  They require 2 bytes 
-/// for both the repeated encoding or the literal encoding.  This value can always 
-/// be computed based on the bit-width. 
-/// TODO: think about how to use this for strings.  The bit packing isn't quite the same. 
-// 
-/// Examples with bit-width 1 (eg encoding booleans): 
-/// ---------------------------------------- 
-/// 100 1s followed by 100 0s: 
-/// <varint(100 << 1)> <1, padded to 1 byte> <varint(100 << 1)> <0, padded to 1 byte> 
-///  - (total 4 bytes) 
-// 
-/// alternating 1s and 0s (200 total): 
-/// 200 ints = 25 groups of 8 
-/// <varint((25 << 1) | 1)> <25 bytes of values, bitpacked> 
-/// (total 26 bytes, 1 byte overhead) 
-// 
- 
-/// Decoder class for RLE encoded data. 
-class RleDecoder { 
- public: 
-  /// Create a decoder object. buffer/buffer_len is the decoded data. 
-  /// bit_width is the width of each value (before encoding). 
-  RleDecoder(const uint8_t* buffer, int buffer_len, int bit_width) 
-      : bit_reader_(buffer, buffer_len), 
-        bit_width_(bit_width), 
-        current_value_(0), 
-        repeat_count_(0), 
-        literal_count_(0) { 
-    DCHECK_GE(bit_width_, 0); 
-    DCHECK_LE(bit_width_, 64); 
-  } 
- 
-  RleDecoder() : bit_width_(-1) {} 
- 
-  void Reset(const uint8_t* buffer, int buffer_len, int bit_width) { 
-    DCHECK_GE(bit_width, 0); 
-    DCHECK_LE(bit_width, 64); 
-    bit_reader_.Reset(buffer, buffer_len); 
-    bit_width_ = bit_width; 
-    current_value_ = 0; 
-    repeat_count_ = 0; 
-    literal_count_ = 0; 
-  } 
- 
-  /// Gets the next value.  Returns false if there are no more. 
-  template <typename T> 
-  bool Get(T* val); 
- 
-  /// Gets a batch of values.  Returns the number of decoded elements. 
-  template <typename T> 
-  int GetBatch(T* values, int batch_size); 
- 
-  /// Like GetBatch but add spacing for null entries 
-  template <typename T> 
-  int GetBatchSpaced(int batch_size, int null_count, const uint8_t* valid_bits, 
-                     int64_t valid_bits_offset, T* out); 
- 
-  /// Like GetBatch but the values are then decoded using the provided dictionary 
-  template <typename T> 
-  int GetBatchWithDict(const T* dictionary, int32_t dictionary_length, T* values, 
-                       int batch_size); 
- 
-  /// Like GetBatchWithDict but add spacing for null entries 
-  /// 
-  /// Null entries will be zero-initialized in `values` to avoid leaking 
-  /// private data. 
-  template <typename T> 
-  int GetBatchWithDictSpaced(const T* dictionary, int32_t dictionary_length, T* values, 
-                             int batch_size, int null_count, const uint8_t* valid_bits, 
-                             int64_t valid_bits_offset); 
- 
- protected: 
-  BitUtil::BitReader bit_reader_; 
-  /// Number of bits needed to encode the value. Must be between 0 and 64. 
-  int bit_width_; 
-  uint64_t current_value_; 
-  int32_t repeat_count_; 
-  int32_t literal_count_; 
- 
- private: 
-  /// Fills literal_count_ and repeat_count_ with next values. Returns false if there 
-  /// are no more. 
-  template <typename T> 
-  bool NextCounts(); 
- 
-  /// Utility methods for retrieving spaced values. 
-  template <typename T, typename RunType, typename Converter> 
-  int GetSpaced(Converter converter, int batch_size, int null_count, 
-                const uint8_t* valid_bits, int64_t valid_bits_offset, T* out); 
-}; 
- 
-/// Class to incrementally build the rle data.   This class does not allocate any memory. 
-/// The encoding has two modes: encoding repeated runs and literal runs. 
-/// If the run is sufficiently short, it is more efficient to encode as a literal run. 
-/// This class does so by buffering 8 values at a time.  If they are not all the same 
-/// they are added to the literal run.  If they are the same, they are added to the 
-/// repeated run.  When we switch modes, the previous run is flushed out. 
-class RleEncoder { 
- public: 
-  /// buffer/buffer_len: preallocated output buffer. 
-  /// bit_width: max number of bits for value. 
-  /// TODO: consider adding a min_repeated_run_length so the caller can control 
-  /// when values should be encoded as repeated runs.  Currently this is derived 
-  /// based on the bit_width, which can determine a storage optimal choice. 
-  /// TODO: allow 0 bit_width (and have dict encoder use it) 
-  RleEncoder(uint8_t* buffer, int buffer_len, int bit_width) 
-      : bit_width_(bit_width), bit_writer_(buffer, buffer_len) { 
-    DCHECK_GE(bit_width_, 0); 
-    DCHECK_LE(bit_width_, 64); 
-    max_run_byte_size_ = MinBufferSize(bit_width); 
-    DCHECK_GE(buffer_len, max_run_byte_size_) << "Input buffer not big enough."; 
-    Clear(); 
-  } 
- 
-  /// Returns the minimum buffer size needed to use the encoder for 'bit_width' 
-  /// This is the maximum length of a single run for 'bit_width'. 
-  /// It is not valid to pass a buffer less than this length. 
-  static int MinBufferSize(int bit_width) { 
-    /// 1 indicator byte and MAX_VALUES_PER_LITERAL_RUN 'bit_width' values. 
-    int max_literal_run_size = 
-        1 + 
-        static_cast<int>(BitUtil::BytesForBits(MAX_VALUES_PER_LITERAL_RUN * bit_width)); 
-    /// Up to kMaxVlqByteLength indicator and a single 'bit_width' value. 
-    int max_repeated_run_size = BitUtil::BitReader::kMaxVlqByteLength + 
-                                static_cast<int>(BitUtil::BytesForBits(bit_width)); 
-    return std::max(max_literal_run_size, max_repeated_run_size); 
-  } 
- 
-  /// Returns the maximum byte size it could take to encode 'num_values'. 
-  static int MaxBufferSize(int bit_width, int num_values) { 
-    // For a bit_width > 1, the worst case is the repetition of "literal run of length 8 
-    // and then a repeated run of length 8". 
-    // 8 values per smallest run, 8 bits per byte 
-    int bytes_per_run = bit_width; 
-    int num_runs = static_cast<int>(BitUtil::CeilDiv(num_values, 8)); 
-    int literal_max_size = num_runs + num_runs * bytes_per_run; 
- 
-    // In the very worst case scenario, the data is a concatenation of repeated 
-    // runs of 8 values. Repeated run has a 1 byte varint followed by the 
-    // bit-packed repeated value 
-    int min_repeated_run_size = 1 + static_cast<int>(BitUtil::BytesForBits(bit_width)); 
-    int repeated_max_size = 
-        static_cast<int>(BitUtil::CeilDiv(num_values, 8)) * min_repeated_run_size; 
- 
-    return std::max(literal_max_size, repeated_max_size); 
-  } 
- 
-  /// Encode value.  Returns true if the value fits in buffer, false otherwise. 
-  /// This value must be representable with bit_width_ bits. 
-  bool Put(uint64_t value); 
- 
-  /// Flushes any pending values to the underlying buffer. 
-  /// Returns the total number of bytes written 
-  int Flush(); 
- 
-  /// Resets all the state in the encoder. 
-  void Clear(); 
- 
-  /// Returns pointer to underlying buffer 
-  uint8_t* buffer() { return bit_writer_.buffer(); } 
-  int32_t len() { return bit_writer_.bytes_written(); } 
- 
- private: 
-  /// Flushes any buffered values.  If this is part of a repeated run, this is largely 
-  /// a no-op. 
-  /// If it is part of a literal run, this will call FlushLiteralRun, which writes 
-  /// out the buffered literal values. 
-  /// If 'done' is true, the current run would be written even if it would normally 
-  /// have been buffered more.  This should only be called at the end, when the 
-  /// encoder has received all values even if it would normally continue to be 
-  /// buffered. 
-  void FlushBufferedValues(bool done); 
- 
-  /// Flushes literal values to the underlying buffer.  If update_indicator_byte, 
-  /// then the current literal run is complete and the indicator byte is updated. 
-  void FlushLiteralRun(bool update_indicator_byte); 
- 
-  /// Flushes a repeated run to the underlying buffer. 
-  void FlushRepeatedRun(); 
- 
-  /// Checks and sets buffer_full_. This must be called after flushing a run to 
-  /// make sure there are enough bytes remaining to encode the next run. 
-  void CheckBufferFull(); 
- 
-  /// The maximum number of values in a single literal run 
-  /// (number of groups encodable by a 1-byte indicator * 8) 
-  static const int MAX_VALUES_PER_LITERAL_RUN = (1 << 6) * 8; 
- 
-  /// Number of bits needed to encode the value. Must be between 0 and 64. 
-  const int bit_width_; 
- 
-  /// Underlying buffer. 
-  BitUtil::BitWriter bit_writer_; 
- 
-  /// If true, the buffer is full and subsequent Put()'s will fail. 
-  bool buffer_full_; 
- 
-  /// The maximum byte size a single run can take. 
-  int max_run_byte_size_; 
- 
-  /// We need to buffer at most 8 values for literals.  This happens when the 
-  /// bit_width is 1 (so 8 values fit in one byte). 
-  /// TODO: generalize this to other bit widths 
-  int64_t buffered_values_[8]; 
- 
-  /// Number of values in buffered_values_ 
-  int num_buffered_values_; 
- 
-  /// The current (also last) value that was written and the count of how 
-  /// many times in a row that value has been seen.  This is maintained even 
-  /// if we are in a literal run.  If the repeat_count_ get high enough, we switch 
-  /// to encoding repeated runs. 
-  uint64_t current_value_; 
-  int repeat_count_; 
- 
-  /// Number of literals in the current run.  This does not include the literals 
-  /// that might be in buffered_values_.  Only after we've got a group big enough 
-  /// can we decide if they should part of the literal_count_ or repeat_count_ 
-  int literal_count_; 
- 
-  /// Pointer to a byte in the underlying buffer that stores the indicator byte. 
-  /// This is reserved as soon as we need a literal run but the value is written 
-  /// when the literal run is complete. 
-  uint8_t* literal_indicator_byte_; 
-}; 
- 
-template <typename T> 
-inline bool RleDecoder::Get(T* val) { 
-  return GetBatch(val, 1) == 1; 
-} 
- 
-template <typename T> 
-inline int RleDecoder::GetBatch(T* values, int batch_size) { 
-  DCHECK_GE(bit_width_, 0); 
-  int values_read = 0; 
- 
-  auto* out = values; 
- 
-  while (values_read < batch_size) { 
-    int remaining = batch_size - values_read; 
- 
-    if (repeat_count_ > 0) {  // Repeated value case. 
-      int repeat_batch = std::min(remaining, repeat_count_); 
-      std::fill(out, out + repeat_batch, static_cast<T>(current_value_)); 
- 
-      repeat_count_ -= repeat_batch; 
-      values_read += repeat_batch; 
-      out += repeat_batch; 
-    } else if (literal_count_ > 0) { 
-      int literal_batch = std::min(remaining, literal_count_); 
-      int actual_read = bit_reader_.GetBatch(bit_width_, out, literal_batch); 
-      if (actual_read != literal_batch) { 
-        return values_read; 
-      } 
- 
-      literal_count_ -= literal_batch; 
-      values_read += literal_batch; 
-      out += literal_batch; 
-    } else { 
-      if (!NextCounts<T>()) return values_read; 
-    } 
-  } 
- 
-  return values_read; 
-} 
- 
-template <typename T, typename RunType, typename Converter> 
-inline int RleDecoder::GetSpaced(Converter converter, int batch_size, int null_count, 
-                                 const uint8_t* valid_bits, int64_t valid_bits_offset, 
-                                 T* out) { 
-  if (ARROW_PREDICT_FALSE(null_count == batch_size)) { 
-    converter.FillZero(out, out + batch_size); 
-    return batch_size; 
-  } 
- 
-  DCHECK_GE(bit_width_, 0); 
-  int values_read = 0; 
-  int values_remaining = batch_size - null_count; 
- 
-  // Assume no bits to start. 
-  arrow::internal::BitRunReader bit_reader(valid_bits, valid_bits_offset, 
-                                           /*length=*/batch_size); 
-  arrow::internal::BitRun valid_run = bit_reader.NextRun(); 
-  while (values_read < batch_size) { 
-    if (ARROW_PREDICT_FALSE(valid_run.length == 0)) { 
-      valid_run = bit_reader.NextRun(); 
-    } 
- 
-    DCHECK_GT(batch_size, 0); 
-    DCHECK_GT(valid_run.length, 0); 
- 
-    if (valid_run.set) { 
-      if ((repeat_count_ == 0) && (literal_count_ == 0)) { 
-        if (!NextCounts<RunType>()) return values_read; 
-        DCHECK((repeat_count_ > 0) ^ (literal_count_ > 0)); 
-      } 
- 
-      if (repeat_count_ > 0) { 
-        int repeat_batch = 0; 
-        // Consume the entire repeat counts incrementing repeat_batch to 
-        // be the total of nulls + values consumed, we only need to 
-        // get the total count because we can fill in the same value for 
-        // nulls and non-nulls. This proves to be a big efficiency win. 
-        while (repeat_count_ > 0 && (values_read + repeat_batch) < batch_size) { 
-          DCHECK_GT(valid_run.length, 0); 
-          if (valid_run.set) { 
-            int update_size = std::min(static_cast<int>(valid_run.length), repeat_count_); 
-            repeat_count_ -= update_size; 
-            repeat_batch += update_size; 
-            valid_run.length -= update_size; 
-            values_remaining -= update_size; 
-          } else { 
-            // We can consume all nulls here because we would do so on 
-            //  the next loop anyways. 
-            repeat_batch += static_cast<int>(valid_run.length); 
-            valid_run.length = 0; 
-          } 
-          if (valid_run.length == 0) { 
-            valid_run = bit_reader.NextRun(); 
-          } 
-        } 
-        RunType current_value = static_cast<RunType>(current_value_); 
-        if (ARROW_PREDICT_FALSE(!converter.IsValid(current_value))) { 
-          return values_read; 
-        } 
-        converter.Fill(out, out + repeat_batch, current_value); 
-        out += repeat_batch; 
-        values_read += repeat_batch; 
-      } else if (literal_count_ > 0) { 
-        int literal_batch = std::min(values_remaining, literal_count_); 
-        DCHECK_GT(literal_batch, 0); 
- 
-        // Decode the literals 
-        constexpr int kBufferSize = 1024; 
-        RunType indices[kBufferSize]; 
-        literal_batch = std::min(literal_batch, kBufferSize); 
-        int actual_read = bit_reader_.GetBatch(bit_width_, indices, literal_batch); 
-        if (ARROW_PREDICT_FALSE(actual_read != literal_batch)) { 
-          return values_read; 
-        } 
-        if (!converter.IsValid(indices, /*length=*/actual_read)) { 
-          return values_read; 
-        } 
-        int skipped = 0; 
-        int literals_read = 0; 
-        while (literals_read < literal_batch) { 
-          if (valid_run.set) { 
-            int update_size = std::min(literal_batch - literals_read, 
-                                       static_cast<int>(valid_run.length)); 
-            converter.Copy(out, indices + literals_read, update_size); 
-            literals_read += update_size; 
-            out += update_size; 
-            valid_run.length -= update_size; 
-          } else { 
-            converter.FillZero(out, out + valid_run.length); 
-            out += valid_run.length; 
-            skipped += static_cast<int>(valid_run.length); 
-            valid_run.length = 0; 
-          } 
-          if (valid_run.length == 0) { 
-            valid_run = bit_reader.NextRun(); 
-          } 
-        } 
-        literal_count_ -= literal_batch; 
-        values_remaining -= literal_batch; 
-        values_read += literal_batch + skipped; 
-      } 
-    } else { 
-      converter.FillZero(out, out + valid_run.length); 
-      out += valid_run.length; 
-      values_read += static_cast<int>(valid_run.length); 
-      valid_run.length = 0; 
-    } 
-  } 
-  DCHECK_EQ(valid_run.length, 0); 
-  DCHECK_EQ(values_remaining, 0); 
-  return values_read; 
-} 
- 
-// Converter for GetSpaced that handles runs that get returned 
-// directly as output. 
-template <typename T> 
-struct PlainRleConverter { 
-  T kZero = {}; 
-  inline bool IsValid(const T& values) const { return true; } 
-  inline bool IsValid(const T* values, int32_t length) const { return true; } 
-  inline void Fill(T* begin, T* end, const T& run_value) const { 
-    std::fill(begin, end, run_value); 
-  } 
-  inline void FillZero(T* begin, T* end) { std::fill(begin, end, kZero); } 
-  inline void Copy(T* out, const T* values, int length) const { 
-    std::memcpy(out, values, length * sizeof(T)); 
-  } 
-}; 
- 
-template <typename T> 
-inline int RleDecoder::GetBatchSpaced(int batch_size, int null_count, 
-                                      const uint8_t* valid_bits, 
-                                      int64_t valid_bits_offset, T* out) { 
-  if (null_count == 0) { 
-    return GetBatch<T>(out, batch_size); 
-  } 
- 
-  PlainRleConverter<T> converter; 
-  arrow::internal::BitBlockCounter block_counter(valid_bits, valid_bits_offset, 
-                                                 batch_size); 
- 
-  int total_processed = 0; 
-  int processed = 0; 
-  arrow::internal::BitBlockCount block; 
- 
-  do { 
-    block = block_counter.NextFourWords(); 
-    if (block.length == 0) { 
-      break; 
-    } 
-    if (block.AllSet()) { 
-      processed = GetBatch<T>(out, block.length); 
-    } else if (block.NoneSet()) { 
-      converter.FillZero(out, out + block.length); 
-      processed = block.length; 
-    } else { 
-      processed = GetSpaced<T, /*RunType=*/T, PlainRleConverter<T>>( 
-          converter, block.length, block.length - block.popcount, valid_bits, 
-          valid_bits_offset, out); 
-    } 
-    total_processed += processed; 
-    out += block.length; 
-    valid_bits_offset += block.length; 
-  } while (processed == block.length); 
-  return total_processed; 
-} 
- 
-static inline bool IndexInRange(int32_t idx, int32_t dictionary_length) { 
-  return idx >= 0 && idx < dictionary_length; 
-} 
- 
-// Converter for GetSpaced that handles runs of returned dictionary 
-// indices. 
-template <typename T> 
-struct DictionaryConverter { 
-  T kZero = {}; 
-  const T* dictionary; 
-  int32_t dictionary_length; 
- 
-  inline bool IsValid(int32_t value) { return IndexInRange(value, dictionary_length); } 
- 
-  inline bool IsValid(const int32_t* values, int32_t length) const { 
-    using IndexType = int32_t; 
-    IndexType min_index = std::numeric_limits<IndexType>::max(); 
-    IndexType max_index = std::numeric_limits<IndexType>::min(); 
-    for (int x = 0; x < length; x++) { 
-      min_index = std::min(values[x], min_index); 
-      max_index = std::max(values[x], max_index); 
-    } 
- 
-    return IndexInRange(min_index, dictionary_length) && 
-           IndexInRange(max_index, dictionary_length); 
-  } 
-  inline void Fill(T* begin, T* end, const int32_t& run_value) const { 
-    std::fill(begin, end, dictionary[run_value]); 
-  } 
-  inline void FillZero(T* begin, T* end) { std::fill(begin, end, kZero); } 
- 
-  inline void Copy(T* out, const int32_t* values, int length) const { 
-    for (int x = 0; x < length; x++) { 
-      out[x] = dictionary[values[x]]; 
-    } 
-  } 
-}; 
- 
-template <typename T> 
-inline int RleDecoder::GetBatchWithDict(const T* dictionary, int32_t dictionary_length, 
-                                        T* values, int batch_size) { 
-  // Per https://github.com/apache/parquet-format/blob/master/Encodings.md, 
-  // the maximum dictionary index width in Parquet is 32 bits. 
-  using IndexType = int32_t; 
-  DictionaryConverter<T> converter; 
-  converter.dictionary = dictionary; 
-  converter.dictionary_length = dictionary_length; 
- 
-  DCHECK_GE(bit_width_, 0); 
-  int values_read = 0; 
- 
-  auto* out = values; 
- 
-  while (values_read < batch_size) { 
-    int remaining = batch_size - values_read; 
- 
-    if (repeat_count_ > 0) { 
-      auto idx = static_cast<IndexType>(current_value_); 
-      if (ARROW_PREDICT_FALSE(!IndexInRange(idx, dictionary_length))) { 
-        return values_read; 
-      } 
-      T val = dictionary[idx]; 
- 
-      int repeat_batch = std::min(remaining, repeat_count_); 
-      std::fill(out, out + repeat_batch, val); 
- 
-      /* Upkeep counters */ 
-      repeat_count_ -= repeat_batch; 
-      values_read += repeat_batch; 
-      out += repeat_batch; 
-    } else if (literal_count_ > 0) { 
-      constexpr int kBufferSize = 1024; 
-      IndexType indices[kBufferSize]; 
- 
-      int literal_batch = std::min(remaining, literal_count_); 
-      literal_batch = std::min(literal_batch, kBufferSize); 
- 
-      int actual_read = bit_reader_.GetBatch(bit_width_, indices, literal_batch); 
-      if (ARROW_PREDICT_FALSE(actual_read != literal_batch)) { 
-        return values_read; 
-      } 
-      if (ARROW_PREDICT_FALSE(!converter.IsValid(indices, /*length=*/literal_batch))) { 
-        return values_read; 
-      } 
-      converter.Copy(out, indices, literal_batch); 
- 
-      /* Upkeep counters */ 
-      literal_count_ -= literal_batch; 
-      values_read += literal_batch; 
-      out += literal_batch; 
-    } else { 
-      if (!NextCounts<IndexType>()) return values_read; 
-    } 
-  } 
- 
-  return values_read; 
-} 
- 
-template <typename T> 
-inline int RleDecoder::GetBatchWithDictSpaced(const T* dictionary, 
-                                              int32_t dictionary_length, T* out, 
-                                              int batch_size, int null_count, 
-                                              const uint8_t* valid_bits, 
-                                              int64_t valid_bits_offset) { 
-  if (null_count == 0) { 
-    return GetBatchWithDict<T>(dictionary, dictionary_length, out, batch_size); 
-  } 
-  arrow::internal::BitBlockCounter block_counter(valid_bits, valid_bits_offset, 
-                                                 batch_size); 
-  using IndexType = int32_t; 
-  DictionaryConverter<T> converter; 
-  converter.dictionary = dictionary; 
-  converter.dictionary_length = dictionary_length; 
- 
-  int total_processed = 0; 
-  int processed = 0; 
-  arrow::internal::BitBlockCount block; 
-  do { 
-    block = block_counter.NextFourWords(); 
-    if (block.length == 0) { 
-      break; 
-    } 
-    if (block.AllSet()) { 
-      processed = GetBatchWithDict<T>(dictionary, dictionary_length, out, block.length); 
-    } else if (block.NoneSet()) { 
-      converter.FillZero(out, out + block.length); 
-      processed = block.length; 
-    } else { 
-      processed = GetSpaced<T, /*RunType=*/IndexType, DictionaryConverter<T>>( 
-          converter, block.length, block.length - block.popcount, valid_bits, 
-          valid_bits_offset, out); 
-    } 
-    total_processed += processed; 
-    out += block.length; 
-    valid_bits_offset += block.length; 
-  } while (processed == block.length); 
-  return total_processed; 
-} 
- 
-template <typename T> 
-bool RleDecoder::NextCounts() { 
-  // Read the next run's indicator int, it could be a literal or repeated run. 
-  // The int is encoded as a vlq-encoded value. 
-  uint32_t indicator_value = 0; 
-  if (!bit_reader_.GetVlqInt(&indicator_value)) return false; 
- 
-  // lsb indicates if it is a literal run or repeated run 
-  bool is_literal = indicator_value & 1; 
-  uint32_t count = indicator_value >> 1; 
-  if (is_literal) { 
-    if (ARROW_PREDICT_FALSE(count == 0 || count > static_cast<uint32_t>(INT32_MAX) / 8)) { 
-      return false; 
-    } 
-    literal_count_ = count * 8; 
-  } else { 
-    if (ARROW_PREDICT_FALSE(count == 0 || count > static_cast<uint32_t>(INT32_MAX))) { 
-      return false; 
-    } 
-    repeat_count_ = count; 
-    T value = {}; 
-    if (!bit_reader_.GetAligned<T>(static_cast<int>(BitUtil::CeilDiv(bit_width_, 8)), 
-                                   &value)) { 
-      return false; 
-    } 
-    current_value_ = static_cast<uint64_t>(value); 
-  } 
-  return true; 
-} 
- 
-/// This function buffers input values 8 at a time.  After seeing all 8 values, 
-/// it decides whether they should be encoded as a literal or repeated run. 
-inline bool RleEncoder::Put(uint64_t value) { 
-  DCHECK(bit_width_ == 64 || value < (1ULL << bit_width_)); 
-  if (ARROW_PREDICT_FALSE(buffer_full_)) return false; 
- 
-  if (ARROW_PREDICT_TRUE(current_value_ == value)) { 
-    ++repeat_count_; 
-    if (repeat_count_ > 8) { 
-      // This is just a continuation of the current run, no need to buffer the 
-      // values. 
-      // Note that this is the fast path for long repeated runs. 
-      return true; 
-    } 
-  } else { 
-    if (repeat_count_ >= 8) { 
-      // We had a run that was long enough but it has ended.  Flush the 
-      // current repeated run. 
-      DCHECK_EQ(literal_count_, 0); 
-      FlushRepeatedRun(); 
-    } 
-    repeat_count_ = 1; 
-    current_value_ = value; 
-  } 
- 
-  buffered_values_[num_buffered_values_] = value; 
-  if (++num_buffered_values_ == 8) { 
-    DCHECK_EQ(literal_count_ % 8, 0); 
-    FlushBufferedValues(false); 
-  } 
-  return true; 
-} 
- 
-inline void RleEncoder::FlushLiteralRun(bool update_indicator_byte) { 
-  if (literal_indicator_byte_ == NULL) { 
-    // The literal indicator byte has not been reserved yet, get one now. 
-    literal_indicator_byte_ = bit_writer_.GetNextBytePtr(); 
-    DCHECK(literal_indicator_byte_ != NULL); 
-  } 
- 
-  // Write all the buffered values as bit packed literals 
-  for (int i = 0; i < num_buffered_values_; ++i) { 
-    bool success = bit_writer_.PutValue(buffered_values_[i], bit_width_); 
-    DCHECK(success) << "There is a bug in using CheckBufferFull()"; 
-  } 
-  num_buffered_values_ = 0; 
- 
-  if (update_indicator_byte) { 
-    // At this point we need to write the indicator byte for the literal run. 
-    // We only reserve one byte, to allow for streaming writes of literal values. 
-    // The logic makes sure we flush literal runs often enough to not overrun 
-    // the 1 byte. 
-    DCHECK_EQ(literal_count_ % 8, 0); 
-    int num_groups = literal_count_ / 8; 
-    int32_t indicator_value = (num_groups << 1) | 1; 
-    DCHECK_EQ(indicator_value & 0xFFFFFF00, 0); 
-    *literal_indicator_byte_ = static_cast<uint8_t>(indicator_value); 
-    literal_indicator_byte_ = NULL; 
-    literal_count_ = 0; 
-    CheckBufferFull(); 
-  } 
-} 
- 
-inline void RleEncoder::FlushRepeatedRun() { 
-  DCHECK_GT(repeat_count_, 0); 
-  bool result = true; 
-  // The lsb of 0 indicates this is a repeated run 
-  int32_t indicator_value = repeat_count_ << 1 | 0; 
-  result &= bit_writer_.PutVlqInt(indicator_value); 
-  result &= bit_writer_.PutAligned(current_value_, 
-                                   static_cast<int>(BitUtil::CeilDiv(bit_width_, 8))); 
-  DCHECK(result); 
-  num_buffered_values_ = 0; 
-  repeat_count_ = 0; 
-  CheckBufferFull(); 
-} 
- 
-/// Flush the values that have been buffered.  At this point we decide whether 
-/// we need to switch between the run types or continue the current one. 
-inline void RleEncoder::FlushBufferedValues(bool done) { 
-  if (repeat_count_ >= 8) { 
-    // Clear the buffered values.  They are part of the repeated run now and we 
-    // don't want to flush them out as literals. 
-    num_buffered_values_ = 0; 
-    if (literal_count_ != 0) { 
-      // There was a current literal run.  All the values in it have been flushed 
-      // but we still need to update the indicator byte. 
-      DCHECK_EQ(literal_count_ % 8, 0); 
-      DCHECK_EQ(repeat_count_, 8); 
-      FlushLiteralRun(true); 
-    } 
-    DCHECK_EQ(literal_count_, 0); 
-    return; 
-  } 
- 
-  literal_count_ += num_buffered_values_; 
-  DCHECK_EQ(literal_count_ % 8, 0); 
-  int num_groups = literal_count_ / 8; 
-  if (num_groups + 1 >= (1 << 6)) { 
-    // We need to start a new literal run because the indicator byte we've reserved 
-    // cannot store more values. 
-    DCHECK(literal_indicator_byte_ != NULL); 
-    FlushLiteralRun(true); 
-  } else { 
-    FlushLiteralRun(done); 
-  } 
-  repeat_count_ = 0; 
-} 
- 
-inline int RleEncoder::Flush() { 
-  if (literal_count_ > 0 || repeat_count_ > 0 || num_buffered_values_ > 0) { 
-    bool all_repeat = literal_count_ == 0 && (repeat_count_ == num_buffered_values_ || 
-                                              num_buffered_values_ == 0); 
-    // There is something pending, figure out if it's a repeated or literal run 
-    if (repeat_count_ > 0 && all_repeat) { 
-      FlushRepeatedRun(); 
-    } else { 
-      DCHECK_EQ(literal_count_ % 8, 0); 
-      // Buffer the last group of literals to 8 by padding with 0s. 
-      for (; num_buffered_values_ != 0 && num_buffered_values_ < 8; 
-           ++num_buffered_values_) { 
-        buffered_values_[num_buffered_values_] = 0; 
-      } 
-      literal_count_ += num_buffered_values_; 
-      FlushLiteralRun(true); 
-      repeat_count_ = 0; 
-    } 
-  } 
-  bit_writer_.Flush(); 
-  DCHECK_EQ(num_buffered_values_, 0); 
-  DCHECK_EQ(literal_count_, 0); 
-  DCHECK_EQ(repeat_count_, 0); 
- 
-  return bit_writer_.bytes_written(); 
-} 
- 
-inline void RleEncoder::CheckBufferFull() { 
-  int bytes_written = bit_writer_.bytes_written(); 
-  if (bytes_written + max_run_byte_size_ > bit_writer_.buffer_len()) { 
-    buffer_full_ = true; 
-  } 
-} 
- 
-inline void RleEncoder::Clear() { 
-  buffer_full_ = false; 
-  current_value_ = 0; 
-  repeat_count_ = 0; 
-  num_buffered_values_ = 0; 
-  literal_count_ = 0; 
-  literal_indicator_byte_ = NULL; 
-  bit_writer_.Clear(); 
-} 
- 
-}  // namespace util 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Imported from Apache Impala (incubating) on 2016-01-29 and modified for use
+// in parquet-cpp, Arrow
+
+#pragma once
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <vector>
+
+#include "arrow/util/bit_block_counter.h"
+#include "arrow/util/bit_run_reader.h"
+#include "arrow/util/bit_stream_utils.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/macros.h"
+
+namespace arrow {
+namespace util {
+
+/// Utility classes to do run length encoding (RLE) for fixed bit width values.  If runs
+/// are sufficiently long, RLE is used, otherwise, the values are just bit-packed
+/// (literal encoding).
+/// For both types of runs, there is a byte-aligned indicator which encodes the length
+/// of the run and the type of the run.
+/// This encoding has the benefit that when there aren't any long enough runs, values
+/// are always decoded at fixed (can be precomputed) bit offsets OR both the value and
+/// the run length are byte aligned. This allows for very efficient decoding
+/// implementations.
+/// The encoding is:
+///    encoded-block := run*
+///    run := literal-run | repeated-run
+///    literal-run := literal-indicator < literal bytes >
+///    repeated-run := repeated-indicator < repeated value. padded to byte boundary >
+///    literal-indicator := varint_encode( number_of_groups << 1 | 1)
+///    repeated-indicator := varint_encode( number_of_repetitions << 1 )
+//
+/// Each run is preceded by a varint. The varint's least significant bit is
+/// used to indicate whether the run is a literal run or a repeated run. The rest
+/// of the varint is used to determine the length of the run (eg how many times the
+/// value repeats).
+//
+/// In the case of literal runs, the run length is always a multiple of 8 (i.e. encode
+/// in groups of 8), so that no matter the bit-width of the value, the sequence will end
+/// on a byte boundary without padding.
+/// Given that we know it is a multiple of 8, we store the number of 8-groups rather than
+/// the actual number of encoded ints. (This means that the total number of encoded values
+/// can not be determined from the encoded data, since the number of values in the last
+/// group may not be a multiple of 8). For the last group of literal runs, we pad
+/// the group to 8 with zeros. This allows for 8 at a time decoding on the read side
+/// without the need for additional checks.
+//
+/// There is a break-even point when it is more storage efficient to do run length
+/// encoding.  For 1 bit-width values, that point is 8 values.  They require 2 bytes
+/// for both the repeated encoding or the literal encoding.  This value can always
+/// be computed based on the bit-width.
+/// TODO: think about how to use this for strings.  The bit packing isn't quite the same.
+//
+/// Examples with bit-width 1 (eg encoding booleans):
+/// ----------------------------------------
+/// 100 1s followed by 100 0s:
+/// <varint(100 << 1)> <1, padded to 1 byte> <varint(100 << 1)> <0, padded to 1 byte>
+///  - (total 4 bytes)
+//
+/// alternating 1s and 0s (200 total):
+/// 200 ints = 25 groups of 8
+/// <varint((25 << 1) | 1)> <25 bytes of values, bitpacked>
+/// (total 26 bytes, 1 byte overhead)
+//
+
+/// Decoder class for RLE encoded data.
+class RleDecoder {
+ public:
+  /// Create a decoder object. buffer/buffer_len is the decoded data.
+  /// bit_width is the width of each value (before encoding).
+  RleDecoder(const uint8_t* buffer, int buffer_len, int bit_width)
+      : bit_reader_(buffer, buffer_len),
+        bit_width_(bit_width),
+        current_value_(0),
+        repeat_count_(0),
+        literal_count_(0) {
+    DCHECK_GE(bit_width_, 0);
+    DCHECK_LE(bit_width_, 64);
+  }
+
+  RleDecoder() : bit_width_(-1) {}
+
+  void Reset(const uint8_t* buffer, int buffer_len, int bit_width) {
+    DCHECK_GE(bit_width, 0);
+    DCHECK_LE(bit_width, 64);
+    bit_reader_.Reset(buffer, buffer_len);
+    bit_width_ = bit_width;
+    current_value_ = 0;
+    repeat_count_ = 0;
+    literal_count_ = 0;
+  }
+
+  /// Gets the next value.  Returns false if there are no more.
+  template <typename T>
+  bool Get(T* val);
+
+  /// Gets a batch of values.  Returns the number of decoded elements.
+  template <typename T>
+  int GetBatch(T* values, int batch_size);
+
+  /// Like GetBatch but add spacing for null entries
+  template <typename T>
+  int GetBatchSpaced(int batch_size, int null_count, const uint8_t* valid_bits,
+                     int64_t valid_bits_offset, T* out);
+
+  /// Like GetBatch but the values are then decoded using the provided dictionary
+  template <typename T>
+  int GetBatchWithDict(const T* dictionary, int32_t dictionary_length, T* values,
+                       int batch_size);
+
+  /// Like GetBatchWithDict but add spacing for null entries
+  ///
+  /// Null entries will be zero-initialized in `values` to avoid leaking
+  /// private data.
+  template <typename T>
+  int GetBatchWithDictSpaced(const T* dictionary, int32_t dictionary_length, T* values,
+                             int batch_size, int null_count, const uint8_t* valid_bits,
+                             int64_t valid_bits_offset);
+
+ protected:
+  BitUtil::BitReader bit_reader_;
+  /// Number of bits needed to encode the value. Must be between 0 and 64.
+  int bit_width_;
+  uint64_t current_value_;
+  int32_t repeat_count_;
+  int32_t literal_count_;
+
+ private:
+  /// Fills literal_count_ and repeat_count_ with next values. Returns false if there
+  /// are no more.
+  template <typename T>
+  bool NextCounts();
+
+  /// Utility methods for retrieving spaced values.
+  template <typename T, typename RunType, typename Converter>
+  int GetSpaced(Converter converter, int batch_size, int null_count,
+                const uint8_t* valid_bits, int64_t valid_bits_offset, T* out);
+};
+
+/// Class to incrementally build the rle data.   This class does not allocate any memory.
+/// The encoding has two modes: encoding repeated runs and literal runs.
+/// If the run is sufficiently short, it is more efficient to encode as a literal run.
+/// This class does so by buffering 8 values at a time.  If they are not all the same
+/// they are added to the literal run.  If they are the same, they are added to the
+/// repeated run.  When we switch modes, the previous run is flushed out.
+class RleEncoder {
+ public:
+  /// buffer/buffer_len: preallocated output buffer.
+  /// bit_width: max number of bits for value.
+  /// TODO: consider adding a min_repeated_run_length so the caller can control
+  /// when values should be encoded as repeated runs.  Currently this is derived
+  /// based on the bit_width, which can determine a storage optimal choice.
+  /// TODO: allow 0 bit_width (and have dict encoder use it)
+  RleEncoder(uint8_t* buffer, int buffer_len, int bit_width)
+      : bit_width_(bit_width), bit_writer_(buffer, buffer_len) {
+    DCHECK_GE(bit_width_, 0);
+    DCHECK_LE(bit_width_, 64);
+    max_run_byte_size_ = MinBufferSize(bit_width);
+    DCHECK_GE(buffer_len, max_run_byte_size_) << "Input buffer not big enough.";
+    Clear();
+  }
+
+  /// Returns the minimum buffer size needed to use the encoder for 'bit_width'
+  /// This is the maximum length of a single run for 'bit_width'.
+  /// It is not valid to pass a buffer less than this length.
+  static int MinBufferSize(int bit_width) {
+    /// 1 indicator byte and MAX_VALUES_PER_LITERAL_RUN 'bit_width' values.
+    int max_literal_run_size =
+        1 +
+        static_cast<int>(BitUtil::BytesForBits(MAX_VALUES_PER_LITERAL_RUN * bit_width));
+    /// Up to kMaxVlqByteLength indicator and a single 'bit_width' value.
+    int max_repeated_run_size = BitUtil::BitReader::kMaxVlqByteLength +
+                                static_cast<int>(BitUtil::BytesForBits(bit_width));
+    return std::max(max_literal_run_size, max_repeated_run_size);
+  }
+
+  /// Returns the maximum byte size it could take to encode 'num_values'.
+  static int MaxBufferSize(int bit_width, int num_values) {
+    // For a bit_width > 1, the worst case is the repetition of "literal run of length 8
+    // and then a repeated run of length 8".
+    // 8 values per smallest run, 8 bits per byte
+    int bytes_per_run = bit_width;
+    int num_runs = static_cast<int>(BitUtil::CeilDiv(num_values, 8));
+    int literal_max_size = num_runs + num_runs * bytes_per_run;
+
+    // In the very worst case scenario, the data is a concatenation of repeated
+    // runs of 8 values. Repeated run has a 1 byte varint followed by the
+    // bit-packed repeated value
+    int min_repeated_run_size = 1 + static_cast<int>(BitUtil::BytesForBits(bit_width));
+    int repeated_max_size =
+        static_cast<int>(BitUtil::CeilDiv(num_values, 8)) * min_repeated_run_size;
+
+    return std::max(literal_max_size, repeated_max_size);
+  }
+
+  /// Encode value.  Returns true if the value fits in buffer, false otherwise.
+  /// This value must be representable with bit_width_ bits.
+  bool Put(uint64_t value);
+
+  /// Flushes any pending values to the underlying buffer.
+  /// Returns the total number of bytes written
+  int Flush();
+
+  /// Resets all the state in the encoder.
+  void Clear();
+
+  /// Returns pointer to underlying buffer
+  uint8_t* buffer() { return bit_writer_.buffer(); }
+  int32_t len() { return bit_writer_.bytes_written(); }
+
+ private:
+  /// Flushes any buffered values.  If this is part of a repeated run, this is largely
+  /// a no-op.
+  /// If it is part of a literal run, this will call FlushLiteralRun, which writes
+  /// out the buffered literal values.
+  /// If 'done' is true, the current run would be written even if it would normally
+  /// have been buffered more.  This should only be called at the end, when the
+  /// encoder has received all values even if it would normally continue to be
+  /// buffered.
+  void FlushBufferedValues(bool done);
+
+  /// Flushes literal values to the underlying buffer.  If update_indicator_byte,
+  /// then the current literal run is complete and the indicator byte is updated.
+  void FlushLiteralRun(bool update_indicator_byte);
+
+  /// Flushes a repeated run to the underlying buffer.
+  void FlushRepeatedRun();
+
+  /// Checks and sets buffer_full_. This must be called after flushing a run to
+  /// make sure there are enough bytes remaining to encode the next run.
+  void CheckBufferFull();
+
+  /// The maximum number of values in a single literal run
+  /// (number of groups encodable by a 1-byte indicator * 8)
+  static const int MAX_VALUES_PER_LITERAL_RUN = (1 << 6) * 8;
+
+  /// Number of bits needed to encode the value. Must be between 0 and 64.
+  const int bit_width_;
+
+  /// Underlying buffer.
+  BitUtil::BitWriter bit_writer_;
+
+  /// If true, the buffer is full and subsequent Put()'s will fail.
+  bool buffer_full_;
+
+  /// The maximum byte size a single run can take.
+  int max_run_byte_size_;
+
+  /// We need to buffer at most 8 values for literals.  This happens when the
+  /// bit_width is 1 (so 8 values fit in one byte).
+  /// TODO: generalize this to other bit widths
+  int64_t buffered_values_[8];
+
+  /// Number of values in buffered_values_
+  int num_buffered_values_;
+
+  /// The current (also last) value that was written and the count of how
+  /// many times in a row that value has been seen.  This is maintained even
+  /// if we are in a literal run.  If the repeat_count_ get high enough, we switch
+  /// to encoding repeated runs.
+  uint64_t current_value_;
+  int repeat_count_;
+
+  /// Number of literals in the current run.  This does not include the literals
+  /// that might be in buffered_values_.  Only after we've got a group big enough
+  /// can we decide if they should part of the literal_count_ or repeat_count_
+  int literal_count_;
+
+  /// Pointer to a byte in the underlying buffer that stores the indicator byte.
+  /// This is reserved as soon as we need a literal run but the value is written
+  /// when the literal run is complete.
+  uint8_t* literal_indicator_byte_;
+};
+
+template <typename T>
+inline bool RleDecoder::Get(T* val) {
+  return GetBatch(val, 1) == 1;
+}
+
+template <typename T>
+inline int RleDecoder::GetBatch(T* values, int batch_size) {
+  DCHECK_GE(bit_width_, 0);
+  int values_read = 0;
+
+  auto* out = values;
+
+  while (values_read < batch_size) {
+    int remaining = batch_size - values_read;
+
+    if (repeat_count_ > 0) {  // Repeated value case.
+      int repeat_batch = std::min(remaining, repeat_count_);
+      std::fill(out, out + repeat_batch, static_cast<T>(current_value_));
+
+      repeat_count_ -= repeat_batch;
+      values_read += repeat_batch;
+      out += repeat_batch;
+    } else if (literal_count_ > 0) {
+      int literal_batch = std::min(remaining, literal_count_);
+      int actual_read = bit_reader_.GetBatch(bit_width_, out, literal_batch);
+      if (actual_read != literal_batch) {
+        return values_read;
+      }
+
+      literal_count_ -= literal_batch;
+      values_read += literal_batch;
+      out += literal_batch;
+    } else {
+      if (!NextCounts<T>()) return values_read;
+    }
+  }
+
+  return values_read;
+}
+
+template <typename T, typename RunType, typename Converter>
+inline int RleDecoder::GetSpaced(Converter converter, int batch_size, int null_count,
+                                 const uint8_t* valid_bits, int64_t valid_bits_offset,
+                                 T* out) {
+  if (ARROW_PREDICT_FALSE(null_count == batch_size)) {
+    converter.FillZero(out, out + batch_size);
+    return batch_size;
+  }
+
+  DCHECK_GE(bit_width_, 0);
+  int values_read = 0;
+  int values_remaining = batch_size - null_count;
+
+  // Assume no bits to start.
+  arrow::internal::BitRunReader bit_reader(valid_bits, valid_bits_offset,
+                                           /*length=*/batch_size);
+  arrow::internal::BitRun valid_run = bit_reader.NextRun();
+  while (values_read < batch_size) {
+    if (ARROW_PREDICT_FALSE(valid_run.length == 0)) {
+      valid_run = bit_reader.NextRun();
+    }
+
+    DCHECK_GT(batch_size, 0);
+    DCHECK_GT(valid_run.length, 0);
+
+    if (valid_run.set) {
+      if ((repeat_count_ == 0) && (literal_count_ == 0)) {
+        if (!NextCounts<RunType>()) return values_read;
+        DCHECK((repeat_count_ > 0) ^ (literal_count_ > 0));
+      }
+
+      if (repeat_count_ > 0) {
+        int repeat_batch = 0;
+        // Consume the entire repeat counts incrementing repeat_batch to
+        // be the total of nulls + values consumed, we only need to
+        // get the total count because we can fill in the same value for
+        // nulls and non-nulls. This proves to be a big efficiency win.
+        while (repeat_count_ > 0 && (values_read + repeat_batch) < batch_size) {
+          DCHECK_GT(valid_run.length, 0);
+          if (valid_run.set) {
+            int update_size = std::min(static_cast<int>(valid_run.length), repeat_count_);
+            repeat_count_ -= update_size;
+            repeat_batch += update_size;
+            valid_run.length -= update_size;
+            values_remaining -= update_size;
+          } else {
+            // We can consume all nulls here because we would do so on
+            //  the next loop anyways.
+            repeat_batch += static_cast<int>(valid_run.length);
+            valid_run.length = 0;
+          }
+          if (valid_run.length == 0) {
+            valid_run = bit_reader.NextRun();
+          }
+        }
+        RunType current_value = static_cast<RunType>(current_value_);
+        if (ARROW_PREDICT_FALSE(!converter.IsValid(current_value))) {
+          return values_read;
+        }
+        converter.Fill(out, out + repeat_batch, current_value);
+        out += repeat_batch;
+        values_read += repeat_batch;
+      } else if (literal_count_ > 0) {
+        int literal_batch = std::min(values_remaining, literal_count_);
+        DCHECK_GT(literal_batch, 0);
+
+        // Decode the literals
+        constexpr int kBufferSize = 1024;
+        RunType indices[kBufferSize];
+        literal_batch = std::min(literal_batch, kBufferSize);
+        int actual_read = bit_reader_.GetBatch(bit_width_, indices, literal_batch);
+        if (ARROW_PREDICT_FALSE(actual_read != literal_batch)) {
+          return values_read;
+        }
+        if (!converter.IsValid(indices, /*length=*/actual_read)) {
+          return values_read;
+        }
+        int skipped = 0;
+        int literals_read = 0;
+        while (literals_read < literal_batch) {
+          if (valid_run.set) {
+            int update_size = std::min(literal_batch - literals_read,
+                                       static_cast<int>(valid_run.length));
+            converter.Copy(out, indices + literals_read, update_size);
+            literals_read += update_size;
+            out += update_size;
+            valid_run.length -= update_size;
+          } else {
+            converter.FillZero(out, out + valid_run.length);
+            out += valid_run.length;
+            skipped += static_cast<int>(valid_run.length);
+            valid_run.length = 0;
+          }
+          if (valid_run.length == 0) {
+            valid_run = bit_reader.NextRun();
+          }
+        }
+        literal_count_ -= literal_batch;
+        values_remaining -= literal_batch;
+        values_read += literal_batch + skipped;
+      }
+    } else {
+      converter.FillZero(out, out + valid_run.length);
+      out += valid_run.length;
+      values_read += static_cast<int>(valid_run.length);
+      valid_run.length = 0;
+    }
+  }
+  DCHECK_EQ(valid_run.length, 0);
+  DCHECK_EQ(values_remaining, 0);
+  return values_read;
+}
+
+// Converter for GetSpaced that handles runs that get returned
+// directly as output.
+template <typename T>
+struct PlainRleConverter {
+  T kZero = {};
+  inline bool IsValid(const T& values) const { return true; }
+  inline bool IsValid(const T* values, int32_t length) const { return true; }
+  inline void Fill(T* begin, T* end, const T& run_value) const {
+    std::fill(begin, end, run_value);
+  }
+  inline void FillZero(T* begin, T* end) { std::fill(begin, end, kZero); }
+  inline void Copy(T* out, const T* values, int length) const {
+    std::memcpy(out, values, length * sizeof(T));
+  }
+};
+
+template <typename T>
+inline int RleDecoder::GetBatchSpaced(int batch_size, int null_count,
+                                      const uint8_t* valid_bits,
+                                      int64_t valid_bits_offset, T* out) {
+  if (null_count == 0) {
+    return GetBatch<T>(out, batch_size);
+  }
+
+  PlainRleConverter<T> converter;
+  arrow::internal::BitBlockCounter block_counter(valid_bits, valid_bits_offset,
+                                                 batch_size);
+
+  int total_processed = 0;
+  int processed = 0;
+  arrow::internal::BitBlockCount block;
+
+  do {
+    block = block_counter.NextFourWords();
+    if (block.length == 0) {
+      break;
+    }
+    if (block.AllSet()) {
+      processed = GetBatch<T>(out, block.length);
+    } else if (block.NoneSet()) {
+      converter.FillZero(out, out + block.length);
+      processed = block.length;
+    } else {
+      processed = GetSpaced<T, /*RunType=*/T, PlainRleConverter<T>>(
+          converter, block.length, block.length - block.popcount, valid_bits,
+          valid_bits_offset, out);
+    }
+    total_processed += processed;
+    out += block.length;
+    valid_bits_offset += block.length;
+  } while (processed == block.length);
+  return total_processed;
+}
+
+static inline bool IndexInRange(int32_t idx, int32_t dictionary_length) {
+  return idx >= 0 && idx < dictionary_length;
+}
+
+// Converter for GetSpaced that handles runs of returned dictionary
+// indices.
+template <typename T>
+struct DictionaryConverter {
+  T kZero = {};
+  const T* dictionary;
+  int32_t dictionary_length;
+
+  inline bool IsValid(int32_t value) { return IndexInRange(value, dictionary_length); }
+
+  inline bool IsValid(const int32_t* values, int32_t length) const {
+    using IndexType = int32_t;
+    IndexType min_index = std::numeric_limits<IndexType>::max();
+    IndexType max_index = std::numeric_limits<IndexType>::min();
+    for (int x = 0; x < length; x++) {
+      min_index = std::min(values[x], min_index);
+      max_index = std::max(values[x], max_index);
+    }
+
+    return IndexInRange(min_index, dictionary_length) &&
+           IndexInRange(max_index, dictionary_length);
+  }
+  inline void Fill(T* begin, T* end, const int32_t& run_value) const {
+    std::fill(begin, end, dictionary[run_value]);
+  }
+  inline void FillZero(T* begin, T* end) { std::fill(begin, end, kZero); }
+
+  inline void Copy(T* out, const int32_t* values, int length) const {
+    for (int x = 0; x < length; x++) {
+      out[x] = dictionary[values[x]];
+    }
+  }
+};
+
+template <typename T>
+inline int RleDecoder::GetBatchWithDict(const T* dictionary, int32_t dictionary_length,
+                                        T* values, int batch_size) {
+  // Per https://github.com/apache/parquet-format/blob/master/Encodings.md,
+  // the maximum dictionary index width in Parquet is 32 bits.
+  using IndexType = int32_t;
+  DictionaryConverter<T> converter;
+  converter.dictionary = dictionary;
+  converter.dictionary_length = dictionary_length;
+
+  DCHECK_GE(bit_width_, 0);
+  int values_read = 0;
+
+  auto* out = values;
+
+  while (values_read < batch_size) {
+    int remaining = batch_size - values_read;
+
+    if (repeat_count_ > 0) {
+      auto idx = static_cast<IndexType>(current_value_);
+      if (ARROW_PREDICT_FALSE(!IndexInRange(idx, dictionary_length))) {
+        return values_read;
+      }
+      T val = dictionary[idx];
+
+      int repeat_batch = std::min(remaining, repeat_count_);
+      std::fill(out, out + repeat_batch, val);
+
+      /* Upkeep counters */
+      repeat_count_ -= repeat_batch;
+      values_read += repeat_batch;
+      out += repeat_batch;
+    } else if (literal_count_ > 0) {
+      constexpr int kBufferSize = 1024;
+      IndexType indices[kBufferSize];
+
+      int literal_batch = std::min(remaining, literal_count_);
+      literal_batch = std::min(literal_batch, kBufferSize);
+
+      int actual_read = bit_reader_.GetBatch(bit_width_, indices, literal_batch);
+      if (ARROW_PREDICT_FALSE(actual_read != literal_batch)) {
+        return values_read;
+      }
+      if (ARROW_PREDICT_FALSE(!converter.IsValid(indices, /*length=*/literal_batch))) {
+        return values_read;
+      }
+      converter.Copy(out, indices, literal_batch);
+
+      /* Upkeep counters */
+      literal_count_ -= literal_batch;
+      values_read += literal_batch;
+      out += literal_batch;
+    } else {
+      if (!NextCounts<IndexType>()) return values_read;
+    }
+  }
+
+  return values_read;
+}
+
+template <typename T>
+inline int RleDecoder::GetBatchWithDictSpaced(const T* dictionary,
+                                              int32_t dictionary_length, T* out,
+                                              int batch_size, int null_count,
+                                              const uint8_t* valid_bits,
+                                              int64_t valid_bits_offset) {
+  if (null_count == 0) {
+    return GetBatchWithDict<T>(dictionary, dictionary_length, out, batch_size);
+  }
+  arrow::internal::BitBlockCounter block_counter(valid_bits, valid_bits_offset,
+                                                 batch_size);
+  using IndexType = int32_t;
+  DictionaryConverter<T> converter;
+  converter.dictionary = dictionary;
+  converter.dictionary_length = dictionary_length;
+
+  int total_processed = 0;
+  int processed = 0;
+  arrow::internal::BitBlockCount block;
+  do {
+    block = block_counter.NextFourWords();
+    if (block.length == 0) {
+      break;
+    }
+    if (block.AllSet()) {
+      processed = GetBatchWithDict<T>(dictionary, dictionary_length, out, block.length);
+    } else if (block.NoneSet()) {
+      converter.FillZero(out, out + block.length);
+      processed = block.length;
+    } else {
+      processed = GetSpaced<T, /*RunType=*/IndexType, DictionaryConverter<T>>(
+          converter, block.length, block.length - block.popcount, valid_bits,
+          valid_bits_offset, out);
+    }
+    total_processed += processed;
+    out += block.length;
+    valid_bits_offset += block.length;
+  } while (processed == block.length);
+  return total_processed;
+}
+
+template <typename T>
+bool RleDecoder::NextCounts() {
+  // Read the next run's indicator int, it could be a literal or repeated run.
+  // The int is encoded as a vlq-encoded value.
+  uint32_t indicator_value = 0;
+  if (!bit_reader_.GetVlqInt(&indicator_value)) return false;
+
+  // lsb indicates if it is a literal run or repeated run
+  bool is_literal = indicator_value & 1;
+  uint32_t count = indicator_value >> 1;
+  if (is_literal) {
+    if (ARROW_PREDICT_FALSE(count == 0 || count > static_cast<uint32_t>(INT32_MAX) / 8)) {
+      return false;
+    }
+    literal_count_ = count * 8;
+  } else {
+    if (ARROW_PREDICT_FALSE(count == 0 || count > static_cast<uint32_t>(INT32_MAX))) {
+      return false;
+    }
+    repeat_count_ = count;
+    T value = {};
+    if (!bit_reader_.GetAligned<T>(static_cast<int>(BitUtil::CeilDiv(bit_width_, 8)),
+                                   &value)) {
+      return false;
+    }
+    current_value_ = static_cast<uint64_t>(value);
+  }
+  return true;
+}
+
+/// This function buffers input values 8 at a time.  After seeing all 8 values,
+/// it decides whether they should be encoded as a literal or repeated run.
+inline bool RleEncoder::Put(uint64_t value) {
+  DCHECK(bit_width_ == 64 || value < (1ULL << bit_width_));
+  if (ARROW_PREDICT_FALSE(buffer_full_)) return false;
+
+  if (ARROW_PREDICT_TRUE(current_value_ == value)) {
+    ++repeat_count_;
+    if (repeat_count_ > 8) {
+      // This is just a continuation of the current run, no need to buffer the
+      // values.
+      // Note that this is the fast path for long repeated runs.
+      return true;
+    }
+  } else {
+    if (repeat_count_ >= 8) {
+      // We had a run that was long enough but it has ended.  Flush the
+      // current repeated run.
+      DCHECK_EQ(literal_count_, 0);
+      FlushRepeatedRun();
+    }
+    repeat_count_ = 1;
+    current_value_ = value;
+  }
+
+  buffered_values_[num_buffered_values_] = value;
+  if (++num_buffered_values_ == 8) {
+    DCHECK_EQ(literal_count_ % 8, 0);
+    FlushBufferedValues(false);
+  }
+  return true;
+}
+
+inline void RleEncoder::FlushLiteralRun(bool update_indicator_byte) {
+  if (literal_indicator_byte_ == NULL) {
+    // The literal indicator byte has not been reserved yet, get one now.
+    literal_indicator_byte_ = bit_writer_.GetNextBytePtr();
+    DCHECK(literal_indicator_byte_ != NULL);
+  }
+
+  // Write all the buffered values as bit packed literals
+  for (int i = 0; i < num_buffered_values_; ++i) {
+    bool success = bit_writer_.PutValue(buffered_values_[i], bit_width_);
+    DCHECK(success) << "There is a bug in using CheckBufferFull()";
+  }
+  num_buffered_values_ = 0;
+
+  if (update_indicator_byte) {
+    // At this point we need to write the indicator byte for the literal run.
+    // We only reserve one byte, to allow for streaming writes of literal values.
+    // The logic makes sure we flush literal runs often enough to not overrun
+    // the 1 byte.
+    DCHECK_EQ(literal_count_ % 8, 0);
+    int num_groups = literal_count_ / 8;
+    int32_t indicator_value = (num_groups << 1) | 1;
+    DCHECK_EQ(indicator_value & 0xFFFFFF00, 0);
+    *literal_indicator_byte_ = static_cast<uint8_t>(indicator_value);
+    literal_indicator_byte_ = NULL;
+    literal_count_ = 0;
+    CheckBufferFull();
+  }
+}
+
+inline void RleEncoder::FlushRepeatedRun() {
+  DCHECK_GT(repeat_count_, 0);
+  bool result = true;
+  // The lsb of 0 indicates this is a repeated run
+  int32_t indicator_value = repeat_count_ << 1 | 0;
+  result &= bit_writer_.PutVlqInt(indicator_value);
+  result &= bit_writer_.PutAligned(current_value_,
+                                   static_cast<int>(BitUtil::CeilDiv(bit_width_, 8)));
+  DCHECK(result);
+  num_buffered_values_ = 0;
+  repeat_count_ = 0;
+  CheckBufferFull();
+}
+
+/// Flush the values that have been buffered.  At this point we decide whether
+/// we need to switch between the run types or continue the current one.
+inline void RleEncoder::FlushBufferedValues(bool done) {
+  if (repeat_count_ >= 8) {
+    // Clear the buffered values.  They are part of the repeated run now and we
+    // don't want to flush them out as literals.
+    num_buffered_values_ = 0;
+    if (literal_count_ != 0) {
+      // There was a current literal run.  All the values in it have been flushed
+      // but we still need to update the indicator byte.
+      DCHECK_EQ(literal_count_ % 8, 0);
+      DCHECK_EQ(repeat_count_, 8);
+      FlushLiteralRun(true);
+    }
+    DCHECK_EQ(literal_count_, 0);
+    return;
+  }
+
+  literal_count_ += num_buffered_values_;
+  DCHECK_EQ(literal_count_ % 8, 0);
+  int num_groups = literal_count_ / 8;
+  if (num_groups + 1 >= (1 << 6)) {
+    // We need to start a new literal run because the indicator byte we've reserved
+    // cannot store more values.
+    DCHECK(literal_indicator_byte_ != NULL);
+    FlushLiteralRun(true);
+  } else {
+    FlushLiteralRun(done);
+  }
+  repeat_count_ = 0;
+}
+
+inline int RleEncoder::Flush() {
+  if (literal_count_ > 0 || repeat_count_ > 0 || num_buffered_values_ > 0) {
+    bool all_repeat = literal_count_ == 0 && (repeat_count_ == num_buffered_values_ ||
+                                              num_buffered_values_ == 0);
+    // There is something pending, figure out if it's a repeated or literal run
+    if (repeat_count_ > 0 && all_repeat) {
+      FlushRepeatedRun();
+    } else {
+      DCHECK_EQ(literal_count_ % 8, 0);
+      // Buffer the last group of literals to 8 by padding with 0s.
+      for (; num_buffered_values_ != 0 && num_buffered_values_ < 8;
+           ++num_buffered_values_) {
+        buffered_values_[num_buffered_values_] = 0;
+      }
+      literal_count_ += num_buffered_values_;
+      FlushLiteralRun(true);
+      repeat_count_ = 0;
+    }
+  }
+  bit_writer_.Flush();
+  DCHECK_EQ(num_buffered_values_, 0);
+  DCHECK_EQ(literal_count_, 0);
+  DCHECK_EQ(repeat_count_, 0);
+
+  return bit_writer_.bytes_written();
+}
+
+inline void RleEncoder::CheckBufferFull() {
+  int bytes_written = bit_writer_.bytes_written();
+  if (bytes_written + max_run_byte_size_ > bit_writer_.buffer_len()) {
+    buffer_full_ = true;
+  }
+}
+
+inline void RleEncoder::Clear() {
+  buffer_full_ = false;
+  current_value_ = 0;
+  repeat_count_ = 0;
+  num_buffered_values_ = 0;
+  literal_count_ = 0;
+  literal_indicator_byte_ = NULL;
+  bit_writer_.Clear();
+}
+
+}  // namespace util
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/spaced.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/spaced.h
index 9414984663f..8265e1d22ae 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/spaced.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/spaced.h
@@ -1,98 +1,98 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <cassert> 
-#include <cstdint> 
-#include <cstring> 
- 
-#include "arrow/util/bit_run_reader.h" 
- 
-namespace arrow { 
-namespace util { 
-namespace internal { 
- 
-/// \brief Compress the buffer to spaced, excluding the null entries. 
-/// 
-/// \param[in] src the source buffer 
-/// \param[in] num_values the size of source buffer 
-/// \param[in] valid_bits bitmap data indicating position of valid slots 
-/// \param[in] valid_bits_offset offset into valid_bits 
-/// \param[out] output the output buffer spaced 
-/// \return The size of spaced buffer. 
-template <typename T> 
-inline int SpacedCompress(const T* src, int num_values, const uint8_t* valid_bits, 
-                          int64_t valid_bits_offset, T* output) { 
-  int num_valid_values = 0; 
- 
-  arrow::internal::SetBitRunReader reader(valid_bits, valid_bits_offset, num_values); 
-  while (true) { 
-    const auto run = reader.NextRun(); 
-    if (run.length == 0) { 
-      break; 
-    } 
-    std::memcpy(output + num_valid_values, src + run.position, run.length * sizeof(T)); 
-    num_valid_values += static_cast<int32_t>(run.length); 
-  } 
- 
-  return num_valid_values; 
-} 
- 
-/// \brief Relocate values in buffer into positions of non-null values as indicated by 
-/// a validity bitmap. 
-/// 
-/// \param[in, out] buffer the in-place buffer 
-/// \param[in] num_values total size of buffer including null slots 
-/// \param[in] null_count number of null slots 
-/// \param[in] valid_bits bitmap data indicating position of valid slots 
-/// \param[in] valid_bits_offset offset into valid_bits 
-/// \return The number of values expanded, including nulls. 
-template <typename T> 
-inline int SpacedExpand(T* buffer, int num_values, int null_count, 
-                        const uint8_t* valid_bits, int64_t valid_bits_offset) { 
-  // Point to end as we add the spacing from the back. 
-  int idx_decode = num_values - null_count; 
- 
-  // Depending on the number of nulls, some of the value slots in buffer may 
-  // be uninitialized, and this will cause valgrind warnings / potentially UB 
-  std::memset(static_cast<void*>(buffer + idx_decode), 0, null_count * sizeof(T)); 
-  if (idx_decode == 0) { 
-    // All nulls, nothing more to do 
-    return num_values; 
-  } 
- 
-  arrow::internal::ReverseSetBitRunReader reader(valid_bits, valid_bits_offset, 
-                                                 num_values); 
-  while (true) { 
-    const auto run = reader.NextRun(); 
-    if (run.length == 0) { 
-      break; 
-    } 
-    idx_decode -= static_cast<int32_t>(run.length); 
-    assert(idx_decode >= 0); 
-    std::memmove(buffer + run.position, buffer + idx_decode, run.length * sizeof(T)); 
-  } 
- 
-  // Otherwise caller gave an incorrect null_count 
-  assert(idx_decode == 0); 
-  return num_values; 
-} 
- 
-}  // namespace internal 
-}  // namespace util 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+
+#include "arrow/util/bit_run_reader.h"
+
+namespace arrow {
+namespace util {
+namespace internal {
+
+/// \brief Compress the buffer to spaced, excluding the null entries.
+///
+/// \param[in] src the source buffer
+/// \param[in] num_values the size of source buffer
+/// \param[in] valid_bits bitmap data indicating position of valid slots
+/// \param[in] valid_bits_offset offset into valid_bits
+/// \param[out] output the output buffer spaced
+/// \return The size of spaced buffer.
+template <typename T>
+inline int SpacedCompress(const T* src, int num_values, const uint8_t* valid_bits,
+                          int64_t valid_bits_offset, T* output) {
+  int num_valid_values = 0;
+
+  arrow::internal::SetBitRunReader reader(valid_bits, valid_bits_offset, num_values);
+  while (true) {
+    const auto run = reader.NextRun();
+    if (run.length == 0) {
+      break;
+    }
+    std::memcpy(output + num_valid_values, src + run.position, run.length * sizeof(T));
+    num_valid_values += static_cast<int32_t>(run.length);
+  }
+
+  return num_valid_values;
+}
+
+/// \brief Relocate values in buffer into positions of non-null values as indicated by
+/// a validity bitmap.
+///
+/// \param[in, out] buffer the in-place buffer
+/// \param[in] num_values total size of buffer including null slots
+/// \param[in] null_count number of null slots
+/// \param[in] valid_bits bitmap data indicating position of valid slots
+/// \param[in] valid_bits_offset offset into valid_bits
+/// \return The number of values expanded, including nulls.
+template <typename T>
+inline int SpacedExpand(T* buffer, int num_values, int null_count,
+                        const uint8_t* valid_bits, int64_t valid_bits_offset) {
+  // Point to end as we add the spacing from the back.
+  int idx_decode = num_values - null_count;
+
+  // Depending on the number of nulls, some of the value slots in buffer may
+  // be uninitialized, and this will cause valgrind warnings / potentially UB
+  std::memset(static_cast<void*>(buffer + idx_decode), 0, null_count * sizeof(T));
+  if (idx_decode == 0) {
+    // All nulls, nothing more to do
+    return num_values;
+  }
+
+  arrow::internal::ReverseSetBitRunReader reader(valid_bits, valid_bits_offset,
+                                                 num_values);
+  while (true) {
+    const auto run = reader.NextRun();
+    if (run.length == 0) {
+      break;
+    }
+    idx_decode -= static_cast<int32_t>(run.length);
+    assert(idx_decode >= 0);
+    std::memmove(buffer + run.position, buffer + idx_decode, run.length * sizeof(T));
+  }
+
+  // Otherwise caller gave an incorrect null_count
+  assert(idx_decode == 0);
+  return num_values;
+}
+
+}  // namespace internal
+}  // namespace util
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/string.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/string.cc
index 5abb2feb446..d922311df1c 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/string.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/string.cc
@@ -92,23 +92,23 @@ Status ParseHexValue(const char* data, uint8_t* out) {
 
 namespace internal {
 
-std::vector<util::string_view> SplitString(util::string_view v, char delimiter) { 
-  std::vector<util::string_view> parts; 
-  size_t start = 0, end; 
-  while (true) { 
-    end = v.find(delimiter, start); 
-    parts.push_back(v.substr(start, end - start)); 
-    if (end == std::string::npos) { 
-      break; 
-    } 
-    start = end + 1; 
-  } 
-  return parts; 
-} 
- 
-template <typename StringLike> 
-static std::string JoinStringLikes(const std::vector<StringLike>& strings, 
-                                   util::string_view delimiter) { 
+std::vector<util::string_view> SplitString(util::string_view v, char delimiter) {
+  std::vector<util::string_view> parts;
+  size_t start = 0, end;
+  while (true) {
+    end = v.find(delimiter, start);
+    parts.push_back(v.substr(start, end - start));
+    if (end == std::string::npos) {
+      break;
+    }
+    start = end + 1;
+  }
+  return parts;
+}
+
+template <typename StringLike>
+static std::string JoinStringLikes(const std::vector<StringLike>& strings,
+                                   util::string_view delimiter) {
   if (strings.size() == 0) {
     return "";
   }
@@ -120,18 +120,18 @@ static std::string JoinStringLikes(const std::vector<StringLike>& strings,
   return out;
 }
 
-std::string JoinStrings(const std::vector<util::string_view>& strings, 
-                        util::string_view delimiter) { 
-  return JoinStringLikes(strings, delimiter); 
-} 
-
-std::string JoinStrings(const std::vector<std::string>& strings, 
-                        util::string_view delimiter) { 
-  return JoinStringLikes(strings, delimiter); 
-} 
- 
-static constexpr bool IsWhitespace(char c) { return c == ' ' || c == '\t'; } 
- 
+std::string JoinStrings(const std::vector<util::string_view>& strings,
+                        util::string_view delimiter) {
+  return JoinStringLikes(strings, delimiter);
+}
+
+std::string JoinStrings(const std::vector<std::string>& strings,
+                        util::string_view delimiter) {
+  return JoinStringLikes(strings, delimiter);
+}
+
+static constexpr bool IsWhitespace(char c) { return c == ' ' || c == '\t'; }
+
 std::string TrimString(std::string value) {
   size_t ltrim_chars = 0;
   while (ltrim_chars < value.size() && IsWhitespace(value[ltrim_chars])) {
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/string.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/string.h
index 932e599fc21..68b8a54e313 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/string.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/string.h
@@ -42,20 +42,20 @@ ARROW_EXPORT Status ParseHexValue(const char* data, uint8_t* out);
 
 namespace internal {
 
-/// \brief Split a string with a delimiter 
-ARROW_EXPORT 
-std::vector<util::string_view> SplitString(util::string_view v, char delim); 
- 
+/// \brief Split a string with a delimiter
+ARROW_EXPORT
+std::vector<util::string_view> SplitString(util::string_view v, char delim);
+
 /// \brief Join strings with a delimiter
 ARROW_EXPORT
 std::string JoinStrings(const std::vector<util::string_view>& strings,
                         util::string_view delimiter);
 
-/// \brief Join strings with a delimiter 
-ARROW_EXPORT 
-std::string JoinStrings(const std::vector<std::string>& strings, 
-                        util::string_view delimiter); 
- 
+/// \brief Join strings with a delimiter
+ARROW_EXPORT
+std::string JoinStrings(const std::vector<std::string>& strings,
+                        util::string_view delimiter);
+
 /// \brief Trim whitespace from left and right sides of string
 ARROW_EXPORT
 std::string TrimString(std::string value);
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/task_group.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/task_group.cc
index 04a6d95cacb..7e8ab64b703 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/task_group.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/task_group.cc
@@ -30,29 +30,29 @@
 namespace arrow {
 namespace internal {
 
-namespace { 
- 
+namespace {
+
 ////////////////////////////////////////////////////////////////////////
 // Serial TaskGroup implementation
 
 class SerialTaskGroup : public TaskGroup {
  public:
-  explicit SerialTaskGroup(StopToken stop_token) : stop_token_(std::move(stop_token)) {} 
- 
-  void AppendReal(FnOnce<Status()> task) override { 
+  explicit SerialTaskGroup(StopToken stop_token) : stop_token_(std::move(stop_token)) {}
+
+  void AppendReal(FnOnce<Status()> task) override {
     DCHECK(!finished_);
-    if (stop_token_.IsStopRequested()) { 
-      status_ &= stop_token_.Poll(); 
-      return; 
-    } 
+    if (stop_token_.IsStopRequested()) {
+      status_ &= stop_token_.Poll();
+      return;
+    }
     if (status_.ok()) {
-      status_ &= std::move(task)(); 
+      status_ &= std::move(task)();
     }
   }
 
   Status current_status() override { return status_; }
 
-  bool ok() const override { return status_.ok(); } 
+  bool ok() const override { return status_.ok(); }
 
   Status Finish() override {
     if (!finished_) {
@@ -61,11 +61,11 @@ class SerialTaskGroup : public TaskGroup {
     return status_;
   }
 
-  Future<> FinishAsync() override { return Future<>::MakeFinished(Finish()); } 
- 
+  Future<> FinishAsync() override { return Future<>::MakeFinished(Finish()); }
+
   int parallelism() override { return 1; }
 
-  StopToken stop_token_; 
+  StopToken stop_token_;
   Status status_;
   bool finished_ = false;
 };
@@ -75,11 +75,11 @@ class SerialTaskGroup : public TaskGroup {
 
 class ThreadedTaskGroup : public TaskGroup {
  public:
-  ThreadedTaskGroup(Executor* executor, StopToken stop_token) 
-      : executor_(executor), 
-        stop_token_(std::move(stop_token)), 
-        nremaining_(0), 
-        ok_(true) {} 
+  ThreadedTaskGroup(Executor* executor, StopToken stop_token)
+      : executor_(executor),
+        stop_token_(std::move(stop_token)),
+        nremaining_(0),
+        ok_(true) {}
 
   ~ThreadedTaskGroup() override {
     // Make sure all pending tasks are finished, so that dangling references
@@ -87,42 +87,42 @@ class ThreadedTaskGroup : public TaskGroup {
     ARROW_UNUSED(Finish());
   }
 
-  void AppendReal(FnOnce<Status()> task) override { 
-    DCHECK(!finished_); 
-    if (stop_token_.IsStopRequested()) { 
-      UpdateStatus(stop_token_.Poll()); 
-      return; 
-    } 
- 
+  void AppendReal(FnOnce<Status()> task) override {
+    DCHECK(!finished_);
+    if (stop_token_.IsStopRequested()) {
+      UpdateStatus(stop_token_.Poll());
+      return;
+    }
+
     // The hot path is unlocked thanks to atomics
     // Only if an error occurs is the lock taken
     if (ok_.load(std::memory_order_acquire)) {
       nremaining_.fetch_add(1, std::memory_order_acquire);
 
       auto self = checked_pointer_cast<ThreadedTaskGroup>(shared_from_this());
- 
-      struct Callable { 
-        void operator()() { 
-          if (self_->ok_.load(std::memory_order_acquire)) { 
-            Status st; 
-            if (stop_token_.IsStopRequested()) { 
-              st = stop_token_.Poll(); 
-            } else { 
-              // XXX what about exceptions? 
-              st = std::move(task_)(); 
-            } 
-            self_->UpdateStatus(std::move(st)); 
-          } 
-          self_->OneTaskDone(); 
+
+      struct Callable {
+        void operator()() {
+          if (self_->ok_.load(std::memory_order_acquire)) {
+            Status st;
+            if (stop_token_.IsStopRequested()) {
+              st = stop_token_.Poll();
+            } else {
+              // XXX what about exceptions?
+              st = std::move(task_)();
+            }
+            self_->UpdateStatus(std::move(st));
+          }
+          self_->OneTaskDone();
         }
- 
-        std::shared_ptr<ThreadedTaskGroup> self_; 
-        FnOnce<Status()> task_; 
-        StopToken stop_token_; 
-      }; 
- 
-      Status st = 
-          executor_->Spawn(Callable{std::move(self), std::move(task), stop_token_}); 
+
+        std::shared_ptr<ThreadedTaskGroup> self_;
+        FnOnce<Status()> task_;
+        StopToken stop_token_;
+      };
+
+      Status st =
+          executor_->Spawn(Callable{std::move(self), std::move(task), stop_token_});
       UpdateStatus(std::move(st));
     }
   }
@@ -132,7 +132,7 @@ class ThreadedTaskGroup : public TaskGroup {
     return status_;
   }
 
-  bool ok() const override { return ok_.load(); } 
+  bool ok() const override { return ok_.load(); }
 
   Status Finish() override {
     std::unique_lock<std::mutex> lock(mutex_);
@@ -144,20 +144,20 @@ class ThreadedTaskGroup : public TaskGroup {
     return status_;
   }
 
-  Future<> FinishAsync() override { 
+  Future<> FinishAsync() override {
     std::lock_guard<std::mutex> lock(mutex_);
-    if (!completion_future_.has_value()) { 
-      if (nremaining_.load() == 0) { 
-        completion_future_ = Future<>::MakeFinished(status_); 
-      } else { 
-        completion_future_ = Future<>::Make(); 
-      } 
-    } 
-    return *completion_future_; 
+    if (!completion_future_.has_value()) {
+      if (nremaining_.load() == 0) {
+        completion_future_ = Future<>::MakeFinished(status_);
+      } else {
+        completion_future_ = Future<>::Make();
+      }
+    }
+    return *completion_future_;
   }
 
-  int parallelism() override { return executor_->GetCapacity(); } 
- 
+  int parallelism() override { return executor_->GetCapacity(); }
+
  protected:
   void UpdateStatus(Status&& st) {
     // Must be called unlocked, only locks on error
@@ -177,27 +177,27 @@ class ThreadedTaskGroup : public TaskGroup {
       // before cv.notify_one() has returned
       std::unique_lock<std::mutex> lock(mutex_);
       cv_.notify_one();
-      if (completion_future_.has_value()) { 
-        // MarkFinished could be slow.  We don't want to call it while we are holding 
-        // the lock. 
-        auto& future = *completion_future_; 
-        const auto finished = completion_future_->is_finished(); 
-        const auto& status = status_; 
-        // This will be redundant if the user calls Finish and not FinishAsync 
-        if (!finished && !finished_) { 
-          finished_ = true; 
-          lock.unlock(); 
-          future.MarkFinished(status); 
-        } else { 
-          lock.unlock(); 
-        } 
-      } 
+      if (completion_future_.has_value()) {
+        // MarkFinished could be slow.  We don't want to call it while we are holding
+        // the lock.
+        auto& future = *completion_future_;
+        const auto finished = completion_future_->is_finished();
+        const auto& status = status_;
+        // This will be redundant if the user calls Finish and not FinishAsync
+        if (!finished && !finished_) {
+          finished_ = true;
+          lock.unlock();
+          future.MarkFinished(status);
+        } else {
+          lock.unlock();
+        }
+      }
     }
   }
 
   // These members are usable unlocked
   Executor* executor_;
-  StopToken stop_token_; 
+  StopToken stop_token_;
   std::atomic<int32_t> nremaining_;
   std::atomic<bool> ok_;
 
@@ -206,18 +206,18 @@ class ThreadedTaskGroup : public TaskGroup {
   std::condition_variable cv_;
   Status status_;
   bool finished_ = false;
-  util::optional<Future<>> completion_future_; 
+  util::optional<Future<>> completion_future_;
 };
 
-}  // namespace 
- 
-std::shared_ptr<TaskGroup> TaskGroup::MakeSerial(StopToken stop_token) { 
-  return std::shared_ptr<TaskGroup>(new SerialTaskGroup{stop_token}); 
+}  // namespace
+
+std::shared_ptr<TaskGroup> TaskGroup::MakeSerial(StopToken stop_token) {
+  return std::shared_ptr<TaskGroup>(new SerialTaskGroup{stop_token});
 }
 
-std::shared_ptr<TaskGroup> TaskGroup::MakeThreaded(Executor* thread_pool, 
-                                                   StopToken stop_token) { 
-  return std::shared_ptr<TaskGroup>(new ThreadedTaskGroup{thread_pool, stop_token}); 
+std::shared_ptr<TaskGroup> TaskGroup::MakeThreaded(Executor* thread_pool,
+                                                   StopToken stop_token) {
+  return std::shared_ptr<TaskGroup>(new ThreadedTaskGroup{thread_pool, stop_token});
 }
 
 }  // namespace internal
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/task_group.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/task_group.h
index b3692cbcfeb..3bb72f0d9cb 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/task_group.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/task_group.h
@@ -21,9 +21,9 @@
 #include <utility>
 
 #include "arrow/status.h"
-#include "arrow/type_fwd.h" 
-#include "arrow/util/cancel.h" 
-#include "arrow/util/functional.h" 
+#include "arrow/type_fwd.h"
+#include "arrow/util/cancel.h"
+#include "arrow/util/functional.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/type_fwd.h"
 #include "arrow/util/visibility.h"
@@ -38,18 +38,18 @@ namespace internal {
 /// implementation.  When Finish() returns, it is guaranteed that all
 /// tasks have finished, or at least one has errored.
 ///
-/// Once an error has occurred any tasks that are submitted to the task group 
-/// will not run.  The call to Append will simply return without scheduling the 
-/// task. 
-/// 
-/// If the task group is parallel it is possible that multiple tasks could be 
-/// running at the same time and one of those tasks fails.  This will put the 
-/// task group in a failure state (so additional tasks cannot be run) however 
-/// it will not interrupt running tasks.  Finish will not complete 
-/// until all running tasks have finished, even if one task fails. 
-/// 
-/// Once a task group has finished new tasks may not be added to it.  If you need to start 
-/// a new batch of work then you should create a new task group. 
+/// Once an error has occurred any tasks that are submitted to the task group
+/// will not run.  The call to Append will simply return without scheduling the
+/// task.
+///
+/// If the task group is parallel it is possible that multiple tasks could be
+/// running at the same time and one of those tasks fails.  This will put the
+/// task group in a failure state (so additional tasks cannot be run) however
+/// it will not interrupt running tasks.  Finish will not complete
+/// until all running tasks have finished, even if one task fails.
+///
+/// Once a task group has finished new tasks may not be added to it.  If you need to start
+/// a new batch of work then you should create a new task group.
 class ARROW_EXPORT TaskGroup : public std::enable_shared_from_this<TaskGroup> {
  public:
   /// Add a Status-returning function to execute.  Execution order is
@@ -65,33 +65,33 @@ class ARROW_EXPORT TaskGroup : public std::enable_shared_from_this<TaskGroup> {
   /// task (or subgroup).
   virtual Status Finish() = 0;
 
-  /// Returns a future that will complete the first time all tasks are finished. 
-  /// This should be called only after all top level tasks 
-  /// have been added to the task group. 
-  /// 
-  /// If you are using a TaskGroup asynchronously there are a few considerations to keep 
-  /// in mind.  The tasks should not block on I/O, etc (defeats the purpose of using 
-  /// futures) and should not be doing any nested locking or you run the risk of the tasks 
-  /// getting stuck in the thread pool waiting for tasks which cannot get scheduled. 
-  /// 
-  /// Primarily this call is intended to help migrate existing work written with TaskGroup 
-  /// in mind to using futures without having to do a complete conversion on the first 
-  /// pass. 
-  virtual Future<> FinishAsync() = 0; 
- 
+  /// Returns a future that will complete the first time all tasks are finished.
+  /// This should be called only after all top level tasks
+  /// have been added to the task group.
+  ///
+  /// If you are using a TaskGroup asynchronously there are a few considerations to keep
+  /// in mind.  The tasks should not block on I/O, etc (defeats the purpose of using
+  /// futures) and should not be doing any nested locking or you run the risk of the tasks
+  /// getting stuck in the thread pool waiting for tasks which cannot get scheduled.
+  ///
+  /// Primarily this call is intended to help migrate existing work written with TaskGroup
+  /// in mind to using futures without having to do a complete conversion on the first
+  /// pass.
+  virtual Future<> FinishAsync() = 0;
+
   /// The current aggregate error Status.  Non-blocking, useful for stopping early.
   virtual Status current_status() = 0;
 
-  /// Whether some tasks have already failed.  Non-blocking, useful for stopping early. 
-  virtual bool ok() const = 0; 
+  /// Whether some tasks have already failed.  Non-blocking, useful for stopping early.
+  virtual bool ok() const = 0;
 
   /// How many tasks can typically be executed in parallel.
   /// This is only a hint, useful for testing or debugging.
   virtual int parallelism() = 0;
 
-  static std::shared_ptr<TaskGroup> MakeSerial(StopToken = StopToken::Unstoppable()); 
-  static std::shared_ptr<TaskGroup> MakeThreaded(internal::Executor*, 
-                                                 StopToken = StopToken::Unstoppable()); 
+  static std::shared_ptr<TaskGroup> MakeSerial(StopToken = StopToken::Unstoppable());
+  static std::shared_ptr<TaskGroup> MakeThreaded(internal::Executor*,
+                                                 StopToken = StopToken::Unstoppable());
 
   virtual ~TaskGroup() = default;
 
@@ -99,7 +99,7 @@ class ARROW_EXPORT TaskGroup : public std::enable_shared_from_this<TaskGroup> {
   TaskGroup() = default;
   ARROW_DISALLOW_COPY_AND_ASSIGN(TaskGroup);
 
-  virtual void AppendReal(FnOnce<Status()> task) = 0; 
+  virtual void AppendReal(FnOnce<Status()> task) = 0;
 };
 
 }  // namespace internal
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/tdigest.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/tdigest.cc
index 93527f0c1f7..99b771ca0f2 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/tdigest.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/tdigest.cc
@@ -1,417 +1,417 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "arrow/util/tdigest.h" 
- 
-#include <algorithm> 
-#include <cmath> 
-#include <iostream> 
-#include <limits> 
-#include <queue> 
-#include <tuple> 
-#include <vector> 
- 
-#include "arrow/status.h" 
- 
-#ifndef M_PI 
-#define M_PI 3.14159265358979323846 
-#endif 
- 
-namespace arrow { 
-namespace internal { 
- 
-namespace { 
- 
-// a numerically stable lerp is unbelievably complex 
-// but we are *approximating* the quantile, so let's keep it simple 
-double Lerp(double a, double b, double t) { return a + t * (b - a); } 
- 
-// histogram bin 
-struct Centroid { 
-  double mean; 
-  double weight;  // # data points in this bin 
- 
-  // merge with another centroid 
-  void Merge(const Centroid& centroid) { 
-    weight += centroid.weight; 
-    mean += (centroid.mean - mean) * centroid.weight / weight; 
-  } 
-}; 
- 
-// scale function K0: linear function, as baseline 
-struct ScalerK0 { 
-  explicit ScalerK0(uint32_t delta) : delta_norm(delta / 2.0) {} 
- 
-  double K(double q) const { return delta_norm * q; } 
-  double Q(double k) const { return k / delta_norm; } 
- 
-  const double delta_norm; 
-}; 
- 
-// scale function K1 
-struct ScalerK1 { 
-  explicit ScalerK1(uint32_t delta) : delta_norm(delta / (2.0 * M_PI)) {} 
- 
-  double K(double q) const { return delta_norm * std::asin(2 * q - 1); } 
-  double Q(double k) const { return (std::sin(k / delta_norm) + 1) / 2; } 
- 
-  const double delta_norm; 
-}; 
- 
-// implements t-digest merging algorithm 
-template <class T = ScalerK1> 
-class TDigestMerger : private T { 
- public: 
-  explicit TDigestMerger(uint32_t delta) : T(delta) { Reset(0, nullptr); } 
- 
-  void Reset(double total_weight, std::vector<Centroid>* tdigest) { 
-    total_weight_ = total_weight; 
-    tdigest_ = tdigest; 
-    if (tdigest_) { 
-      tdigest_->resize(0); 
-    } 
-    weight_so_far_ = 0; 
-    weight_limit_ = -1;  // trigger first centroid merge 
-  } 
- 
-  // merge one centroid from a sorted centroid stream 
-  void Add(const Centroid& centroid) { 
-    auto& td = *tdigest_; 
-    const double weight = weight_so_far_ + centroid.weight; 
-    if (weight <= weight_limit_) { 
-      td.back().Merge(centroid); 
-    } else { 
-      const double quantile = weight_so_far_ / total_weight_; 
-      const double next_weight_limit = total_weight_ * this->Q(this->K(quantile) + 1); 
-      // weight limit should be strictly increasing, until the last centroid 
-      if (next_weight_limit <= weight_limit_) { 
-        weight_limit_ = total_weight_; 
-      } else { 
-        weight_limit_ = next_weight_limit; 
-      } 
-      td.push_back(centroid);  // should never exceed capacity and trigger reallocation 
-    } 
-    weight_so_far_ = weight; 
-  } 
- 
-  // validate k-size of a tdigest 
-  Status Validate(const std::vector<Centroid>& tdigest, double total_weight) const { 
-    double q_prev = 0, k_prev = this->K(0); 
-    for (size_t i = 0; i < tdigest.size(); ++i) { 
-      const double q = q_prev + tdigest[i].weight / total_weight; 
-      const double k = this->K(q); 
-      if (tdigest[i].weight != 1 && (k - k_prev) > 1.001) { 
-        return Status::Invalid("oversized centroid: ", k - k_prev); 
-      } 
-      k_prev = k; 
-      q_prev = q; 
-    } 
-    return Status::OK(); 
-  } 
- 
- private: 
-  double total_weight_;   // total weight of this tdigest 
-  double weight_so_far_;  // accumulated weight till current bin 
-  double weight_limit_;   // max accumulated weight to move to next bin 
-  std::vector<Centroid>* tdigest_; 
-}; 
- 
-}  // namespace 
- 
-class TDigest::TDigestImpl { 
- public: 
-  explicit TDigestImpl(uint32_t delta) 
-      : delta_(delta > 10 ? delta : 10), merger_(delta_) { 
-    tdigests_[0].reserve(delta_); 
-    tdigests_[1].reserve(delta_); 
-    Reset(); 
-  } 
- 
-  void Reset() { 
-    tdigests_[0].resize(0); 
-    tdigests_[1].resize(0); 
-    current_ = 0; 
-    total_weight_ = 0; 
-    min_ = std::numeric_limits<double>::max(); 
-    max_ = std::numeric_limits<double>::lowest(); 
-    merger_.Reset(0, nullptr); 
-  } 
- 
-  Status Validate() const { 
-    // check weight, centroid order 
-    double total_weight = 0, prev_mean = std::numeric_limits<double>::lowest(); 
-    for (const auto& centroid : tdigests_[current_]) { 
-      if (std::isnan(centroid.mean) || std::isnan(centroid.weight)) { 
-        return Status::Invalid("NAN found in tdigest"); 
-      } 
-      if (centroid.mean < prev_mean) { 
-        return Status::Invalid("centroid mean decreases"); 
-      } 
-      if (centroid.weight < 1) { 
-        return Status::Invalid("invalid centroid weight"); 
-      } 
-      prev_mean = centroid.mean; 
-      total_weight += centroid.weight; 
-    } 
-    if (total_weight != total_weight_) { 
-      return Status::Invalid("tdigest total weight mismatch"); 
-    } 
-    // check if buffer expanded 
-    if (tdigests_[0].capacity() > delta_ || tdigests_[1].capacity() > delta_) { 
-      return Status::Invalid("oversized tdigest buffer"); 
-    } 
-    // check k-size 
-    return merger_.Validate(tdigests_[current_], total_weight_); 
-  } 
- 
-  void Dump() const { 
-    const auto& td = tdigests_[current_]; 
-    for (size_t i = 0; i < td.size(); ++i) { 
-      std::cerr << i << ": mean = " << td[i].mean << ", weight = " << td[i].weight 
-                << std::endl; 
-    } 
-    std::cerr << "min = " << min_ << ", max = " << max_ << std::endl; 
-  } 
- 
-  // merge with other tdigests 
-  void Merge(const std::vector<const TDigestImpl*>& tdigest_impls) { 
-    // current and end iterator 
-    using CentroidIter = std::vector<Centroid>::const_iterator; 
-    using CentroidIterPair = std::pair<CentroidIter, CentroidIter>; 
-    // use a min-heap to find next minimal centroid from all tdigests 
-    auto centroid_gt = [](const CentroidIterPair& lhs, const CentroidIterPair& rhs) { 
-      return lhs.first->mean > rhs.first->mean; 
-    }; 
-    using CentroidQueue = 
-        std::priority_queue<CentroidIterPair, std::vector<CentroidIterPair>, 
-                            decltype(centroid_gt)>; 
- 
-    // trivial dynamic memory allocated at runtime 
-    std::vector<CentroidIterPair> queue_buffer; 
-    queue_buffer.reserve(tdigest_impls.size() + 1); 
-    CentroidQueue queue(std::move(centroid_gt), std::move(queue_buffer)); 
- 
-    const auto& this_tdigest = tdigests_[current_]; 
-    if (this_tdigest.size() > 0) { 
-      queue.emplace(this_tdigest.cbegin(), this_tdigest.cend()); 
-    } 
-    for (const TDigestImpl* td : tdigest_impls) { 
-      const auto& other_tdigest = td->tdigests_[td->current_]; 
-      if (other_tdigest.size() > 0) { 
-        queue.emplace(other_tdigest.cbegin(), other_tdigest.cend()); 
-        total_weight_ += td->total_weight_; 
-        min_ = std::min(min_, td->min_); 
-        max_ = std::max(max_, td->max_); 
-      } 
-    } 
- 
-    merger_.Reset(total_weight_, &tdigests_[1 - current_]); 
-    CentroidIter current_iter, end_iter; 
-    // do k-way merge till one buffer left 
-    while (queue.size() > 1) { 
-      std::tie(current_iter, end_iter) = queue.top(); 
-      merger_.Add(*current_iter); 
-      queue.pop(); 
-      if (++current_iter != end_iter) { 
-        queue.emplace(current_iter, end_iter); 
-      } 
-    } 
-    // merge last buffer 
-    if (!queue.empty()) { 
-      std::tie(current_iter, end_iter) = queue.top(); 
-      while (current_iter != end_iter) { 
-        merger_.Add(*current_iter++); 
-      } 
-    } 
-    merger_.Reset(0, nullptr); 
- 
-    current_ = 1 - current_; 
-  } 
- 
-  // merge input data with current tdigest 
-  void MergeInput(std::vector<double>& input) { 
-    total_weight_ += input.size(); 
- 
-    std::sort(input.begin(), input.end()); 
-    min_ = std::min(min_, input.front()); 
-    max_ = std::max(max_, input.back()); 
- 
-    // pick next minimal centroid from input and tdigest, feed to merger 
-    merger_.Reset(total_weight_, &tdigests_[1 - current_]); 
-    const auto& td = tdigests_[current_]; 
-    uint32_t tdigest_index = 0, input_index = 0; 
-    while (tdigest_index < td.size() && input_index < input.size()) { 
-      if (td[tdigest_index].mean < input[input_index]) { 
-        merger_.Add(td[tdigest_index++]); 
-      } else { 
-        merger_.Add(Centroid{input[input_index++], 1}); 
-      } 
-    } 
-    while (tdigest_index < td.size()) { 
-      merger_.Add(td[tdigest_index++]); 
-    } 
-    while (input_index < input.size()) { 
-      merger_.Add(Centroid{input[input_index++], 1}); 
-    } 
-    merger_.Reset(0, nullptr); 
- 
-    input.resize(0); 
-    current_ = 1 - current_; 
-  } 
- 
-  double Quantile(double q) const { 
-    const auto& td = tdigests_[current_]; 
- 
-    if (q < 0 || q > 1 || td.size() == 0) { 
-      return NAN; 
-    } 
- 
-    const double index = q * total_weight_; 
-    if (index <= 1) { 
-      return min_; 
-    } else if (index >= total_weight_ - 1) { 
-      return max_; 
-    } 
- 
-    // find centroid contains the index 
-    uint32_t ci = 0; 
-    double weight_sum = 0; 
-    for (; ci < td.size(); ++ci) { 
-      weight_sum += td[ci].weight; 
-      if (index <= weight_sum) { 
-        break; 
-      } 
-    } 
-    DCHECK_LT(ci, td.size()); 
- 
-    // deviation of index from the centroid center 
-    double diff = index + td[ci].weight / 2 - weight_sum; 
- 
-    // index happen to be in a unit weight centroid 
-    if (td[ci].weight == 1 && std::abs(diff) < 0.5) { 
-      return td[ci].mean; 
-    } 
- 
-    // find adjacent centroids for interpolation 
-    uint32_t ci_left = ci, ci_right = ci; 
-    if (diff > 0) { 
-      if (ci_right == td.size() - 1) { 
-        // index larger than center of last bin 
-        DCHECK_EQ(weight_sum, total_weight_); 
-        const Centroid* c = &td[ci_right]; 
-        DCHECK_GE(c->weight, 2); 
-        return Lerp(c->mean, max_, diff / (c->weight / 2)); 
-      } 
-      ++ci_right; 
-    } else { 
-      if (ci_left == 0) { 
-        // index smaller than center of first bin 
-        const Centroid* c = &td[0]; 
-        DCHECK_GE(c->weight, 2); 
-        return Lerp(min_, c->mean, index / (c->weight / 2)); 
-      } 
-      --ci_left; 
-      diff += td[ci_left].weight / 2 + td[ci_right].weight / 2; 
-    } 
- 
-    // interpolate from adjacent centroids 
-    diff /= (td[ci_left].weight / 2 + td[ci_right].weight / 2); 
-    return Lerp(td[ci_left].mean, td[ci_right].mean, diff); 
-  } 
- 
-  double Mean() const { 
-    double sum = 0; 
-    for (const auto& centroid : tdigests_[current_]) { 
-      sum += centroid.mean * centroid.weight; 
-    } 
-    return total_weight_ == 0 ? NAN : sum / total_weight_; 
-  } 
- 
-  double total_weight() const { return total_weight_; } 
- 
- private: 
-  // must be delcared before merger_, see constructor initialization list 
-  const uint32_t delta_; 
- 
-  TDigestMerger<> merger_; 
-  double total_weight_; 
-  double min_, max_; 
- 
-  // ping-pong buffer holds two tdigests, size = 2 * delta * sizeof(Centroid) 
-  std::vector<Centroid> tdigests_[2]; 
-  // index of active tdigest buffer, 0 or 1 
-  int current_; 
-}; 
- 
-TDigest::TDigest(uint32_t delta, uint32_t buffer_size) : impl_(new TDigestImpl(delta)) { 
-  input_.reserve(buffer_size); 
-  Reset(); 
-} 
- 
-TDigest::~TDigest() = default; 
-TDigest::TDigest(TDigest&&) = default; 
-TDigest& TDigest::operator=(TDigest&&) = default; 
- 
-void TDigest::Reset() { 
-  input_.resize(0); 
-  impl_->Reset(); 
-} 
- 
-Status TDigest::Validate() { 
-  MergeInput(); 
-  return impl_->Validate(); 
-} 
- 
-void TDigest::Dump() { 
-  MergeInput(); 
-  impl_->Dump(); 
-} 
- 
-void TDigest::Merge(std::vector<TDigest>* tdigests) { 
-  MergeInput(); 
- 
-  std::vector<const TDigestImpl*> tdigest_impls; 
-  tdigest_impls.reserve(tdigests->size()); 
-  for (auto& td : *tdigests) { 
-    td.MergeInput(); 
-    tdigest_impls.push_back(td.impl_.get()); 
-  } 
-  impl_->Merge(tdigest_impls); 
-} 
- 
-double TDigest::Quantile(double q) { 
-  MergeInput(); 
-  return impl_->Quantile(q); 
-} 
- 
-double TDigest::Mean() { 
-  MergeInput(); 
-  return impl_->Mean(); 
-} 
- 
-bool TDigest::is_empty() const { 
-  return input_.size() == 0 && impl_->total_weight() == 0; 
-} 
- 
-void TDigest::MergeInput() { 
-  if (input_.size() > 0) { 
-    impl_->MergeInput(input_);  // will mutate input_ 
-  } 
-} 
- 
-}  // namespace internal 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/tdigest.h"
+
+#include <algorithm>
+#include <cmath>
+#include <iostream>
+#include <limits>
+#include <queue>
+#include <tuple>
+#include <vector>
+
+#include "arrow/status.h"
+
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+
+namespace arrow {
+namespace internal {
+
+namespace {
+
+// a numerically stable lerp is unbelievably complex
+// but we are *approximating* the quantile, so let's keep it simple
+double Lerp(double a, double b, double t) { return a + t * (b - a); }
+
+// histogram bin
+struct Centroid {
+  double mean;
+  double weight;  // # data points in this bin
+
+  // merge with another centroid
+  void Merge(const Centroid& centroid) {
+    weight += centroid.weight;
+    mean += (centroid.mean - mean) * centroid.weight / weight;
+  }
+};
+
+// scale function K0: linear function, as baseline
+struct ScalerK0 {
+  explicit ScalerK0(uint32_t delta) : delta_norm(delta / 2.0) {}
+
+  double K(double q) const { return delta_norm * q; }
+  double Q(double k) const { return k / delta_norm; }
+
+  const double delta_norm;
+};
+
+// scale function K1
+struct ScalerK1 {
+  explicit ScalerK1(uint32_t delta) : delta_norm(delta / (2.0 * M_PI)) {}
+
+  double K(double q) const { return delta_norm * std::asin(2 * q - 1); }
+  double Q(double k) const { return (std::sin(k / delta_norm) + 1) / 2; }
+
+  const double delta_norm;
+};
+
+// implements t-digest merging algorithm
+template <class T = ScalerK1>
+class TDigestMerger : private T {
+ public:
+  explicit TDigestMerger(uint32_t delta) : T(delta) { Reset(0, nullptr); }
+
+  void Reset(double total_weight, std::vector<Centroid>* tdigest) {
+    total_weight_ = total_weight;
+    tdigest_ = tdigest;
+    if (tdigest_) {
+      tdigest_->resize(0);
+    }
+    weight_so_far_ = 0;
+    weight_limit_ = -1;  // trigger first centroid merge
+  }
+
+  // merge one centroid from a sorted centroid stream
+  void Add(const Centroid& centroid) {
+    auto& td = *tdigest_;
+    const double weight = weight_so_far_ + centroid.weight;
+    if (weight <= weight_limit_) {
+      td.back().Merge(centroid);
+    } else {
+      const double quantile = weight_so_far_ / total_weight_;
+      const double next_weight_limit = total_weight_ * this->Q(this->K(quantile) + 1);
+      // weight limit should be strictly increasing, until the last centroid
+      if (next_weight_limit <= weight_limit_) {
+        weight_limit_ = total_weight_;
+      } else {
+        weight_limit_ = next_weight_limit;
+      }
+      td.push_back(centroid);  // should never exceed capacity and trigger reallocation
+    }
+    weight_so_far_ = weight;
+  }
+
+  // validate k-size of a tdigest
+  Status Validate(const std::vector<Centroid>& tdigest, double total_weight) const {
+    double q_prev = 0, k_prev = this->K(0);
+    for (size_t i = 0; i < tdigest.size(); ++i) {
+      const double q = q_prev + tdigest[i].weight / total_weight;
+      const double k = this->K(q);
+      if (tdigest[i].weight != 1 && (k - k_prev) > 1.001) {
+        return Status::Invalid("oversized centroid: ", k - k_prev);
+      }
+      k_prev = k;
+      q_prev = q;
+    }
+    return Status::OK();
+  }
+
+ private:
+  double total_weight_;   // total weight of this tdigest
+  double weight_so_far_;  // accumulated weight till current bin
+  double weight_limit_;   // max accumulated weight to move to next bin
+  std::vector<Centroid>* tdigest_;
+};
+
+}  // namespace
+
+class TDigest::TDigestImpl {
+ public:
+  explicit TDigestImpl(uint32_t delta)
+      : delta_(delta > 10 ? delta : 10), merger_(delta_) {
+    tdigests_[0].reserve(delta_);
+    tdigests_[1].reserve(delta_);
+    Reset();
+  }
+
+  void Reset() {
+    tdigests_[0].resize(0);
+    tdigests_[1].resize(0);
+    current_ = 0;
+    total_weight_ = 0;
+    min_ = std::numeric_limits<double>::max();
+    max_ = std::numeric_limits<double>::lowest();
+    merger_.Reset(0, nullptr);
+  }
+
+  Status Validate() const {
+    // check weight, centroid order
+    double total_weight = 0, prev_mean = std::numeric_limits<double>::lowest();
+    for (const auto& centroid : tdigests_[current_]) {
+      if (std::isnan(centroid.mean) || std::isnan(centroid.weight)) {
+        return Status::Invalid("NAN found in tdigest");
+      }
+      if (centroid.mean < prev_mean) {
+        return Status::Invalid("centroid mean decreases");
+      }
+      if (centroid.weight < 1) {
+        return Status::Invalid("invalid centroid weight");
+      }
+      prev_mean = centroid.mean;
+      total_weight += centroid.weight;
+    }
+    if (total_weight != total_weight_) {
+      return Status::Invalid("tdigest total weight mismatch");
+    }
+    // check if buffer expanded
+    if (tdigests_[0].capacity() > delta_ || tdigests_[1].capacity() > delta_) {
+      return Status::Invalid("oversized tdigest buffer");
+    }
+    // check k-size
+    return merger_.Validate(tdigests_[current_], total_weight_);
+  }
+
+  void Dump() const {
+    const auto& td = tdigests_[current_];
+    for (size_t i = 0; i < td.size(); ++i) {
+      std::cerr << i << ": mean = " << td[i].mean << ", weight = " << td[i].weight
+                << std::endl;
+    }
+    std::cerr << "min = " << min_ << ", max = " << max_ << std::endl;
+  }
+
+  // merge with other tdigests
+  void Merge(const std::vector<const TDigestImpl*>& tdigest_impls) {
+    // current and end iterator
+    using CentroidIter = std::vector<Centroid>::const_iterator;
+    using CentroidIterPair = std::pair<CentroidIter, CentroidIter>;
+    // use a min-heap to find next minimal centroid from all tdigests
+    auto centroid_gt = [](const CentroidIterPair& lhs, const CentroidIterPair& rhs) {
+      return lhs.first->mean > rhs.first->mean;
+    };
+    using CentroidQueue =
+        std::priority_queue<CentroidIterPair, std::vector<CentroidIterPair>,
+                            decltype(centroid_gt)>;
+
+    // trivial dynamic memory allocated at runtime
+    std::vector<CentroidIterPair> queue_buffer;
+    queue_buffer.reserve(tdigest_impls.size() + 1);
+    CentroidQueue queue(std::move(centroid_gt), std::move(queue_buffer));
+
+    const auto& this_tdigest = tdigests_[current_];
+    if (this_tdigest.size() > 0) {
+      queue.emplace(this_tdigest.cbegin(), this_tdigest.cend());
+    }
+    for (const TDigestImpl* td : tdigest_impls) {
+      const auto& other_tdigest = td->tdigests_[td->current_];
+      if (other_tdigest.size() > 0) {
+        queue.emplace(other_tdigest.cbegin(), other_tdigest.cend());
+        total_weight_ += td->total_weight_;
+        min_ = std::min(min_, td->min_);
+        max_ = std::max(max_, td->max_);
+      }
+    }
+
+    merger_.Reset(total_weight_, &tdigests_[1 - current_]);
+    CentroidIter current_iter, end_iter;
+    // do k-way merge till one buffer left
+    while (queue.size() > 1) {
+      std::tie(current_iter, end_iter) = queue.top();
+      merger_.Add(*current_iter);
+      queue.pop();
+      if (++current_iter != end_iter) {
+        queue.emplace(current_iter, end_iter);
+      }
+    }
+    // merge last buffer
+    if (!queue.empty()) {
+      std::tie(current_iter, end_iter) = queue.top();
+      while (current_iter != end_iter) {
+        merger_.Add(*current_iter++);
+      }
+    }
+    merger_.Reset(0, nullptr);
+
+    current_ = 1 - current_;
+  }
+
+  // merge input data with current tdigest
+  void MergeInput(std::vector<double>& input) {
+    total_weight_ += input.size();
+
+    std::sort(input.begin(), input.end());
+    min_ = std::min(min_, input.front());
+    max_ = std::max(max_, input.back());
+
+    // pick next minimal centroid from input and tdigest, feed to merger
+    merger_.Reset(total_weight_, &tdigests_[1 - current_]);
+    const auto& td = tdigests_[current_];
+    uint32_t tdigest_index = 0, input_index = 0;
+    while (tdigest_index < td.size() && input_index < input.size()) {
+      if (td[tdigest_index].mean < input[input_index]) {
+        merger_.Add(td[tdigest_index++]);
+      } else {
+        merger_.Add(Centroid{input[input_index++], 1});
+      }
+    }
+    while (tdigest_index < td.size()) {
+      merger_.Add(td[tdigest_index++]);
+    }
+    while (input_index < input.size()) {
+      merger_.Add(Centroid{input[input_index++], 1});
+    }
+    merger_.Reset(0, nullptr);
+
+    input.resize(0);
+    current_ = 1 - current_;
+  }
+
+  double Quantile(double q) const {
+    const auto& td = tdigests_[current_];
+
+    if (q < 0 || q > 1 || td.size() == 0) {
+      return NAN;
+    }
+
+    const double index = q * total_weight_;
+    if (index <= 1) {
+      return min_;
+    } else if (index >= total_weight_ - 1) {
+      return max_;
+    }
+
+    // find centroid contains the index
+    uint32_t ci = 0;
+    double weight_sum = 0;
+    for (; ci < td.size(); ++ci) {
+      weight_sum += td[ci].weight;
+      if (index <= weight_sum) {
+        break;
+      }
+    }
+    DCHECK_LT(ci, td.size());
+
+    // deviation of index from the centroid center
+    double diff = index + td[ci].weight / 2 - weight_sum;
+
+    // index happen to be in a unit weight centroid
+    if (td[ci].weight == 1 && std::abs(diff) < 0.5) {
+      return td[ci].mean;
+    }
+
+    // find adjacent centroids for interpolation
+    uint32_t ci_left = ci, ci_right = ci;
+    if (diff > 0) {
+      if (ci_right == td.size() - 1) {
+        // index larger than center of last bin
+        DCHECK_EQ(weight_sum, total_weight_);
+        const Centroid* c = &td[ci_right];
+        DCHECK_GE(c->weight, 2);
+        return Lerp(c->mean, max_, diff / (c->weight / 2));
+      }
+      ++ci_right;
+    } else {
+      if (ci_left == 0) {
+        // index smaller than center of first bin
+        const Centroid* c = &td[0];
+        DCHECK_GE(c->weight, 2);
+        return Lerp(min_, c->mean, index / (c->weight / 2));
+      }
+      --ci_left;
+      diff += td[ci_left].weight / 2 + td[ci_right].weight / 2;
+    }
+
+    // interpolate from adjacent centroids
+    diff /= (td[ci_left].weight / 2 + td[ci_right].weight / 2);
+    return Lerp(td[ci_left].mean, td[ci_right].mean, diff);
+  }
+
+  double Mean() const {
+    double sum = 0;
+    for (const auto& centroid : tdigests_[current_]) {
+      sum += centroid.mean * centroid.weight;
+    }
+    return total_weight_ == 0 ? NAN : sum / total_weight_;
+  }
+
+  double total_weight() const { return total_weight_; }
+
+ private:
+  // must be delcared before merger_, see constructor initialization list
+  const uint32_t delta_;
+
+  TDigestMerger<> merger_;
+  double total_weight_;
+  double min_, max_;
+
+  // ping-pong buffer holds two tdigests, size = 2 * delta * sizeof(Centroid)
+  std::vector<Centroid> tdigests_[2];
+  // index of active tdigest buffer, 0 or 1
+  int current_;
+};
+
+TDigest::TDigest(uint32_t delta, uint32_t buffer_size) : impl_(new TDigestImpl(delta)) {
+  input_.reserve(buffer_size);
+  Reset();
+}
+
+TDigest::~TDigest() = default;
+TDigest::TDigest(TDigest&&) = default;
+TDigest& TDigest::operator=(TDigest&&) = default;
+
+void TDigest::Reset() {
+  input_.resize(0);
+  impl_->Reset();
+}
+
+Status TDigest::Validate() {
+  MergeInput();
+  return impl_->Validate();
+}
+
+void TDigest::Dump() {
+  MergeInput();
+  impl_->Dump();
+}
+
+void TDigest::Merge(std::vector<TDigest>* tdigests) {
+  MergeInput();
+
+  std::vector<const TDigestImpl*> tdigest_impls;
+  tdigest_impls.reserve(tdigests->size());
+  for (auto& td : *tdigests) {
+    td.MergeInput();
+    tdigest_impls.push_back(td.impl_.get());
+  }
+  impl_->Merge(tdigest_impls);
+}
+
+double TDigest::Quantile(double q) {
+  MergeInput();
+  return impl_->Quantile(q);
+}
+
+double TDigest::Mean() {
+  MergeInput();
+  return impl_->Mean();
+}
+
+bool TDigest::is_empty() const {
+  return input_.size() == 0 && impl_->total_weight() == 0;
+}
+
+void TDigest::MergeInput() {
+  if (input_.size() > 0) {
+    impl_->MergeInput(input_);  // will mutate input_
+  }
+}
+
+}  // namespace internal
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/tdigest.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/tdigest.h
index 361d176bff4..ae42ce48e7d 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/tdigest.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/tdigest.h
@@ -1,103 +1,103 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-// approximate quantiles from arbitrary length dataset with O(1) space 
-// based on 'Computing Extremely Accurate Quantiles Using t-Digests' from Dunning & Ertl 
-// - https://arxiv.org/abs/1902.04023 
-// - https://github.com/tdunning/t-digest 
- 
-#pragma once 
- 
-#include <cmath> 
-#include <memory> 
-#include <vector> 
- 
-#include "arrow/util/logging.h" 
-#include "arrow/util/macros.h" 
-#include "arrow/util/visibility.h" 
- 
-namespace arrow { 
- 
-class Status; 
- 
-namespace internal { 
- 
-class ARROW_EXPORT TDigest { 
- public: 
-  explicit TDigest(uint32_t delta = 100, uint32_t buffer_size = 500); 
-  ~TDigest(); 
-  TDigest(TDigest&&); 
-  TDigest& operator=(TDigest&&); 
- 
-  // reset and re-use this tdigest 
-  void Reset(); 
- 
-  // validate data integrity 
-  Status Validate(); 
- 
-  // dump internal data, only for debug 
-  void Dump(); 
- 
-  // buffer a single data point, consume internal buffer if full 
-  // this function is intensively called and performance critical 
-  // call it only if you are sure no NAN exists in input data 
-  void Add(double value) { 
-    DCHECK(!std::isnan(value)) << "cannot add NAN"; 
-    if (ARROW_PREDICT_FALSE(input_.size() == input_.capacity())) { 
-      MergeInput(); 
-    } 
-    input_.push_back(value); 
-  } 
- 
-  // skip NAN on adding 
-  template <typename T> 
-  typename std::enable_if<std::is_floating_point<T>::value>::type NanAdd(T value) { 
-    if (!std::isnan(value)) Add(value); 
-  } 
- 
-  template <typename T> 
-  typename std::enable_if<std::is_integral<T>::value>::type NanAdd(T value) { 
-    Add(static_cast<double>(value)); 
-  } 
- 
-  // merge with other t-digests, called infrequently 
-  void Merge(std::vector<TDigest>* tdigests); 
- 
-  // calculate quantile 
-  double Quantile(double q); 
- 
-  double Min() { return Quantile(0); } 
-  double Max() { return Quantile(1); } 
-  double Mean(); 
- 
-  // check if this tdigest contains no valid data points 
-  bool is_empty() const; 
- 
- private: 
-  // merge input data with current tdigest 
-  void MergeInput(); 
- 
-  // input buffer, size = buffer_size * sizeof(double) 
-  std::vector<double> input_; 
- 
-  // hide other members with pimpl 
-  class TDigestImpl; 
-  std::unique_ptr<TDigestImpl> impl_; 
-}; 
- 
-}  // namespace internal 
-}  // namespace arrow 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// approximate quantiles from arbitrary length dataset with O(1) space
+// based on 'Computing Extremely Accurate Quantiles Using t-Digests' from Dunning & Ertl
+// - https://arxiv.org/abs/1902.04023
+// - https://github.com/tdunning/t-digest
+
+#pragma once
+
+#include <cmath>
+#include <memory>
+#include <vector>
+
+#include "arrow/util/logging.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Status;
+
+namespace internal {
+
+class ARROW_EXPORT TDigest {
+ public:
+  explicit TDigest(uint32_t delta = 100, uint32_t buffer_size = 500);
+  ~TDigest();
+  TDigest(TDigest&&);
+  TDigest& operator=(TDigest&&);
+
+  // reset and re-use this tdigest
+  void Reset();
+
+  // validate data integrity
+  Status Validate();
+
+  // dump internal data, only for debug
+  void Dump();
+
+  // buffer a single data point, consume internal buffer if full
+  // this function is intensively called and performance critical
+  // call it only if you are sure no NAN exists in input data
+  void Add(double value) {
+    DCHECK(!std::isnan(value)) << "cannot add NAN";
+    if (ARROW_PREDICT_FALSE(input_.size() == input_.capacity())) {
+      MergeInput();
+    }
+    input_.push_back(value);
+  }
+
+  // skip NAN on adding
+  template <typename T>
+  typename std::enable_if<std::is_floating_point<T>::value>::type NanAdd(T value) {
+    if (!std::isnan(value)) Add(value);
+  }
+
+  template <typename T>
+  typename std::enable_if<std::is_integral<T>::value>::type NanAdd(T value) {
+    Add(static_cast<double>(value));
+  }
+
+  // merge with other t-digests, called infrequently
+  void Merge(std::vector<TDigest>* tdigests);
+
+  // calculate quantile
+  double Quantile(double q);
+
+  double Min() { return Quantile(0); }
+  double Max() { return Quantile(1); }
+  double Mean();
+
+  // check if this tdigest contains no valid data points
+  bool is_empty() const;
+
+ private:
+  // merge input data with current tdigest
+  void MergeInput();
+
+  // input buffer, size = buffer_size * sizeof(double)
+  std::vector<double> input_;
+
+  // hide other members with pimpl
+  class TDigestImpl;
+  std::unique_ptr<TDigestImpl> impl_;
+};
+
+}  // namespace internal
+}  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/thread_pool.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/thread_pool.cc
index ee480d0dec9..758295d01ed 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/thread_pool.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/thread_pool.cc
@@ -32,88 +32,88 @@
 namespace arrow {
 namespace internal {
 
-Executor::~Executor() = default; 
-
-namespace { 
- 
-struct Task { 
-  FnOnce<void()> callable; 
-  StopToken stop_token; 
-  Executor::StopCallback stop_callback; 
-}; 
- 
-}  // namespace 
- 
-struct SerialExecutor::State { 
-  std::deque<Task> task_queue; 
-  std::mutex mutex; 
-  std::condition_variable wait_for_tasks; 
-  bool finished{false}; 
-}; 
- 
-SerialExecutor::SerialExecutor() : state_(std::make_shared<State>()) {} 
- 
-SerialExecutor::~SerialExecutor() = default; 
- 
-Status SerialExecutor::SpawnReal(TaskHints hints, FnOnce<void()> task, 
-                                 StopToken stop_token, StopCallback&& stop_callback) { 
-  // While the SerialExecutor runs tasks synchronously on its main thread, 
-  // SpawnReal may be called from external threads (e.g. when transferring back 
-  // from blocking I/O threads), so we need to keep the state alive *and* to 
-  // lock its contents. 
-  // 
-  // Note that holding the lock while notifying the condition variable may 
-  // not be sufficient, as some exit paths in the main thread are unlocked. 
-  auto state = state_; 
-  { 
-    std::lock_guard<std::mutex> lk(state->mutex); 
-    state->task_queue.push_back( 
-        Task{std::move(task), std::move(stop_token), std::move(stop_callback)}); 
-  } 
-  state->wait_for_tasks.notify_one(); 
-  return Status::OK(); 
-} 
- 
-void SerialExecutor::MarkFinished() { 
-  // Same comment as SpawnReal above 
-  auto state = state_; 
-  { 
-    std::lock_guard<std::mutex> lk(state->mutex); 
-    state->finished = true; 
-  } 
-  state->wait_for_tasks.notify_one(); 
-} 
- 
-void SerialExecutor::RunLoop() { 
-  // This is called from the SerialExecutor's main thread, so the 
-  // state is guaranteed to be kept alive. 
-  std::unique_lock<std::mutex> lk(state_->mutex); 
- 
-  while (!state_->finished) { 
-    while (!state_->task_queue.empty()) { 
-      Task task = std::move(state_->task_queue.front()); 
-      state_->task_queue.pop_front(); 
-      lk.unlock(); 
-      if (!task.stop_token.IsStopRequested()) { 
-        std::move(task.callable)(); 
-      } else { 
-        if (task.stop_callback) { 
-          std::move(task.stop_callback)(task.stop_token.Poll()); 
-        } 
-        // Can't break here because there may be cleanup tasks down the chain we still 
-        // need to run. 
-      } 
-      lk.lock(); 
-    } 
-    // In this case we must be waiting on work from external (e.g. I/O) executors.  Wait 
-    // for tasks to arrive (typically via transferred futures). 
-    state_->wait_for_tasks.wait( 
-        lk, [&] { return state_->finished || !state_->task_queue.empty(); }); 
-  } 
-} 
- 
+Executor::~Executor() = default;
+
+namespace {
+
+struct Task {
+  FnOnce<void()> callable;
+  StopToken stop_token;
+  Executor::StopCallback stop_callback;
+};
+
+}  // namespace
+
+struct SerialExecutor::State {
+  std::deque<Task> task_queue;
+  std::mutex mutex;
+  std::condition_variable wait_for_tasks;
+  bool finished{false};
+};
+
+SerialExecutor::SerialExecutor() : state_(std::make_shared<State>()) {}
+
+SerialExecutor::~SerialExecutor() = default;
+
+Status SerialExecutor::SpawnReal(TaskHints hints, FnOnce<void()> task,
+                                 StopToken stop_token, StopCallback&& stop_callback) {
+  // While the SerialExecutor runs tasks synchronously on its main thread,
+  // SpawnReal may be called from external threads (e.g. when transferring back
+  // from blocking I/O threads), so we need to keep the state alive *and* to
+  // lock its contents.
+  //
+  // Note that holding the lock while notifying the condition variable may
+  // not be sufficient, as some exit paths in the main thread are unlocked.
+  auto state = state_;
+  {
+    std::lock_guard<std::mutex> lk(state->mutex);
+    state->task_queue.push_back(
+        Task{std::move(task), std::move(stop_token), std::move(stop_callback)});
+  }
+  state->wait_for_tasks.notify_one();
+  return Status::OK();
+}
+
+void SerialExecutor::MarkFinished() {
+  // Same comment as SpawnReal above
+  auto state = state_;
+  {
+    std::lock_guard<std::mutex> lk(state->mutex);
+    state->finished = true;
+  }
+  state->wait_for_tasks.notify_one();
+}
+
+void SerialExecutor::RunLoop() {
+  // This is called from the SerialExecutor's main thread, so the
+  // state is guaranteed to be kept alive.
+  std::unique_lock<std::mutex> lk(state_->mutex);
+
+  while (!state_->finished) {
+    while (!state_->task_queue.empty()) {
+      Task task = std::move(state_->task_queue.front());
+      state_->task_queue.pop_front();
+      lk.unlock();
+      if (!task.stop_token.IsStopRequested()) {
+        std::move(task.callable)();
+      } else {
+        if (task.stop_callback) {
+          std::move(task.stop_callback)(task.stop_token.Poll());
+        }
+        // Can't break here because there may be cleanup tasks down the chain we still
+        // need to run.
+      }
+      lk.lock();
+    }
+    // In this case we must be waiting on work from external (e.g. I/O) executors.  Wait
+    // for tasks to arrive (typically via transferred futures).
+    state_->wait_for_tasks.wait(
+        lk, [&] { return state_->finished || !state_->task_queue.empty(); });
+  }
+}
+
 struct ThreadPool::State {
-  State() = default; 
+  State() = default;
 
   // NOTE: in case locking becomes too expensive, we can investigate lock-free FIFOs
   // such as https://github.com/cameron314/concurrentqueue
@@ -125,17 +125,17 @@ struct ThreadPool::State {
   std::list<std::thread> workers_;
   // Trashcan for finished threads
   std::vector<std::thread> finished_workers_;
-  std::deque<Task> pending_tasks_; 
+  std::deque<Task> pending_tasks_;
 
   // Desired number of threads
-  int desired_capacity_ = 0; 
- 
-  // Total number of tasks that are either queued or running 
-  int tasks_queued_or_running_ = 0; 
- 
+  int desired_capacity_ = 0;
+
+  // Total number of tasks that are either queued or running
+  int tasks_queued_or_running_ = 0;
+
   // Are we shutting down?
-  bool please_shutdown_ = false; 
-  bool quick_shutdown_ = false; 
+  bool please_shutdown_ = false;
+  bool quick_shutdown_ = false;
 };
 
 // The worker loop is an independent function so that it can keep running
@@ -165,24 +165,24 @@ static void WorkerLoop(std::shared_ptr<ThreadPool::State> state,
       if (should_secede()) {
         break;
       }
- 
-      DCHECK_GE(state->tasks_queued_or_running_, 0); 
+
+      DCHECK_GE(state->tasks_queued_or_running_, 0);
       {
-        Task task = std::move(state->pending_tasks_.front()); 
+        Task task = std::move(state->pending_tasks_.front());
         state->pending_tasks_.pop_front();
-        StopToken* stop_token = &task.stop_token; 
+        StopToken* stop_token = &task.stop_token;
         lock.unlock();
-        if (!stop_token->IsStopRequested()) { 
-          std::move(task.callable)(); 
-        } else { 
-          if (task.stop_callback) { 
-            std::move(task.stop_callback)(stop_token->Poll()); 
-          } 
-        } 
-        ARROW_UNUSED(std::move(task));  // release resources before waiting for lock 
-        lock.lock(); 
+        if (!stop_token->IsStopRequested()) {
+          std::move(task.callable)();
+        } else {
+          if (task.stop_callback) {
+            std::move(task.stop_callback)(stop_token->Poll());
+          }
+        }
+        ARROW_UNUSED(std::move(task));  // release resources before waiting for lock
+        lock.lock();
       }
-      state->tasks_queued_or_running_--; 
+      state->tasks_queued_or_running_--;
     }
     // Now either the queue is empty *or* a quick shutdown was requested
     if (state->please_shutdown_ || should_secede()) {
@@ -191,7 +191,7 @@ static void WorkerLoop(std::shared_ptr<ThreadPool::State> state,
     // Wait for next wakeup
     state->cv_.wait(lock);
   }
-  DCHECK_GE(state->tasks_queued_or_running_, 0); 
+  DCHECK_GE(state->tasks_queued_or_running_, 0);
 
   // We're done.  Move our thread object to the trashcan of finished
   // workers.  This has two motivations:
@@ -262,14 +262,14 @@ Status ThreadPool::SetCapacity(int threads) {
   CollectFinishedWorkersUnlocked();
 
   state_->desired_capacity_ = threads;
-  // See if we need to increase or decrease the number of running threads 
-  const int required = std::min(static_cast<int>(state_->pending_tasks_.size()), 
-                                threads - static_cast<int>(state_->workers_.size())); 
-  if (required > 0) { 
-    // Some tasks are pending, spawn the number of needed threads immediately 
-    LaunchWorkersUnlocked(required); 
-  } else if (required < 0) { 
-    // Excess threads are running, wake them so that they stop 
+  // See if we need to increase or decrease the number of running threads
+  const int required = std::min(static_cast<int>(state_->pending_tasks_.size()),
+                                threads - static_cast<int>(state_->workers_.size()));
+  if (required > 0) {
+    // Some tasks are pending, spawn the number of needed threads immediately
+    LaunchWorkersUnlocked(required);
+  } else if (required < 0) {
+    // Excess threads are running, wake them so that they stop
     state_->cv_.notify_all();
   }
   return Status::OK();
@@ -281,12 +281,12 @@ int ThreadPool::GetCapacity() {
   return state_->desired_capacity_;
 }
 
-int ThreadPool::GetNumTasks() { 
-  ProtectAgainstFork(); 
-  std::unique_lock<std::mutex> lock(state_->mutex_); 
-  return state_->tasks_queued_or_running_; 
-} 
- 
+int ThreadPool::GetNumTasks() {
+  ProtectAgainstFork();
+  std::unique_lock<std::mutex> lock(state_->mutex_);
+  return state_->tasks_queued_or_running_;
+}
+
 int ThreadPool::GetActualCapacity() {
   ProtectAgainstFork();
   std::unique_lock<std::mutex> lock(state_->mutex_);
@@ -321,25 +321,25 @@ void ThreadPool::CollectFinishedWorkersUnlocked() {
   state_->finished_workers_.clear();
 }
 
-thread_local ThreadPool* current_thread_pool_ = nullptr; 
- 
-bool ThreadPool::OwnsThisThread() { return current_thread_pool_ == this; } 
- 
+thread_local ThreadPool* current_thread_pool_ = nullptr;
+
+bool ThreadPool::OwnsThisThread() { return current_thread_pool_ == this; }
+
 void ThreadPool::LaunchWorkersUnlocked(int threads) {
   std::shared_ptr<State> state = sp_state_;
 
   for (int i = 0; i < threads; i++) {
     state_->workers_.emplace_back();
     auto it = --(state_->workers_.end());
-    *it = std::thread([this, state, it] { 
-      current_thread_pool_ = this; 
-      WorkerLoop(state, it); 
-    }); 
+    *it = std::thread([this, state, it] {
+      current_thread_pool_ = this;
+      WorkerLoop(state, it);
+    });
   }
 }
 
-Status ThreadPool::SpawnReal(TaskHints hints, FnOnce<void()> task, StopToken stop_token, 
-                             StopCallback&& stop_callback) { 
+Status ThreadPool::SpawnReal(TaskHints hints, FnOnce<void()> task, StopToken stop_token,
+                             StopCallback&& stop_callback) {
   {
     ProtectAgainstFork();
     std::lock_guard<std::mutex> lock(state_->mutex_);
@@ -347,14 +347,14 @@ Status ThreadPool::SpawnReal(TaskHints hints, FnOnce<void()> task, StopToken sto
       return Status::Invalid("operation forbidden during or after shutdown");
     }
     CollectFinishedWorkersUnlocked();
-    state_->tasks_queued_or_running_++; 
-    if (static_cast<int>(state_->workers_.size()) < state_->tasks_queued_or_running_ && 
-        state_->desired_capacity_ > static_cast<int>(state_->workers_.size())) { 
-      // We can still spin up more workers so spin up a new worker 
-      LaunchWorkersUnlocked(/*threads=*/1); 
-    } 
-    state_->pending_tasks_.push_back( 
-        {std::move(task), std::move(stop_token), std::move(stop_callback)}); 
+    state_->tasks_queued_or_running_++;
+    if (static_cast<int>(state_->workers_.size()) < state_->tasks_queued_or_running_ &&
+        state_->desired_capacity_ > static_cast<int>(state_->workers_.size())) {
+      // We can still spin up more workers so spin up a new worker
+      LaunchWorkersUnlocked(/*threads=*/1);
+    }
+    state_->pending_tasks_.push_back(
+        {std::move(task), std::move(stop_token), std::move(stop_callback)});
   }
   state_->cv_.notify_one();
   return Status::OK();
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/thread_pool.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/thread_pool.h
index 5d866601ab1..9ac8e36a3d8 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/thread_pool.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/thread_pool.h
@@ -23,14 +23,14 @@
 
 #include <cstdint>
 #include <memory>
-#include <queue> 
+#include <queue>
 #include <type_traits>
 #include <utility>
 
 #include "arrow/result.h"
 #include "arrow/status.h"
-#include "arrow/util/cancel.h" 
-#include "arrow/util/functional.h" 
+#include "arrow/util/cancel.h"
+#include "arrow/util/functional.h"
 #include "arrow/util/future.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/visibility.h"
@@ -76,229 +76,229 @@ struct TaskHints {
 
 class ARROW_EXPORT Executor {
  public:
-  using StopCallback = internal::FnOnce<void(const Status&)>; 
- 
+  using StopCallback = internal::FnOnce<void(const Status&)>;
+
   virtual ~Executor();
 
   // Spawn a fire-and-forget task.
   template <typename Function>
   Status Spawn(Function&& func) {
-    return SpawnReal(TaskHints{}, std::forward<Function>(func), StopToken::Unstoppable(), 
-                     StopCallback{}); 
+    return SpawnReal(TaskHints{}, std::forward<Function>(func), StopToken::Unstoppable(),
+                     StopCallback{});
+  }
+  template <typename Function>
+  Status Spawn(Function&& func, StopToken stop_token) {
+    return SpawnReal(TaskHints{}, std::forward<Function>(func), std::move(stop_token),
+                     StopCallback{});
   }
   template <typename Function>
-  Status Spawn(Function&& func, StopToken stop_token) { 
-    return SpawnReal(TaskHints{}, std::forward<Function>(func), std::move(stop_token), 
-                     StopCallback{}); 
-  } 
-  template <typename Function> 
   Status Spawn(TaskHints hints, Function&& func) {
-    return SpawnReal(hints, std::forward<Function>(func), StopToken::Unstoppable(), 
-                     StopCallback{}); 
+    return SpawnReal(hints, std::forward<Function>(func), StopToken::Unstoppable(),
+                     StopCallback{});
+  }
+  template <typename Function>
+  Status Spawn(TaskHints hints, Function&& func, StopToken stop_token) {
+    return SpawnReal(hints, std::forward<Function>(func), std::move(stop_token),
+                     StopCallback{});
+  }
+  template <typename Function>
+  Status Spawn(TaskHints hints, Function&& func, StopToken stop_token,
+               StopCallback stop_callback) {
+    return SpawnReal(hints, std::forward<Function>(func), std::move(stop_token),
+                     std::move(stop_callback));
+  }
+
+  // Transfers a future to this executor.  Any continuations added to the
+  // returned future will run in this executor.  Otherwise they would run
+  // on the same thread that called MarkFinished.
+  //
+  // This is necessary when (for example) an I/O task is completing a future.
+  // The continuations of that future should run on the CPU thread pool keeping
+  // CPU heavy work off the I/O thread pool.  So the I/O task should transfer
+  // the future to the CPU executor before returning.
+  //
+  // By default this method will only transfer if the future is not already completed.  If
+  // the future is already completed then any callback would be run synchronously and so
+  // no transfer is typically necessary.  However, in cases where you want to force a
+  // transfer (e.g. to help the scheduler break up units of work across multiple cores)
+  // then you can override this behavior with `always_transfer`.
+  template <typename T>
+  Future<T> Transfer(Future<T> future) {
+    return DoTransfer(std::move(future), false);
   }
-  template <typename Function> 
-  Status Spawn(TaskHints hints, Function&& func, StopToken stop_token) { 
-    return SpawnReal(hints, std::forward<Function>(func), std::move(stop_token), 
-                     StopCallback{}); 
-  } 
-  template <typename Function> 
-  Status Spawn(TaskHints hints, Function&& func, StopToken stop_token, 
-               StopCallback stop_callback) { 
-    return SpawnReal(hints, std::forward<Function>(func), std::move(stop_token), 
-                     std::move(stop_callback)); 
-  } 
-
-  // Transfers a future to this executor.  Any continuations added to the 
-  // returned future will run in this executor.  Otherwise they would run 
-  // on the same thread that called MarkFinished. 
-  // 
-  // This is necessary when (for example) an I/O task is completing a future. 
-  // The continuations of that future should run on the CPU thread pool keeping 
-  // CPU heavy work off the I/O thread pool.  So the I/O task should transfer 
-  // the future to the CPU executor before returning. 
-  // 
-  // By default this method will only transfer if the future is not already completed.  If 
-  // the future is already completed then any callback would be run synchronously and so 
-  // no transfer is typically necessary.  However, in cases where you want to force a 
-  // transfer (e.g. to help the scheduler break up units of work across multiple cores) 
-  // then you can override this behavior with `always_transfer`. 
-  template <typename T> 
-  Future<T> Transfer(Future<T> future) { 
-    return DoTransfer(std::move(future), false); 
-  } 
- 
-  // Overload of Transfer which will always schedule callbacks on new threads even if the 
-  // future is finished when the callback is added. 
-  // 
-  // This can be useful in cases where you want to ensure parallelism 
-  template <typename T> 
-  Future<T> TransferAlways(Future<T> future) { 
-    return DoTransfer(std::move(future), true); 
-  } 
- 
+
+  // Overload of Transfer which will always schedule callbacks on new threads even if the
+  // future is finished when the callback is added.
+  //
+  // This can be useful in cases where you want to ensure parallelism
+  template <typename T>
+  Future<T> TransferAlways(Future<T> future) {
+    return DoTransfer(std::move(future), true);
+  }
+
   // Submit a callable and arguments for execution.  Return a future that
   // will return the callable's result value once.
   // The callable's arguments are copied before execution.
-  template <typename Function, typename... Args, 
-            typename FutureType = typename ::arrow::detail::ContinueFuture::ForSignature< 
-                Function && (Args && ...)>> 
-  Result<FutureType> Submit(TaskHints hints, StopToken stop_token, Function&& func, 
-                            Args&&... args) { 
-    using ValueType = typename FutureType::ValueType; 
-
-    auto future = FutureType::Make(); 
-    auto task = std::bind(::arrow::detail::ContinueFuture{}, future, 
-                          std::forward<Function>(func), std::forward<Args>(args)...); 
-    struct { 
-      WeakFuture<ValueType> weak_fut; 
-
-      void operator()(const Status& st) { 
-        auto fut = weak_fut.get(); 
-        if (fut.is_valid()) { 
-          fut.MarkFinished(st); 
-        } 
-      } 
-    } stop_callback{WeakFuture<ValueType>(future)}; 
-    ARROW_RETURN_NOT_OK(SpawnReal(hints, std::move(task), std::move(stop_token), 
-                                  std::move(stop_callback))); 
+  template <typename Function, typename... Args,
+            typename FutureType = typename ::arrow::detail::ContinueFuture::ForSignature<
+                Function && (Args && ...)>>
+  Result<FutureType> Submit(TaskHints hints, StopToken stop_token, Function&& func,
+                            Args&&... args) {
+    using ValueType = typename FutureType::ValueType;
+
+    auto future = FutureType::Make();
+    auto task = std::bind(::arrow::detail::ContinueFuture{}, future,
+                          std::forward<Function>(func), std::forward<Args>(args)...);
+    struct {
+      WeakFuture<ValueType> weak_fut;
+
+      void operator()(const Status& st) {
+        auto fut = weak_fut.get();
+        if (fut.is_valid()) {
+          fut.MarkFinished(st);
+        }
+      }
+    } stop_callback{WeakFuture<ValueType>(future)};
+    ARROW_RETURN_NOT_OK(SpawnReal(hints, std::move(task), std::move(stop_token),
+                                  std::move(stop_callback)));
 
     return future;
   }
 
-  template <typename Function, typename... Args, 
-            typename FutureType = typename ::arrow::detail::ContinueFuture::ForSignature< 
-                Function && (Args && ...)>> 
-  Result<FutureType> Submit(StopToken stop_token, Function&& func, Args&&... args) { 
-    return Submit(TaskHints{}, stop_token, std::forward<Function>(func), 
-                  std::forward<Args>(args)...); 
+  template <typename Function, typename... Args,
+            typename FutureType = typename ::arrow::detail::ContinueFuture::ForSignature<
+                Function && (Args && ...)>>
+  Result<FutureType> Submit(StopToken stop_token, Function&& func, Args&&... args) {
+    return Submit(TaskHints{}, stop_token, std::forward<Function>(func),
+                  std::forward<Args>(args)...);
+  }
+
+  template <typename Function, typename... Args,
+            typename FutureType = typename ::arrow::detail::ContinueFuture::ForSignature<
+                Function && (Args && ...)>>
+  Result<FutureType> Submit(TaskHints hints, Function&& func, Args&&... args) {
+    return Submit(std::move(hints), StopToken::Unstoppable(),
+                  std::forward<Function>(func), std::forward<Args>(args)...);
+  }
+
+  template <typename Function, typename... Args,
+            typename FutureType = typename ::arrow::detail::ContinueFuture::ForSignature<
+                Function && (Args && ...)>>
+  Result<FutureType> Submit(Function&& func, Args&&... args) {
+    return Submit(TaskHints{}, StopToken::Unstoppable(), std::forward<Function>(func),
+                  std::forward<Args>(args)...);
   }
 
-  template <typename Function, typename... Args, 
-            typename FutureType = typename ::arrow::detail::ContinueFuture::ForSignature< 
-                Function && (Args && ...)>> 
-  Result<FutureType> Submit(TaskHints hints, Function&& func, Args&&... args) { 
-    return Submit(std::move(hints), StopToken::Unstoppable(), 
-                  std::forward<Function>(func), std::forward<Args>(args)...); 
-  } 
- 
-  template <typename Function, typename... Args, 
-            typename FutureType = typename ::arrow::detail::ContinueFuture::ForSignature< 
-                Function && (Args && ...)>> 
-  Result<FutureType> Submit(Function&& func, Args&&... args) { 
-    return Submit(TaskHints{}, StopToken::Unstoppable(), std::forward<Function>(func), 
-                  std::forward<Args>(args)...); 
-  } 
- 
   // Return the level of parallelism (the number of tasks that may be executed
   // concurrently).  This may be an approximate number.
   virtual int GetCapacity() = 0;
 
-  // Return true if the thread from which this function is called is owned by this 
-  // Executor. Returns false if this Executor does not support this property. 
-  virtual bool OwnsThisThread() { return false; } 
- 
+  // Return true if the thread from which this function is called is owned by this
+  // Executor. Returns false if this Executor does not support this property.
+  virtual bool OwnsThisThread() { return false; }
+
  protected:
   ARROW_DISALLOW_COPY_AND_ASSIGN(Executor);
 
   Executor() = default;
 
-  template <typename T, typename FT = Future<T>, typename FTSync = typename FT::SyncType> 
-  Future<T> DoTransfer(Future<T> future, bool always_transfer = false) { 
-    auto transferred = Future<T>::Make(); 
-    if (always_transfer) { 
-      CallbackOptions callback_options = CallbackOptions::Defaults(); 
-      callback_options.should_schedule = ShouldSchedule::Always; 
-      callback_options.executor = this; 
-      auto sync_callback = [transferred](const FTSync& result) mutable { 
-        transferred.MarkFinished(result); 
-      }; 
-      future.AddCallback(sync_callback, callback_options); 
-      return transferred; 
-    } 
- 
-    // We could use AddCallback's ShouldSchedule::IfUnfinished but we can save a bit of 
-    // work by doing the test here. 
-    auto callback = [this, transferred](const FTSync& result) mutable { 
-      auto spawn_status = 
-          Spawn([transferred, result]() mutable { transferred.MarkFinished(result); }); 
-      if (!spawn_status.ok()) { 
-        transferred.MarkFinished(spawn_status); 
-      } 
-    }; 
-    auto callback_factory = [&callback]() { return callback; }; 
-    if (future.TryAddCallback(callback_factory)) { 
-      return transferred; 
-    } 
-    // If the future is already finished and we aren't going to force spawn a thread 
-    // then we don't need to add another layer of callback and can return the original 
-    // future 
-    return future; 
-  } 
- 
+  template <typename T, typename FT = Future<T>, typename FTSync = typename FT::SyncType>
+  Future<T> DoTransfer(Future<T> future, bool always_transfer = false) {
+    auto transferred = Future<T>::Make();
+    if (always_transfer) {
+      CallbackOptions callback_options = CallbackOptions::Defaults();
+      callback_options.should_schedule = ShouldSchedule::Always;
+      callback_options.executor = this;
+      auto sync_callback = [transferred](const FTSync& result) mutable {
+        transferred.MarkFinished(result);
+      };
+      future.AddCallback(sync_callback, callback_options);
+      return transferred;
+    }
+
+    // We could use AddCallback's ShouldSchedule::IfUnfinished but we can save a bit of
+    // work by doing the test here.
+    auto callback = [this, transferred](const FTSync& result) mutable {
+      auto spawn_status =
+          Spawn([transferred, result]() mutable { transferred.MarkFinished(result); });
+      if (!spawn_status.ok()) {
+        transferred.MarkFinished(spawn_status);
+      }
+    };
+    auto callback_factory = [&callback]() { return callback; };
+    if (future.TryAddCallback(callback_factory)) {
+      return transferred;
+    }
+    // If the future is already finished and we aren't going to force spawn a thread
+    // then we don't need to add another layer of callback and can return the original
+    // future
+    return future;
+  }
+
   // Subclassing API
-  virtual Status SpawnReal(TaskHints hints, FnOnce<void()> task, StopToken, 
-                           StopCallback&&) = 0; 
+  virtual Status SpawnReal(TaskHints hints, FnOnce<void()> task, StopToken,
+                           StopCallback&&) = 0;
 };
 
-/// \brief An executor implementation that runs all tasks on a single thread using an 
-/// event loop. 
-/// 
-/// Note: Any sort of nested parallelism will deadlock this executor.  Blocking waits are 
-/// fine but if one task needs to wait for another task it must be expressed as an 
-/// asynchronous continuation. 
-class ARROW_EXPORT SerialExecutor : public Executor { 
- public: 
-  template <typename T = ::arrow::internal::Empty> 
-  using TopLevelTask = internal::FnOnce<Future<T>(Executor*)>; 
- 
-  ~SerialExecutor() override; 
- 
-  int GetCapacity() override { return 1; }; 
-  Status SpawnReal(TaskHints hints, FnOnce<void()> task, StopToken, 
-                   StopCallback&&) override; 
- 
-  /// \brief Runs the TopLevelTask and any scheduled tasks 
-  /// 
-  /// The TopLevelTask (or one of the tasks it schedules) must either return an invalid 
-  /// status or call the finish signal. Failure to do this will result in a deadlock.  For 
-  /// this reason it is preferable (if possible) to use the helper methods (below) 
-  /// RunSynchronously/RunSerially which delegates the responsiblity onto a Future 
-  /// producer's existing responsibility to always mark a future finished (which can 
-  /// someday be aided by ARROW-12207). 
-  template <typename T = internal::Empty, typename FT = Future<T>, 
-            typename FTSync = typename FT::SyncType> 
-  static FTSync RunInSerialExecutor(TopLevelTask<T> initial_task) { 
-    Future<T> fut = SerialExecutor().Run<T>(std::move(initial_task)); 
-    return FutureToSync(fut); 
-  } 
- 
- private: 
-  SerialExecutor(); 
- 
-  // State uses mutex 
-  struct State; 
-  std::shared_ptr<State> state_; 
- 
-  template <typename T, typename FTSync = typename Future<T>::SyncType> 
-  Future<T> Run(TopLevelTask<T> initial_task) { 
-    auto final_fut = std::move(initial_task)(this); 
-    if (final_fut.is_finished()) { 
-      return final_fut; 
-    } 
-    final_fut.AddCallback([this](const FTSync&) { MarkFinished(); }); 
-    RunLoop(); 
-    return final_fut; 
-  } 
-  void RunLoop(); 
-  void MarkFinished(); 
-}; 
- 
-/// An Executor implementation spawning tasks in FIFO manner on a fixed-size 
-/// pool of worker threads. 
-/// 
-/// Note: Any sort of nested parallelism will deadlock this executor.  Blocking waits are 
-/// fine but if one task needs to wait for another task it must be expressed as an 
-/// asynchronous continuation. 
+/// \brief An executor implementation that runs all tasks on a single thread using an
+/// event loop.
+///
+/// Note: Any sort of nested parallelism will deadlock this executor.  Blocking waits are
+/// fine but if one task needs to wait for another task it must be expressed as an
+/// asynchronous continuation.
+class ARROW_EXPORT SerialExecutor : public Executor {
+ public:
+  template <typename T = ::arrow::internal::Empty>
+  using TopLevelTask = internal::FnOnce<Future<T>(Executor*)>;
+
+  ~SerialExecutor() override;
+
+  int GetCapacity() override { return 1; };
+  Status SpawnReal(TaskHints hints, FnOnce<void()> task, StopToken,
+                   StopCallback&&) override;
+
+  /// \brief Runs the TopLevelTask and any scheduled tasks
+  ///
+  /// The TopLevelTask (or one of the tasks it schedules) must either return an invalid
+  /// status or call the finish signal. Failure to do this will result in a deadlock.  For
+  /// this reason it is preferable (if possible) to use the helper methods (below)
+  /// RunSynchronously/RunSerially which delegates the responsiblity onto a Future
+  /// producer's existing responsibility to always mark a future finished (which can
+  /// someday be aided by ARROW-12207).
+  template <typename T = internal::Empty, typename FT = Future<T>,
+            typename FTSync = typename FT::SyncType>
+  static FTSync RunInSerialExecutor(TopLevelTask<T> initial_task) {
+    Future<T> fut = SerialExecutor().Run<T>(std::move(initial_task));
+    return FutureToSync(fut);
+  }
+
+ private:
+  SerialExecutor();
+
+  // State uses mutex
+  struct State;
+  std::shared_ptr<State> state_;
+
+  template <typename T, typename FTSync = typename Future<T>::SyncType>
+  Future<T> Run(TopLevelTask<T> initial_task) {
+    auto final_fut = std::move(initial_task)(this);
+    if (final_fut.is_finished()) {
+      return final_fut;
+    }
+    final_fut.AddCallback([this](const FTSync&) { MarkFinished(); });
+    RunLoop();
+    return final_fut;
+  }
+  void RunLoop();
+  void MarkFinished();
+};
+
+/// An Executor implementation spawning tasks in FIFO manner on a fixed-size
+/// pool of worker threads.
+///
+/// Note: Any sort of nested parallelism will deadlock this executor.  Blocking waits are
+/// fine but if one task needs to wait for another task it must be expressed as an
+/// asynchronous continuation.
 class ARROW_EXPORT ThreadPool : public Executor {
  public:
   // Construct a thread pool with the given number of worker threads
@@ -309,25 +309,25 @@ class ARROW_EXPORT ThreadPool : public Executor {
   static Result<std::shared_ptr<ThreadPool>> MakeEternal(int threads);
 
   // Destroy thread pool; the pool will first be shut down
-  ~ThreadPool() override; 
+  ~ThreadPool() override;
 
   // Return the desired number of worker threads.
   // The actual number of workers may lag a bit before being adjusted to
   // match this value.
   int GetCapacity() override;
 
-  bool OwnsThisThread() override; 
- 
-  // Return the number of tasks either running or in the queue. 
-  int GetNumTasks(); 
- 
+  bool OwnsThisThread() override;
+
+  // Return the number of tasks either running or in the queue.
+  int GetNumTasks();
+
   // Dynamically change the number of worker threads.
-  // 
-  // This function always returns immediately. 
-  // If fewer threads are running than this number, new threads are spawned 
-  // on-demand when needed for task execution. 
-  // If more threads are running than this number, excess threads are reaped 
-  // as soon as possible. 
+  //
+  // This function always returns immediately.
+  // If fewer threads are running than this number, new threads are spawned
+  // on-demand when needed for task execution.
+  // If more threads are running than this number, excess threads are reaped
+  // as soon as possible.
   Status SetCapacity(int threads);
 
   // Heuristic for the default capacity of a thread pool for CPU-bound tasks.
@@ -350,8 +350,8 @@ class ARROW_EXPORT ThreadPool : public Executor {
 
   ThreadPool();
 
-  Status SpawnReal(TaskHints hints, FnOnce<void()> task, StopToken, 
-                   StopCallback&&) override; 
+  Status SpawnReal(TaskHints hints, FnOnce<void()> task, StopToken,
+                   StopCallback&&) override;
 
   // Collect finished worker threads, making sure the OS threads have exited
   void CollectFinishedWorkersUnlocked();
@@ -375,24 +375,24 @@ class ARROW_EXPORT ThreadPool : public Executor {
 // Return the process-global thread pool for CPU-bound tasks.
 ARROW_EXPORT ThreadPool* GetCpuThreadPool();
 
-/// \brief Potentially run an async operation serially (if use_threads is false) 
-/// \see RunSerially 
-/// 
-/// If `use_threads` is true, the global CPU executor is used. 
-/// If `use_threads` is false, a temporary SerialExecutor is used. 
-/// `get_future` is called (from this thread) with the chosen executor and must 
-/// return a future that will eventually finish. This function returns once the 
-/// future has finished. 
-template <typename Fut, typename ValueType = typename Fut::ValueType> 
-typename Fut::SyncType RunSynchronously(FnOnce<Fut(Executor*)> get_future, 
-                                        bool use_threads) { 
-  if (use_threads) { 
-    auto fut = std::move(get_future)(GetCpuThreadPool()); 
-    return FutureToSync(fut); 
-  } else { 
-    return SerialExecutor::RunInSerialExecutor<ValueType>(std::move(get_future)); 
-  } 
-} 
- 
+/// \brief Potentially run an async operation serially (if use_threads is false)
+/// \see RunSerially
+///
+/// If `use_threads` is true, the global CPU executor is used.
+/// If `use_threads` is false, a temporary SerialExecutor is used.
+/// `get_future` is called (from this thread) with the chosen executor and must
+/// return a future that will eventually finish. This function returns once the
+/// future has finished.
+template <typename Fut, typename ValueType = typename Fut::ValueType>
+typename Fut::SyncType RunSynchronously(FnOnce<Fut(Executor*)> get_future,
+                                        bool use_threads) {
+  if (use_threads) {
+    auto fut = std::move(get_future)(GetCpuThreadPool());
+    return FutureToSync(fut);
+  } else {
+    return SerialExecutor::RunInSerialExecutor<ValueType>(std::move(get_future));
+  }
+}
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/trie.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/trie.h
index ed73fdc6b04..b250cca647d 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/trie.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/trie.h
@@ -116,7 +116,7 @@ std::ostream& operator<<(std::ostream& os, const SmallString<N>& str) {
 class ARROW_EXPORT Trie {
   using index_type = int16_t;
   using fast_index_type = int_fast16_t;
-  static constexpr auto kMaxIndex = std::numeric_limits<index_type>::max(); 
+  static constexpr auto kMaxIndex = std::numeric_limits<index_type>::max();
 
  public:
   Trie() : size_(0) {}
@@ -126,9 +126,9 @@ class ARROW_EXPORT Trie {
   int32_t Find(util::string_view s) const {
     const Node* node = &nodes_[0];
     fast_index_type pos = 0;
-    if (s.length() > static_cast<size_t>(kMaxIndex)) { 
-      return -1; 
-    } 
+    if (s.length() > static_cast<size_t>(kMaxIndex)) {
+      return -1;
+    }
     fast_index_type remaining = static_cast<fast_index_type>(s.length());
 
     while (remaining > 0) {
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/type_fwd.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/type_fwd.h
index b3e69aa632f..ca107c2c69d 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/type_fwd.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/type_fwd.h
@@ -19,16 +19,16 @@
 
 namespace arrow {
 
-namespace internal { 
-struct Empty; 
-}  // namespace internal 
-
-template <typename T = internal::Empty> 
-class WeakFuture; 
-class FutureWaiter; 
- 
-class TimestampParser; 
- 
+namespace internal {
+struct Empty;
+}  // namespace internal
+
+template <typename T = internal::Empty>
+class WeakFuture;
+class FutureWaiter;
+
+class TimestampParser;
+
 namespace internal {
 
 class Executor;
@@ -36,27 +36,27 @@ class TaskGroup;
 class ThreadPool;
 
 }  // namespace internal
- 
-struct Compression { 
-  /// \brief Compression algorithm 
-  enum type { 
-    UNCOMPRESSED, 
-    SNAPPY, 
-    GZIP, 
-    BROTLI, 
-    ZSTD, 
-    LZ4, 
-    LZ4_FRAME, 
-    LZO, 
-    BZ2, 
-    LZ4_HADOOP 
-  }; 
-}; 
- 
-namespace util { 
-class Compressor; 
-class Decompressor; 
-class Codec; 
-}  // namespace util 
- 
+
+struct Compression {
+  /// \brief Compression algorithm
+  enum type {
+    UNCOMPRESSED,
+    SNAPPY,
+    GZIP,
+    BROTLI,
+    ZSTD,
+    LZ4,
+    LZ4_FRAME,
+    LZO,
+    BZ2,
+    LZ4_HADOOP
+  };
+};
+
+namespace util {
+class Compressor;
+class Decompressor;
+class Codec;
+}  // namespace util
+
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/type_traits.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/type_traits.h
index a8cfec5cc04..80cc6297e39 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/type_traits.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/type_traits.h
@@ -17,7 +17,7 @@
 
 #pragma once
 
-#include <cstdint> 
+#include <cstdint>
 #include <type_traits>
 
 namespace arrow {
@@ -42,45 +42,45 @@ template <typename T>
 struct is_null_pointer : std::is_same<std::nullptr_t, typename std::remove_cv<T>::type> {
 };
 
-#ifdef __GLIBCXX__ 
- 
-// A aligned_union backport, because old libstdc++ versions don't include it. 
- 
-constexpr std::size_t max_size(std::size_t a, std::size_t b) { return (a > b) ? a : b; } 
- 
-template <typename...> 
-struct max_size_traits; 
- 
-template <typename H, typename... T> 
-struct max_size_traits<H, T...> { 
-  static constexpr std::size_t max_sizeof() { 
-    return max_size(sizeof(H), max_size_traits<T...>::max_sizeof()); 
-  } 
-  static constexpr std::size_t max_alignof() { 
-    return max_size(alignof(H), max_size_traits<T...>::max_alignof()); 
-  } 
-}; 
- 
-template <> 
-struct max_size_traits<> { 
-  static constexpr std::size_t max_sizeof() { return 0; } 
-  static constexpr std::size_t max_alignof() { return 0; } 
-}; 
- 
-template <std::size_t Len, typename... T> 
-struct aligned_union { 
-  static constexpr std::size_t alignment_value = max_size_traits<T...>::max_alignof(); 
-  static constexpr std::size_t size_value = 
-      max_size(Len, max_size_traits<T...>::max_sizeof()); 
-  using type = typename std::aligned_storage<size_value, alignment_value>::type; 
-}; 
- 
-#else 
- 
-template <std::size_t Len, typename... T> 
-using aligned_union = std::aligned_union<Len, T...>; 
- 
-#endif 
- 
+#ifdef __GLIBCXX__
+
+// A aligned_union backport, because old libstdc++ versions don't include it.
+
+constexpr std::size_t max_size(std::size_t a, std::size_t b) { return (a > b) ? a : b; }
+
+template <typename...>
+struct max_size_traits;
+
+template <typename H, typename... T>
+struct max_size_traits<H, T...> {
+  static constexpr std::size_t max_sizeof() {
+    return max_size(sizeof(H), max_size_traits<T...>::max_sizeof());
+  }
+  static constexpr std::size_t max_alignof() {
+    return max_size(alignof(H), max_size_traits<T...>::max_alignof());
+  }
+};
+
+template <>
+struct max_size_traits<> {
+  static constexpr std::size_t max_sizeof() { return 0; }
+  static constexpr std::size_t max_alignof() { return 0; }
+};
+
+template <std::size_t Len, typename... T>
+struct aligned_union {
+  static constexpr std::size_t alignment_value = max_size_traits<T...>::max_alignof();
+  static constexpr std::size_t size_value =
+      max_size(Len, max_size_traits<T...>::max_sizeof());
+  using type = typename std::aligned_storage<size_value, alignment_value>::type;
+};
+
+#else
+
+template <std::size_t Len, typename... T>
+using aligned_union = std::aligned_union<Len, T...>;
+
+#endif
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/uri.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/uri.cc
index f644f73fd8e..c19a7bc2eee 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/uri.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/uri.cc
@@ -23,7 +23,7 @@
 
 #include "arrow/util/string_view.h"
 #include "arrow/util/value_parsing.h"
-#include "contrib/restricted/uriparser/include/uriparser/Uri.h" 
+#include "contrib/restricted/uriparser/include/uriparser/Uri.h"
 
 namespace arrow {
 namespace internal {
@@ -71,28 +71,28 @@ std::string UriEscape(const std::string& s) {
   return escaped;
 }
 
-std::string UriUnescape(const util::string_view s) { 
-  std::string result(s); 
-  if (!result.empty()) { 
-    auto end = uriUnescapeInPlaceA(&result[0]); 
-    result.resize(end - &result[0]); 
-  } 
-  return result; 
-} 
- 
-std::string UriEncodeHost(const std::string& host) { 
-  // Fairly naive check: if it contains a ':', it's IPv6 and needs 
-  // brackets, else it's OK 
-  if (host.find(":") != std::string::npos) { 
-    std::string result = "["; 
-    result += host; 
-    result += ']'; 
-    return result; 
-  } else { 
-    return host; 
-  } 
-} 
- 
+std::string UriUnescape(const util::string_view s) {
+  std::string result(s);
+  if (!result.empty()) {
+    auto end = uriUnescapeInPlaceA(&result[0]);
+    result.resize(end - &result[0]);
+  }
+  return result;
+}
+
+std::string UriEncodeHost(const std::string& host) {
+  // Fairly naive check: if it contains a ':', it's IPv6 and needs
+  // brackets, else it's OK
+  if (host.find(":") != std::string::npos) {
+    std::string result = "[";
+    result += host;
+    result += ']';
+    return result;
+  } else {
+    return host;
+  }
+}
+
 struct Uri::Impl {
   Impl() : string_rep_(""), port_(-1) { memset(&uri_, 0, sizeof(uri_)); }
 
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/uri.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/uri.h
index 35a9400f92b..b4ffbb04dec 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/uri.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/uri.h
@@ -24,7 +24,7 @@
 #include <vector>
 
 #include "arrow/type_fwd.h"
-#include "arrow/util/string_view.h" 
+#include "arrow/util/string_view.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
@@ -92,13 +92,13 @@ class ARROW_EXPORT Uri {
 ARROW_EXPORT
 std::string UriEscape(const std::string& s);
 
-ARROW_EXPORT 
-std::string UriUnescape(const arrow::util::string_view s); 
- 
-/// Encode a host for use within a URI, such as "localhost", 
-/// "127.0.0.1", or "[::1]". 
-ARROW_EXPORT 
-std::string UriEncodeHost(const std::string& host); 
- 
+ARROW_EXPORT
+std::string UriUnescape(const arrow::util::string_view s);
+
+/// Encode a host for use within a URI, such as "localhost",
+/// "127.0.0.1", or "[::1]".
+ARROW_EXPORT
+std::string UriEncodeHost(const std::string& host);
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/utf8.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/utf8.cc
index af850dfc523..11394d2e64c 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/utf8.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/utf8.cc
@@ -64,8 +64,8 @@ const uint8_t utf8_small_table[] = { // NOLINT
 
 uint16_t utf8_large_table[9 * 256] = {0xffff};
 
-const uint8_t utf8_byte_size_table[16] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4}; 
- 
+const uint8_t utf8_byte_size_table[16] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4};
+
 static void InitializeLargeTable() {
   for (uint32_t state = 0; state < 9; ++state) {
     for (uint32_t byte = 0; byte < 256; ++byte) {
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/utf8.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/utf8.h
index 54ee9a2820b..0ec3538b95c 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/utf8.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/utf8.h
@@ -23,15 +23,15 @@
 #include <memory>
 #include <string>
 
-#if defined(ARROW_HAVE_NEON) || defined(ARROW_HAVE_SSE4_2) 
-#error #include <xsimd/xsimd.hpp> 
-#endif 
- 
+#if defined(ARROW_HAVE_NEON) || defined(ARROW_HAVE_SSE4_2)
+#error #include <xsimd/xsimd.hpp>
+#endif
+
 #include "arrow/type_fwd.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/simd.h"
 #include "arrow/util/string_view.h"
-#include "arrow/util/ubsan.h" 
+#include "arrow/util/ubsan.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
@@ -65,8 +65,8 @@ static constexpr uint8_t kUTF8DecodeReject = 12;
 // In this table states are multiples of 256.
 ARROW_EXPORT extern uint16_t utf8_large_table[9 * 256];
 
-ARROW_EXPORT extern const uint8_t utf8_byte_size_table[16]; 
- 
+ARROW_EXPORT extern const uint8_t utf8_byte_size_table[16];
+
 // Success / reject states when looked up in the large table
 static constexpr uint16_t kUTF8ValidateAccept = 0;
 static constexpr uint16_t kUTF8ValidateReject = 256;
@@ -94,9 +94,9 @@ ARROW_EXPORT void InitializeUTF8();
 
 inline bool ValidateUTF8(const uint8_t* data, int64_t size) {
   static constexpr uint64_t high_bits_64 = 0x8080808080808080ULL;
-  static constexpr uint32_t high_bits_32 = 0x80808080UL; 
-  static constexpr uint16_t high_bits_16 = 0x8080U; 
-  static constexpr uint8_t high_bits_8 = 0x80U; 
+  static constexpr uint32_t high_bits_32 = 0x80808080UL;
+  static constexpr uint16_t high_bits_16 = 0x8080U;
+  static constexpr uint8_t high_bits_8 = 0x80U;
 
 #ifndef NDEBUG
   internal::CheckUTF8Initialized();
@@ -106,8 +106,8 @@ inline bool ValidateUTF8(const uint8_t* data, int64_t size) {
     // XXX This is doing an unaligned access.  Contemporary architectures
     // (x86-64, AArch64, PPC64) support it natively and often have good
     // performance nevertheless.
-    uint64_t mask64 = SafeLoadAs<uint64_t>(data); 
-    if (ARROW_PREDICT_TRUE((mask64 & high_bits_64) == 0)) { 
+    uint64_t mask64 = SafeLoadAs<uint64_t>(data);
+    if (ARROW_PREDICT_TRUE((mask64 & high_bits_64) == 0)) {
       // 8 bytes of pure ASCII, move forward
       size -= 8;
       data += 8;
@@ -162,50 +162,50 @@ inline bool ValidateUTF8(const uint8_t* data, int64_t size) {
     return false;
   }
 
-  // Check if string tail is full ASCII (common case, fast) 
-  if (size >= 4) { 
-    uint32_t tail_mask = SafeLoadAs<uint32_t>(data + size - 4); 
-    uint32_t head_mask = SafeLoadAs<uint32_t>(data); 
-    if (ARROW_PREDICT_TRUE(((head_mask | tail_mask) & high_bits_32) == 0)) { 
-      return true; 
-    } 
-  } else if (size >= 2) { 
-    uint16_t tail_mask = SafeLoadAs<uint16_t>(data + size - 2); 
-    uint16_t head_mask = SafeLoadAs<uint16_t>(data); 
-    if (ARROW_PREDICT_TRUE(((head_mask | tail_mask) & high_bits_16) == 0)) { 
-      return true; 
-    } 
-  } else if (size == 1) { 
-    if (ARROW_PREDICT_TRUE((*data & high_bits_8) == 0)) { 
-      return true; 
-    } 
-  } else { 
-    /* size == 0 */ 
-    return true; 
-  } 
- 
-  // Fall back to UTF8 validation of tail string. 
+  // Check if string tail is full ASCII (common case, fast)
+  if (size >= 4) {
+    uint32_t tail_mask = SafeLoadAs<uint32_t>(data + size - 4);
+    uint32_t head_mask = SafeLoadAs<uint32_t>(data);
+    if (ARROW_PREDICT_TRUE(((head_mask | tail_mask) & high_bits_32) == 0)) {
+      return true;
+    }
+  } else if (size >= 2) {
+    uint16_t tail_mask = SafeLoadAs<uint16_t>(data + size - 2);
+    uint16_t head_mask = SafeLoadAs<uint16_t>(data);
+    if (ARROW_PREDICT_TRUE(((head_mask | tail_mask) & high_bits_16) == 0)) {
+      return true;
+    }
+  } else if (size == 1) {
+    if (ARROW_PREDICT_TRUE((*data & high_bits_8) == 0)) {
+      return true;
+    }
+  } else {
+    /* size == 0 */
+    return true;
+  }
+
+  // Fall back to UTF8 validation of tail string.
   // Note the state table is designed so that, once in the reject state,
   // we remain in that state until the end.  So we needn't check for
   // rejection at each char (we don't gain much by short-circuiting here).
   uint16_t state = internal::kUTF8ValidateAccept;
-  switch (size) { 
-    case 7: 
-      state = internal::ValidateOneUTF8Byte(data[size - 7], state); 
-    case 6: 
-      state = internal::ValidateOneUTF8Byte(data[size - 6], state); 
-    case 5: 
-      state = internal::ValidateOneUTF8Byte(data[size - 5], state); 
-    case 4: 
-      state = internal::ValidateOneUTF8Byte(data[size - 4], state); 
-    case 3: 
-      state = internal::ValidateOneUTF8Byte(data[size - 3], state); 
-    case 2: 
-      state = internal::ValidateOneUTF8Byte(data[size - 2], state); 
-    case 1: 
-      state = internal::ValidateOneUTF8Byte(data[size - 1], state); 
-    default: 
-      break; 
+  switch (size) {
+    case 7:
+      state = internal::ValidateOneUTF8Byte(data[size - 7], state);
+    case 6:
+      state = internal::ValidateOneUTF8Byte(data[size - 6], state);
+    case 5:
+      state = internal::ValidateOneUTF8Byte(data[size - 5], state);
+    case 4:
+      state = internal::ValidateOneUTF8Byte(data[size - 4], state);
+    case 3:
+      state = internal::ValidateOneUTF8Byte(data[size - 3], state);
+    case 2:
+      state = internal::ValidateOneUTF8Byte(data[size - 2], state);
+    case 1:
+      state = internal::ValidateOneUTF8Byte(data[size - 1], state);
+    default:
+      break;
   }
   return ARROW_PREDICT_TRUE(state == internal::kUTF8ValidateAccept);
 }
@@ -246,26 +246,26 @@ inline bool ValidateAsciiSw(const uint8_t* data, int64_t len) {
   }
 }
 
-#if defined(ARROW_HAVE_NEON) || defined(ARROW_HAVE_SSE4_2) 
+#if defined(ARROW_HAVE_NEON) || defined(ARROW_HAVE_SSE4_2)
 inline bool ValidateAsciiSimd(const uint8_t* data, int64_t len) {
-  using simd_batch = xsimd::batch<int8_t, 16>; 
+  using simd_batch = xsimd::batch<int8_t, 16>;
 
   if (len >= 32) {
-    const simd_batch zero(static_cast<int8_t>(0)); 
+    const simd_batch zero(static_cast<int8_t>(0));
     const uint8_t* data2 = data + 16;
-    simd_batch or1 = zero, or2 = zero; 
+    simd_batch or1 = zero, or2 = zero;
 
     while (len >= 32) {
-      or1 |= simd_batch(reinterpret_cast<const int8_t*>(data), xsimd::unaligned_mode{}); 
-      or2 |= simd_batch(reinterpret_cast<const int8_t*>(data2), xsimd::unaligned_mode{}); 
+      or1 |= simd_batch(reinterpret_cast<const int8_t*>(data), xsimd::unaligned_mode{});
+      or2 |= simd_batch(reinterpret_cast<const int8_t*>(data2), xsimd::unaligned_mode{});
       data += 32;
       data2 += 32;
       len -= 32;
     }
 
-    // To test for upper bit in all bytes, test whether any of them is negative 
-    or1 |= or2; 
-    if (xsimd::any(or1 < zero)) { 
+    // To test for upper bit in all bytes, test whether any of them is negative
+    or1 |= or2;
+    if (xsimd::any(or1 < zero)) {
       return false;
     }
   }
@@ -295,34 +295,34 @@ Result<const uint8_t*> SkipUTF8BOM(const uint8_t* data, int64_t size);
 
 static constexpr uint32_t kMaxUnicodeCodepoint = 0x110000;
 
-// size of a valid UTF8 can be determined by looking at leading 4 bits of BYTE1 
-// utf8_byte_size_table[0..7] --> pure ascii chars --> 1B length 
-// utf8_byte_size_table[8..11] --> internal bytes --> 1B length 
-// utf8_byte_size_table[12,13] --> 2B long UTF8 chars 
-// utf8_byte_size_table[14] --> 3B long UTF8 chars 
-// utf8_byte_size_table[15] --> 4B long UTF8 chars 
-// NOTE: Results for invalid/ malformed utf-8 sequences are undefined. 
-// ex: \xFF... returns 4B 
-static inline uint8_t ValidUtf8CodepointByteSize(const uint8_t* codeunit) { 
-  return internal::utf8_byte_size_table[*codeunit >> 4]; 
-} 
- 
+// size of a valid UTF8 can be determined by looking at leading 4 bits of BYTE1
+// utf8_byte_size_table[0..7] --> pure ascii chars --> 1B length
+// utf8_byte_size_table[8..11] --> internal bytes --> 1B length
+// utf8_byte_size_table[12,13] --> 2B long UTF8 chars
+// utf8_byte_size_table[14] --> 3B long UTF8 chars
+// utf8_byte_size_table[15] --> 4B long UTF8 chars
+// NOTE: Results for invalid/ malformed utf-8 sequences are undefined.
+// ex: \xFF... returns 4B
+static inline uint8_t ValidUtf8CodepointByteSize(const uint8_t* codeunit) {
+  return internal::utf8_byte_size_table[*codeunit >> 4];
+}
+
 static inline bool Utf8IsContinuation(const uint8_t codeunit) {
   return (codeunit & 0xC0) == 0x80;  // upper two bits should be 10
 }
 
-static inline bool Utf8Is2ByteStart(const uint8_t codeunit) { 
-  return (codeunit & 0xE0) == 0xC0;  // upper three bits should be 110 
-} 
- 
-static inline bool Utf8Is3ByteStart(const uint8_t codeunit) { 
-  return (codeunit & 0xF0) == 0xE0;  // upper four bits should be 1110 
-} 
- 
-static inline bool Utf8Is4ByteStart(const uint8_t codeunit) { 
-  return (codeunit & 0xF8) == 0xF0;  // upper five bits should be 11110 
-} 
- 
+static inline bool Utf8Is2ByteStart(const uint8_t codeunit) {
+  return (codeunit & 0xE0) == 0xC0;  // upper three bits should be 110
+}
+
+static inline bool Utf8Is3ByteStart(const uint8_t codeunit) {
+  return (codeunit & 0xF0) == 0xE0;  // upper four bits should be 1110
+}
+
+static inline bool Utf8Is4ByteStart(const uint8_t codeunit) {
+  return (codeunit & 0xF8) == 0xF0;  // upper five bits should be 11110
+}
+
 static inline uint8_t* UTF8Encode(uint8_t* str, uint32_t codepoint) {
   if (codepoint < 0x80) {
     *str++ = codepoint;
@@ -346,7 +346,7 @@ static inline uint8_t* UTF8Encode(uint8_t* str, uint32_t codepoint) {
 
 static inline bool UTF8Decode(const uint8_t** data, uint32_t* codepoint) {
   const uint8_t* str = *data;
-  if (*str < 0x80) {  // ascii 
+  if (*str < 0x80) {  // ascii
     *codepoint = *str++;
   } else if (ARROW_PREDICT_FALSE(*str < 0xC0)) {  // invalid non-ascii char
     return false;
@@ -391,45 +391,45 @@ static inline bool UTF8Decode(const uint8_t** data, uint32_t* codepoint) {
   return true;
 }
 
-static inline bool UTF8DecodeReverse(const uint8_t** data, uint32_t* codepoint) { 
-  const uint8_t* str = *data; 
-  if (*str < 0x80) {  // ascii 
-    *codepoint = *str--; 
-  } else { 
-    if (ARROW_PREDICT_FALSE(!Utf8IsContinuation(*str))) { 
-      return false; 
-    } 
-    uint8_t code_unit_N = (*str--) & 0x3F;  // take last 6 bits 
-    if (Utf8Is2ByteStart(*str)) { 
-      uint8_t code_unit_1 = (*str--) & 0x1F;  // take last 5 bits 
-      *codepoint = (code_unit_1 << 6) + code_unit_N; 
-    } else { 
-      if (ARROW_PREDICT_FALSE(!Utf8IsContinuation(*str))) { 
-        return false; 
-      } 
-      uint8_t code_unit_Nmin1 = (*str--) & 0x3F;  // take last 6 bits 
-      if (Utf8Is3ByteStart(*str)) { 
-        uint8_t code_unit_1 = (*str--) & 0x0F;  // take last 4 bits 
-        *codepoint = (code_unit_1 << 12) + (code_unit_Nmin1 << 6) + code_unit_N; 
-      } else { 
-        if (ARROW_PREDICT_FALSE(!Utf8IsContinuation(*str))) { 
-          return false; 
-        } 
-        uint8_t code_unit_Nmin2 = (*str--) & 0x3F;  // take last 6 bits 
-        if (ARROW_PREDICT_TRUE(Utf8Is4ByteStart(*str))) { 
-          uint8_t code_unit_1 = (*str--) & 0x07;  // take last 3 bits 
-          *codepoint = (code_unit_1 << 18) + (code_unit_Nmin2 << 12) + 
-                       (code_unit_Nmin1 << 6) + code_unit_N; 
-        } else { 
-          return false; 
-        } 
-      } 
-    } 
-  } 
-  *data = str; 
-  return true; 
-} 
- 
+static inline bool UTF8DecodeReverse(const uint8_t** data, uint32_t* codepoint) {
+  const uint8_t* str = *data;
+  if (*str < 0x80) {  // ascii
+    *codepoint = *str--;
+  } else {
+    if (ARROW_PREDICT_FALSE(!Utf8IsContinuation(*str))) {
+      return false;
+    }
+    uint8_t code_unit_N = (*str--) & 0x3F;  // take last 6 bits
+    if (Utf8Is2ByteStart(*str)) {
+      uint8_t code_unit_1 = (*str--) & 0x1F;  // take last 5 bits
+      *codepoint = (code_unit_1 << 6) + code_unit_N;
+    } else {
+      if (ARROW_PREDICT_FALSE(!Utf8IsContinuation(*str))) {
+        return false;
+      }
+      uint8_t code_unit_Nmin1 = (*str--) & 0x3F;  // take last 6 bits
+      if (Utf8Is3ByteStart(*str)) {
+        uint8_t code_unit_1 = (*str--) & 0x0F;  // take last 4 bits
+        *codepoint = (code_unit_1 << 12) + (code_unit_Nmin1 << 6) + code_unit_N;
+      } else {
+        if (ARROW_PREDICT_FALSE(!Utf8IsContinuation(*str))) {
+          return false;
+        }
+        uint8_t code_unit_Nmin2 = (*str--) & 0x3F;  // take last 6 bits
+        if (ARROW_PREDICT_TRUE(Utf8Is4ByteStart(*str))) {
+          uint8_t code_unit_1 = (*str--) & 0x07;  // take last 3 bits
+          *codepoint = (code_unit_1 << 18) + (code_unit_Nmin2 << 12) +
+                       (code_unit_Nmin1 << 6) + code_unit_N;
+        } else {
+          return false;
+        }
+      }
+    }
+  }
+  *data = str;
+  return true;
+}
+
 template <class UnaryOperation>
 static inline bool UTF8Transform(const uint8_t* first, const uint8_t* last,
                                  uint8_t** destination, UnaryOperation&& unary_op) {
@@ -446,97 +446,97 @@ static inline bool UTF8Transform(const uint8_t* first, const uint8_t* last,
   return true;
 }
 
-template <class Predicate> 
-static inline bool UTF8FindIf(const uint8_t* first, const uint8_t* last, 
-                              Predicate&& predicate, const uint8_t** position) { 
-  const uint8_t* i = first; 
-  while (i < last) { 
-    uint32_t codepoint = 0; 
-    const uint8_t* current = i; 
-    if (ARROW_PREDICT_FALSE(!UTF8Decode(&i, &codepoint))) { 
-      return false; 
-    } 
-    if (predicate(codepoint)) { 
-      *position = current; 
-      return true; 
-    } 
-  } 
-  *position = last; 
-  return true; 
-} 
- 
-// Same semantics as std::find_if using reverse iterators with the return value 
-// having the same semantics as std::reverse_iterator<..>.base() 
-// A reverse iterator physically points to the next address, e.g.: 
-// &*reverse_iterator(i) == &*(i + 1) 
-template <class Predicate> 
-static inline bool UTF8FindIfReverse(const uint8_t* first, const uint8_t* last, 
-                                     Predicate&& predicate, const uint8_t** position) { 
-  // converts to a normal point 
-  const uint8_t* i = last - 1; 
-  while (i >= first) { 
-    uint32_t codepoint = 0; 
-    const uint8_t* current = i; 
-    if (ARROW_PREDICT_FALSE(!UTF8DecodeReverse(&i, &codepoint))) { 
-      return false; 
-    } 
-    if (predicate(codepoint)) { 
-      // converts normal pointer to 'reverse iterator semantics'. 
-      *position = current + 1; 
-      return true; 
-    } 
-  } 
-  // similar to how an end pointer point to 1 beyond the last, reverse iterators point 
-  // to the 'first' pointer to indicate out of range. 
-  *position = first; 
-  return true; 
-} 
- 
-static inline bool UTF8AdvanceCodepoints(const uint8_t* first, const uint8_t* last, 
-                                         const uint8_t** destination, int64_t n) { 
-  return UTF8FindIf( 
-      first, last, 
-      [&](uint32_t codepoint) { 
-        bool done = n == 0; 
-        n--; 
-        return done; 
-      }, 
-      destination); 
-} 
- 
-static inline bool UTF8AdvanceCodepointsReverse(const uint8_t* first, const uint8_t* last, 
-                                                const uint8_t** destination, int64_t n) { 
-  return UTF8FindIfReverse( 
-      first, last, 
-      [&](uint32_t codepoint) { 
-        bool done = n == 0; 
-        n--; 
-        return done; 
-      }, 
-      destination); 
-} 
- 
-template <class UnaryFunction> 
-static inline bool UTF8ForEach(const uint8_t* first, const uint8_t* last, 
-                               UnaryFunction&& f) { 
-  const uint8_t* i = first; 
-  while (i < last) { 
-    uint32_t codepoint = 0; 
-    if (ARROW_PREDICT_FALSE(!UTF8Decode(&i, &codepoint))) { 
-      return false; 
-    } 
-    f(codepoint); 
-  } 
-  return true; 
-} 
- 
-template <class UnaryFunction> 
-static inline bool UTF8ForEach(const std::string& s, UnaryFunction&& f) { 
-  return UTF8ForEach(reinterpret_cast<const uint8_t*>(s.data()), 
-                     reinterpret_cast<const uint8_t*>(s.data() + s.length()), 
-                     std::forward<UnaryFunction>(f)); 
-} 
- 
+template <class Predicate>
+static inline bool UTF8FindIf(const uint8_t* first, const uint8_t* last,
+                              Predicate&& predicate, const uint8_t** position) {
+  const uint8_t* i = first;
+  while (i < last) {
+    uint32_t codepoint = 0;
+    const uint8_t* current = i;
+    if (ARROW_PREDICT_FALSE(!UTF8Decode(&i, &codepoint))) {
+      return false;
+    }
+    if (predicate(codepoint)) {
+      *position = current;
+      return true;
+    }
+  }
+  *position = last;
+  return true;
+}
+
+// Same semantics as std::find_if using reverse iterators with the return value
+// having the same semantics as std::reverse_iterator<..>.base()
+// A reverse iterator physically points to the next address, e.g.:
+// &*reverse_iterator(i) == &*(i + 1)
+template <class Predicate>
+static inline bool UTF8FindIfReverse(const uint8_t* first, const uint8_t* last,
+                                     Predicate&& predicate, const uint8_t** position) {
+  // converts to a normal point
+  const uint8_t* i = last - 1;
+  while (i >= first) {
+    uint32_t codepoint = 0;
+    const uint8_t* current = i;
+    if (ARROW_PREDICT_FALSE(!UTF8DecodeReverse(&i, &codepoint))) {
+      return false;
+    }
+    if (predicate(codepoint)) {
+      // converts normal pointer to 'reverse iterator semantics'.
+      *position = current + 1;
+      return true;
+    }
+  }
+  // similar to how an end pointer point to 1 beyond the last, reverse iterators point
+  // to the 'first' pointer to indicate out of range.
+  *position = first;
+  return true;
+}
+
+static inline bool UTF8AdvanceCodepoints(const uint8_t* first, const uint8_t* last,
+                                         const uint8_t** destination, int64_t n) {
+  return UTF8FindIf(
+      first, last,
+      [&](uint32_t codepoint) {
+        bool done = n == 0;
+        n--;
+        return done;
+      },
+      destination);
+}
+
+static inline bool UTF8AdvanceCodepointsReverse(const uint8_t* first, const uint8_t* last,
+                                                const uint8_t** destination, int64_t n) {
+  return UTF8FindIfReverse(
+      first, last,
+      [&](uint32_t codepoint) {
+        bool done = n == 0;
+        n--;
+        return done;
+      },
+      destination);
+}
+
+template <class UnaryFunction>
+static inline bool UTF8ForEach(const uint8_t* first, const uint8_t* last,
+                               UnaryFunction&& f) {
+  const uint8_t* i = first;
+  while (i < last) {
+    uint32_t codepoint = 0;
+    if (ARROW_PREDICT_FALSE(!UTF8Decode(&i, &codepoint))) {
+      return false;
+    }
+    f(codepoint);
+  }
+  return true;
+}
+
+template <class UnaryFunction>
+static inline bool UTF8ForEach(const std::string& s, UnaryFunction&& f) {
+  return UTF8ForEach(reinterpret_cast<const uint8_t*>(s.data()),
+                     reinterpret_cast<const uint8_t*>(s.data() + s.length()),
+                     std::forward<UnaryFunction>(f));
+}
+
 template <class UnaryPredicate>
 static inline bool UTF8AllOf(const uint8_t* first, const uint8_t* last, bool* result,
                              UnaryPredicate&& predicate) {
@@ -556,15 +556,15 @@ static inline bool UTF8AllOf(const uint8_t* first, const uint8_t* last, bool* re
   return true;
 }
 
-/// Count the number of codepoints in the given string (assuming it is valid UTF8). 
-static inline int64_t UTF8Length(const uint8_t* first, const uint8_t* last) { 
-  int64_t length = 0; 
-  while (first != last) { 
-    length += ((*first & 0xc0) != 0x80); 
-    ++first; 
-  } 
-  return length; 
-} 
- 
+/// Count the number of codepoints in the given string (assuming it is valid UTF8).
+static inline int64_t UTF8Length(const uint8_t* first, const uint8_t* last) {
+  int64_t length = 0;
+  while (first != last) {
+    length += ((*first & 0xc0) != 0x80);
+    ++first;
+  }
+  return length;
+}
+
 }  // namespace util
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/value_parsing.cc b/contrib/libs/apache/arrow/cpp/src/arrow/util/value_parsing.cc
index 5460dfb91f9..3b147366636 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/value_parsing.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/value_parsing.cc
@@ -20,19 +20,19 @@
 #include <string>
 #include <utility>
 
-#include "contrib/restricted/fast_float/include/fast_float/fast_float.h" 
+#include "contrib/restricted/fast_float/include/fast_float/fast_float.h"
 
 namespace arrow {
 namespace internal {
 
 bool StringToFloat(const char* s, size_t length, float* out) {
-  const auto res = fast_float::from_chars(s, s + length, *out); 
-  return res.ec == std::errc() && res.ptr == s + length; 
+  const auto res = fast_float::from_chars(s, s + length, *out);
+  return res.ec == std::errc() && res.ptr == s + length;
 }
 
 bool StringToFloat(const char* s, size_t length, double* out) {
-  const auto res = fast_float::from_chars(s, s + length, *out); 
-  return res.ec == std::errc() && res.ptr == s + length; 
+  const auto res = fast_float::from_chars(s, s + length, *out);
+  return res.ec == std::errc() && res.ptr == s + length;
 }
 
 // ----------------------------------------------------------------------
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/value_parsing.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/value_parsing.h
index e8de13287c1..00295d1b51f 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/value_parsing.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/value_parsing.h
@@ -486,80 +486,80 @@ static inline bool ParseHH_MM_SS(const char* s, Duration* out) {
 
 static inline bool ParseSubSeconds(const char* s, size_t length, TimeUnit::type unit,
                                    uint32_t* out) {
-  // The decimal point has been peeled off at this point 
- 
-  // Fail if number of decimal places provided exceeds what the unit can hold. 
-  // Calculate how many trailing decimal places are omitted for the unit 
-  // e.g. if 4 decimal places are provided and unit is MICRO, 2 are missing 
-  size_t omitted = 0; 
-  switch (unit) { 
-    case TimeUnit::MILLI: 
-      if (ARROW_PREDICT_FALSE(length > 3)) { 
-        return false; 
-      } 
-      if (length < 3) { 
-        omitted = 3 - length; 
-      } 
+  // The decimal point has been peeled off at this point
+
+  // Fail if number of decimal places provided exceeds what the unit can hold.
+  // Calculate how many trailing decimal places are omitted for the unit
+  // e.g. if 4 decimal places are provided and unit is MICRO, 2 are missing
+  size_t omitted = 0;
+  switch (unit) {
+    case TimeUnit::MILLI:
+      if (ARROW_PREDICT_FALSE(length > 3)) {
+        return false;
+      }
+      if (length < 3) {
+        omitted = 3 - length;
+      }
       break;
-    case TimeUnit::MICRO: 
-      if (ARROW_PREDICT_FALSE(length > 6)) { 
-        return false; 
-      } 
-      if (length < 6) { 
-        omitted = 6 - length; 
-      } 
+    case TimeUnit::MICRO:
+      if (ARROW_PREDICT_FALSE(length > 6)) {
+        return false;
+      }
+      if (length < 6) {
+        omitted = 6 - length;
+      }
       break;
-    case TimeUnit::NANO: 
-      if (ARROW_PREDICT_FALSE(length > 9)) { 
-        return false; 
-      } 
-      if (length < 9) { 
-        omitted = 9 - length; 
-      } 
+    case TimeUnit::NANO:
+      if (ARROW_PREDICT_FALSE(length > 9)) {
+        return false;
+      }
+      if (length < 9) {
+        omitted = 9 - length;
+      }
       break;
     default:
       return false;
   }
 
-  if (ARROW_PREDICT_TRUE(omitted == 0)) { 
-    return ParseUnsigned(s, length, out); 
-  } else { 
-    uint32_t subseconds; 
-    bool success = ParseUnsigned(s, length, &subseconds); 
-    if (ARROW_PREDICT_TRUE(success)) { 
-      switch (omitted) { 
-        case 1: 
-          *out = subseconds * 10; 
-          break; 
-        case 2: 
-          *out = subseconds * 100; 
-          break; 
-        case 3: 
-          *out = subseconds * 1000; 
-          break; 
-        case 4: 
-          *out = subseconds * 10000; 
-          break; 
-        case 5: 
-          *out = subseconds * 100000; 
-          break; 
-        case 6: 
-          *out = subseconds * 1000000; 
-          break; 
-        case 7: 
-          *out = subseconds * 10000000; 
-          break; 
-        case 8: 
-          *out = subseconds * 100000000; 
-          break; 
-        default: 
-          // Impossible case 
-          break; 
-      } 
-      return true; 
-    } else { 
-      return false; 
-    } 
+  if (ARROW_PREDICT_TRUE(omitted == 0)) {
+    return ParseUnsigned(s, length, out);
+  } else {
+    uint32_t subseconds;
+    bool success = ParseUnsigned(s, length, &subseconds);
+    if (ARROW_PREDICT_TRUE(success)) {
+      switch (omitted) {
+        case 1:
+          *out = subseconds * 10;
+          break;
+        case 2:
+          *out = subseconds * 100;
+          break;
+        case 3:
+          *out = subseconds * 1000;
+          break;
+        case 4:
+          *out = subseconds * 10000;
+          break;
+        case 5:
+          *out = subseconds * 100000;
+          break;
+        case 6:
+          *out = subseconds * 1000000;
+          break;
+        case 7:
+          *out = subseconds * 10000000;
+          break;
+        case 8:
+          *out = subseconds * 100000000;
+          break;
+        default:
+          // Impossible case
+          break;
+      }
+      return true;
+    } else {
+      return false;
+    }
   }
 }
 
@@ -572,21 +572,21 @@ static inline bool ParseTimestampISO8601(const char* s, size_t length,
 
   // We allow the following formats for all units:
   // - "YYYY-MM-DD"
-  // - "YYYY-MM-DD[ T]hhZ?" 
-  // - "YYYY-MM-DD[ T]hh:mmZ?" 
-  // - "YYYY-MM-DD[ T]hh:mm:ssZ?" 
+  // - "YYYY-MM-DD[ T]hhZ?"
+  // - "YYYY-MM-DD[ T]hh:mmZ?"
+  // - "YYYY-MM-DD[ T]hh:mm:ssZ?"
   //
-  // We allow the following formats for unit == MILLI, MICRO, or NANO: 
-  // - "YYYY-MM-DD[ T]hh:mm:ss.s{1,3}Z?" 
+  // We allow the following formats for unit == MILLI, MICRO, or NANO:
+  // - "YYYY-MM-DD[ T]hh:mm:ss.s{1,3}Z?"
   //
-  // We allow the following formats for unit == MICRO, or NANO: 
-  // - "YYYY-MM-DD[ T]hh:mm:ss.s{4,6}Z?" 
+  // We allow the following formats for unit == MICRO, or NANO:
+  // - "YYYY-MM-DD[ T]hh:mm:ss.s{4,6}Z?"
   //
-  // We allow the following formats for unit == NANO: 
-  // - "YYYY-MM-DD[ T]hh:mm:ss.s{7,9}Z?" 
+  // We allow the following formats for unit == NANO:
+  // - "YYYY-MM-DD[ T]hh:mm:ss.s{7,9}Z?"
   //
   // UTC is always assumed, and the DataType's timezone is ignored.
-  // 
+  //
 
   if (ARROW_PREDICT_FALSE(length < 10)) return false;
 
@@ -621,15 +621,15 @@ static inline bool ParseTimestampISO8601(const char* s, size_t length,
       }
       break;
     case 19:  // YYYY-MM-DD[ T]hh:mm:ss
-    case 21:  // YYYY-MM-DD[ T]hh:mm:ss.s 
-    case 22:  // YYYY-MM-DD[ T]hh:mm:ss.ss 
-    case 23:  // YYYY-MM-DD[ T]hh:mm:ss.sss 
-    case 24:  // YYYY-MM-DD[ T]hh:mm:ss.ssss 
-    case 25:  // YYYY-MM-DD[ T]hh:mm:ss.sssss 
-    case 26:  // YYYY-MM-DD[ T]hh:mm:ss.ssssss 
-    case 27:  // YYYY-MM-DD[ T]hh:mm:ss.sssssss 
-    case 28:  // YYYY-MM-DD[ T]hh:mm:ss.ssssssss 
-    case 29:  // YYYY-MM-DD[ T]hh:mm:ss.sssssssss 
+    case 21:  // YYYY-MM-DD[ T]hh:mm:ss.s
+    case 22:  // YYYY-MM-DD[ T]hh:mm:ss.ss
+    case 23:  // YYYY-MM-DD[ T]hh:mm:ss.sss
+    case 24:  // YYYY-MM-DD[ T]hh:mm:ss.ssss
+    case 25:  // YYYY-MM-DD[ T]hh:mm:ss.sssss
+    case 26:  // YYYY-MM-DD[ T]hh:mm:ss.ssssss
+    case 27:  // YYYY-MM-DD[ T]hh:mm:ss.sssssss
+    case 28:  // YYYY-MM-DD[ T]hh:mm:ss.ssssssss
+    case 29:  // YYYY-MM-DD[ T]hh:mm:ss.sssssssss
       if (ARROW_PREDICT_FALSE(!detail::ParseHH_MM_SS(s + 11, &seconds_since_midnight))) {
         return false;
       }
@@ -645,13 +645,13 @@ static inline bool ParseTimestampISO8601(const char* s, size_t length,
     return true;
   }
 
-  if (ARROW_PREDICT_FALSE(s[19] != '.')) { 
-    return false; 
-  } 
- 
+  if (ARROW_PREDICT_FALSE(s[19] != '.')) {
+    return false;
+  }
+
   uint32_t subseconds = 0;
   if (ARROW_PREDICT_FALSE(
-          !detail::ParseSubSeconds(s + 20, length - 20, unit, &subseconds))) { 
+          !detail::ParseSubSeconds(s + 20, length - 20, unit, &subseconds))) {
     return false;
   }
 
@@ -753,7 +753,7 @@ struct StringConverter<TIME_TYPE, enable_if_time<TIME_TYPE>> {
 
     uint32_t subseconds_count = 0;
     if (ARROW_PREDICT_FALSE(
-            !detail::ParseSubSeconds(s + 9, length - 9, unit, &subseconds_count))) { 
+            !detail::ParseSubSeconds(s + 9, length - 9, unit, &subseconds_count))) {
       return false;
     }
 
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/variant.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/variant.h
index 8f8d23c2b76..b4b0d8f6f31 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/variant.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/variant.h
@@ -17,423 +17,423 @@
 
 #pragma once
 
-#include <cstddef> 
-#include <exception> 
-#include <type_traits> 
-#include <utility> 
-
-#include "arrow/util/macros.h" 
-#include "arrow/util/type_traits.h" 
- 
+#include <cstddef>
+#include <exception>
+#include <type_traits>
+#include <utility>
+
+#include "arrow/util/macros.h"
+#include "arrow/util/type_traits.h"
+
 namespace arrow {
 namespace util {
 
-/// \brief a std::variant-like discriminated union 
-/// 
-/// Simplifications from std::variant: 
-/// 
-/// - Strictly defaultable. The first type of T... should be nothrow default constructible 
-///   and it will be used for default Variants. 
-/// 
-/// - Never valueless_by_exception. std::variant supports a state outside those specified 
-///   by T... to which it can return in the event that a constructor throws. If a Variant 
-///   would become valueless_by_exception it will instead return to its default state. 
-/// 
-/// - Strictly nothrow move constructible and assignable 
-/// 
-/// - Less sophisticated type deduction. std::variant<bool, std::string>("hello") will 
-///   intelligently construct std::string while Variant<bool, std::string>("hello") will 
-///   construct bool. 
-/// 
-/// - Either both copy constructible and assignable or neither (std::variant independently 
-///   enables copy construction and copy assignment). Variant is copy constructible if 
-///   each of T... is copy constructible and assignable. 
-/// 
-/// - Slimmer interface; several members of std::variant are omitted. 
-/// 
-/// - Throws no exceptions; if a bad_variant_access would be thrown Variant will instead 
-///   segfault (nullptr dereference). 
-/// 
-/// - Mutable visit takes a pointer instead of mutable reference or rvalue reference, 
-///   which is more conformant with our code style. 
-template <typename... T> 
-class Variant; 
-
-namespace detail { 
- 
-template <typename T, typename = void> 
-struct is_equality_comparable : std::false_type {}; 
- 
-template <typename T> 
-struct is_equality_comparable< 
-    T, typename std::enable_if<std::is_convertible< 
-           decltype(std::declval<T>() == std::declval<T>()), bool>::value>::type> 
-    : std::true_type {}; 
- 
-template <bool C, typename T, typename E> 
-using conditional_t = typename std::conditional<C, T, E>::type; 
- 
-template <typename T> 
-struct type_constant { 
-  using type = T; 
-}; 
- 
-template <typename...> 
-struct first; 
- 
-template <typename H, typename... T> 
-struct first<H, T...> { 
-  using type = H; 
-}; 
- 
-template <typename T> 
-using decay_t = typename std::decay<T>::type; 
- 
-template <bool...> 
-struct all : std::true_type {}; 
- 
-template <bool H, bool... T> 
-struct all<H, T...> : conditional_t<H, all<T...>, std::false_type> {}; 
- 
-struct delete_copy_constructor { 
-  template <typename> 
-  struct type { 
-    type() = default; 
-    type(const type& other) = delete; 
-    type& operator=(const type& other) = delete; 
-  }; 
-}; 
- 
-struct explicit_copy_constructor { 
-  template <typename Copyable> 
-  struct type { 
-    type() = default; 
-    type(const type& other) { static_cast<const Copyable&>(other).copy_to(this); } 
-    type& operator=(const type& other) { 
-      static_cast<Copyable*>(this)->destroy(); 
-      static_cast<const Copyable&>(other).copy_to(this); 
-      return *this; 
-    } 
-  }; 
-}; 
- 
-template <typename... T> 
-struct VariantStorage { 
-  VariantStorage() = default; 
-  VariantStorage(const VariantStorage&) {} 
-  VariantStorage& operator=(const VariantStorage&) { return *this; } 
-  VariantStorage(VariantStorage&&) noexcept {} 
-  VariantStorage& operator=(VariantStorage&&) noexcept { return *this; } 
-  ~VariantStorage() { 
-    static_assert(offsetof(VariantStorage, data_) == 0, 
-                  "(void*)&VariantStorage::data_ == (void*)this"); 
-  } 
- 
-  typename arrow::internal::aligned_union<0, T...>::type data_; 
-  uint8_t index_ = 0; 
-}; 
- 
-template <typename V, typename...> 
-struct VariantImpl; 
- 
-template <typename... T> 
-struct VariantImpl<Variant<T...>> : VariantStorage<T...> { 
-  static void index_of() noexcept {} 
-  void destroy() noexcept {} 
-  void move_to(...) noexcept {} 
-  void copy_to(...) const {} 
- 
-  template <typename R, typename Visitor> 
-  [[noreturn]] R visit_const(Visitor&& /* visitor */) const { 
-    std::terminate(); 
-  } 
-  template <typename R, typename Visitor> 
-  [[noreturn]] R visit_mutable(Visitor&& /* visitor */) { 
-    std::terminate(); 
-  } 
-}; 
- 
-template <typename... M, typename H, typename... T> 
-struct VariantImpl<Variant<M...>, H, T...> : VariantImpl<Variant<M...>, T...> { 
-  using VariantType = Variant<M...>; 
-  using Impl = VariantImpl<VariantType, T...>; 
- 
-  static constexpr uint8_t kIndex = sizeof...(M) - sizeof...(T) - 1; 
- 
-  VariantImpl() = default; 
- 
-  using VariantImpl<VariantType, T...>::VariantImpl; 
-  using Impl::operator=; 
-  using Impl::index_of; 
- 
-  explicit VariantImpl(H value) { 
-    new (this) H(std::move(value)); 
-    this->index_ = kIndex; 
-  } 
- 
-  VariantImpl& operator=(H value) { 
-    static_cast<VariantType*>(this)->destroy(); 
-    new (this) H(std::move(value)); 
-    this->index_ = kIndex; 
-    return *this; 
-  } 
- 
-  H& cast_this() { return *reinterpret_cast<H*>(this); } 
-  const H& cast_this() const { return *reinterpret_cast<const H*>(this); } 
- 
-  void move_to(VariantType* target) noexcept { 
-    if (this->index_ == kIndex) { 
-      new (target) H(std::move(cast_this())); 
-      target->index_ = kIndex; 
-    } else { 
-      Impl::move_to(target); 
-    } 
-  } 
- 
-  // Templated to avoid instantiation in case H is not copy constructible 
-  template <typename Void> 
-  void copy_to(Void* generic_target) const { 
-    const auto target = static_cast<VariantType*>(generic_target); 
-    try { 
-      if (this->index_ == kIndex) { 
-        new (target) H(cast_this()); 
-        target->index_ = kIndex; 
-      } else { 
-        Impl::copy_to(target); 
-      } 
-    } catch (...) { 
-      target->construct_default(); 
-      throw; 
-    } 
-  } 
- 
-  void destroy() noexcept { 
-    if (this->index_ == kIndex) { 
-      if (!std::is_trivially_destructible<H>::value) { 
-        cast_this().~H(); 
-      } 
-    } else { 
-      Impl::destroy(); 
-    } 
-  } 
- 
-  static constexpr std::integral_constant<uint8_t, kIndex> index_of( 
-      const type_constant<H>&) { 
-    return {}; 
-  } 
- 
-  template <typename R, typename Visitor> 
-  R visit_const(Visitor&& visitor) const { 
-    if (this->index_ == kIndex) { 
-      return std::forward<Visitor>(visitor)(cast_this()); 
-    } 
-    return Impl::template visit_const<R>(std::forward<Visitor>(visitor)); 
-  } 
- 
-  template <typename R, typename Visitor> 
-  R visit_mutable(Visitor&& visitor) { 
-    if (this->index_ == kIndex) { 
-      return std::forward<Visitor>(visitor)(&cast_this()); 
-    } 
-    return Impl::template visit_mutable<R>(std::forward<Visitor>(visitor)); 
-  } 
-}; 
- 
-}  // namespace detail 
- 
-template <typename... T> 
-class Variant : detail::VariantImpl<Variant<T...>, T...>, 
-                detail::conditional_t< 
-                    detail::all<(std::is_copy_constructible<T>::value && 
-                                 std::is_copy_assignable<T>::value)...>::value, 
-                    detail::explicit_copy_constructor, 
-                    detail::delete_copy_constructor>::template type<Variant<T...>> { 
-  template <typename U> 
-  static constexpr uint8_t index_of() { 
-    return Impl::index_of(detail::type_constant<U>{}); 
-  } 
- 
-  using Impl = detail::VariantImpl<Variant<T...>, T...>; 
- 
- public: 
-  using default_type = typename util::detail::first<T...>::type; 
- 
-  Variant() noexcept { construct_default(); } 
- 
-  Variant(const Variant& other) = default; 
-  Variant& operator=(const Variant& other) = default; 
-  Variant& operator=(Variant&& other) noexcept { 
-    this->destroy(); 
-    other.move_to(this); 
-    return *this; 
-  } 
- 
-  using Impl::Impl; 
-  using Impl::operator=; 
- 
-  Variant(Variant&& other) noexcept { other.move_to(this); } 
- 
-  ~Variant() { 
-    static_assert(offsetof(Variant, data_) == 0, "(void*)&Variant::data_ == (void*)this"); 
-    this->destroy(); 
-  } 
- 
-  /// \brief Return the zero-based type index of the value held by the variant 
-  uint8_t index() const noexcept { return this->index_; } 
- 
-  /// \brief Get a const pointer to the value held by the variant 
-  /// 
-  /// If the type given as template argument doesn't match, a null pointer is returned. 
-  template <typename U, uint8_t I = index_of<U>()> 
-  const U* get() const noexcept { 
-    return index() == I ? reinterpret_cast<const U*>(this) : NULLPTR; 
-  } 
- 
-  /// \brief Get a pointer to the value held by the variant 
-  /// 
-  /// If the type given as template argument doesn't match, a null pointer is returned. 
-  template <typename U, uint8_t I = index_of<U>()> 
-  U* get() noexcept { 
-    return index() == I ? reinterpret_cast<U*>(this) : NULLPTR; 
-  } 
- 
-  /// \brief Replace the value held by the variant 
-  /// 
-  /// The intended type must be given as a template argument. 
-  /// The value is constructed in-place using the given function arguments. 
-  template <typename U, typename... A, uint8_t I = index_of<U>()> 
-  void emplace(A&&... args) try { 
-    this->destroy(); 
-    new (this) U(std::forward<A>(args)...); 
-    this->index_ = I; 
-  } catch (...) { 
-    construct_default(); 
-    throw; 
-  } 
- 
-  template <typename U, typename E, typename... A, uint8_t I = index_of<U>()> 
-  void emplace(std::initializer_list<E> il, A&&... args) try { 
-    this->destroy(); 
-    new (this) U(il, std::forward<A>(args)...); 
-    this->index_ = I; 
-  } catch (...) { 
-    construct_default(); 
-    throw; 
-  } 
- 
-  /// \brief Swap with another variant's contents 
-  void swap(Variant& other) noexcept {  // NOLINT google-runtime-references 
-    Variant tmp = std::move(other); 
-    other = std::move(*this); 
-    *this = std::move(tmp); 
-  } 
- 
-  using Impl::visit_const; 
-  using Impl::visit_mutable; 
- 
- private: 
-  void construct_default() noexcept { 
-    new (this) default_type(); 
-    this->index_ = 0; 
-  } 
- 
-  template <typename V> 
-  friend struct detail::explicit_copy_constructor::type; 
- 
-  template <typename V, typename...> 
-  friend struct detail::VariantImpl; 
-}; 
- 
-/// \brief Call polymorphic visitor on a const variant's value 
-/// 
-/// The visitor will receive a const reference to the value held by the variant. 
-/// It must define overloads for each possible variant type. 
-/// The overloads should all return the same type (no attempt 
-/// is made to find a generalized return type). 
-template <typename Visitor, typename... T, 
-          typename R = decltype(std::declval<Visitor&&>()( 
-              std::declval<const typename Variant<T...>::default_type&>()))> 
-R visit(Visitor&& visitor, const util::Variant<T...>& v) { 
-  return v.template visit_const<R>(std::forward<Visitor>(visitor)); 
-} 
- 
-/// \brief Call polymorphic visitor on a non-const variant's value 
-/// 
-/// The visitor will receive a pointer to the value held by the variant. 
-/// It must define overloads for each possible variant type. 
-/// The overloads should all return the same type (no attempt 
-/// is made to find a generalized return type). 
-template <typename Visitor, typename... T, 
-          typename R = decltype(std::declval<Visitor&&>()( 
-              std::declval<typename Variant<T...>::default_type*>()))> 
-R visit(Visitor&& visitor, util::Variant<T...>* v) { 
-  return v->template visit_mutable<R>(std::forward<Visitor>(visitor)); 
-} 
- 
-/// \brief Get a const reference to the value held by the variant 
-/// 
-/// If the type given as template argument doesn't match, behavior is undefined 
-/// (a null pointer will be dereferenced). 
-template <typename U, typename... T> 
-const U& get(const Variant<T...>& v) { 
-  return *v.template get<U>(); 
-} 
- 
-/// \brief Get a reference to the value held by the variant 
-/// 
-/// If the type given as template argument doesn't match, behavior is undefined 
-/// (a null pointer will be dereferenced). 
-template <typename U, typename... T> 
-U& get(Variant<T...>& v) { 
-  return *v.template get<U>(); 
-} 
- 
-/// \brief Get a const pointer to the value held by the variant 
-/// 
-/// If the type given as template argument doesn't match, a nullptr is returned. 
-template <typename U, typename... T> 
-const U* get_if(const Variant<T...>* v) { 
-  return v->template get<U>(); 
-} 
- 
-/// \brief Get a pointer to the value held by the variant 
-/// 
-/// If the type given as template argument doesn't match, a nullptr is returned. 
-template <typename U, typename... T> 
-U* get_if(Variant<T...>* v) { 
-  return v->template get<U>(); 
-} 
- 
-namespace detail { 
- 
-template <typename... T> 
-struct VariantsEqual { 
-  template <typename U> 
-  bool operator()(const U& r) const { 
-    return get<U>(l_) == r; 
-  } 
-  const Variant<T...>& l_; 
-}; 
- 
-}  // namespace detail 
- 
-template <typename... T, typename = typename std::enable_if<detail::all< 
-                             detail::is_equality_comparable<T>::value...>::value>> 
-bool operator==(const Variant<T...>& l, const Variant<T...>& r) { 
-  if (l.index() != r.index()) return false; 
-  return visit(detail::VariantsEqual<T...>{l}, r); 
-} 
- 
-template <typename... T> 
-auto operator!=(const Variant<T...>& l, const Variant<T...>& r) -> decltype(l == r) { 
-  return !(l == r); 
-} 
- 
-/// \brief Return whether the variant holds a value of the given type 
-template <typename U, typename... T> 
-bool holds_alternative(const Variant<T...>& v) { 
-  return v.template get<U>(); 
-} 
- 
+/// \brief a std::variant-like discriminated union
+///
+/// Simplifications from std::variant:
+///
+/// - Strictly defaultable. The first type of T... should be nothrow default constructible
+///   and it will be used for default Variants.
+///
+/// - Never valueless_by_exception. std::variant supports a state outside those specified
+///   by T... to which it can return in the event that a constructor throws. If a Variant
+///   would become valueless_by_exception it will instead return to its default state.
+///
+/// - Strictly nothrow move constructible and assignable
+///
+/// - Less sophisticated type deduction. std::variant<bool, std::string>("hello") will
+///   intelligently construct std::string while Variant<bool, std::string>("hello") will
+///   construct bool.
+///
+/// - Either both copy constructible and assignable or neither (std::variant independently
+///   enables copy construction and copy assignment). Variant is copy constructible if
+///   each of T... is copy constructible and assignable.
+///
+/// - Slimmer interface; several members of std::variant are omitted.
+///
+/// - Throws no exceptions; if a bad_variant_access would be thrown Variant will instead
+///   segfault (nullptr dereference).
+///
+/// - Mutable visit takes a pointer instead of mutable reference or rvalue reference,
+///   which is more conformant with our code style.
+template <typename... T>
+class Variant;
+
+namespace detail {
+
+template <typename T, typename = void>
+struct is_equality_comparable : std::false_type {};
+
+template <typename T>
+struct is_equality_comparable<
+    T, typename std::enable_if<std::is_convertible<
+           decltype(std::declval<T>() == std::declval<T>()), bool>::value>::type>
+    : std::true_type {};
+
+template <bool C, typename T, typename E>
+using conditional_t = typename std::conditional<C, T, E>::type;
+
+template <typename T>
+struct type_constant {
+  using type = T;
+};
+
+template <typename...>
+struct first;
+
+template <typename H, typename... T>
+struct first<H, T...> {
+  using type = H;
+};
+
+template <typename T>
+using decay_t = typename std::decay<T>::type;
+
+template <bool...>
+struct all : std::true_type {};
+
+template <bool H, bool... T>
+struct all<H, T...> : conditional_t<H, all<T...>, std::false_type> {};
+
+struct delete_copy_constructor {
+  template <typename>
+  struct type {
+    type() = default;
+    type(const type& other) = delete;
+    type& operator=(const type& other) = delete;
+  };
+};
+
+struct explicit_copy_constructor {
+  template <typename Copyable>
+  struct type {
+    type() = default;
+    type(const type& other) { static_cast<const Copyable&>(other).copy_to(this); }
+    type& operator=(const type& other) {
+      static_cast<Copyable*>(this)->destroy();
+      static_cast<const Copyable&>(other).copy_to(this);
+      return *this;
+    }
+  };
+};
+
+template <typename... T>
+struct VariantStorage {
+  VariantStorage() = default;
+  VariantStorage(const VariantStorage&) {}
+  VariantStorage& operator=(const VariantStorage&) { return *this; }
+  VariantStorage(VariantStorage&&) noexcept {}
+  VariantStorage& operator=(VariantStorage&&) noexcept { return *this; }
+  ~VariantStorage() {
+    static_assert(offsetof(VariantStorage, data_) == 0,
+                  "(void*)&VariantStorage::data_ == (void*)this");
+  }
+
+  typename arrow::internal::aligned_union<0, T...>::type data_;
+  uint8_t index_ = 0;
+};
+
+template <typename V, typename...>
+struct VariantImpl;
+
+template <typename... T>
+struct VariantImpl<Variant<T...>> : VariantStorage<T...> {
+  static void index_of() noexcept {}
+  void destroy() noexcept {}
+  void move_to(...) noexcept {}
+  void copy_to(...) const {}
+
+  template <typename R, typename Visitor>
+  [[noreturn]] R visit_const(Visitor&& /* visitor */) const {
+    std::terminate();
+  }
+  template <typename R, typename Visitor>
+  [[noreturn]] R visit_mutable(Visitor&& /* visitor */) {
+    std::terminate();
+  }
+};
+
+template <typename... M, typename H, typename... T>
+struct VariantImpl<Variant<M...>, H, T...> : VariantImpl<Variant<M...>, T...> {
+  using VariantType = Variant<M...>;
+  using Impl = VariantImpl<VariantType, T...>;
+
+  static constexpr uint8_t kIndex = sizeof...(M) - sizeof...(T) - 1;
+
+  VariantImpl() = default;
+
+  using VariantImpl<VariantType, T...>::VariantImpl;
+  using Impl::operator=;
+  using Impl::index_of;
+
+  explicit VariantImpl(H value) {
+    new (this) H(std::move(value));
+    this->index_ = kIndex;
+  }
+
+  VariantImpl& operator=(H value) {
+    static_cast<VariantType*>(this)->destroy();
+    new (this) H(std::move(value));
+    this->index_ = kIndex;
+    return *this;
+  }
+
+  H& cast_this() { return *reinterpret_cast<H*>(this); }
+  const H& cast_this() const { return *reinterpret_cast<const H*>(this); }
+
+  void move_to(VariantType* target) noexcept {
+    if (this->index_ == kIndex) {
+      new (target) H(std::move(cast_this()));
+      target->index_ = kIndex;
+    } else {
+      Impl::move_to(target);
+    }
+  }
+
+  // Templated to avoid instantiation in case H is not copy constructible
+  template <typename Void>
+  void copy_to(Void* generic_target) const {
+    const auto target = static_cast<VariantType*>(generic_target);
+    try {
+      if (this->index_ == kIndex) {
+        new (target) H(cast_this());
+        target->index_ = kIndex;
+      } else {
+        Impl::copy_to(target);
+      }
+    } catch (...) {
+      target->construct_default();
+      throw;
+    }
+  }
+
+  void destroy() noexcept {
+    if (this->index_ == kIndex) {
+      if (!std::is_trivially_destructible<H>::value) {
+        cast_this().~H();
+      }
+    } else {
+      Impl::destroy();
+    }
+  }
+
+  static constexpr std::integral_constant<uint8_t, kIndex> index_of(
+      const type_constant<H>&) {
+    return {};
+  }
+
+  template <typename R, typename Visitor>
+  R visit_const(Visitor&& visitor) const {
+    if (this->index_ == kIndex) {
+      return std::forward<Visitor>(visitor)(cast_this());
+    }
+    return Impl::template visit_const<R>(std::forward<Visitor>(visitor));
+  }
+
+  template <typename R, typename Visitor>
+  R visit_mutable(Visitor&& visitor) {
+    if (this->index_ == kIndex) {
+      return std::forward<Visitor>(visitor)(&cast_this());
+    }
+    return Impl::template visit_mutable<R>(std::forward<Visitor>(visitor));
+  }
+};
+
+}  // namespace detail
+
+template <typename... T>
+class Variant : detail::VariantImpl<Variant<T...>, T...>,
+                detail::conditional_t<
+                    detail::all<(std::is_copy_constructible<T>::value &&
+                                 std::is_copy_assignable<T>::value)...>::value,
+                    detail::explicit_copy_constructor,
+                    detail::delete_copy_constructor>::template type<Variant<T...>> {
+  template <typename U>
+  static constexpr uint8_t index_of() {
+    return Impl::index_of(detail::type_constant<U>{});
+  }
+
+  using Impl = detail::VariantImpl<Variant<T...>, T...>;
+
+ public:
+  using default_type = typename util::detail::first<T...>::type;
+
+  Variant() noexcept { construct_default(); }
+
+  Variant(const Variant& other) = default;
+  Variant& operator=(const Variant& other) = default;
+  Variant& operator=(Variant&& other) noexcept {
+    this->destroy();
+    other.move_to(this);
+    return *this;
+  }
+
+  using Impl::Impl;
+  using Impl::operator=;
+
+  Variant(Variant&& other) noexcept { other.move_to(this); }
+
+  ~Variant() {
+    static_assert(offsetof(Variant, data_) == 0, "(void*)&Variant::data_ == (void*)this");
+    this->destroy();
+  }
+
+  /// \brief Return the zero-based type index of the value held by the variant
+  uint8_t index() const noexcept { return this->index_; }
+
+  /// \brief Get a const pointer to the value held by the variant
+  ///
+  /// If the type given as template argument doesn't match, a null pointer is returned.
+  template <typename U, uint8_t I = index_of<U>()>
+  const U* get() const noexcept {
+    return index() == I ? reinterpret_cast<const U*>(this) : NULLPTR;
+  }
+
+  /// \brief Get a pointer to the value held by the variant
+  ///
+  /// If the type given as template argument doesn't match, a null pointer is returned.
+  template <typename U, uint8_t I = index_of<U>()>
+  U* get() noexcept {
+    return index() == I ? reinterpret_cast<U*>(this) : NULLPTR;
+  }
+
+  /// \brief Replace the value held by the variant
+  ///
+  /// The intended type must be given as a template argument.
+  /// The value is constructed in-place using the given function arguments.
+  template <typename U, typename... A, uint8_t I = index_of<U>()>
+  void emplace(A&&... args) try {
+    this->destroy();
+    new (this) U(std::forward<A>(args)...);
+    this->index_ = I;
+  } catch (...) {
+    construct_default();
+    throw;
+  }
+
+  template <typename U, typename E, typename... A, uint8_t I = index_of<U>()>
+  void emplace(std::initializer_list<E> il, A&&... args) try {
+    this->destroy();
+    new (this) U(il, std::forward<A>(args)...);
+    this->index_ = I;
+  } catch (...) {
+    construct_default();
+    throw;
+  }
+
+  /// \brief Swap with another variant's contents
+  void swap(Variant& other) noexcept {  // NOLINT google-runtime-references
+    Variant tmp = std::move(other);
+    other = std::move(*this);
+    *this = std::move(tmp);
+  }
+
+  using Impl::visit_const;
+  using Impl::visit_mutable;
+
+ private:
+  void construct_default() noexcept {
+    new (this) default_type();
+    this->index_ = 0;
+  }
+
+  template <typename V>
+  friend struct detail::explicit_copy_constructor::type;
+
+  template <typename V, typename...>
+  friend struct detail::VariantImpl;
+};
+
+/// \brief Call polymorphic visitor on a const variant's value
+///
+/// The visitor will receive a const reference to the value held by the variant.
+/// It must define overloads for each possible variant type.
+/// The overloads should all return the same type (no attempt
+/// is made to find a generalized return type).
+template <typename Visitor, typename... T,
+          typename R = decltype(std::declval<Visitor&&>()(
+              std::declval<const typename Variant<T...>::default_type&>()))>
+R visit(Visitor&& visitor, const util::Variant<T...>& v) {
+  return v.template visit_const<R>(std::forward<Visitor>(visitor));
+}
+
+/// \brief Call polymorphic visitor on a non-const variant's value
+///
+/// The visitor will receive a pointer to the value held by the variant.
+/// It must define overloads for each possible variant type.
+/// The overloads should all return the same type (no attempt
+/// is made to find a generalized return type).
+template <typename Visitor, typename... T,
+          typename R = decltype(std::declval<Visitor&&>()(
+              std::declval<typename Variant<T...>::default_type*>()))>
+R visit(Visitor&& visitor, util::Variant<T...>* v) {
+  return v->template visit_mutable<R>(std::forward<Visitor>(visitor));
+}
+
+/// \brief Get a const reference to the value held by the variant
+///
+/// If the type given as template argument doesn't match, behavior is undefined
+/// (a null pointer will be dereferenced).
+template <typename U, typename... T>
+const U& get(const Variant<T...>& v) {
+  return *v.template get<U>();
+}
+
+/// \brief Get a reference to the value held by the variant
+///
+/// If the type given as template argument doesn't match, behavior is undefined
+/// (a null pointer will be dereferenced).
+template <typename U, typename... T>
+U& get(Variant<T...>& v) {
+  return *v.template get<U>();
+}
+
+/// \brief Get a const pointer to the value held by the variant
+///
+/// If the type given as template argument doesn't match, a nullptr is returned.
+template <typename U, typename... T>
+const U* get_if(const Variant<T...>* v) {
+  return v->template get<U>();
+}
+
+/// \brief Get a pointer to the value held by the variant
+///
+/// If the type given as template argument doesn't match, a nullptr is returned.
+template <typename U, typename... T>
+U* get_if(Variant<T...>* v) {
+  return v->template get<U>();
+}
+
+namespace detail {
+
+template <typename... T>
+struct VariantsEqual {
+  template <typename U>
+  bool operator()(const U& r) const {
+    return get<U>(l_) == r;
+  }
+  const Variant<T...>& l_;
+};
+
+}  // namespace detail
+
+template <typename... T, typename = typename std::enable_if<detail::all<
+                             detail::is_equality_comparable<T>::value...>::value>>
+bool operator==(const Variant<T...>& l, const Variant<T...>& r) {
+  if (l.index() != r.index()) return false;
+  return visit(detail::VariantsEqual<T...>{l}, r);
+}
+
+template <typename... T>
+auto operator!=(const Variant<T...>& l, const Variant<T...>& r) -> decltype(l == r) {
+  return !(l == r);
+}
+
+/// \brief Return whether the variant holds a value of the given type
+template <typename U, typename... T>
+bool holds_alternative(const Variant<T...>& v) {
+  return v.template get<U>();
+}
+
 }  // namespace util
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/vector.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/vector.h
index 8bb6f44a4d5..041bdb424a7 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/vector.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/vector.h
@@ -17,20 +17,20 @@
 
 #pragma once
 
-#include <algorithm> 
+#include <algorithm>
 #include <utility>
 #include <vector>
 
-#include "arrow/result.h" 
-#include "arrow/util/algorithm.h" 
-#include "arrow/util/functional.h" 
+#include "arrow/result.h"
+#include "arrow/util/algorithm.h"
+#include "arrow/util/functional.h"
 #include "arrow/util/logging.h"
 
 namespace arrow {
 namespace internal {
 
 template <typename T>
-std::vector<T> DeleteVectorElement(const std::vector<T>& values, size_t index) { 
+std::vector<T> DeleteVectorElement(const std::vector<T>& values, size_t index) {
   DCHECK(!values.empty());
   DCHECK_LT(index, values.size());
   std::vector<T> out;
@@ -45,8 +45,8 @@ std::vector<T> DeleteVectorElement(const std::vector<T>& values, size_t index) {
 }
 
 template <typename T>
-std::vector<T> AddVectorElement(const std::vector<T>& values, size_t index, 
-                                T new_element) { 
+std::vector<T> AddVectorElement(const std::vector<T>& values, size_t index,
+                                T new_element) {
   DCHECK_LE(index, values.size());
   std::vector<T> out;
   out.reserve(values.size() + 1);
@@ -61,8 +61,8 @@ std::vector<T> AddVectorElement(const std::vector<T>& values, size_t index,
 }
 
 template <typename T>
-std::vector<T> ReplaceVectorElement(const std::vector<T>& values, size_t index, 
-                                    T new_element) { 
+std::vector<T> ReplaceVectorElement(const std::vector<T>& values, size_t index,
+                                    T new_element) {
   DCHECK_LE(index, values.size());
   std::vector<T> out;
   out.reserve(values.size());
@@ -76,97 +76,97 @@ std::vector<T> ReplaceVectorElement(const std::vector<T>& values, size_t index,
   return out;
 }
 
-template <typename T, typename Predicate> 
-std::vector<T> FilterVector(std::vector<T> values, Predicate&& predicate) { 
-  auto new_end = 
-      std::remove_if(values.begin(), values.end(), std::forward<Predicate>(predicate)); 
-  values.erase(new_end, values.end()); 
-  return values; 
-} 
- 
-template <typename Fn, typename From, 
-          typename To = decltype(std::declval<Fn>()(std::declval<From>()))> 
-std::vector<To> MapVector(Fn&& map, const std::vector<From>& source) { 
-  std::vector<To> out; 
-  out.reserve(source.size()); 
-  std::transform(source.begin(), source.end(), std::back_inserter(out), 
-                 std::forward<Fn>(map)); 
-  return out; 
-} 
- 
-template <typename Fn, typename From, 
-          typename To = decltype(std::declval<Fn>()(std::declval<From>()))> 
-std::vector<To> MapVector(Fn&& map, std::vector<From>&& source) { 
-  std::vector<To> out; 
-  out.reserve(source.size()); 
-  std::transform(std::make_move_iterator(source.begin()), 
-                 std::make_move_iterator(source.end()), std::back_inserter(out), 
-                 std::forward<Fn>(map)); 
-  return out; 
-} 
- 
-/// \brief Like MapVector, but where the function can fail. 
-template <typename Fn, typename From = internal::call_traits::argument_type<0, Fn>, 
-          typename To = typename internal::call_traits::return_type<Fn>::ValueType> 
-Result<std::vector<To>> MaybeMapVector(Fn&& map, const std::vector<From>& source) { 
-  std::vector<To> out; 
-  out.reserve(source.size()); 
-  ARROW_RETURN_NOT_OK(MaybeTransform(source.begin(), source.end(), 
-                                     std::back_inserter(out), std::forward<Fn>(map))); 
-  return std::move(out); 
-} 
- 
-template <typename Fn, typename From = internal::call_traits::argument_type<0, Fn>, 
-          typename To = typename internal::call_traits::return_type<Fn>::ValueType> 
-Result<std::vector<To>> MaybeMapVector(Fn&& map, std::vector<From>&& source) { 
-  std::vector<To> out; 
-  out.reserve(source.size()); 
-  ARROW_RETURN_NOT_OK(MaybeTransform(std::make_move_iterator(source.begin()), 
-                                     std::make_move_iterator(source.end()), 
-                                     std::back_inserter(out), std::forward<Fn>(map))); 
-  return std::move(out); 
-} 
- 
-template <typename T> 
-std::vector<T> FlattenVectors(const std::vector<std::vector<T>>& vecs) { 
-  std::size_t sum = 0; 
-  for (const auto& vec : vecs) { 
-    sum += vec.size(); 
-  } 
-  std::vector<T> out; 
-  out.reserve(sum); 
-  for (const auto& vec : vecs) { 
-    out.insert(out.end(), vec.begin(), vec.end()); 
-  } 
-  return out; 
-} 
- 
-template <typename T> 
-Result<std::vector<T>> UnwrapOrRaise(std::vector<Result<T>>&& results) { 
-  std::vector<T> out; 
-  out.reserve(results.size()); 
-  auto end = std::make_move_iterator(results.end()); 
-  for (auto it = std::make_move_iterator(results.begin()); it != end; it++) { 
-    if (!it->ok()) { 
-      return it->status(); 
-    } 
-    out.push_back(it->MoveValueUnsafe()); 
-  } 
-  return std::move(out); 
-} 
- 
-template <typename T> 
-Result<std::vector<T>> UnwrapOrRaise(const std::vector<Result<T>>& results) { 
-  std::vector<T> out; 
-  out.reserve(results.size()); 
-  for (const auto& result : results) { 
-    if (!result.ok()) { 
-      return result.status(); 
-    } 
-    out.push_back(result.ValueUnsafe()); 
-  } 
-  return std::move(out); 
-} 
- 
+template <typename T, typename Predicate>
+std::vector<T> FilterVector(std::vector<T> values, Predicate&& predicate) {
+  auto new_end =
+      std::remove_if(values.begin(), values.end(), std::forward<Predicate>(predicate));
+  values.erase(new_end, values.end());
+  return values;
+}
+
+template <typename Fn, typename From,
+          typename To = decltype(std::declval<Fn>()(std::declval<From>()))>
+std::vector<To> MapVector(Fn&& map, const std::vector<From>& source) {
+  std::vector<To> out;
+  out.reserve(source.size());
+  std::transform(source.begin(), source.end(), std::back_inserter(out),
+                 std::forward<Fn>(map));
+  return out;
+}
+
+template <typename Fn, typename From,
+          typename To = decltype(std::declval<Fn>()(std::declval<From>()))>
+std::vector<To> MapVector(Fn&& map, std::vector<From>&& source) {
+  std::vector<To> out;
+  out.reserve(source.size());
+  std::transform(std::make_move_iterator(source.begin()),
+                 std::make_move_iterator(source.end()), std::back_inserter(out),
+                 std::forward<Fn>(map));
+  return out;
+}
+
+/// \brief Like MapVector, but where the function can fail.
+template <typename Fn, typename From = internal::call_traits::argument_type<0, Fn>,
+          typename To = typename internal::call_traits::return_type<Fn>::ValueType>
+Result<std::vector<To>> MaybeMapVector(Fn&& map, const std::vector<From>& source) {
+  std::vector<To> out;
+  out.reserve(source.size());
+  ARROW_RETURN_NOT_OK(MaybeTransform(source.begin(), source.end(),
+                                     std::back_inserter(out), std::forward<Fn>(map)));
+  return std::move(out);
+}
+
+template <typename Fn, typename From = internal::call_traits::argument_type<0, Fn>,
+          typename To = typename internal::call_traits::return_type<Fn>::ValueType>
+Result<std::vector<To>> MaybeMapVector(Fn&& map, std::vector<From>&& source) {
+  std::vector<To> out;
+  out.reserve(source.size());
+  ARROW_RETURN_NOT_OK(MaybeTransform(std::make_move_iterator(source.begin()),
+                                     std::make_move_iterator(source.end()),
+                                     std::back_inserter(out), std::forward<Fn>(map)));
+  return std::move(out);
+}
+
+template <typename T>
+std::vector<T> FlattenVectors(const std::vector<std::vector<T>>& vecs) {
+  std::size_t sum = 0;
+  for (const auto& vec : vecs) {
+    sum += vec.size();
+  }
+  std::vector<T> out;
+  out.reserve(sum);
+  for (const auto& vec : vecs) {
+    out.insert(out.end(), vec.begin(), vec.end());
+  }
+  return out;
+}
+
+template <typename T>
+Result<std::vector<T>> UnwrapOrRaise(std::vector<Result<T>>&& results) {
+  std::vector<T> out;
+  out.reserve(results.size());
+  auto end = std::make_move_iterator(results.end());
+  for (auto it = std::make_move_iterator(results.begin()); it != end; it++) {
+    if (!it->ok()) {
+      return it->status();
+    }
+    out.push_back(it->MoveValueUnsafe());
+  }
+  return std::move(out);
+}
+
+template <typename T>
+Result<std::vector<T>> UnwrapOrRaise(const std::vector<Result<T>>& results) {
+  std::vector<T> out;
+  out.reserve(results.size());
+  for (const auto& result : results) {
+    if (!result.ok()) {
+      return result.status();
+    }
+    out.push_back(result.ValueUnsafe());
+  }
+  return std::move(out);
+}
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/windows_fixup.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/windows_fixup.h
index 6cb5a5e66be..2949ac4ab76 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/util/windows_fixup.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/windows_fixup.h
@@ -19,13 +19,13 @@
 
 #ifdef _WIN32
 
-#ifdef max 
-#undef max 
-#endif 
-#ifdef min 
-#undef min 
-#endif 
- 
+#ifdef max
+#undef max
+#endif
+#ifdef min
+#undef min
+#endif
+
 // The Windows API defines macros from *File resolving to either
 // *FileA or *FileW.  Need to undo them.
 #ifdef CopyFile
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/vendored/ProducerConsumerQueue.h b/contrib/libs/apache/arrow/cpp/src/arrow/vendored/ProducerConsumerQueue.h
index bdd776bfc4c..0b7cfa1cb16 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/vendored/ProducerConsumerQueue.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/vendored/ProducerConsumerQueue.h
@@ -1,217 +1,217 @@
-// Vendored from git tag v2021.02.15.00 
- 
-/* 
- * Copyright (c) Facebook, Inc. and its affiliates. 
- * 
- * Licensed under the Apache License, Version 2.0 (the "License"); 
- * you may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at 
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0 
- * 
- * Unless required by applicable law or agreed to in writing, software 
- * distributed under the License is distributed on an "AS IS" BASIS, 
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
- * See the License for the specific language governing permissions and 
- * limitations under the License. 
- */ 
- 
-// @author Bo Hu ([email protected]) 
-// @author Jordan DeLong ([email protected]) 
- 
-// This file has been modified as part of Apache Arrow to conform to 
-// Apache Arrow's coding conventions 
- 
-#pragma once 
- 
-#include <atomic> 
-#include <cassert> 
-#include <cstdlib> 
-#include <memory> 
-#include <stdexcept> 
-#include <type_traits> 
-#include <utility> 
- 
-namespace arrow_vendored { 
-namespace folly { 
- 
-// Vendored from folly/Portability.h 
-namespace { 
-#if defined(__arm__) 
-#define FOLLY_ARM 1 
-#else 
-#define FOLLY_ARM 0 
-#endif 
- 
-#if defined(__s390x__) 
-#define FOLLY_S390X 1 
-#else 
-#define FOLLY_S390X 0 
-#endif 
- 
-constexpr bool kIsArchArm = FOLLY_ARM == 1; 
-constexpr bool kIsArchS390X = FOLLY_S390X == 1; 
-}  // namespace 
- 
-// Vendored from folly/lang/Align.h 
-namespace { 
- 
-constexpr std::size_t hardware_destructive_interference_size = 
-    (kIsArchArm || kIsArchS390X) ? 64 : 128; 
- 
-}  // namespace 
- 
-/* 
- * ProducerConsumerQueue is a one producer and one consumer queue 
- * without locks. 
- */ 
-template <class T> 
-struct ProducerConsumerQueue { 
-  typedef T value_type; 
- 
-  ProducerConsumerQueue(const ProducerConsumerQueue&) = delete; 
-  ProducerConsumerQueue& operator=(const ProducerConsumerQueue&) = delete; 
- 
-  // size must be >= 2. 
-  // 
-  // Also, note that the number of usable slots in the queue at any 
-  // given time is actually (size-1), so if you start with an empty queue, 
-  // IsFull() will return true after size-1 insertions. 
-  explicit ProducerConsumerQueue(uint32_t size) 
-      : size_(size), 
-        records_(static_cast<T*>(std::malloc(sizeof(T) * size))), 
-        readIndex_(0), 
-        writeIndex_(0) { 
-    assert(size >= 2); 
-    if (!records_) { 
-      throw std::bad_alloc(); 
-    } 
-  } 
- 
-  ~ProducerConsumerQueue() { 
-    // We need to destruct anything that may still exist in our queue. 
-    // (No real synchronization needed at destructor time: only one 
-    // thread can be doing this.) 
-    if (!std::is_trivially_destructible<T>::value) { 
-      size_t readIndex = readIndex_; 
-      size_t endIndex = writeIndex_; 
-      while (readIndex != endIndex) { 
-        records_[readIndex].~T(); 
-        if (++readIndex == size_) { 
-          readIndex = 0; 
-        } 
-      } 
-    } 
- 
-    std::free(records_); 
-  } 
- 
-  template <class... Args> 
-  bool Write(Args&&... recordArgs) { 
-    auto const currentWrite = writeIndex_.load(std::memory_order_relaxed); 
-    auto nextRecord = currentWrite + 1; 
-    if (nextRecord == size_) { 
-      nextRecord = 0; 
-    } 
-    if (nextRecord != readIndex_.load(std::memory_order_acquire)) { 
-      new (&records_[currentWrite]) T(std::forward<Args>(recordArgs)...); 
-      writeIndex_.store(nextRecord, std::memory_order_release); 
-      return true; 
-    } 
- 
-    // queue is full 
-    return false; 
-  } 
- 
-  // move the value at the front of the queue to given variable 
-  bool Read(T& record) { 
-    auto const currentRead = readIndex_.load(std::memory_order_relaxed); 
-    if (currentRead == writeIndex_.load(std::memory_order_acquire)) { 
-      // queue is empty 
-      return false; 
-    } 
- 
-    auto nextRecord = currentRead + 1; 
-    if (nextRecord == size_) { 
-      nextRecord = 0; 
-    } 
-    record = std::move(records_[currentRead]); 
-    records_[currentRead].~T(); 
-    readIndex_.store(nextRecord, std::memory_order_release); 
-    return true; 
-  } 
- 
-  // pointer to the value at the front of the queue (for use in-place) or 
-  // nullptr if empty. 
-  T* FrontPtr() { 
-    auto const currentRead = readIndex_.load(std::memory_order_relaxed); 
-    if (currentRead == writeIndex_.load(std::memory_order_acquire)) { 
-      // queue is empty 
-      return nullptr; 
-    } 
-    return &records_[currentRead]; 
-  } 
- 
-  // queue must not be empty 
-  void PopFront() { 
-    auto const currentRead = readIndex_.load(std::memory_order_relaxed); 
-    assert(currentRead != writeIndex_.load(std::memory_order_acquire)); 
- 
-    auto nextRecord = currentRead + 1; 
-    if (nextRecord == size_) { 
-      nextRecord = 0; 
-    } 
-    records_[currentRead].~T(); 
-    readIndex_.store(nextRecord, std::memory_order_release); 
-  } 
- 
-  bool IsEmpty() const { 
-    return readIndex_.load(std::memory_order_acquire) == 
-           writeIndex_.load(std::memory_order_acquire); 
-  } 
- 
-  bool IsFull() const { 
-    auto nextRecord = writeIndex_.load(std::memory_order_acquire) + 1; 
-    if (nextRecord == size_) { 
-      nextRecord = 0; 
-    } 
-    if (nextRecord != readIndex_.load(std::memory_order_acquire)) { 
-      return false; 
-    } 
-    // queue is full 
-    return true; 
-  } 
- 
-  // * If called by consumer, then true size may be more (because producer may 
-  //   be adding items concurrently). 
-  // * If called by producer, then true size may be less (because consumer may 
-  //   be removing items concurrently). 
-  // * It is undefined to call this from any other thread. 
-  size_t SizeGuess() const { 
-    int ret = writeIndex_.load(std::memory_order_acquire) - 
-              readIndex_.load(std::memory_order_acquire); 
-    if (ret < 0) { 
-      ret += size_; 
-    } 
-    return ret; 
-  } 
- 
-  // maximum number of items in the queue. 
-  size_t capacity() const { return size_ - 1; } 
- 
- private: 
-  using AtomicIndex = std::atomic<unsigned int>; 
- 
-  char pad0_[hardware_destructive_interference_size]; 
-  const uint32_t size_; 
-  T* const records_; 
- 
-  AtomicIndex readIndex_; 
-  char pad1_[hardware_destructive_interference_size - sizeof(AtomicIndex)]; 
-  AtomicIndex writeIndex_; 
- 
-  char pad2_[hardware_destructive_interference_size - sizeof(AtomicIndex)]; 
-}; 
- 
-}  // namespace folly 
-}  // namespace arrow_vendored 
+// Vendored from git tag v2021.02.15.00
+
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// @author Bo Hu ([email protected])
+// @author Jordan DeLong ([email protected])
+
+// This file has been modified as part of Apache Arrow to conform to
+// Apache Arrow's coding conventions
+
+#pragma once
+
+#include <atomic>
+#include <cassert>
+#include <cstdlib>
+#include <memory>
+#include <stdexcept>
+#include <type_traits>
+#include <utility>
+
+namespace arrow_vendored {
+namespace folly {
+
+// Vendored from folly/Portability.h
+namespace {
+#if defined(__arm__)
+#define FOLLY_ARM 1
+#else
+#define FOLLY_ARM 0
+#endif
+
+#if defined(__s390x__)
+#define FOLLY_S390X 1
+#else
+#define FOLLY_S390X 0
+#endif
+
+constexpr bool kIsArchArm = FOLLY_ARM == 1;
+constexpr bool kIsArchS390X = FOLLY_S390X == 1;
+}  // namespace
+
+// Vendored from folly/lang/Align.h
+namespace {
+
+constexpr std::size_t hardware_destructive_interference_size =
+    (kIsArchArm || kIsArchS390X) ? 64 : 128;
+
+}  // namespace
+
+/*
+ * ProducerConsumerQueue is a one producer and one consumer queue
+ * without locks.
+ */
+template <class T>
+struct ProducerConsumerQueue {
+  typedef T value_type;
+
+  ProducerConsumerQueue(const ProducerConsumerQueue&) = delete;
+  ProducerConsumerQueue& operator=(const ProducerConsumerQueue&) = delete;
+
+  // size must be >= 2.
+  //
+  // Also, note that the number of usable slots in the queue at any
+  // given time is actually (size-1), so if you start with an empty queue,
+  // IsFull() will return true after size-1 insertions.
+  explicit ProducerConsumerQueue(uint32_t size)
+      : size_(size),
+        records_(static_cast<T*>(std::malloc(sizeof(T) * size))),
+        readIndex_(0),
+        writeIndex_(0) {
+    assert(size >= 2);
+    if (!records_) {
+      throw std::bad_alloc();
+    }
+  }
+
+  ~ProducerConsumerQueue() {
+    // We need to destruct anything that may still exist in our queue.
+    // (No real synchronization needed at destructor time: only one
+    // thread can be doing this.)
+    if (!std::is_trivially_destructible<T>::value) {
+      size_t readIndex = readIndex_;
+      size_t endIndex = writeIndex_;
+      while (readIndex != endIndex) {
+        records_[readIndex].~T();
+        if (++readIndex == size_) {
+          readIndex = 0;
+        }
+      }
+    }
+
+    std::free(records_);
+  }
+
+  template <class... Args>
+  bool Write(Args&&... recordArgs) {
+    auto const currentWrite = writeIndex_.load(std::memory_order_relaxed);
+    auto nextRecord = currentWrite + 1;
+    if (nextRecord == size_) {
+      nextRecord = 0;
+    }
+    if (nextRecord != readIndex_.load(std::memory_order_acquire)) {
+      new (&records_[currentWrite]) T(std::forward<Args>(recordArgs)...);
+      writeIndex_.store(nextRecord, std::memory_order_release);
+      return true;
+    }
+
+    // queue is full
+    return false;
+  }
+
+  // move the value at the front of the queue to given variable
+  bool Read(T& record) {
+    auto const currentRead = readIndex_.load(std::memory_order_relaxed);
+    if (currentRead == writeIndex_.load(std::memory_order_acquire)) {
+      // queue is empty
+      return false;
+    }
+
+    auto nextRecord = currentRead + 1;
+    if (nextRecord == size_) {
+      nextRecord = 0;
+    }
+    record = std::move(records_[currentRead]);
+    records_[currentRead].~T();
+    readIndex_.store(nextRecord, std::memory_order_release);
+    return true;
+  }
+
+  // pointer to the value at the front of the queue (for use in-place) or
+  // nullptr if empty.
+  T* FrontPtr() {
+    auto const currentRead = readIndex_.load(std::memory_order_relaxed);
+    if (currentRead == writeIndex_.load(std::memory_order_acquire)) {
+      // queue is empty
+      return nullptr;
+    }
+    return &records_[currentRead];
+  }
+
+  // queue must not be empty
+  void PopFront() {
+    auto const currentRead = readIndex_.load(std::memory_order_relaxed);
+    assert(currentRead != writeIndex_.load(std::memory_order_acquire));
+
+    auto nextRecord = currentRead + 1;
+    if (nextRecord == size_) {
+      nextRecord = 0;
+    }
+    records_[currentRead].~T();
+    readIndex_.store(nextRecord, std::memory_order_release);
+  }
+
+  bool IsEmpty() const {
+    return readIndex_.load(std::memory_order_acquire) ==
+           writeIndex_.load(std::memory_order_acquire);
+  }
+
+  bool IsFull() const {
+    auto nextRecord = writeIndex_.load(std::memory_order_acquire) + 1;
+    if (nextRecord == size_) {
+      nextRecord = 0;
+    }
+    if (nextRecord != readIndex_.load(std::memory_order_acquire)) {
+      return false;
+    }
+    // queue is full
+    return true;
+  }
+
+  // * If called by consumer, then true size may be more (because producer may
+  //   be adding items concurrently).
+  // * If called by producer, then true size may be less (because consumer may
+  //   be removing items concurrently).
+  // * It is undefined to call this from any other thread.
+  size_t SizeGuess() const {
+    int ret = writeIndex_.load(std::memory_order_acquire) -
+              readIndex_.load(std::memory_order_acquire);
+    if (ret < 0) {
+      ret += size_;
+    }
+    return ret;
+  }
+
+  // maximum number of items in the queue.
+  size_t capacity() const { return size_ - 1; }
+
+ private:
+  using AtomicIndex = std::atomic<unsigned int>;
+
+  char pad0_[hardware_destructive_interference_size];
+  const uint32_t size_;
+  T* const records_;
+
+  AtomicIndex readIndex_;
+  char pad1_[hardware_destructive_interference_size - sizeof(AtomicIndex)];
+  AtomicIndex writeIndex_;
+
+  char pad2_[hardware_destructive_interference_size - sizeof(AtomicIndex)];
+};
+
+}  // namespace folly
+}  // namespace arrow_vendored
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/vendored/portable-snippets/safe-math.h b/contrib/libs/apache/arrow/cpp/src/arrow/vendored/portable-snippets/safe-math.h
index 6430a57af29..7f6426ac765 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/vendored/portable-snippets/safe-math.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/vendored/portable-snippets/safe-math.h
@@ -58,8 +58,8 @@
 #  define PSNIP_SAFE__FUNCTION PSNIP_SAFE__COMPILER_ATTRIBUTES static PSNIP_SAFE__INLINE
 #endif
 
-// !defined(__cplusplus) added for Solaris support 
-#if !defined(__cplusplus) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L 
+// !defined(__cplusplus) added for Solaris support
+#if !defined(__cplusplus) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
 #  define psnip_safe_bool _Bool
 #else
 #  define psnip_safe_bool int
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/visitor.cc b/contrib/libs/apache/arrow/cpp/src/arrow/visitor.cc
index 83d4de210d3..851785081c7 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/visitor.cc
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/visitor.cc
@@ -67,7 +67,7 @@ ARRAY_VISITOR_DEFAULT(SparseUnionArray)
 ARRAY_VISITOR_DEFAULT(DenseUnionArray)
 ARRAY_VISITOR_DEFAULT(DictionaryArray)
 ARRAY_VISITOR_DEFAULT(Decimal128Array)
-ARRAY_VISITOR_DEFAULT(Decimal256Array) 
+ARRAY_VISITOR_DEFAULT(Decimal256Array)
 ARRAY_VISITOR_DEFAULT(ExtensionArray)
 
 #undef ARRAY_VISITOR_DEFAULT
@@ -107,7 +107,7 @@ TYPE_VISITOR_DEFAULT(DayTimeIntervalType)
 TYPE_VISITOR_DEFAULT(MonthIntervalType)
 TYPE_VISITOR_DEFAULT(DurationType)
 TYPE_VISITOR_DEFAULT(Decimal128Type)
-TYPE_VISITOR_DEFAULT(Decimal256Type) 
+TYPE_VISITOR_DEFAULT(Decimal256Type)
 TYPE_VISITOR_DEFAULT(ListType)
 TYPE_VISITOR_DEFAULT(LargeListType)
 TYPE_VISITOR_DEFAULT(MapType)
@@ -156,7 +156,7 @@ SCALAR_VISITOR_DEFAULT(DayTimeIntervalScalar)
 SCALAR_VISITOR_DEFAULT(MonthIntervalScalar)
 SCALAR_VISITOR_DEFAULT(DurationScalar)
 SCALAR_VISITOR_DEFAULT(Decimal128Scalar)
-SCALAR_VISITOR_DEFAULT(Decimal256Scalar) 
+SCALAR_VISITOR_DEFAULT(Decimal256Scalar)
 SCALAR_VISITOR_DEFAULT(ListScalar)
 SCALAR_VISITOR_DEFAULT(LargeListScalar)
 SCALAR_VISITOR_DEFAULT(MapScalar)
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/visitor.h b/contrib/libs/apache/arrow/cpp/src/arrow/visitor.h
index fe49f51ce3d..0382e461199 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/visitor.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/visitor.h
@@ -54,7 +54,7 @@ class ARROW_EXPORT ArrayVisitor {
   virtual Status Visit(const MonthIntervalArray& array);
   virtual Status Visit(const DurationArray& array);
   virtual Status Visit(const Decimal128Array& array);
-  virtual Status Visit(const Decimal256Array& array); 
+  virtual Status Visit(const Decimal256Array& array);
   virtual Status Visit(const ListArray& array);
   virtual Status Visit(const LargeListArray& array);
   virtual Status Visit(const MapArray& array);
@@ -97,7 +97,7 @@ class ARROW_EXPORT TypeVisitor {
   virtual Status Visit(const DayTimeIntervalType& type);
   virtual Status Visit(const DurationType& type);
   virtual Status Visit(const Decimal128Type& type);
-  virtual Status Visit(const Decimal256Type& type); 
+  virtual Status Visit(const Decimal256Type& type);
   virtual Status Visit(const ListType& type);
   virtual Status Visit(const LargeListType& type);
   virtual Status Visit(const MapType& type);
@@ -140,7 +140,7 @@ class ARROW_EXPORT ScalarVisitor {
   virtual Status Visit(const MonthIntervalScalar& scalar);
   virtual Status Visit(const DurationScalar& scalar);
   virtual Status Visit(const Decimal128Scalar& scalar);
-  virtual Status Visit(const Decimal256Scalar& scalar); 
+  virtual Status Visit(const Decimal256Scalar& scalar);
   virtual Status Visit(const ListScalar& scalar);
   virtual Status Visit(const LargeListScalar& scalar);
   virtual Status Visit(const MapScalar& scalar);
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/visitor_inline.h b/contrib/libs/apache/arrow/cpp/src/arrow/visitor_inline.h
index 69d443b48cd..132c35aeaa1 100644
--- a/contrib/libs/apache/arrow/cpp/src/arrow/visitor_inline.h
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/visitor_inline.h
@@ -68,7 +68,7 @@ namespace arrow {
   ACTION(MonthInterval);                        \
   ACTION(DayTimeInterval);                      \
   ACTION(Decimal128);                           \
-  ACTION(Decimal256);                           \ 
+  ACTION(Decimal256);                           \
   ACTION(List);                                 \
   ACTION(LargeList);                            \
   ACTION(Map);                                  \
@@ -199,9 +199,9 @@ struct ArrayDataInlineVisitor<T, enable_if_base_binary<T>> {
     using offset_type = typename T::offset_type;
     constexpr char empty_value = 0;
 
-    if (arr.length == 0) { 
-      return Status::OK(); 
-    } 
+    if (arr.length == 0) {
+      return Status::OK();
+    }
     const offset_type* offsets = arr.GetValues<offset_type>(1);
     const char* data;
     if (!arr.buffers[2]) {
@@ -232,9 +232,9 @@ struct ArrayDataInlineVisitor<T, enable_if_base_binary<T>> {
     using offset_type = typename T::offset_type;
     constexpr uint8_t empty_value = 0;
 
-    if (arr.length == 0) { 
-      return; 
-    } 
+    if (arr.length == 0) {
+      return;
+    }
     const offset_type* offsets = arr.GetValues<offset_type>(1);
     const uint8_t* data;
     if (!arr.buffers[2]) {
diff --git a/contrib/libs/apache/arrow/cpp/src/generated/parquet_constants.cpp b/contrib/libs/apache/arrow/cpp/src/generated/parquet_constants.cpp
index 5df101f4369..b1b4ce62673 100644
--- a/contrib/libs/apache/arrow/cpp/src/generated/parquet_constants.cpp
+++ b/contrib/libs/apache/arrow/cpp/src/generated/parquet_constants.cpp
@@ -1,17 +1,17 @@
-/** 
- * Autogenerated by Thrift Compiler (0.13.0) 
- * 
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 
- *  @generated 
- */ 
-#include "parquet_constants.h" 
- 
-namespace parquet { namespace format { 
- 
-const parquetConstants g_parquet_constants; 
- 
-parquetConstants::parquetConstants() { 
-} 
- 
-}} // namespace 
- 
+/**
+ * Autogenerated by Thrift Compiler (0.13.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+#include "parquet_constants.h"
+
+namespace parquet { namespace format {
+
+const parquetConstants g_parquet_constants;
+
+parquetConstants::parquetConstants() {
+}
+
+}} // namespace
+
diff --git a/contrib/libs/apache/arrow/cpp/src/generated/parquet_constants.h b/contrib/libs/apache/arrow/cpp/src/generated/parquet_constants.h
index 98df7236774..1e288c7cd1f 100644
--- a/contrib/libs/apache/arrow/cpp/src/generated/parquet_constants.h
+++ b/contrib/libs/apache/arrow/cpp/src/generated/parquet_constants.h
@@ -1,24 +1,24 @@
-/** 
- * Autogenerated by Thrift Compiler (0.13.0) 
- * 
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 
- *  @generated 
- */ 
-#ifndef parquet_CONSTANTS_H 
-#define parquet_CONSTANTS_H 
- 
-#include "parquet_types.h" 
- 
-namespace parquet { namespace format { 
- 
-class parquetConstants { 
- public: 
-  parquetConstants(); 
- 
-}; 
- 
-extern const parquetConstants g_parquet_constants; 
- 
-}} // namespace 
- 
-#endif 
+/**
+ * Autogenerated by Thrift Compiler (0.13.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+#ifndef parquet_CONSTANTS_H
+#define parquet_CONSTANTS_H
+
+#include "parquet_types.h"
+
+namespace parquet { namespace format {
+
+class parquetConstants {
+ public:
+  parquetConstants();
+
+};
+
+extern const parquetConstants g_parquet_constants;
+
+}} // namespace
+
+#endif
diff --git a/contrib/libs/apache/arrow/cpp/src/generated/parquet_types.cpp b/contrib/libs/apache/arrow/cpp/src/generated/parquet_types.cpp
index ca55e9ab0ae..7c7289658ee 100644
--- a/contrib/libs/apache/arrow/cpp/src/generated/parquet_types.cpp
+++ b/contrib/libs/apache/arrow/cpp/src/generated/parquet_types.cpp
@@ -1,7415 +1,7415 @@
-/** 
- * Autogenerated by Thrift Compiler (0.13.0) 
- * 
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 
- *  @generated 
- */ 
-#include "parquet_types.h" 
- 
-#include <algorithm> 
-#include <ostream> 
- 
-#include <thrift/TToString.h> 
- 
-namespace parquet { namespace format { 
- 
-int _kTypeValues[] = { 
-  Type::BOOLEAN, 
-  Type::INT32, 
-  Type::INT64, 
-  Type::INT96, 
-  Type::FLOAT, 
-  Type::DOUBLE, 
-  Type::BYTE_ARRAY, 
-  Type::FIXED_LEN_BYTE_ARRAY 
-}; 
-const char* _kTypeNames[] = { 
-  "BOOLEAN", 
-  "INT32", 
-  "INT64", 
-  "INT96", 
-  "FLOAT", 
-  "DOUBLE", 
-  "BYTE_ARRAY", 
-  "FIXED_LEN_BYTE_ARRAY" 
-}; 
-const std::map<int, const char*> _Type_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(8, _kTypeValues, _kTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL)); 
- 
-std::ostream& operator<<(std::ostream& out, const Type::type& val) { 
-  std::map<int, const char*>::const_iterator it = _Type_VALUES_TO_NAMES.find(val); 
-  if (it != _Type_VALUES_TO_NAMES.end()) { 
-    out << it->second; 
-  } else { 
-    out << static_cast<int>(val); 
-  } 
-  return out; 
-} 
- 
-std::string to_string(const Type::type& val) { 
-  std::map<int, const char*>::const_iterator it = _Type_VALUES_TO_NAMES.find(val); 
-  if (it != _Type_VALUES_TO_NAMES.end()) { 
-    return std::string(it->second); 
-  } else { 
-    return std::to_string(static_cast<int>(val)); 
-  } 
-} 
- 
-int _kConvertedTypeValues[] = { 
-  ConvertedType::UTF8, 
-  ConvertedType::MAP, 
-  ConvertedType::MAP_KEY_VALUE, 
-  ConvertedType::LIST, 
-  ConvertedType::ENUM, 
-  ConvertedType::DECIMAL, 
-  ConvertedType::DATE, 
-  ConvertedType::TIME_MILLIS, 
-  ConvertedType::TIME_MICROS, 
-  ConvertedType::TIMESTAMP_MILLIS, 
-  ConvertedType::TIMESTAMP_MICROS, 
-  ConvertedType::UINT_8, 
-  ConvertedType::UINT_16, 
-  ConvertedType::UINT_32, 
-  ConvertedType::UINT_64, 
-  ConvertedType::INT_8, 
-  ConvertedType::INT_16, 
-  ConvertedType::INT_32, 
-  ConvertedType::INT_64, 
-  ConvertedType::JSON, 
-  ConvertedType::BSON, 
-  ConvertedType::INTERVAL 
-}; 
-const char* _kConvertedTypeNames[] = { 
-  "UTF8", 
-  "MAP", 
-  "MAP_KEY_VALUE", 
-  "LIST", 
-  "ENUM", 
-  "DECIMAL", 
-  "DATE", 
-  "TIME_MILLIS", 
-  "TIME_MICROS", 
-  "TIMESTAMP_MILLIS", 
-  "TIMESTAMP_MICROS", 
-  "UINT_8", 
-  "UINT_16", 
-  "UINT_32", 
-  "UINT_64", 
-  "INT_8", 
-  "INT_16", 
-  "INT_32", 
-  "INT_64", 
-  "JSON", 
-  "BSON", 
-  "INTERVAL" 
-}; 
-const std::map<int, const char*> _ConvertedType_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(22, _kConvertedTypeValues, _kConvertedTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL)); 
- 
-std::ostream& operator<<(std::ostream& out, const ConvertedType::type& val) { 
-  std::map<int, const char*>::const_iterator it = _ConvertedType_VALUES_TO_NAMES.find(val); 
-  if (it != _ConvertedType_VALUES_TO_NAMES.end()) { 
-    out << it->second; 
-  } else { 
-    out << static_cast<int>(val); 
-  } 
-  return out; 
-} 
- 
-std::string to_string(const ConvertedType::type& val) { 
-  std::map<int, const char*>::const_iterator it = _ConvertedType_VALUES_TO_NAMES.find(val); 
-  if (it != _ConvertedType_VALUES_TO_NAMES.end()) { 
-    return std::string(it->second); 
-  } else { 
-    return std::to_string(static_cast<int>(val)); 
-  } 
-} 
- 
-int _kFieldRepetitionTypeValues[] = { 
-  FieldRepetitionType::REQUIRED, 
-  FieldRepetitionType::OPTIONAL, 
-  FieldRepetitionType::REPEATED 
-}; 
-const char* _kFieldRepetitionTypeNames[] = { 
-  "REQUIRED", 
-  "OPTIONAL", 
-  "REPEATED" 
-}; 
-const std::map<int, const char*> _FieldRepetitionType_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(3, _kFieldRepetitionTypeValues, _kFieldRepetitionTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL)); 
- 
-std::ostream& operator<<(std::ostream& out, const FieldRepetitionType::type& val) { 
-  std::map<int, const char*>::const_iterator it = _FieldRepetitionType_VALUES_TO_NAMES.find(val); 
-  if (it != _FieldRepetitionType_VALUES_TO_NAMES.end()) { 
-    out << it->second; 
-  } else { 
-    out << static_cast<int>(val); 
-  } 
-  return out; 
-} 
- 
-std::string to_string(const FieldRepetitionType::type& val) { 
-  std::map<int, const char*>::const_iterator it = _FieldRepetitionType_VALUES_TO_NAMES.find(val); 
-  if (it != _FieldRepetitionType_VALUES_TO_NAMES.end()) { 
-    return std::string(it->second); 
-  } else { 
-    return std::to_string(static_cast<int>(val)); 
-  } 
-} 
- 
-int _kEncodingValues[] = { 
-  Encoding::PLAIN, 
-  Encoding::PLAIN_DICTIONARY, 
-  Encoding::RLE, 
-  Encoding::BIT_PACKED, 
-  Encoding::DELTA_BINARY_PACKED, 
-  Encoding::DELTA_LENGTH_BYTE_ARRAY, 
-  Encoding::DELTA_BYTE_ARRAY, 
-  Encoding::RLE_DICTIONARY, 
-  Encoding::BYTE_STREAM_SPLIT 
-}; 
-const char* _kEncodingNames[] = { 
-  "PLAIN", 
-  "PLAIN_DICTIONARY", 
-  "RLE", 
-  "BIT_PACKED", 
-  "DELTA_BINARY_PACKED", 
-  "DELTA_LENGTH_BYTE_ARRAY", 
-  "DELTA_BYTE_ARRAY", 
-  "RLE_DICTIONARY", 
-  "BYTE_STREAM_SPLIT" 
-}; 
-const std::map<int, const char*> _Encoding_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(9, _kEncodingValues, _kEncodingNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL)); 
- 
-std::ostream& operator<<(std::ostream& out, const Encoding::type& val) { 
-  std::map<int, const char*>::const_iterator it = _Encoding_VALUES_TO_NAMES.find(val); 
-  if (it != _Encoding_VALUES_TO_NAMES.end()) { 
-    out << it->second; 
-  } else { 
-    out << static_cast<int>(val); 
-  } 
-  return out; 
-} 
- 
-std::string to_string(const Encoding::type& val) { 
-  std::map<int, const char*>::const_iterator it = _Encoding_VALUES_TO_NAMES.find(val); 
-  if (it != _Encoding_VALUES_TO_NAMES.end()) { 
-    return std::string(it->second); 
-  } else { 
-    return std::to_string(static_cast<int>(val)); 
-  } 
-} 
- 
-int _kCompressionCodecValues[] = { 
-  CompressionCodec::UNCOMPRESSED, 
-  CompressionCodec::SNAPPY, 
-  CompressionCodec::GZIP, 
-  CompressionCodec::LZO, 
-  CompressionCodec::BROTLI, 
-  CompressionCodec::LZ4, 
-  CompressionCodec::ZSTD, 
-  CompressionCodec::LZ4_RAW 
-}; 
-const char* _kCompressionCodecNames[] = { 
-  "UNCOMPRESSED", 
-  "SNAPPY", 
-  "GZIP", 
-  "LZO", 
-  "BROTLI", 
-  "LZ4", 
-  "ZSTD", 
-  "LZ4_RAW" 
-}; 
-const std::map<int, const char*> _CompressionCodec_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(8, _kCompressionCodecValues, _kCompressionCodecNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL)); 
- 
-std::ostream& operator<<(std::ostream& out, const CompressionCodec::type& val) { 
-  std::map<int, const char*>::const_iterator it = _CompressionCodec_VALUES_TO_NAMES.find(val); 
-  if (it != _CompressionCodec_VALUES_TO_NAMES.end()) { 
-    out << it->second; 
-  } else { 
-    out << static_cast<int>(val); 
-  } 
-  return out; 
-} 
- 
-std::string to_string(const CompressionCodec::type& val) { 
-  std::map<int, const char*>::const_iterator it = _CompressionCodec_VALUES_TO_NAMES.find(val); 
-  if (it != _CompressionCodec_VALUES_TO_NAMES.end()) { 
-    return std::string(it->second); 
-  } else { 
-    return std::to_string(static_cast<int>(val)); 
-  } 
-} 
- 
-int _kPageTypeValues[] = { 
-  PageType::DATA_PAGE, 
-  PageType::INDEX_PAGE, 
-  PageType::DICTIONARY_PAGE, 
-  PageType::DATA_PAGE_V2 
-}; 
-const char* _kPageTypeNames[] = { 
-  "DATA_PAGE", 
-  "INDEX_PAGE", 
-  "DICTIONARY_PAGE", 
-  "DATA_PAGE_V2" 
-}; 
-const std::map<int, const char*> _PageType_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(4, _kPageTypeValues, _kPageTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL)); 
- 
-std::ostream& operator<<(std::ostream& out, const PageType::type& val) { 
-  std::map<int, const char*>::const_iterator it = _PageType_VALUES_TO_NAMES.find(val); 
-  if (it != _PageType_VALUES_TO_NAMES.end()) { 
-    out << it->second; 
-  } else { 
-    out << static_cast<int>(val); 
-  } 
-  return out; 
-} 
- 
-std::string to_string(const PageType::type& val) { 
-  std::map<int, const char*>::const_iterator it = _PageType_VALUES_TO_NAMES.find(val); 
-  if (it != _PageType_VALUES_TO_NAMES.end()) { 
-    return std::string(it->second); 
-  } else { 
-    return std::to_string(static_cast<int>(val)); 
-  } 
-} 
- 
-int _kBoundaryOrderValues[] = { 
-  BoundaryOrder::UNORDERED, 
-  BoundaryOrder::ASCENDING, 
-  BoundaryOrder::DESCENDING 
-}; 
-const char* _kBoundaryOrderNames[] = { 
-  "UNORDERED", 
-  "ASCENDING", 
-  "DESCENDING" 
-}; 
-const std::map<int, const char*> _BoundaryOrder_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(3, _kBoundaryOrderValues, _kBoundaryOrderNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL)); 
- 
-std::ostream& operator<<(std::ostream& out, const BoundaryOrder::type& val) { 
-  std::map<int, const char*>::const_iterator it = _BoundaryOrder_VALUES_TO_NAMES.find(val); 
-  if (it != _BoundaryOrder_VALUES_TO_NAMES.end()) { 
-    out << it->second; 
-  } else { 
-    out << static_cast<int>(val); 
-  } 
-  return out; 
-} 
- 
-std::string to_string(const BoundaryOrder::type& val) { 
-  std::map<int, const char*>::const_iterator it = _BoundaryOrder_VALUES_TO_NAMES.find(val); 
-  if (it != _BoundaryOrder_VALUES_TO_NAMES.end()) { 
-    return std::string(it->second); 
-  } else { 
-    return std::to_string(static_cast<int>(val)); 
-  } 
-} 
- 
- 
-Statistics::~Statistics() noexcept { 
-} 
- 
- 
-void Statistics::__set_max(const std::string& val) { 
-  this->max = val; 
-__isset.max = true; 
-} 
- 
-void Statistics::__set_min(const std::string& val) { 
-  this->min = val; 
-__isset.min = true; 
-} 
- 
-void Statistics::__set_null_count(const int64_t val) { 
-  this->null_count = val; 
-__isset.null_count = true; 
-} 
- 
-void Statistics::__set_distinct_count(const int64_t val) { 
-  this->distinct_count = val; 
-__isset.distinct_count = true; 
-} 
- 
-void Statistics::__set_max_value(const std::string& val) { 
-  this->max_value = val; 
-__isset.max_value = true; 
-} 
- 
-void Statistics::__set_min_value(const std::string& val) { 
-  this->min_value = val; 
-__isset.min_value = true; 
-} 
-std::ostream& operator<<(std::ostream& out, const Statistics& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t Statistics::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    switch (fid) 
-    { 
-      case 1: 
-        if (ftype == ::apache::thrift::protocol::T_STRING) { 
-          xfer += iprot->readBinary(this->max); 
-          this->__isset.max = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 2: 
-        if (ftype == ::apache::thrift::protocol::T_STRING) { 
-          xfer += iprot->readBinary(this->min); 
-          this->__isset.min = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 3: 
-        if (ftype == ::apache::thrift::protocol::T_I64) { 
-          xfer += iprot->readI64(this->null_count); 
-          this->__isset.null_count = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 4: 
-        if (ftype == ::apache::thrift::protocol::T_I64) { 
-          xfer += iprot->readI64(this->distinct_count); 
-          this->__isset.distinct_count = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 5: 
-        if (ftype == ::apache::thrift::protocol::T_STRING) { 
-          xfer += iprot->readBinary(this->max_value); 
-          this->__isset.max_value = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 6: 
-        if (ftype == ::apache::thrift::protocol::T_STRING) { 
-          xfer += iprot->readBinary(this->min_value); 
-          this->__isset.min_value = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      default: 
-        xfer += iprot->skip(ftype); 
-        break; 
-    } 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  return xfer; 
-} 
- 
-uint32_t Statistics::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("Statistics"); 
- 
-  if (this->__isset.max) { 
-    xfer += oprot->writeFieldBegin("max", ::apache::thrift::protocol::T_STRING, 1); 
-    xfer += oprot->writeBinary(this->max); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.min) { 
-    xfer += oprot->writeFieldBegin("min", ::apache::thrift::protocol::T_STRING, 2); 
-    xfer += oprot->writeBinary(this->min); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.null_count) { 
-    xfer += oprot->writeFieldBegin("null_count", ::apache::thrift::protocol::T_I64, 3); 
-    xfer += oprot->writeI64(this->null_count); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.distinct_count) { 
-    xfer += oprot->writeFieldBegin("distinct_count", ::apache::thrift::protocol::T_I64, 4); 
-    xfer += oprot->writeI64(this->distinct_count); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.max_value) { 
-    xfer += oprot->writeFieldBegin("max_value", ::apache::thrift::protocol::T_STRING, 5); 
-    xfer += oprot->writeBinary(this->max_value); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.min_value) { 
-    xfer += oprot->writeFieldBegin("min_value", ::apache::thrift::protocol::T_STRING, 6); 
-    xfer += oprot->writeBinary(this->min_value); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(Statistics &a, Statistics &b) { 
-  using ::std::swap; 
-  swap(a.max, b.max); 
-  swap(a.min, b.min); 
-  swap(a.null_count, b.null_count); 
-  swap(a.distinct_count, b.distinct_count); 
-  swap(a.max_value, b.max_value); 
-  swap(a.min_value, b.min_value); 
-  swap(a.__isset, b.__isset); 
-} 
- 
-Statistics::Statistics(const Statistics& other0) { 
-  max = other0.max; 
-  min = other0.min; 
-  null_count = other0.null_count; 
-  distinct_count = other0.distinct_count; 
-  max_value = other0.max_value; 
-  min_value = other0.min_value; 
-  __isset = other0.__isset; 
-} 
-Statistics& Statistics::operator=(const Statistics& other1) { 
-  max = other1.max; 
-  min = other1.min; 
-  null_count = other1.null_count; 
-  distinct_count = other1.distinct_count; 
-  max_value = other1.max_value; 
-  min_value = other1.min_value; 
-  __isset = other1.__isset; 
-  return *this; 
-} 
-void Statistics::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "Statistics("; 
-  out << "max="; (__isset.max ? (out << to_string(max)) : (out << "<null>")); 
-  out << ", " << "min="; (__isset.min ? (out << to_string(min)) : (out << "<null>")); 
-  out << ", " << "null_count="; (__isset.null_count ? (out << to_string(null_count)) : (out << "<null>")); 
-  out << ", " << "distinct_count="; (__isset.distinct_count ? (out << to_string(distinct_count)) : (out << "<null>")); 
-  out << ", " << "max_value="; (__isset.max_value ? (out << to_string(max_value)) : (out << "<null>")); 
-  out << ", " << "min_value="; (__isset.min_value ? (out << to_string(min_value)) : (out << "<null>")); 
-  out << ")"; 
-} 
- 
- 
-StringType::~StringType() noexcept { 
-} 
- 
-std::ostream& operator<<(std::ostream& out, const StringType& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t StringType::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    xfer += iprot->skip(ftype); 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  return xfer; 
-} 
- 
-uint32_t StringType::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("StringType"); 
- 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(StringType &a, StringType &b) { 
-  using ::std::swap; 
-  (void) a; 
-  (void) b; 
-} 
- 
-StringType::StringType(const StringType& other2) { 
-  (void) other2; 
-} 
-StringType& StringType::operator=(const StringType& other3) { 
-  (void) other3; 
-  return *this; 
-} 
-void StringType::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "StringType("; 
-  out << ")"; 
-} 
- 
- 
-UUIDType::~UUIDType() noexcept { 
-} 
- 
-std::ostream& operator<<(std::ostream& out, const UUIDType& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t UUIDType::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    xfer += iprot->skip(ftype); 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  return xfer; 
-} 
- 
-uint32_t UUIDType::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("UUIDType"); 
- 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(UUIDType &a, UUIDType &b) { 
-  using ::std::swap; 
-  (void) a; 
-  (void) b; 
-} 
- 
-UUIDType::UUIDType(const UUIDType& other4) { 
-  (void) other4; 
-} 
-UUIDType& UUIDType::operator=(const UUIDType& other5) { 
-  (void) other5; 
-  return *this; 
-} 
-void UUIDType::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "UUIDType("; 
-  out << ")"; 
-} 
- 
- 
-MapType::~MapType() noexcept { 
-} 
- 
-std::ostream& operator<<(std::ostream& out, const MapType& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t MapType::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    xfer += iprot->skip(ftype); 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  return xfer; 
-} 
- 
-uint32_t MapType::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("MapType"); 
- 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(MapType &a, MapType &b) { 
-  using ::std::swap; 
-  (void) a; 
-  (void) b; 
-} 
- 
-MapType::MapType(const MapType& other6) { 
-  (void) other6; 
-} 
-MapType& MapType::operator=(const MapType& other7) { 
-  (void) other7; 
-  return *this; 
-} 
-void MapType::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "MapType("; 
-  out << ")"; 
-} 
- 
- 
-ListType::~ListType() noexcept { 
-} 
- 
-std::ostream& operator<<(std::ostream& out, const ListType& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t ListType::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    xfer += iprot->skip(ftype); 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  return xfer; 
-} 
- 
-uint32_t ListType::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("ListType"); 
- 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(ListType &a, ListType &b) { 
-  using ::std::swap; 
-  (void) a; 
-  (void) b; 
-} 
- 
-ListType::ListType(const ListType& other8) { 
-  (void) other8; 
-} 
-ListType& ListType::operator=(const ListType& other9) { 
-  (void) other9; 
-  return *this; 
-} 
-void ListType::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "ListType("; 
-  out << ")"; 
-} 
- 
- 
-EnumType::~EnumType() noexcept { 
-} 
- 
-std::ostream& operator<<(std::ostream& out, const EnumType& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t EnumType::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    xfer += iprot->skip(ftype); 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  return xfer; 
-} 
- 
-uint32_t EnumType::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("EnumType"); 
- 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(EnumType &a, EnumType &b) { 
-  using ::std::swap; 
-  (void) a; 
-  (void) b; 
-} 
- 
-EnumType::EnumType(const EnumType& other10) { 
-  (void) other10; 
-} 
-EnumType& EnumType::operator=(const EnumType& other11) { 
-  (void) other11; 
-  return *this; 
-} 
-void EnumType::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "EnumType("; 
-  out << ")"; 
-} 
- 
- 
-DateType::~DateType() noexcept { 
-} 
- 
-std::ostream& operator<<(std::ostream& out, const DateType& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t DateType::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    xfer += iprot->skip(ftype); 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  return xfer; 
-} 
- 
-uint32_t DateType::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("DateType"); 
- 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(DateType &a, DateType &b) { 
-  using ::std::swap; 
-  (void) a; 
-  (void) b; 
-} 
- 
-DateType::DateType(const DateType& other12) { 
-  (void) other12; 
-} 
-DateType& DateType::operator=(const DateType& other13) { 
-  (void) other13; 
-  return *this; 
-} 
-void DateType::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "DateType("; 
-  out << ")"; 
-} 
- 
- 
-NullType::~NullType() noexcept { 
-} 
- 
-std::ostream& operator<<(std::ostream& out, const NullType& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t NullType::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    xfer += iprot->skip(ftype); 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  return xfer; 
-} 
- 
-uint32_t NullType::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("NullType"); 
- 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(NullType &a, NullType &b) { 
-  using ::std::swap; 
-  (void) a; 
-  (void) b; 
-} 
- 
-NullType::NullType(const NullType& other14) { 
-  (void) other14; 
-} 
-NullType& NullType::operator=(const NullType& other15) { 
-  (void) other15; 
-  return *this; 
-} 
-void NullType::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "NullType("; 
-  out << ")"; 
-} 
- 
- 
-DecimalType::~DecimalType() noexcept { 
-} 
- 
- 
-void DecimalType::__set_scale(const int32_t val) { 
-  this->scale = val; 
-} 
- 
-void DecimalType::__set_precision(const int32_t val) { 
-  this->precision = val; 
-} 
-std::ostream& operator<<(std::ostream& out, const DecimalType& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t DecimalType::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
-  bool isset_scale = false; 
-  bool isset_precision = false; 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    switch (fid) 
-    { 
-      case 1: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          xfer += iprot->readI32(this->scale); 
-          isset_scale = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 2: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          xfer += iprot->readI32(this->precision); 
-          isset_precision = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      default: 
-        xfer += iprot->skip(ftype); 
-        break; 
-    } 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  if (!isset_scale) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_precision) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  return xfer; 
-} 
- 
-uint32_t DecimalType::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("DecimalType"); 
- 
-  xfer += oprot->writeFieldBegin("scale", ::apache::thrift::protocol::T_I32, 1); 
-  xfer += oprot->writeI32(this->scale); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("precision", ::apache::thrift::protocol::T_I32, 2); 
-  xfer += oprot->writeI32(this->precision); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(DecimalType &a, DecimalType &b) { 
-  using ::std::swap; 
-  swap(a.scale, b.scale); 
-  swap(a.precision, b.precision); 
-} 
- 
-DecimalType::DecimalType(const DecimalType& other16) { 
-  scale = other16.scale; 
-  precision = other16.precision; 
-} 
-DecimalType& DecimalType::operator=(const DecimalType& other17) { 
-  scale = other17.scale; 
-  precision = other17.precision; 
-  return *this; 
-} 
-void DecimalType::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "DecimalType("; 
-  out << "scale=" << to_string(scale); 
-  out << ", " << "precision=" << to_string(precision); 
-  out << ")"; 
-} 
- 
- 
-MilliSeconds::~MilliSeconds() noexcept { 
-} 
- 
-std::ostream& operator<<(std::ostream& out, const MilliSeconds& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t MilliSeconds::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    xfer += iprot->skip(ftype); 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  return xfer; 
-} 
- 
-uint32_t MilliSeconds::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("MilliSeconds"); 
- 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(MilliSeconds &a, MilliSeconds &b) { 
-  using ::std::swap; 
-  (void) a; 
-  (void) b; 
-} 
- 
-MilliSeconds::MilliSeconds(const MilliSeconds& other18) { 
-  (void) other18; 
-} 
-MilliSeconds& MilliSeconds::operator=(const MilliSeconds& other19) { 
-  (void) other19; 
-  return *this; 
-} 
-void MilliSeconds::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "MilliSeconds("; 
-  out << ")"; 
-} 
- 
- 
-MicroSeconds::~MicroSeconds() noexcept { 
-} 
- 
-std::ostream& operator<<(std::ostream& out, const MicroSeconds& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t MicroSeconds::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    xfer += iprot->skip(ftype); 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  return xfer; 
-} 
- 
-uint32_t MicroSeconds::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("MicroSeconds"); 
- 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(MicroSeconds &a, MicroSeconds &b) { 
-  using ::std::swap; 
-  (void) a; 
-  (void) b; 
-} 
- 
-MicroSeconds::MicroSeconds(const MicroSeconds& other20) { 
-  (void) other20; 
-} 
-MicroSeconds& MicroSeconds::operator=(const MicroSeconds& other21) { 
-  (void) other21; 
-  return *this; 
-} 
-void MicroSeconds::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "MicroSeconds("; 
-  out << ")"; 
-} 
- 
- 
-NanoSeconds::~NanoSeconds() noexcept { 
-} 
- 
-std::ostream& operator<<(std::ostream& out, const NanoSeconds& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t NanoSeconds::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    xfer += iprot->skip(ftype); 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  return xfer; 
-} 
- 
-uint32_t NanoSeconds::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("NanoSeconds"); 
- 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(NanoSeconds &a, NanoSeconds &b) { 
-  using ::std::swap; 
-  (void) a; 
-  (void) b; 
-} 
- 
-NanoSeconds::NanoSeconds(const NanoSeconds& other22) { 
-  (void) other22; 
-} 
-NanoSeconds& NanoSeconds::operator=(const NanoSeconds& other23) { 
-  (void) other23; 
-  return *this; 
-} 
-void NanoSeconds::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "NanoSeconds("; 
-  out << ")"; 
-} 
- 
- 
-TimeUnit::~TimeUnit() noexcept { 
-} 
- 
- 
-void TimeUnit::__set_MILLIS(const MilliSeconds& val) { 
-  this->MILLIS = val; 
-__isset.MILLIS = true; 
-} 
- 
-void TimeUnit::__set_MICROS(const MicroSeconds& val) { 
-  this->MICROS = val; 
-__isset.MICROS = true; 
-} 
- 
-void TimeUnit::__set_NANOS(const NanoSeconds& val) { 
-  this->NANOS = val; 
-__isset.NANOS = true; 
-} 
-std::ostream& operator<<(std::ostream& out, const TimeUnit& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t TimeUnit::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    switch (fid) 
-    { 
-      case 1: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->MILLIS.read(iprot); 
-          this->__isset.MILLIS = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 2: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->MICROS.read(iprot); 
-          this->__isset.MICROS = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 3: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->NANOS.read(iprot); 
-          this->__isset.NANOS = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      default: 
-        xfer += iprot->skip(ftype); 
-        break; 
-    } 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  return xfer; 
-} 
- 
-uint32_t TimeUnit::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("TimeUnit"); 
- 
-  if (this->__isset.MILLIS) { 
-    xfer += oprot->writeFieldBegin("MILLIS", ::apache::thrift::protocol::T_STRUCT, 1); 
-    xfer += this->MILLIS.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.MICROS) { 
-    xfer += oprot->writeFieldBegin("MICROS", ::apache::thrift::protocol::T_STRUCT, 2); 
-    xfer += this->MICROS.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.NANOS) { 
-    xfer += oprot->writeFieldBegin("NANOS", ::apache::thrift::protocol::T_STRUCT, 3); 
-    xfer += this->NANOS.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(TimeUnit &a, TimeUnit &b) { 
-  using ::std::swap; 
-  swap(a.MILLIS, b.MILLIS); 
-  swap(a.MICROS, b.MICROS); 
-  swap(a.NANOS, b.NANOS); 
-  swap(a.__isset, b.__isset); 
-} 
- 
-TimeUnit::TimeUnit(const TimeUnit& other24) { 
-  MILLIS = other24.MILLIS; 
-  MICROS = other24.MICROS; 
-  NANOS = other24.NANOS; 
-  __isset = other24.__isset; 
-} 
-TimeUnit& TimeUnit::operator=(const TimeUnit& other25) { 
-  MILLIS = other25.MILLIS; 
-  MICROS = other25.MICROS; 
-  NANOS = other25.NANOS; 
-  __isset = other25.__isset; 
-  return *this; 
-} 
-void TimeUnit::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "TimeUnit("; 
-  out << "MILLIS="; (__isset.MILLIS ? (out << to_string(MILLIS)) : (out << "<null>")); 
-  out << ", " << "MICROS="; (__isset.MICROS ? (out << to_string(MICROS)) : (out << "<null>")); 
-  out << ", " << "NANOS="; (__isset.NANOS ? (out << to_string(NANOS)) : (out << "<null>")); 
-  out << ")"; 
-} 
- 
- 
-TimestampType::~TimestampType() noexcept { 
-} 
- 
- 
-void TimestampType::__set_isAdjustedToUTC(const bool val) { 
-  this->isAdjustedToUTC = val; 
-} 
- 
-void TimestampType::__set_unit(const TimeUnit& val) { 
-  this->unit = val; 
-} 
-std::ostream& operator<<(std::ostream& out, const TimestampType& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t TimestampType::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
-  bool isset_isAdjustedToUTC = false; 
-  bool isset_unit = false; 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    switch (fid) 
-    { 
-      case 1: 
-        if (ftype == ::apache::thrift::protocol::T_BOOL) { 
-          xfer += iprot->readBool(this->isAdjustedToUTC); 
-          isset_isAdjustedToUTC = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 2: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->unit.read(iprot); 
-          isset_unit = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      default: 
-        xfer += iprot->skip(ftype); 
-        break; 
-    } 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  if (!isset_isAdjustedToUTC) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_unit) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  return xfer; 
-} 
- 
-uint32_t TimestampType::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("TimestampType"); 
- 
-  xfer += oprot->writeFieldBegin("isAdjustedToUTC", ::apache::thrift::protocol::T_BOOL, 1); 
-  xfer += oprot->writeBool(this->isAdjustedToUTC); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("unit", ::apache::thrift::protocol::T_STRUCT, 2); 
-  xfer += this->unit.write(oprot); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(TimestampType &a, TimestampType &b) { 
-  using ::std::swap; 
-  swap(a.isAdjustedToUTC, b.isAdjustedToUTC); 
-  swap(a.unit, b.unit); 
-} 
- 
-TimestampType::TimestampType(const TimestampType& other26) { 
-  isAdjustedToUTC = other26.isAdjustedToUTC; 
-  unit = other26.unit; 
-} 
-TimestampType& TimestampType::operator=(const TimestampType& other27) { 
-  isAdjustedToUTC = other27.isAdjustedToUTC; 
-  unit = other27.unit; 
-  return *this; 
-} 
-void TimestampType::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "TimestampType("; 
-  out << "isAdjustedToUTC=" << to_string(isAdjustedToUTC); 
-  out << ", " << "unit=" << to_string(unit); 
-  out << ")"; 
-} 
- 
- 
-TimeType::~TimeType() noexcept { 
-} 
- 
- 
-void TimeType::__set_isAdjustedToUTC(const bool val) { 
-  this->isAdjustedToUTC = val; 
-} 
- 
-void TimeType::__set_unit(const TimeUnit& val) { 
-  this->unit = val; 
-} 
-std::ostream& operator<<(std::ostream& out, const TimeType& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t TimeType::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
-  bool isset_isAdjustedToUTC = false; 
-  bool isset_unit = false; 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    switch (fid) 
-    { 
-      case 1: 
-        if (ftype == ::apache::thrift::protocol::T_BOOL) { 
-          xfer += iprot->readBool(this->isAdjustedToUTC); 
-          isset_isAdjustedToUTC = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 2: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->unit.read(iprot); 
-          isset_unit = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      default: 
-        xfer += iprot->skip(ftype); 
-        break; 
-    } 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  if (!isset_isAdjustedToUTC) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_unit) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  return xfer; 
-} 
- 
-uint32_t TimeType::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("TimeType"); 
- 
-  xfer += oprot->writeFieldBegin("isAdjustedToUTC", ::apache::thrift::protocol::T_BOOL, 1); 
-  xfer += oprot->writeBool(this->isAdjustedToUTC); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("unit", ::apache::thrift::protocol::T_STRUCT, 2); 
-  xfer += this->unit.write(oprot); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(TimeType &a, TimeType &b) { 
-  using ::std::swap; 
-  swap(a.isAdjustedToUTC, b.isAdjustedToUTC); 
-  swap(a.unit, b.unit); 
-} 
- 
-TimeType::TimeType(const TimeType& other28) { 
-  isAdjustedToUTC = other28.isAdjustedToUTC; 
-  unit = other28.unit; 
-} 
-TimeType& TimeType::operator=(const TimeType& other29) { 
-  isAdjustedToUTC = other29.isAdjustedToUTC; 
-  unit = other29.unit; 
-  return *this; 
-} 
-void TimeType::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "TimeType("; 
-  out << "isAdjustedToUTC=" << to_string(isAdjustedToUTC); 
-  out << ", " << "unit=" << to_string(unit); 
-  out << ")"; 
-} 
- 
- 
-IntType::~IntType() noexcept { 
-} 
- 
- 
-void IntType::__set_bitWidth(const int8_t val) { 
-  this->bitWidth = val; 
-} 
- 
-void IntType::__set_isSigned(const bool val) { 
-  this->isSigned = val; 
-} 
-std::ostream& operator<<(std::ostream& out, const IntType& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t IntType::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
-  bool isset_bitWidth = false; 
-  bool isset_isSigned = false; 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    switch (fid) 
-    { 
-      case 1: 
-        if (ftype == ::apache::thrift::protocol::T_BYTE) { 
-          xfer += iprot->readByte(this->bitWidth); 
-          isset_bitWidth = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 2: 
-        if (ftype == ::apache::thrift::protocol::T_BOOL) { 
-          xfer += iprot->readBool(this->isSigned); 
-          isset_isSigned = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      default: 
-        xfer += iprot->skip(ftype); 
-        break; 
-    } 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  if (!isset_bitWidth) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_isSigned) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  return xfer; 
-} 
- 
-uint32_t IntType::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("IntType"); 
- 
-  xfer += oprot->writeFieldBegin("bitWidth", ::apache::thrift::protocol::T_BYTE, 1); 
-  xfer += oprot->writeByte(this->bitWidth); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("isSigned", ::apache::thrift::protocol::T_BOOL, 2); 
-  xfer += oprot->writeBool(this->isSigned); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(IntType &a, IntType &b) { 
-  using ::std::swap; 
-  swap(a.bitWidth, b.bitWidth); 
-  swap(a.isSigned, b.isSigned); 
-} 
- 
-IntType::IntType(const IntType& other30) { 
-  bitWidth = other30.bitWidth; 
-  isSigned = other30.isSigned; 
-} 
-IntType& IntType::operator=(const IntType& other31) { 
-  bitWidth = other31.bitWidth; 
-  isSigned = other31.isSigned; 
-  return *this; 
-} 
-void IntType::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "IntType("; 
-  out << "bitWidth=" << to_string(bitWidth); 
-  out << ", " << "isSigned=" << to_string(isSigned); 
-  out << ")"; 
-} 
- 
- 
-JsonType::~JsonType() noexcept { 
-} 
- 
-std::ostream& operator<<(std::ostream& out, const JsonType& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t JsonType::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    xfer += iprot->skip(ftype); 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  return xfer; 
-} 
- 
-uint32_t JsonType::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("JsonType"); 
- 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(JsonType &a, JsonType &b) { 
-  using ::std::swap; 
-  (void) a; 
-  (void) b; 
-} 
- 
-JsonType::JsonType(const JsonType& other32) { 
-  (void) other32; 
-} 
-JsonType& JsonType::operator=(const JsonType& other33) { 
-  (void) other33; 
-  return *this; 
-} 
-void JsonType::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "JsonType("; 
-  out << ")"; 
-} 
- 
- 
-BsonType::~BsonType() noexcept { 
-} 
- 
-std::ostream& operator<<(std::ostream& out, const BsonType& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t BsonType::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    xfer += iprot->skip(ftype); 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  return xfer; 
-} 
- 
-uint32_t BsonType::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("BsonType"); 
- 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(BsonType &a, BsonType &b) { 
-  using ::std::swap; 
-  (void) a; 
-  (void) b; 
-} 
- 
-BsonType::BsonType(const BsonType& other34) { 
-  (void) other34; 
-} 
-BsonType& BsonType::operator=(const BsonType& other35) { 
-  (void) other35; 
-  return *this; 
-} 
-void BsonType::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "BsonType("; 
-  out << ")"; 
-} 
- 
- 
-LogicalType::~LogicalType() noexcept { 
-} 
- 
- 
-void LogicalType::__set_STRING(const StringType& val) { 
-  this->STRING = val; 
-__isset.STRING = true; 
-} 
- 
-void LogicalType::__set_MAP(const MapType& val) { 
-  this->MAP = val; 
-__isset.MAP = true; 
-} 
- 
-void LogicalType::__set_LIST(const ListType& val) { 
-  this->LIST = val; 
-__isset.LIST = true; 
-} 
- 
-void LogicalType::__set_ENUM(const EnumType& val) { 
-  this->ENUM = val; 
-__isset.ENUM = true; 
-} 
- 
-void LogicalType::__set_DECIMAL(const DecimalType& val) { 
-  this->DECIMAL = val; 
-__isset.DECIMAL = true; 
-} 
- 
-void LogicalType::__set_DATE(const DateType& val) { 
-  this->DATE = val; 
-__isset.DATE = true; 
-} 
- 
-void LogicalType::__set_TIME(const TimeType& val) { 
-  this->TIME = val; 
-__isset.TIME = true; 
-} 
- 
-void LogicalType::__set_TIMESTAMP(const TimestampType& val) { 
-  this->TIMESTAMP = val; 
-__isset.TIMESTAMP = true; 
-} 
- 
-void LogicalType::__set_INTEGER(const IntType& val) { 
-  this->INTEGER = val; 
-__isset.INTEGER = true; 
-} 
- 
-void LogicalType::__set_UNKNOWN(const NullType& val) { 
-  this->UNKNOWN = val; 
-__isset.UNKNOWN = true; 
-} 
- 
-void LogicalType::__set_JSON(const JsonType& val) { 
-  this->JSON = val; 
-__isset.JSON = true; 
-} 
- 
-void LogicalType::__set_BSON(const BsonType& val) { 
-  this->BSON = val; 
-__isset.BSON = true; 
-} 
- 
-void LogicalType::__set_UUID(const UUIDType& val) { 
-  this->UUID = val; 
-__isset.UUID = true; 
-} 
-std::ostream& operator<<(std::ostream& out, const LogicalType& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t LogicalType::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    switch (fid) 
-    { 
-      case 1: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->STRING.read(iprot); 
-          this->__isset.STRING = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 2: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->MAP.read(iprot); 
-          this->__isset.MAP = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 3: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->LIST.read(iprot); 
-          this->__isset.LIST = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 4: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->ENUM.read(iprot); 
-          this->__isset.ENUM = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 5: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->DECIMAL.read(iprot); 
-          this->__isset.DECIMAL = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 6: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->DATE.read(iprot); 
-          this->__isset.DATE = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 7: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->TIME.read(iprot); 
-          this->__isset.TIME = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 8: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->TIMESTAMP.read(iprot); 
-          this->__isset.TIMESTAMP = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 10: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->INTEGER.read(iprot); 
-          this->__isset.INTEGER = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 11: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->UNKNOWN.read(iprot); 
-          this->__isset.UNKNOWN = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 12: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->JSON.read(iprot); 
-          this->__isset.JSON = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 13: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->BSON.read(iprot); 
-          this->__isset.BSON = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 14: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->UUID.read(iprot); 
-          this->__isset.UUID = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      default: 
-        xfer += iprot->skip(ftype); 
-        break; 
-    } 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  return xfer; 
-} 
- 
-uint32_t LogicalType::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("LogicalType"); 
- 
-  if (this->__isset.STRING) { 
-    xfer += oprot->writeFieldBegin("STRING", ::apache::thrift::protocol::T_STRUCT, 1); 
-    xfer += this->STRING.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.MAP) { 
-    xfer += oprot->writeFieldBegin("MAP", ::apache::thrift::protocol::T_STRUCT, 2); 
-    xfer += this->MAP.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.LIST) { 
-    xfer += oprot->writeFieldBegin("LIST", ::apache::thrift::protocol::T_STRUCT, 3); 
-    xfer += this->LIST.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.ENUM) { 
-    xfer += oprot->writeFieldBegin("ENUM", ::apache::thrift::protocol::T_STRUCT, 4); 
-    xfer += this->ENUM.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.DECIMAL) { 
-    xfer += oprot->writeFieldBegin("DECIMAL", ::apache::thrift::protocol::T_STRUCT, 5); 
-    xfer += this->DECIMAL.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.DATE) { 
-    xfer += oprot->writeFieldBegin("DATE", ::apache::thrift::protocol::T_STRUCT, 6); 
-    xfer += this->DATE.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.TIME) { 
-    xfer += oprot->writeFieldBegin("TIME", ::apache::thrift::protocol::T_STRUCT, 7); 
-    xfer += this->TIME.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.TIMESTAMP) { 
-    xfer += oprot->writeFieldBegin("TIMESTAMP", ::apache::thrift::protocol::T_STRUCT, 8); 
-    xfer += this->TIMESTAMP.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.INTEGER) { 
-    xfer += oprot->writeFieldBegin("INTEGER", ::apache::thrift::protocol::T_STRUCT, 10); 
-    xfer += this->INTEGER.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.UNKNOWN) { 
-    xfer += oprot->writeFieldBegin("UNKNOWN", ::apache::thrift::protocol::T_STRUCT, 11); 
-    xfer += this->UNKNOWN.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.JSON) { 
-    xfer += oprot->writeFieldBegin("JSON", ::apache::thrift::protocol::T_STRUCT, 12); 
-    xfer += this->JSON.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.BSON) { 
-    xfer += oprot->writeFieldBegin("BSON", ::apache::thrift::protocol::T_STRUCT, 13); 
-    xfer += this->BSON.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.UUID) { 
-    xfer += oprot->writeFieldBegin("UUID", ::apache::thrift::protocol::T_STRUCT, 14); 
-    xfer += this->UUID.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(LogicalType &a, LogicalType &b) { 
-  using ::std::swap; 
-  swap(a.STRING, b.STRING); 
-  swap(a.MAP, b.MAP); 
-  swap(a.LIST, b.LIST); 
-  swap(a.ENUM, b.ENUM); 
-  swap(a.DECIMAL, b.DECIMAL); 
-  swap(a.DATE, b.DATE); 
-  swap(a.TIME, b.TIME); 
-  swap(a.TIMESTAMP, b.TIMESTAMP); 
-  swap(a.INTEGER, b.INTEGER); 
-  swap(a.UNKNOWN, b.UNKNOWN); 
-  swap(a.JSON, b.JSON); 
-  swap(a.BSON, b.BSON); 
-  swap(a.UUID, b.UUID); 
-  swap(a.__isset, b.__isset); 
-} 
- 
-LogicalType::LogicalType(const LogicalType& other36) { 
-  STRING = other36.STRING; 
-  MAP = other36.MAP; 
-  LIST = other36.LIST; 
-  ENUM = other36.ENUM; 
-  DECIMAL = other36.DECIMAL; 
-  DATE = other36.DATE; 
-  TIME = other36.TIME; 
-  TIMESTAMP = other36.TIMESTAMP; 
-  INTEGER = other36.INTEGER; 
-  UNKNOWN = other36.UNKNOWN; 
-  JSON = other36.JSON; 
-  BSON = other36.BSON; 
-  UUID = other36.UUID; 
-  __isset = other36.__isset; 
-} 
-LogicalType& LogicalType::operator=(const LogicalType& other37) { 
-  STRING = other37.STRING; 
-  MAP = other37.MAP; 
-  LIST = other37.LIST; 
-  ENUM = other37.ENUM; 
-  DECIMAL = other37.DECIMAL; 
-  DATE = other37.DATE; 
-  TIME = other37.TIME; 
-  TIMESTAMP = other37.TIMESTAMP; 
-  INTEGER = other37.INTEGER; 
-  UNKNOWN = other37.UNKNOWN; 
-  JSON = other37.JSON; 
-  BSON = other37.BSON; 
-  UUID = other37.UUID; 
-  __isset = other37.__isset; 
-  return *this; 
-} 
-void LogicalType::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "LogicalType("; 
-  out << "STRING="; (__isset.STRING ? (out << to_string(STRING)) : (out << "<null>")); 
-  out << ", " << "MAP="; (__isset.MAP ? (out << to_string(MAP)) : (out << "<null>")); 
-  out << ", " << "LIST="; (__isset.LIST ? (out << to_string(LIST)) : (out << "<null>")); 
-  out << ", " << "ENUM="; (__isset.ENUM ? (out << to_string(ENUM)) : (out << "<null>")); 
-  out << ", " << "DECIMAL="; (__isset.DECIMAL ? (out << to_string(DECIMAL)) : (out << "<null>")); 
-  out << ", " << "DATE="; (__isset.DATE ? (out << to_string(DATE)) : (out << "<null>")); 
-  out << ", " << "TIME="; (__isset.TIME ? (out << to_string(TIME)) : (out << "<null>")); 
-  out << ", " << "TIMESTAMP="; (__isset.TIMESTAMP ? (out << to_string(TIMESTAMP)) : (out << "<null>")); 
-  out << ", " << "INTEGER="; (__isset.INTEGER ? (out << to_string(INTEGER)) : (out << "<null>")); 
-  out << ", " << "UNKNOWN="; (__isset.UNKNOWN ? (out << to_string(UNKNOWN)) : (out << "<null>")); 
-  out << ", " << "JSON="; (__isset.JSON ? (out << to_string(JSON)) : (out << "<null>")); 
-  out << ", " << "BSON="; (__isset.BSON ? (out << to_string(BSON)) : (out << "<null>")); 
-  out << ", " << "UUID="; (__isset.UUID ? (out << to_string(UUID)) : (out << "<null>")); 
-  out << ")"; 
-} 
- 
- 
-SchemaElement::~SchemaElement() noexcept { 
-} 
- 
- 
-void SchemaElement::__set_type(const Type::type val) { 
-  this->type = val; 
-__isset.type = true; 
-} 
- 
-void SchemaElement::__set_type_length(const int32_t val) { 
-  this->type_length = val; 
-__isset.type_length = true; 
-} 
- 
-void SchemaElement::__set_repetition_type(const FieldRepetitionType::type val) { 
-  this->repetition_type = val; 
-__isset.repetition_type = true; 
-} 
- 
-void SchemaElement::__set_name(const std::string& val) { 
-  this->name = val; 
-} 
- 
-void SchemaElement::__set_num_children(const int32_t val) { 
-  this->num_children = val; 
-__isset.num_children = true; 
-} 
- 
-void SchemaElement::__set_converted_type(const ConvertedType::type val) { 
-  this->converted_type = val; 
-__isset.converted_type = true; 
-} 
- 
-void SchemaElement::__set_scale(const int32_t val) { 
-  this->scale = val; 
-__isset.scale = true; 
-} 
- 
-void SchemaElement::__set_precision(const int32_t val) { 
-  this->precision = val; 
-__isset.precision = true; 
-} 
- 
-void SchemaElement::__set_field_id(const int32_t val) { 
-  this->field_id = val; 
-__isset.field_id = true; 
-} 
- 
-void SchemaElement::__set_logicalType(const LogicalType& val) { 
-  this->logicalType = val; 
-__isset.logicalType = true; 
-} 
-std::ostream& operator<<(std::ostream& out, const SchemaElement& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t SchemaElement::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
-  bool isset_name = false; 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    switch (fid) 
-    { 
-      case 1: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          int32_t ecast38; 
-          xfer += iprot->readI32(ecast38); 
-          this->type = (Type::type)ecast38; 
-          this->__isset.type = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 2: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          xfer += iprot->readI32(this->type_length); 
-          this->__isset.type_length = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 3: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          int32_t ecast39; 
-          xfer += iprot->readI32(ecast39); 
-          this->repetition_type = (FieldRepetitionType::type)ecast39; 
-          this->__isset.repetition_type = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 4: 
-        if (ftype == ::apache::thrift::protocol::T_STRING) { 
-          xfer += iprot->readString(this->name); 
-          isset_name = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 5: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          xfer += iprot->readI32(this->num_children); 
-          this->__isset.num_children = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 6: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          int32_t ecast40; 
-          xfer += iprot->readI32(ecast40); 
-          this->converted_type = (ConvertedType::type)ecast40; 
-          this->__isset.converted_type = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 7: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          xfer += iprot->readI32(this->scale); 
-          this->__isset.scale = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 8: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          xfer += iprot->readI32(this->precision); 
-          this->__isset.precision = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 9: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          xfer += iprot->readI32(this->field_id); 
-          this->__isset.field_id = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 10: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->logicalType.read(iprot); 
-          this->__isset.logicalType = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      default: 
-        xfer += iprot->skip(ftype); 
-        break; 
-    } 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  if (!isset_name) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  return xfer; 
-} 
- 
-uint32_t SchemaElement::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("SchemaElement"); 
- 
-  if (this->__isset.type) { 
-    xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1); 
-    xfer += oprot->writeI32((int32_t)this->type); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.type_length) { 
-    xfer += oprot->writeFieldBegin("type_length", ::apache::thrift::protocol::T_I32, 2); 
-    xfer += oprot->writeI32(this->type_length); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.repetition_type) { 
-    xfer += oprot->writeFieldBegin("repetition_type", ::apache::thrift::protocol::T_I32, 3); 
-    xfer += oprot->writeI32((int32_t)this->repetition_type); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  xfer += oprot->writeFieldBegin("name", ::apache::thrift::protocol::T_STRING, 4); 
-  xfer += oprot->writeString(this->name); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  if (this->__isset.num_children) { 
-    xfer += oprot->writeFieldBegin("num_children", ::apache::thrift::protocol::T_I32, 5); 
-    xfer += oprot->writeI32(this->num_children); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.converted_type) { 
-    xfer += oprot->writeFieldBegin("converted_type", ::apache::thrift::protocol::T_I32, 6); 
-    xfer += oprot->writeI32((int32_t)this->converted_type); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.scale) { 
-    xfer += oprot->writeFieldBegin("scale", ::apache::thrift::protocol::T_I32, 7); 
-    xfer += oprot->writeI32(this->scale); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.precision) { 
-    xfer += oprot->writeFieldBegin("precision", ::apache::thrift::protocol::T_I32, 8); 
-    xfer += oprot->writeI32(this->precision); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.field_id) { 
-    xfer += oprot->writeFieldBegin("field_id", ::apache::thrift::protocol::T_I32, 9); 
-    xfer += oprot->writeI32(this->field_id); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.logicalType) { 
-    xfer += oprot->writeFieldBegin("logicalType", ::apache::thrift::protocol::T_STRUCT, 10); 
-    xfer += this->logicalType.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(SchemaElement &a, SchemaElement &b) { 
-  using ::std::swap; 
-  swap(a.type, b.type); 
-  swap(a.type_length, b.type_length); 
-  swap(a.repetition_type, b.repetition_type); 
-  swap(a.name, b.name); 
-  swap(a.num_children, b.num_children); 
-  swap(a.converted_type, b.converted_type); 
-  swap(a.scale, b.scale); 
-  swap(a.precision, b.precision); 
-  swap(a.field_id, b.field_id); 
-  swap(a.logicalType, b.logicalType); 
-  swap(a.__isset, b.__isset); 
-} 
- 
-SchemaElement::SchemaElement(const SchemaElement& other41) { 
-  type = other41.type; 
-  type_length = other41.type_length; 
-  repetition_type = other41.repetition_type; 
-  name = other41.name; 
-  num_children = other41.num_children; 
-  converted_type = other41.converted_type; 
-  scale = other41.scale; 
-  precision = other41.precision; 
-  field_id = other41.field_id; 
-  logicalType = other41.logicalType; 
-  __isset = other41.__isset; 
-} 
-SchemaElement& SchemaElement::operator=(const SchemaElement& other42) { 
-  type = other42.type; 
-  type_length = other42.type_length; 
-  repetition_type = other42.repetition_type; 
-  name = other42.name; 
-  num_children = other42.num_children; 
-  converted_type = other42.converted_type; 
-  scale = other42.scale; 
-  precision = other42.precision; 
-  field_id = other42.field_id; 
-  logicalType = other42.logicalType; 
-  __isset = other42.__isset; 
-  return *this; 
-} 
-void SchemaElement::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "SchemaElement("; 
-  out << "type="; (__isset.type ? (out << to_string(type)) : (out << "<null>")); 
-  out << ", " << "type_length="; (__isset.type_length ? (out << to_string(type_length)) : (out << "<null>")); 
-  out << ", " << "repetition_type="; (__isset.repetition_type ? (out << to_string(repetition_type)) : (out << "<null>")); 
-  out << ", " << "name=" << to_string(name); 
-  out << ", " << "num_children="; (__isset.num_children ? (out << to_string(num_children)) : (out << "<null>")); 
-  out << ", " << "converted_type="; (__isset.converted_type ? (out << to_string(converted_type)) : (out << "<null>")); 
-  out << ", " << "scale="; (__isset.scale ? (out << to_string(scale)) : (out << "<null>")); 
-  out << ", " << "precision="; (__isset.precision ? (out << to_string(precision)) : (out << "<null>")); 
-  out << ", " << "field_id="; (__isset.field_id ? (out << to_string(field_id)) : (out << "<null>")); 
-  out << ", " << "logicalType="; (__isset.logicalType ? (out << to_string(logicalType)) : (out << "<null>")); 
-  out << ")"; 
-} 
- 
- 
-DataPageHeader::~DataPageHeader() noexcept { 
-} 
- 
- 
-void DataPageHeader::__set_num_values(const int32_t val) { 
-  this->num_values = val; 
-} 
- 
-void DataPageHeader::__set_encoding(const Encoding::type val) { 
-  this->encoding = val; 
-} 
- 
-void DataPageHeader::__set_definition_level_encoding(const Encoding::type val) { 
-  this->definition_level_encoding = val; 
-} 
- 
-void DataPageHeader::__set_repetition_level_encoding(const Encoding::type val) { 
-  this->repetition_level_encoding = val; 
-} 
- 
-void DataPageHeader::__set_statistics(const Statistics& val) { 
-  this->statistics = val; 
-__isset.statistics = true; 
-} 
-std::ostream& operator<<(std::ostream& out, const DataPageHeader& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t DataPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
-  bool isset_num_values = false; 
-  bool isset_encoding = false; 
-  bool isset_definition_level_encoding = false; 
-  bool isset_repetition_level_encoding = false; 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    switch (fid) 
-    { 
-      case 1: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          xfer += iprot->readI32(this->num_values); 
-          isset_num_values = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 2: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          int32_t ecast43; 
-          xfer += iprot->readI32(ecast43); 
-          this->encoding = (Encoding::type)ecast43; 
-          isset_encoding = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 3: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          int32_t ecast44; 
-          xfer += iprot->readI32(ecast44); 
-          this->definition_level_encoding = (Encoding::type)ecast44; 
-          isset_definition_level_encoding = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 4: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          int32_t ecast45; 
-          xfer += iprot->readI32(ecast45); 
-          this->repetition_level_encoding = (Encoding::type)ecast45; 
-          isset_repetition_level_encoding = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 5: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->statistics.read(iprot); 
-          this->__isset.statistics = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      default: 
-        xfer += iprot->skip(ftype); 
-        break; 
-    } 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  if (!isset_num_values) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_encoding) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_definition_level_encoding) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_repetition_level_encoding) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  return xfer; 
-} 
- 
-uint32_t DataPageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("DataPageHeader"); 
- 
-  xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1); 
-  xfer += oprot->writeI32(this->num_values); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2); 
-  xfer += oprot->writeI32((int32_t)this->encoding); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("definition_level_encoding", ::apache::thrift::protocol::T_I32, 3); 
-  xfer += oprot->writeI32((int32_t)this->definition_level_encoding); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("repetition_level_encoding", ::apache::thrift::protocol::T_I32, 4); 
-  xfer += oprot->writeI32((int32_t)this->repetition_level_encoding); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  if (this->__isset.statistics) { 
-    xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 5); 
-    xfer += this->statistics.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(DataPageHeader &a, DataPageHeader &b) { 
-  using ::std::swap; 
-  swap(a.num_values, b.num_values); 
-  swap(a.encoding, b.encoding); 
-  swap(a.definition_level_encoding, b.definition_level_encoding); 
-  swap(a.repetition_level_encoding, b.repetition_level_encoding); 
-  swap(a.statistics, b.statistics); 
-  swap(a.__isset, b.__isset); 
-} 
- 
-DataPageHeader::DataPageHeader(const DataPageHeader& other46) { 
-  num_values = other46.num_values; 
-  encoding = other46.encoding; 
-  definition_level_encoding = other46.definition_level_encoding; 
-  repetition_level_encoding = other46.repetition_level_encoding; 
-  statistics = other46.statistics; 
-  __isset = other46.__isset; 
-} 
-DataPageHeader& DataPageHeader::operator=(const DataPageHeader& other47) { 
-  num_values = other47.num_values; 
-  encoding = other47.encoding; 
-  definition_level_encoding = other47.definition_level_encoding; 
-  repetition_level_encoding = other47.repetition_level_encoding; 
-  statistics = other47.statistics; 
-  __isset = other47.__isset; 
-  return *this; 
-} 
-void DataPageHeader::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "DataPageHeader("; 
-  out << "num_values=" << to_string(num_values); 
-  out << ", " << "encoding=" << to_string(encoding); 
-  out << ", " << "definition_level_encoding=" << to_string(definition_level_encoding); 
-  out << ", " << "repetition_level_encoding=" << to_string(repetition_level_encoding); 
-  out << ", " << "statistics="; (__isset.statistics ? (out << to_string(statistics)) : (out << "<null>")); 
-  out << ")"; 
-} 
- 
- 
-IndexPageHeader::~IndexPageHeader() noexcept { 
-} 
- 
-std::ostream& operator<<(std::ostream& out, const IndexPageHeader& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t IndexPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    xfer += iprot->skip(ftype); 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  return xfer; 
-} 
- 
-uint32_t IndexPageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("IndexPageHeader"); 
- 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(IndexPageHeader &a, IndexPageHeader &b) { 
-  using ::std::swap; 
-  (void) a; 
-  (void) b; 
-} 
- 
-IndexPageHeader::IndexPageHeader(const IndexPageHeader& other48) { 
-  (void) other48; 
-} 
-IndexPageHeader& IndexPageHeader::operator=(const IndexPageHeader& other49) { 
-  (void) other49; 
-  return *this; 
-} 
-void IndexPageHeader::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "IndexPageHeader("; 
-  out << ")"; 
-} 
- 
- 
-DictionaryPageHeader::~DictionaryPageHeader() noexcept { 
-} 
- 
- 
-void DictionaryPageHeader::__set_num_values(const int32_t val) { 
-  this->num_values = val; 
-} 
- 
-void DictionaryPageHeader::__set_encoding(const Encoding::type val) { 
-  this->encoding = val; 
-} 
- 
-void DictionaryPageHeader::__set_is_sorted(const bool val) { 
-  this->is_sorted = val; 
-__isset.is_sorted = true; 
-} 
-std::ostream& operator<<(std::ostream& out, const DictionaryPageHeader& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t DictionaryPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
-  bool isset_num_values = false; 
-  bool isset_encoding = false; 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    switch (fid) 
-    { 
-      case 1: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          xfer += iprot->readI32(this->num_values); 
-          isset_num_values = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 2: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          int32_t ecast50; 
-          xfer += iprot->readI32(ecast50); 
-          this->encoding = (Encoding::type)ecast50; 
-          isset_encoding = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 3: 
-        if (ftype == ::apache::thrift::protocol::T_BOOL) { 
-          xfer += iprot->readBool(this->is_sorted); 
-          this->__isset.is_sorted = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      default: 
-        xfer += iprot->skip(ftype); 
-        break; 
-    } 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  if (!isset_num_values) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_encoding) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  return xfer; 
-} 
- 
-uint32_t DictionaryPageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("DictionaryPageHeader"); 
- 
-  xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1); 
-  xfer += oprot->writeI32(this->num_values); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2); 
-  xfer += oprot->writeI32((int32_t)this->encoding); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  if (this->__isset.is_sorted) { 
-    xfer += oprot->writeFieldBegin("is_sorted", ::apache::thrift::protocol::T_BOOL, 3); 
-    xfer += oprot->writeBool(this->is_sorted); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(DictionaryPageHeader &a, DictionaryPageHeader &b) { 
-  using ::std::swap; 
-  swap(a.num_values, b.num_values); 
-  swap(a.encoding, b.encoding); 
-  swap(a.is_sorted, b.is_sorted); 
-  swap(a.__isset, b.__isset); 
-} 
- 
-DictionaryPageHeader::DictionaryPageHeader(const DictionaryPageHeader& other51) { 
-  num_values = other51.num_values; 
-  encoding = other51.encoding; 
-  is_sorted = other51.is_sorted; 
-  __isset = other51.__isset; 
-} 
-DictionaryPageHeader& DictionaryPageHeader::operator=(const DictionaryPageHeader& other52) { 
-  num_values = other52.num_values; 
-  encoding = other52.encoding; 
-  is_sorted = other52.is_sorted; 
-  __isset = other52.__isset; 
-  return *this; 
-} 
-void DictionaryPageHeader::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "DictionaryPageHeader("; 
-  out << "num_values=" << to_string(num_values); 
-  out << ", " << "encoding=" << to_string(encoding); 
-  out << ", " << "is_sorted="; (__isset.is_sorted ? (out << to_string(is_sorted)) : (out << "<null>")); 
-  out << ")"; 
-} 
- 
- 
-DataPageHeaderV2::~DataPageHeaderV2() noexcept { 
-} 
- 
- 
-void DataPageHeaderV2::__set_num_values(const int32_t val) { 
-  this->num_values = val; 
-} 
- 
-void DataPageHeaderV2::__set_num_nulls(const int32_t val) { 
-  this->num_nulls = val; 
-} 
- 
-void DataPageHeaderV2::__set_num_rows(const int32_t val) { 
-  this->num_rows = val; 
-} 
- 
-void DataPageHeaderV2::__set_encoding(const Encoding::type val) { 
-  this->encoding = val; 
-} 
- 
-void DataPageHeaderV2::__set_definition_levels_byte_length(const int32_t val) { 
-  this->definition_levels_byte_length = val; 
-} 
- 
-void DataPageHeaderV2::__set_repetition_levels_byte_length(const int32_t val) { 
-  this->repetition_levels_byte_length = val; 
-} 
- 
-void DataPageHeaderV2::__set_is_compressed(const bool val) { 
-  this->is_compressed = val; 
-__isset.is_compressed = true; 
-} 
- 
-void DataPageHeaderV2::__set_statistics(const Statistics& val) { 
-  this->statistics = val; 
-__isset.statistics = true; 
-} 
-std::ostream& operator<<(std::ostream& out, const DataPageHeaderV2& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t DataPageHeaderV2::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
-  bool isset_num_values = false; 
-  bool isset_num_nulls = false; 
-  bool isset_num_rows = false; 
-  bool isset_encoding = false; 
-  bool isset_definition_levels_byte_length = false; 
-  bool isset_repetition_levels_byte_length = false; 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    switch (fid) 
-    { 
-      case 1: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          xfer += iprot->readI32(this->num_values); 
-          isset_num_values = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 2: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          xfer += iprot->readI32(this->num_nulls); 
-          isset_num_nulls = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 3: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          xfer += iprot->readI32(this->num_rows); 
-          isset_num_rows = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 4: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          int32_t ecast53; 
-          xfer += iprot->readI32(ecast53); 
-          this->encoding = (Encoding::type)ecast53; 
-          isset_encoding = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 5: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          xfer += iprot->readI32(this->definition_levels_byte_length); 
-          isset_definition_levels_byte_length = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 6: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          xfer += iprot->readI32(this->repetition_levels_byte_length); 
-          isset_repetition_levels_byte_length = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 7: 
-        if (ftype == ::apache::thrift::protocol::T_BOOL) { 
-          xfer += iprot->readBool(this->is_compressed); 
-          this->__isset.is_compressed = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 8: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->statistics.read(iprot); 
-          this->__isset.statistics = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      default: 
-        xfer += iprot->skip(ftype); 
-        break; 
-    } 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  if (!isset_num_values) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_num_nulls) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_num_rows) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_encoding) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_definition_levels_byte_length) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_repetition_levels_byte_length) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  return xfer; 
-} 
- 
-uint32_t DataPageHeaderV2::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("DataPageHeaderV2"); 
- 
-  xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1); 
-  xfer += oprot->writeI32(this->num_values); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("num_nulls", ::apache::thrift::protocol::T_I32, 2); 
-  xfer += oprot->writeI32(this->num_nulls); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I32, 3); 
-  xfer += oprot->writeI32(this->num_rows); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 4); 
-  xfer += oprot->writeI32((int32_t)this->encoding); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("definition_levels_byte_length", ::apache::thrift::protocol::T_I32, 5); 
-  xfer += oprot->writeI32(this->definition_levels_byte_length); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("repetition_levels_byte_length", ::apache::thrift::protocol::T_I32, 6); 
-  xfer += oprot->writeI32(this->repetition_levels_byte_length); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  if (this->__isset.is_compressed) { 
-    xfer += oprot->writeFieldBegin("is_compressed", ::apache::thrift::protocol::T_BOOL, 7); 
-    xfer += oprot->writeBool(this->is_compressed); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.statistics) { 
-    xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 8); 
-    xfer += this->statistics.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b) { 
-  using ::std::swap; 
-  swap(a.num_values, b.num_values); 
-  swap(a.num_nulls, b.num_nulls); 
-  swap(a.num_rows, b.num_rows); 
-  swap(a.encoding, b.encoding); 
-  swap(a.definition_levels_byte_length, b.definition_levels_byte_length); 
-  swap(a.repetition_levels_byte_length, b.repetition_levels_byte_length); 
-  swap(a.is_compressed, b.is_compressed); 
-  swap(a.statistics, b.statistics); 
-  swap(a.__isset, b.__isset); 
-} 
- 
-DataPageHeaderV2::DataPageHeaderV2(const DataPageHeaderV2& other54) { 
-  num_values = other54.num_values; 
-  num_nulls = other54.num_nulls; 
-  num_rows = other54.num_rows; 
-  encoding = other54.encoding; 
-  definition_levels_byte_length = other54.definition_levels_byte_length; 
-  repetition_levels_byte_length = other54.repetition_levels_byte_length; 
-  is_compressed = other54.is_compressed; 
-  statistics = other54.statistics; 
-  __isset = other54.__isset; 
-} 
-DataPageHeaderV2& DataPageHeaderV2::operator=(const DataPageHeaderV2& other55) { 
-  num_values = other55.num_values; 
-  num_nulls = other55.num_nulls; 
-  num_rows = other55.num_rows; 
-  encoding = other55.encoding; 
-  definition_levels_byte_length = other55.definition_levels_byte_length; 
-  repetition_levels_byte_length = other55.repetition_levels_byte_length; 
-  is_compressed = other55.is_compressed; 
-  statistics = other55.statistics; 
-  __isset = other55.__isset; 
-  return *this; 
-} 
-void DataPageHeaderV2::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "DataPageHeaderV2("; 
-  out << "num_values=" << to_string(num_values); 
-  out << ", " << "num_nulls=" << to_string(num_nulls); 
-  out << ", " << "num_rows=" << to_string(num_rows); 
-  out << ", " << "encoding=" << to_string(encoding); 
-  out << ", " << "definition_levels_byte_length=" << to_string(definition_levels_byte_length); 
-  out << ", " << "repetition_levels_byte_length=" << to_string(repetition_levels_byte_length); 
-  out << ", " << "is_compressed="; (__isset.is_compressed ? (out << to_string(is_compressed)) : (out << "<null>")); 
-  out << ", " << "statistics="; (__isset.statistics ? (out << to_string(statistics)) : (out << "<null>")); 
-  out << ")"; 
-} 
- 
- 
-SplitBlockAlgorithm::~SplitBlockAlgorithm() noexcept { 
-} 
- 
-std::ostream& operator<<(std::ostream& out, const SplitBlockAlgorithm& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t SplitBlockAlgorithm::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    xfer += iprot->skip(ftype); 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  return xfer; 
-} 
- 
-uint32_t SplitBlockAlgorithm::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("SplitBlockAlgorithm"); 
- 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(SplitBlockAlgorithm &a, SplitBlockAlgorithm &b) { 
-  using ::std::swap; 
-  (void) a; 
-  (void) b; 
-} 
- 
-SplitBlockAlgorithm::SplitBlockAlgorithm(const SplitBlockAlgorithm& other56) { 
-  (void) other56; 
-} 
-SplitBlockAlgorithm& SplitBlockAlgorithm::operator=(const SplitBlockAlgorithm& other57) { 
-  (void) other57; 
-  return *this; 
-} 
-void SplitBlockAlgorithm::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "SplitBlockAlgorithm("; 
-  out << ")"; 
-} 
- 
- 
-BloomFilterAlgorithm::~BloomFilterAlgorithm() noexcept { 
-} 
- 
- 
-void BloomFilterAlgorithm::__set_BLOCK(const SplitBlockAlgorithm& val) { 
-  this->BLOCK = val; 
-__isset.BLOCK = true; 
-} 
-std::ostream& operator<<(std::ostream& out, const BloomFilterAlgorithm& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t BloomFilterAlgorithm::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    switch (fid) 
-    { 
-      case 1: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->BLOCK.read(iprot); 
-          this->__isset.BLOCK = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      default: 
-        xfer += iprot->skip(ftype); 
-        break; 
-    } 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  return xfer; 
-} 
- 
-uint32_t BloomFilterAlgorithm::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("BloomFilterAlgorithm"); 
- 
-  if (this->__isset.BLOCK) { 
-    xfer += oprot->writeFieldBegin("BLOCK", ::apache::thrift::protocol::T_STRUCT, 1); 
-    xfer += this->BLOCK.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(BloomFilterAlgorithm &a, BloomFilterAlgorithm &b) { 
-  using ::std::swap; 
-  swap(a.BLOCK, b.BLOCK); 
-  swap(a.__isset, b.__isset); 
-} 
- 
-BloomFilterAlgorithm::BloomFilterAlgorithm(const BloomFilterAlgorithm& other58) { 
-  BLOCK = other58.BLOCK; 
-  __isset = other58.__isset; 
-} 
-BloomFilterAlgorithm& BloomFilterAlgorithm::operator=(const BloomFilterAlgorithm& other59) { 
-  BLOCK = other59.BLOCK; 
-  __isset = other59.__isset; 
-  return *this; 
-} 
-void BloomFilterAlgorithm::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "BloomFilterAlgorithm("; 
-  out << "BLOCK="; (__isset.BLOCK ? (out << to_string(BLOCK)) : (out << "<null>")); 
-  out << ")"; 
-} 
- 
- 
-XxHash::~XxHash() noexcept { 
-} 
- 
-std::ostream& operator<<(std::ostream& out, const XxHash& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t XxHash::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    xfer += iprot->skip(ftype); 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  return xfer; 
-} 
- 
-uint32_t XxHash::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("XxHash"); 
- 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(XxHash &a, XxHash &b) { 
-  using ::std::swap; 
-  (void) a; 
-  (void) b; 
-} 
- 
-XxHash::XxHash(const XxHash& other60) { 
-  (void) other60; 
-} 
-XxHash& XxHash::operator=(const XxHash& other61) { 
-  (void) other61; 
-  return *this; 
-} 
-void XxHash::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "XxHash("; 
-  out << ")"; 
-} 
- 
- 
-BloomFilterHash::~BloomFilterHash() noexcept { 
-} 
- 
- 
-void BloomFilterHash::__set_XXHASH(const XxHash& val) { 
-  this->XXHASH = val; 
-__isset.XXHASH = true; 
-} 
-std::ostream& operator<<(std::ostream& out, const BloomFilterHash& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t BloomFilterHash::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    switch (fid) 
-    { 
-      case 1: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->XXHASH.read(iprot); 
-          this->__isset.XXHASH = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      default: 
-        xfer += iprot->skip(ftype); 
-        break; 
-    } 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  return xfer; 
-} 
- 
-uint32_t BloomFilterHash::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("BloomFilterHash"); 
- 
-  if (this->__isset.XXHASH) { 
-    xfer += oprot->writeFieldBegin("XXHASH", ::apache::thrift::protocol::T_STRUCT, 1); 
-    xfer += this->XXHASH.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(BloomFilterHash &a, BloomFilterHash &b) { 
-  using ::std::swap; 
-  swap(a.XXHASH, b.XXHASH); 
-  swap(a.__isset, b.__isset); 
-} 
- 
-BloomFilterHash::BloomFilterHash(const BloomFilterHash& other62) { 
-  XXHASH = other62.XXHASH; 
-  __isset = other62.__isset; 
-} 
-BloomFilterHash& BloomFilterHash::operator=(const BloomFilterHash& other63) { 
-  XXHASH = other63.XXHASH; 
-  __isset = other63.__isset; 
-  return *this; 
-} 
-void BloomFilterHash::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "BloomFilterHash("; 
-  out << "XXHASH="; (__isset.XXHASH ? (out << to_string(XXHASH)) : (out << "<null>")); 
-  out << ")"; 
-} 
- 
- 
-Uncompressed::~Uncompressed() noexcept { 
-} 
- 
-std::ostream& operator<<(std::ostream& out, const Uncompressed& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t Uncompressed::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    xfer += iprot->skip(ftype); 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  return xfer; 
-} 
- 
-uint32_t Uncompressed::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("Uncompressed"); 
- 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(Uncompressed &a, Uncompressed &b) { 
-  using ::std::swap; 
-  (void) a; 
-  (void) b; 
-} 
- 
-Uncompressed::Uncompressed(const Uncompressed& other64) { 
-  (void) other64; 
-} 
-Uncompressed& Uncompressed::operator=(const Uncompressed& other65) { 
-  (void) other65; 
-  return *this; 
-} 
-void Uncompressed::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "Uncompressed("; 
-  out << ")"; 
-} 
- 
- 
-BloomFilterCompression::~BloomFilterCompression() noexcept { 
-} 
- 
- 
-void BloomFilterCompression::__set_UNCOMPRESSED(const Uncompressed& val) { 
-  this->UNCOMPRESSED = val; 
-__isset.UNCOMPRESSED = true; 
-} 
-std::ostream& operator<<(std::ostream& out, const BloomFilterCompression& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t BloomFilterCompression::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    switch (fid) 
-    { 
-      case 1: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->UNCOMPRESSED.read(iprot); 
-          this->__isset.UNCOMPRESSED = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      default: 
-        xfer += iprot->skip(ftype); 
-        break; 
-    } 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  return xfer; 
-} 
- 
-uint32_t BloomFilterCompression::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("BloomFilterCompression"); 
- 
-  if (this->__isset.UNCOMPRESSED) { 
-    xfer += oprot->writeFieldBegin("UNCOMPRESSED", ::apache::thrift::protocol::T_STRUCT, 1); 
-    xfer += this->UNCOMPRESSED.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(BloomFilterCompression &a, BloomFilterCompression &b) { 
-  using ::std::swap; 
-  swap(a.UNCOMPRESSED, b.UNCOMPRESSED); 
-  swap(a.__isset, b.__isset); 
-} 
- 
-BloomFilterCompression::BloomFilterCompression(const BloomFilterCompression& other66) { 
-  UNCOMPRESSED = other66.UNCOMPRESSED; 
-  __isset = other66.__isset; 
-} 
-BloomFilterCompression& BloomFilterCompression::operator=(const BloomFilterCompression& other67) { 
-  UNCOMPRESSED = other67.UNCOMPRESSED; 
-  __isset = other67.__isset; 
-  return *this; 
-} 
-void BloomFilterCompression::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "BloomFilterCompression("; 
-  out << "UNCOMPRESSED="; (__isset.UNCOMPRESSED ? (out << to_string(UNCOMPRESSED)) : (out << "<null>")); 
-  out << ")"; 
-} 
- 
- 
-BloomFilterHeader::~BloomFilterHeader() noexcept { 
-} 
- 
- 
-void BloomFilterHeader::__set_numBytes(const int32_t val) { 
-  this->numBytes = val; 
-} 
- 
-void BloomFilterHeader::__set_algorithm(const BloomFilterAlgorithm& val) { 
-  this->algorithm = val; 
-} 
- 
-void BloomFilterHeader::__set_hash(const BloomFilterHash& val) { 
-  this->hash = val; 
-} 
- 
-void BloomFilterHeader::__set_compression(const BloomFilterCompression& val) { 
-  this->compression = val; 
-} 
-std::ostream& operator<<(std::ostream& out, const BloomFilterHeader& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t BloomFilterHeader::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
-  bool isset_numBytes = false; 
-  bool isset_algorithm = false; 
-  bool isset_hash = false; 
-  bool isset_compression = false; 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    switch (fid) 
-    { 
-      case 1: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          xfer += iprot->readI32(this->numBytes); 
-          isset_numBytes = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 2: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->algorithm.read(iprot); 
-          isset_algorithm = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 3: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->hash.read(iprot); 
-          isset_hash = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 4: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->compression.read(iprot); 
-          isset_compression = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      default: 
-        xfer += iprot->skip(ftype); 
-        break; 
-    } 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  if (!isset_numBytes) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_algorithm) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_hash) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_compression) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  return xfer; 
-} 
- 
-uint32_t BloomFilterHeader::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("BloomFilterHeader"); 
- 
-  xfer += oprot->writeFieldBegin("numBytes", ::apache::thrift::protocol::T_I32, 1); 
-  xfer += oprot->writeI32(this->numBytes); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("algorithm", ::apache::thrift::protocol::T_STRUCT, 2); 
-  xfer += this->algorithm.write(oprot); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("hash", ::apache::thrift::protocol::T_STRUCT, 3); 
-  xfer += this->hash.write(oprot); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("compression", ::apache::thrift::protocol::T_STRUCT, 4); 
-  xfer += this->compression.write(oprot); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(BloomFilterHeader &a, BloomFilterHeader &b) { 
-  using ::std::swap; 
-  swap(a.numBytes, b.numBytes); 
-  swap(a.algorithm, b.algorithm); 
-  swap(a.hash, b.hash); 
-  swap(a.compression, b.compression); 
-} 
- 
-BloomFilterHeader::BloomFilterHeader(const BloomFilterHeader& other68) { 
-  numBytes = other68.numBytes; 
-  algorithm = other68.algorithm; 
-  hash = other68.hash; 
-  compression = other68.compression; 
-} 
-BloomFilterHeader& BloomFilterHeader::operator=(const BloomFilterHeader& other69) { 
-  numBytes = other69.numBytes; 
-  algorithm = other69.algorithm; 
-  hash = other69.hash; 
-  compression = other69.compression; 
-  return *this; 
-} 
-void BloomFilterHeader::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "BloomFilterHeader("; 
-  out << "numBytes=" << to_string(numBytes); 
-  out << ", " << "algorithm=" << to_string(algorithm); 
-  out << ", " << "hash=" << to_string(hash); 
-  out << ", " << "compression=" << to_string(compression); 
-  out << ")"; 
-} 
- 
- 
-PageHeader::~PageHeader() noexcept { 
-} 
- 
- 
-void PageHeader::__set_type(const PageType::type val) { 
-  this->type = val; 
-} 
- 
-void PageHeader::__set_uncompressed_page_size(const int32_t val) { 
-  this->uncompressed_page_size = val; 
-} 
- 
-void PageHeader::__set_compressed_page_size(const int32_t val) { 
-  this->compressed_page_size = val; 
-} 
- 
-void PageHeader::__set_crc(const int32_t val) { 
-  this->crc = val; 
-__isset.crc = true; 
-} 
- 
-void PageHeader::__set_data_page_header(const DataPageHeader& val) { 
-  this->data_page_header = val; 
-__isset.data_page_header = true; 
-} 
- 
-void PageHeader::__set_index_page_header(const IndexPageHeader& val) { 
-  this->index_page_header = val; 
-__isset.index_page_header = true; 
-} 
- 
-void PageHeader::__set_dictionary_page_header(const DictionaryPageHeader& val) { 
-  this->dictionary_page_header = val; 
-__isset.dictionary_page_header = true; 
-} 
- 
-void PageHeader::__set_data_page_header_v2(const DataPageHeaderV2& val) { 
-  this->data_page_header_v2 = val; 
-__isset.data_page_header_v2 = true; 
-} 
-std::ostream& operator<<(std::ostream& out, const PageHeader& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t PageHeader::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
-  bool isset_type = false; 
-  bool isset_uncompressed_page_size = false; 
-  bool isset_compressed_page_size = false; 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    switch (fid) 
-    { 
-      case 1: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          int32_t ecast70; 
-          xfer += iprot->readI32(ecast70); 
-          this->type = (PageType::type)ecast70; 
-          isset_type = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 2: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          xfer += iprot->readI32(this->uncompressed_page_size); 
-          isset_uncompressed_page_size = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 3: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          xfer += iprot->readI32(this->compressed_page_size); 
-          isset_compressed_page_size = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 4: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          xfer += iprot->readI32(this->crc); 
-          this->__isset.crc = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 5: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->data_page_header.read(iprot); 
-          this->__isset.data_page_header = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 6: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->index_page_header.read(iprot); 
-          this->__isset.index_page_header = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 7: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->dictionary_page_header.read(iprot); 
-          this->__isset.dictionary_page_header = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 8: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->data_page_header_v2.read(iprot); 
-          this->__isset.data_page_header_v2 = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      default: 
-        xfer += iprot->skip(ftype); 
-        break; 
-    } 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  if (!isset_type) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_uncompressed_page_size) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_compressed_page_size) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  return xfer; 
-} 
- 
-uint32_t PageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("PageHeader"); 
- 
-  xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1); 
-  xfer += oprot->writeI32((int32_t)this->type); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("uncompressed_page_size", ::apache::thrift::protocol::T_I32, 2); 
-  xfer += oprot->writeI32(this->uncompressed_page_size); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("compressed_page_size", ::apache::thrift::protocol::T_I32, 3); 
-  xfer += oprot->writeI32(this->compressed_page_size); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  if (this->__isset.crc) { 
-    xfer += oprot->writeFieldBegin("crc", ::apache::thrift::protocol::T_I32, 4); 
-    xfer += oprot->writeI32(this->crc); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.data_page_header) { 
-    xfer += oprot->writeFieldBegin("data_page_header", ::apache::thrift::protocol::T_STRUCT, 5); 
-    xfer += this->data_page_header.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.index_page_header) { 
-    xfer += oprot->writeFieldBegin("index_page_header", ::apache::thrift::protocol::T_STRUCT, 6); 
-    xfer += this->index_page_header.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.dictionary_page_header) { 
-    xfer += oprot->writeFieldBegin("dictionary_page_header", ::apache::thrift::protocol::T_STRUCT, 7); 
-    xfer += this->dictionary_page_header.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.data_page_header_v2) { 
-    xfer += oprot->writeFieldBegin("data_page_header_v2", ::apache::thrift::protocol::T_STRUCT, 8); 
-    xfer += this->data_page_header_v2.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(PageHeader &a, PageHeader &b) { 
-  using ::std::swap; 
-  swap(a.type, b.type); 
-  swap(a.uncompressed_page_size, b.uncompressed_page_size); 
-  swap(a.compressed_page_size, b.compressed_page_size); 
-  swap(a.crc, b.crc); 
-  swap(a.data_page_header, b.data_page_header); 
-  swap(a.index_page_header, b.index_page_header); 
-  swap(a.dictionary_page_header, b.dictionary_page_header); 
-  swap(a.data_page_header_v2, b.data_page_header_v2); 
-  swap(a.__isset, b.__isset); 
-} 
- 
-PageHeader::PageHeader(const PageHeader& other71) { 
-  type = other71.type; 
-  uncompressed_page_size = other71.uncompressed_page_size; 
-  compressed_page_size = other71.compressed_page_size; 
-  crc = other71.crc; 
-  data_page_header = other71.data_page_header; 
-  index_page_header = other71.index_page_header; 
-  dictionary_page_header = other71.dictionary_page_header; 
-  data_page_header_v2 = other71.data_page_header_v2; 
-  __isset = other71.__isset; 
-} 
-PageHeader& PageHeader::operator=(const PageHeader& other72) { 
-  type = other72.type; 
-  uncompressed_page_size = other72.uncompressed_page_size; 
-  compressed_page_size = other72.compressed_page_size; 
-  crc = other72.crc; 
-  data_page_header = other72.data_page_header; 
-  index_page_header = other72.index_page_header; 
-  dictionary_page_header = other72.dictionary_page_header; 
-  data_page_header_v2 = other72.data_page_header_v2; 
-  __isset = other72.__isset; 
-  return *this; 
-} 
-void PageHeader::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "PageHeader("; 
-  out << "type=" << to_string(type); 
-  out << ", " << "uncompressed_page_size=" << to_string(uncompressed_page_size); 
-  out << ", " << "compressed_page_size=" << to_string(compressed_page_size); 
-  out << ", " << "crc="; (__isset.crc ? (out << to_string(crc)) : (out << "<null>")); 
-  out << ", " << "data_page_header="; (__isset.data_page_header ? (out << to_string(data_page_header)) : (out << "<null>")); 
-  out << ", " << "index_page_header="; (__isset.index_page_header ? (out << to_string(index_page_header)) : (out << "<null>")); 
-  out << ", " << "dictionary_page_header="; (__isset.dictionary_page_header ? (out << to_string(dictionary_page_header)) : (out << "<null>")); 
-  out << ", " << "data_page_header_v2="; (__isset.data_page_header_v2 ? (out << to_string(data_page_header_v2)) : (out << "<null>")); 
-  out << ")"; 
-} 
- 
- 
-KeyValue::~KeyValue() noexcept { 
-} 
- 
- 
-void KeyValue::__set_key(const std::string& val) { 
-  this->key = val; 
-} 
- 
-void KeyValue::__set_value(const std::string& val) { 
-  this->value = val; 
-__isset.value = true; 
-} 
-std::ostream& operator<<(std::ostream& out, const KeyValue& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t KeyValue::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
-  bool isset_key = false; 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    switch (fid) 
-    { 
-      case 1: 
-        if (ftype == ::apache::thrift::protocol::T_STRING) { 
-          xfer += iprot->readString(this->key); 
-          isset_key = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 2: 
-        if (ftype == ::apache::thrift::protocol::T_STRING) { 
-          xfer += iprot->readString(this->value); 
-          this->__isset.value = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      default: 
-        xfer += iprot->skip(ftype); 
-        break; 
-    } 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  if (!isset_key) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  return xfer; 
-} 
- 
-uint32_t KeyValue::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("KeyValue"); 
- 
-  xfer += oprot->writeFieldBegin("key", ::apache::thrift::protocol::T_STRING, 1); 
-  xfer += oprot->writeString(this->key); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  if (this->__isset.value) { 
-    xfer += oprot->writeFieldBegin("value", ::apache::thrift::protocol::T_STRING, 2); 
-    xfer += oprot->writeString(this->value); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(KeyValue &a, KeyValue &b) { 
-  using ::std::swap; 
-  swap(a.key, b.key); 
-  swap(a.value, b.value); 
-  swap(a.__isset, b.__isset); 
-} 
- 
-KeyValue::KeyValue(const KeyValue& other73) { 
-  key = other73.key; 
-  value = other73.value; 
-  __isset = other73.__isset; 
-} 
-KeyValue& KeyValue::operator=(const KeyValue& other74) { 
-  key = other74.key; 
-  value = other74.value; 
-  __isset = other74.__isset; 
-  return *this; 
-} 
-void KeyValue::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "KeyValue("; 
-  out << "key=" << to_string(key); 
-  out << ", " << "value="; (__isset.value ? (out << to_string(value)) : (out << "<null>")); 
-  out << ")"; 
-} 
- 
- 
-SortingColumn::~SortingColumn() noexcept { 
-} 
- 
- 
-void SortingColumn::__set_column_idx(const int32_t val) { 
-  this->column_idx = val; 
-} 
- 
-void SortingColumn::__set_descending(const bool val) { 
-  this->descending = val; 
-} 
- 
-void SortingColumn::__set_nulls_first(const bool val) { 
-  this->nulls_first = val; 
-} 
-std::ostream& operator<<(std::ostream& out, const SortingColumn& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t SortingColumn::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
-  bool isset_column_idx = false; 
-  bool isset_descending = false; 
-  bool isset_nulls_first = false; 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    switch (fid) 
-    { 
-      case 1: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          xfer += iprot->readI32(this->column_idx); 
-          isset_column_idx = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 2: 
-        if (ftype == ::apache::thrift::protocol::T_BOOL) { 
-          xfer += iprot->readBool(this->descending); 
-          isset_descending = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 3: 
-        if (ftype == ::apache::thrift::protocol::T_BOOL) { 
-          xfer += iprot->readBool(this->nulls_first); 
-          isset_nulls_first = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      default: 
-        xfer += iprot->skip(ftype); 
-        break; 
-    } 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  if (!isset_column_idx) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_descending) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_nulls_first) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  return xfer; 
-} 
- 
-uint32_t SortingColumn::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("SortingColumn"); 
- 
-  xfer += oprot->writeFieldBegin("column_idx", ::apache::thrift::protocol::T_I32, 1); 
-  xfer += oprot->writeI32(this->column_idx); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("descending", ::apache::thrift::protocol::T_BOOL, 2); 
-  xfer += oprot->writeBool(this->descending); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("nulls_first", ::apache::thrift::protocol::T_BOOL, 3); 
-  xfer += oprot->writeBool(this->nulls_first); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(SortingColumn &a, SortingColumn &b) { 
-  using ::std::swap; 
-  swap(a.column_idx, b.column_idx); 
-  swap(a.descending, b.descending); 
-  swap(a.nulls_first, b.nulls_first); 
-} 
- 
-SortingColumn::SortingColumn(const SortingColumn& other75) { 
-  column_idx = other75.column_idx; 
-  descending = other75.descending; 
-  nulls_first = other75.nulls_first; 
-} 
-SortingColumn& SortingColumn::operator=(const SortingColumn& other76) { 
-  column_idx = other76.column_idx; 
-  descending = other76.descending; 
-  nulls_first = other76.nulls_first; 
-  return *this; 
-} 
-void SortingColumn::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "SortingColumn("; 
-  out << "column_idx=" << to_string(column_idx); 
-  out << ", " << "descending=" << to_string(descending); 
-  out << ", " << "nulls_first=" << to_string(nulls_first); 
-  out << ")"; 
-} 
- 
- 
-PageEncodingStats::~PageEncodingStats() noexcept { 
-} 
- 
- 
-void PageEncodingStats::__set_page_type(const PageType::type val) { 
-  this->page_type = val; 
-} 
- 
-void PageEncodingStats::__set_encoding(const Encoding::type val) { 
-  this->encoding = val; 
-} 
- 
-void PageEncodingStats::__set_count(const int32_t val) { 
-  this->count = val; 
-} 
-std::ostream& operator<<(std::ostream& out, const PageEncodingStats& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t PageEncodingStats::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
-  bool isset_page_type = false; 
-  bool isset_encoding = false; 
-  bool isset_count = false; 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    switch (fid) 
-    { 
-      case 1: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          int32_t ecast77; 
-          xfer += iprot->readI32(ecast77); 
-          this->page_type = (PageType::type)ecast77; 
-          isset_page_type = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 2: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          int32_t ecast78; 
-          xfer += iprot->readI32(ecast78); 
-          this->encoding = (Encoding::type)ecast78; 
-          isset_encoding = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 3: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          xfer += iprot->readI32(this->count); 
-          isset_count = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      default: 
-        xfer += iprot->skip(ftype); 
-        break; 
-    } 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  if (!isset_page_type) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_encoding) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_count) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  return xfer; 
-} 
- 
-uint32_t PageEncodingStats::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("PageEncodingStats"); 
- 
-  xfer += oprot->writeFieldBegin("page_type", ::apache::thrift::protocol::T_I32, 1); 
-  xfer += oprot->writeI32((int32_t)this->page_type); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2); 
-  xfer += oprot->writeI32((int32_t)this->encoding); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("count", ::apache::thrift::protocol::T_I32, 3); 
-  xfer += oprot->writeI32(this->count); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(PageEncodingStats &a, PageEncodingStats &b) { 
-  using ::std::swap; 
-  swap(a.page_type, b.page_type); 
-  swap(a.encoding, b.encoding); 
-  swap(a.count, b.count); 
-} 
- 
-PageEncodingStats::PageEncodingStats(const PageEncodingStats& other79) { 
-  page_type = other79.page_type; 
-  encoding = other79.encoding; 
-  count = other79.count; 
-} 
-PageEncodingStats& PageEncodingStats::operator=(const PageEncodingStats& other80) { 
-  page_type = other80.page_type; 
-  encoding = other80.encoding; 
-  count = other80.count; 
-  return *this; 
-} 
-void PageEncodingStats::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "PageEncodingStats("; 
-  out << "page_type=" << to_string(page_type); 
-  out << ", " << "encoding=" << to_string(encoding); 
-  out << ", " << "count=" << to_string(count); 
-  out << ")"; 
-} 
- 
- 
-ColumnMetaData::~ColumnMetaData() noexcept { 
-} 
- 
- 
-void ColumnMetaData::__set_type(const Type::type val) { 
-  this->type = val; 
-} 
- 
-void ColumnMetaData::__set_encodings(const std::vector<Encoding::type> & val) { 
-  this->encodings = val; 
-} 
- 
-void ColumnMetaData::__set_path_in_schema(const std::vector<std::string> & val) { 
-  this->path_in_schema = val; 
-} 
- 
-void ColumnMetaData::__set_codec(const CompressionCodec::type val) { 
-  this->codec = val; 
-} 
- 
-void ColumnMetaData::__set_num_values(const int64_t val) { 
-  this->num_values = val; 
-} 
- 
-void ColumnMetaData::__set_total_uncompressed_size(const int64_t val) { 
-  this->total_uncompressed_size = val; 
-} 
- 
-void ColumnMetaData::__set_total_compressed_size(const int64_t val) { 
-  this->total_compressed_size = val; 
-} 
- 
-void ColumnMetaData::__set_key_value_metadata(const std::vector<KeyValue> & val) { 
-  this->key_value_metadata = val; 
-__isset.key_value_metadata = true; 
-} 
- 
-void ColumnMetaData::__set_data_page_offset(const int64_t val) { 
-  this->data_page_offset = val; 
-} 
- 
-void ColumnMetaData::__set_index_page_offset(const int64_t val) { 
-  this->index_page_offset = val; 
-__isset.index_page_offset = true; 
-} 
- 
-void ColumnMetaData::__set_dictionary_page_offset(const int64_t val) { 
-  this->dictionary_page_offset = val; 
-__isset.dictionary_page_offset = true; 
-} 
- 
-void ColumnMetaData::__set_statistics(const Statistics& val) { 
-  this->statistics = val; 
-__isset.statistics = true; 
-} 
- 
-void ColumnMetaData::__set_encoding_stats(const std::vector<PageEncodingStats> & val) { 
-  this->encoding_stats = val; 
-__isset.encoding_stats = true; 
-} 
- 
-void ColumnMetaData::__set_bloom_filter_offset(const int64_t val) { 
-  this->bloom_filter_offset = val; 
-__isset.bloom_filter_offset = true; 
-} 
-std::ostream& operator<<(std::ostream& out, const ColumnMetaData& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
-  bool isset_type = false; 
-  bool isset_encodings = false; 
-  bool isset_path_in_schema = false; 
-  bool isset_codec = false; 
-  bool isset_num_values = false; 
-  bool isset_total_uncompressed_size = false; 
-  bool isset_total_compressed_size = false; 
-  bool isset_data_page_offset = false; 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    switch (fid) 
-    { 
-      case 1: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          int32_t ecast81; 
-          xfer += iprot->readI32(ecast81); 
-          this->type = (Type::type)ecast81; 
-          isset_type = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 2: 
-        if (ftype == ::apache::thrift::protocol::T_LIST) { 
-          { 
-            this->encodings.clear(); 
-            uint32_t _size82; 
-            ::apache::thrift::protocol::TType _etype85; 
-            xfer += iprot->readListBegin(_etype85, _size82); 
-            this->encodings.resize(_size82); 
-            uint32_t _i86; 
-            for (_i86 = 0; _i86 < _size82; ++_i86) 
-            { 
-              int32_t ecast87; 
-              xfer += iprot->readI32(ecast87); 
-              this->encodings[_i86] = (Encoding::type)ecast87; 
-            } 
-            xfer += iprot->readListEnd(); 
-          } 
-          isset_encodings = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 3: 
-        if (ftype == ::apache::thrift::protocol::T_LIST) { 
-          { 
-            this->path_in_schema.clear(); 
-            uint32_t _size88; 
-            ::apache::thrift::protocol::TType _etype91; 
-            xfer += iprot->readListBegin(_etype91, _size88); 
-            this->path_in_schema.resize(_size88); 
-            uint32_t _i92; 
-            for (_i92 = 0; _i92 < _size88; ++_i92) 
-            { 
-              xfer += iprot->readString(this->path_in_schema[_i92]); 
-            } 
-            xfer += iprot->readListEnd(); 
-          } 
-          isset_path_in_schema = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 4: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          int32_t ecast93; 
-          xfer += iprot->readI32(ecast93); 
-          this->codec = (CompressionCodec::type)ecast93; 
-          isset_codec = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 5: 
-        if (ftype == ::apache::thrift::protocol::T_I64) { 
-          xfer += iprot->readI64(this->num_values); 
-          isset_num_values = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 6: 
-        if (ftype == ::apache::thrift::protocol::T_I64) { 
-          xfer += iprot->readI64(this->total_uncompressed_size); 
-          isset_total_uncompressed_size = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 7: 
-        if (ftype == ::apache::thrift::protocol::T_I64) { 
-          xfer += iprot->readI64(this->total_compressed_size); 
-          isset_total_compressed_size = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 8: 
-        if (ftype == ::apache::thrift::protocol::T_LIST) { 
-          { 
-            this->key_value_metadata.clear(); 
-            uint32_t _size94; 
-            ::apache::thrift::protocol::TType _etype97; 
-            xfer += iprot->readListBegin(_etype97, _size94); 
-            this->key_value_metadata.resize(_size94); 
-            uint32_t _i98; 
-            for (_i98 = 0; _i98 < _size94; ++_i98) 
-            { 
-              xfer += this->key_value_metadata[_i98].read(iprot); 
-            } 
-            xfer += iprot->readListEnd(); 
-          } 
-          this->__isset.key_value_metadata = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 9: 
-        if (ftype == ::apache::thrift::protocol::T_I64) { 
-          xfer += iprot->readI64(this->data_page_offset); 
-          isset_data_page_offset = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 10: 
-        if (ftype == ::apache::thrift::protocol::T_I64) { 
-          xfer += iprot->readI64(this->index_page_offset); 
-          this->__isset.index_page_offset = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 11: 
-        if (ftype == ::apache::thrift::protocol::T_I64) { 
-          xfer += iprot->readI64(this->dictionary_page_offset); 
-          this->__isset.dictionary_page_offset = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 12: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->statistics.read(iprot); 
-          this->__isset.statistics = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 13: 
-        if (ftype == ::apache::thrift::protocol::T_LIST) { 
-          { 
-            this->encoding_stats.clear(); 
-            uint32_t _size99; 
-            ::apache::thrift::protocol::TType _etype102; 
-            xfer += iprot->readListBegin(_etype102, _size99); 
-            this->encoding_stats.resize(_size99); 
-            uint32_t _i103; 
-            for (_i103 = 0; _i103 < _size99; ++_i103) 
-            { 
-              xfer += this->encoding_stats[_i103].read(iprot); 
-            } 
-            xfer += iprot->readListEnd(); 
-          } 
-          this->__isset.encoding_stats = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 14: 
-        if (ftype == ::apache::thrift::protocol::T_I64) { 
-          xfer += iprot->readI64(this->bloom_filter_offset); 
-          this->__isset.bloom_filter_offset = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      default: 
-        xfer += iprot->skip(ftype); 
-        break; 
-    } 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  if (!isset_type) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_encodings) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_path_in_schema) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_codec) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_num_values) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_total_uncompressed_size) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_total_compressed_size) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_data_page_offset) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  return xfer; 
-} 
- 
-uint32_t ColumnMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("ColumnMetaData"); 
- 
-  xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1); 
-  xfer += oprot->writeI32((int32_t)this->type); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("encodings", ::apache::thrift::protocol::T_LIST, 2); 
-  { 
-    xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I32, static_cast<uint32_t>(this->encodings.size())); 
-    std::vector<Encoding::type> ::const_iterator _iter104; 
-    for (_iter104 = this->encodings.begin(); _iter104 != this->encodings.end(); ++_iter104) 
-    { 
-      xfer += oprot->writeI32((int32_t)(*_iter104)); 
-    } 
-    xfer += oprot->writeListEnd(); 
-  } 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 3); 
-  { 
-    xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast<uint32_t>(this->path_in_schema.size())); 
-    std::vector<std::string> ::const_iterator _iter105; 
-    for (_iter105 = this->path_in_schema.begin(); _iter105 != this->path_in_schema.end(); ++_iter105) 
-    { 
-      xfer += oprot->writeString((*_iter105)); 
-    } 
-    xfer += oprot->writeListEnd(); 
-  } 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("codec", ::apache::thrift::protocol::T_I32, 4); 
-  xfer += oprot->writeI32((int32_t)this->codec); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I64, 5); 
-  xfer += oprot->writeI64(this->num_values); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("total_uncompressed_size", ::apache::thrift::protocol::T_I64, 6); 
-  xfer += oprot->writeI64(this->total_uncompressed_size); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("total_compressed_size", ::apache::thrift::protocol::T_I64, 7); 
-  xfer += oprot->writeI64(this->total_compressed_size); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  if (this->__isset.key_value_metadata) { 
-    xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 8); 
-    { 
-      xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->key_value_metadata.size())); 
-      std::vector<KeyValue> ::const_iterator _iter106; 
-      for (_iter106 = this->key_value_metadata.begin(); _iter106 != this->key_value_metadata.end(); ++_iter106) 
-      { 
-        xfer += (*_iter106).write(oprot); 
-      } 
-      xfer += oprot->writeListEnd(); 
-    } 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  xfer += oprot->writeFieldBegin("data_page_offset", ::apache::thrift::protocol::T_I64, 9); 
-  xfer += oprot->writeI64(this->data_page_offset); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  if (this->__isset.index_page_offset) { 
-    xfer += oprot->writeFieldBegin("index_page_offset", ::apache::thrift::protocol::T_I64, 10); 
-    xfer += oprot->writeI64(this->index_page_offset); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.dictionary_page_offset) { 
-    xfer += oprot->writeFieldBegin("dictionary_page_offset", ::apache::thrift::protocol::T_I64, 11); 
-    xfer += oprot->writeI64(this->dictionary_page_offset); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.statistics) { 
-    xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 12); 
-    xfer += this->statistics.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.encoding_stats) { 
-    xfer += oprot->writeFieldBegin("encoding_stats", ::apache::thrift::protocol::T_LIST, 13); 
-    { 
-      xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->encoding_stats.size())); 
-      std::vector<PageEncodingStats> ::const_iterator _iter107; 
-      for (_iter107 = this->encoding_stats.begin(); _iter107 != this->encoding_stats.end(); ++_iter107) 
-      { 
-        xfer += (*_iter107).write(oprot); 
-      } 
-      xfer += oprot->writeListEnd(); 
-    } 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.bloom_filter_offset) { 
-    xfer += oprot->writeFieldBegin("bloom_filter_offset", ::apache::thrift::protocol::T_I64, 14); 
-    xfer += oprot->writeI64(this->bloom_filter_offset); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(ColumnMetaData &a, ColumnMetaData &b) { 
-  using ::std::swap; 
-  swap(a.type, b.type); 
-  swap(a.encodings, b.encodings); 
-  swap(a.path_in_schema, b.path_in_schema); 
-  swap(a.codec, b.codec); 
-  swap(a.num_values, b.num_values); 
-  swap(a.total_uncompressed_size, b.total_uncompressed_size); 
-  swap(a.total_compressed_size, b.total_compressed_size); 
-  swap(a.key_value_metadata, b.key_value_metadata); 
-  swap(a.data_page_offset, b.data_page_offset); 
-  swap(a.index_page_offset, b.index_page_offset); 
-  swap(a.dictionary_page_offset, b.dictionary_page_offset); 
-  swap(a.statistics, b.statistics); 
-  swap(a.encoding_stats, b.encoding_stats); 
-  swap(a.bloom_filter_offset, b.bloom_filter_offset); 
-  swap(a.__isset, b.__isset); 
-} 
- 
-ColumnMetaData::ColumnMetaData(const ColumnMetaData& other108) { 
-  type = other108.type; 
-  encodings = other108.encodings; 
-  path_in_schema = other108.path_in_schema; 
-  codec = other108.codec; 
-  num_values = other108.num_values; 
-  total_uncompressed_size = other108.total_uncompressed_size; 
-  total_compressed_size = other108.total_compressed_size; 
-  key_value_metadata = other108.key_value_metadata; 
-  data_page_offset = other108.data_page_offset; 
-  index_page_offset = other108.index_page_offset; 
-  dictionary_page_offset = other108.dictionary_page_offset; 
-  statistics = other108.statistics; 
-  encoding_stats = other108.encoding_stats; 
-  bloom_filter_offset = other108.bloom_filter_offset; 
-  __isset = other108.__isset; 
-} 
-ColumnMetaData& ColumnMetaData::operator=(const ColumnMetaData& other109) { 
-  type = other109.type; 
-  encodings = other109.encodings; 
-  path_in_schema = other109.path_in_schema; 
-  codec = other109.codec; 
-  num_values = other109.num_values; 
-  total_uncompressed_size = other109.total_uncompressed_size; 
-  total_compressed_size = other109.total_compressed_size; 
-  key_value_metadata = other109.key_value_metadata; 
-  data_page_offset = other109.data_page_offset; 
-  index_page_offset = other109.index_page_offset; 
-  dictionary_page_offset = other109.dictionary_page_offset; 
-  statistics = other109.statistics; 
-  encoding_stats = other109.encoding_stats; 
-  bloom_filter_offset = other109.bloom_filter_offset; 
-  __isset = other109.__isset; 
-  return *this; 
-} 
-void ColumnMetaData::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "ColumnMetaData("; 
-  out << "type=" << to_string(type); 
-  out << ", " << "encodings=" << to_string(encodings); 
-  out << ", " << "path_in_schema=" << to_string(path_in_schema); 
-  out << ", " << "codec=" << to_string(codec); 
-  out << ", " << "num_values=" << to_string(num_values); 
-  out << ", " << "total_uncompressed_size=" << to_string(total_uncompressed_size); 
-  out << ", " << "total_compressed_size=" << to_string(total_compressed_size); 
-  out << ", " << "key_value_metadata="; (__isset.key_value_metadata ? (out << to_string(key_value_metadata)) : (out << "<null>")); 
-  out << ", " << "data_page_offset=" << to_string(data_page_offset); 
-  out << ", " << "index_page_offset="; (__isset.index_page_offset ? (out << to_string(index_page_offset)) : (out << "<null>")); 
-  out << ", " << "dictionary_page_offset="; (__isset.dictionary_page_offset ? (out << to_string(dictionary_page_offset)) : (out << "<null>")); 
-  out << ", " << "statistics="; (__isset.statistics ? (out << to_string(statistics)) : (out << "<null>")); 
-  out << ", " << "encoding_stats="; (__isset.encoding_stats ? (out << to_string(encoding_stats)) : (out << "<null>")); 
-  out << ", " << "bloom_filter_offset="; (__isset.bloom_filter_offset ? (out << to_string(bloom_filter_offset)) : (out << "<null>")); 
-  out << ")"; 
-} 
- 
- 
-EncryptionWithFooterKey::~EncryptionWithFooterKey() noexcept { 
-} 
- 
-std::ostream& operator<<(std::ostream& out, const EncryptionWithFooterKey& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t EncryptionWithFooterKey::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    xfer += iprot->skip(ftype); 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  return xfer; 
-} 
- 
-uint32_t EncryptionWithFooterKey::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("EncryptionWithFooterKey"); 
- 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(EncryptionWithFooterKey &a, EncryptionWithFooterKey &b) { 
-  using ::std::swap; 
-  (void) a; 
-  (void) b; 
-} 
- 
-EncryptionWithFooterKey::EncryptionWithFooterKey(const EncryptionWithFooterKey& other110) { 
-  (void) other110; 
-} 
-EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(const EncryptionWithFooterKey& other111) { 
-  (void) other111; 
-  return *this; 
-} 
-void EncryptionWithFooterKey::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "EncryptionWithFooterKey("; 
-  out << ")"; 
-} 
- 
- 
-EncryptionWithColumnKey::~EncryptionWithColumnKey() noexcept { 
-} 
- 
- 
-void EncryptionWithColumnKey::__set_path_in_schema(const std::vector<std::string> & val) { 
-  this->path_in_schema = val; 
-} 
- 
-void EncryptionWithColumnKey::__set_key_metadata(const std::string& val) { 
-  this->key_metadata = val; 
-__isset.key_metadata = true; 
-} 
-std::ostream& operator<<(std::ostream& out, const EncryptionWithColumnKey& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t EncryptionWithColumnKey::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
-  bool isset_path_in_schema = false; 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    switch (fid) 
-    { 
-      case 1: 
-        if (ftype == ::apache::thrift::protocol::T_LIST) { 
-          { 
-            this->path_in_schema.clear(); 
-            uint32_t _size112; 
-            ::apache::thrift::protocol::TType _etype115; 
-            xfer += iprot->readListBegin(_etype115, _size112); 
-            this->path_in_schema.resize(_size112); 
-            uint32_t _i116; 
-            for (_i116 = 0; _i116 < _size112; ++_i116) 
-            { 
-              xfer += iprot->readString(this->path_in_schema[_i116]); 
-            } 
-            xfer += iprot->readListEnd(); 
-          } 
-          isset_path_in_schema = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 2: 
-        if (ftype == ::apache::thrift::protocol::T_STRING) { 
-          xfer += iprot->readBinary(this->key_metadata); 
-          this->__isset.key_metadata = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      default: 
-        xfer += iprot->skip(ftype); 
-        break; 
-    } 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  if (!isset_path_in_schema) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  return xfer; 
-} 
- 
-uint32_t EncryptionWithColumnKey::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("EncryptionWithColumnKey"); 
- 
-  xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 1); 
-  { 
-    xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast<uint32_t>(this->path_in_schema.size())); 
-    std::vector<std::string> ::const_iterator _iter117; 
-    for (_iter117 = this->path_in_schema.begin(); _iter117 != this->path_in_schema.end(); ++_iter117) 
-    { 
-      xfer += oprot->writeString((*_iter117)); 
-    } 
-    xfer += oprot->writeListEnd(); 
-  } 
-  xfer += oprot->writeFieldEnd(); 
- 
-  if (this->__isset.key_metadata) { 
-    xfer += oprot->writeFieldBegin("key_metadata", ::apache::thrift::protocol::T_STRING, 2); 
-    xfer += oprot->writeBinary(this->key_metadata); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(EncryptionWithColumnKey &a, EncryptionWithColumnKey &b) { 
-  using ::std::swap; 
-  swap(a.path_in_schema, b.path_in_schema); 
-  swap(a.key_metadata, b.key_metadata); 
-  swap(a.__isset, b.__isset); 
-} 
- 
-EncryptionWithColumnKey::EncryptionWithColumnKey(const EncryptionWithColumnKey& other118) { 
-  path_in_schema = other118.path_in_schema; 
-  key_metadata = other118.key_metadata; 
-  __isset = other118.__isset; 
-} 
-EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(const EncryptionWithColumnKey& other119) { 
-  path_in_schema = other119.path_in_schema; 
-  key_metadata = other119.key_metadata; 
-  __isset = other119.__isset; 
-  return *this; 
-} 
-void EncryptionWithColumnKey::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "EncryptionWithColumnKey("; 
-  out << "path_in_schema=" << to_string(path_in_schema); 
-  out << ", " << "key_metadata="; (__isset.key_metadata ? (out << to_string(key_metadata)) : (out << "<null>")); 
-  out << ")"; 
-} 
- 
- 
-ColumnCryptoMetaData::~ColumnCryptoMetaData() noexcept { 
-} 
- 
- 
-void ColumnCryptoMetaData::__set_ENCRYPTION_WITH_FOOTER_KEY(const EncryptionWithFooterKey& val) { 
-  this->ENCRYPTION_WITH_FOOTER_KEY = val; 
-__isset.ENCRYPTION_WITH_FOOTER_KEY = true; 
-} 
- 
-void ColumnCryptoMetaData::__set_ENCRYPTION_WITH_COLUMN_KEY(const EncryptionWithColumnKey& val) { 
-  this->ENCRYPTION_WITH_COLUMN_KEY = val; 
-__isset.ENCRYPTION_WITH_COLUMN_KEY = true; 
-} 
-std::ostream& operator<<(std::ostream& out, const ColumnCryptoMetaData& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t ColumnCryptoMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    switch (fid) 
-    { 
-      case 1: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->ENCRYPTION_WITH_FOOTER_KEY.read(iprot); 
-          this->__isset.ENCRYPTION_WITH_FOOTER_KEY = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 2: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->ENCRYPTION_WITH_COLUMN_KEY.read(iprot); 
-          this->__isset.ENCRYPTION_WITH_COLUMN_KEY = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      default: 
-        xfer += iprot->skip(ftype); 
-        break; 
-    } 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  return xfer; 
-} 
- 
-uint32_t ColumnCryptoMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("ColumnCryptoMetaData"); 
- 
-  if (this->__isset.ENCRYPTION_WITH_FOOTER_KEY) { 
-    xfer += oprot->writeFieldBegin("ENCRYPTION_WITH_FOOTER_KEY", ::apache::thrift::protocol::T_STRUCT, 1); 
-    xfer += this->ENCRYPTION_WITH_FOOTER_KEY.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.ENCRYPTION_WITH_COLUMN_KEY) { 
-    xfer += oprot->writeFieldBegin("ENCRYPTION_WITH_COLUMN_KEY", ::apache::thrift::protocol::T_STRUCT, 2); 
-    xfer += this->ENCRYPTION_WITH_COLUMN_KEY.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(ColumnCryptoMetaData &a, ColumnCryptoMetaData &b) { 
-  using ::std::swap; 
-  swap(a.ENCRYPTION_WITH_FOOTER_KEY, b.ENCRYPTION_WITH_FOOTER_KEY); 
-  swap(a.ENCRYPTION_WITH_COLUMN_KEY, b.ENCRYPTION_WITH_COLUMN_KEY); 
-  swap(a.__isset, b.__isset); 
-} 
- 
-ColumnCryptoMetaData::ColumnCryptoMetaData(const ColumnCryptoMetaData& other120) { 
-  ENCRYPTION_WITH_FOOTER_KEY = other120.ENCRYPTION_WITH_FOOTER_KEY; 
-  ENCRYPTION_WITH_COLUMN_KEY = other120.ENCRYPTION_WITH_COLUMN_KEY; 
-  __isset = other120.__isset; 
-} 
-ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(const ColumnCryptoMetaData& other121) { 
-  ENCRYPTION_WITH_FOOTER_KEY = other121.ENCRYPTION_WITH_FOOTER_KEY; 
-  ENCRYPTION_WITH_COLUMN_KEY = other121.ENCRYPTION_WITH_COLUMN_KEY; 
-  __isset = other121.__isset; 
-  return *this; 
-} 
-void ColumnCryptoMetaData::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "ColumnCryptoMetaData("; 
-  out << "ENCRYPTION_WITH_FOOTER_KEY="; (__isset.ENCRYPTION_WITH_FOOTER_KEY ? (out << to_string(ENCRYPTION_WITH_FOOTER_KEY)) : (out << "<null>")); 
-  out << ", " << "ENCRYPTION_WITH_COLUMN_KEY="; (__isset.ENCRYPTION_WITH_COLUMN_KEY ? (out << to_string(ENCRYPTION_WITH_COLUMN_KEY)) : (out << "<null>")); 
-  out << ")"; 
-} 
- 
- 
-ColumnChunk::~ColumnChunk() noexcept { 
-} 
- 
- 
-void ColumnChunk::__set_file_path(const std::string& val) { 
-  this->file_path = val; 
-__isset.file_path = true; 
-} 
- 
-void ColumnChunk::__set_file_offset(const int64_t val) { 
-  this->file_offset = val; 
-} 
- 
-void ColumnChunk::__set_meta_data(const ColumnMetaData& val) { 
-  this->meta_data = val; 
-__isset.meta_data = true; 
-} 
- 
-void ColumnChunk::__set_offset_index_offset(const int64_t val) { 
-  this->offset_index_offset = val; 
-__isset.offset_index_offset = true; 
-} 
- 
-void ColumnChunk::__set_offset_index_length(const int32_t val) { 
-  this->offset_index_length = val; 
-__isset.offset_index_length = true; 
-} 
- 
-void ColumnChunk::__set_column_index_offset(const int64_t val) { 
-  this->column_index_offset = val; 
-__isset.column_index_offset = true; 
-} 
- 
-void ColumnChunk::__set_column_index_length(const int32_t val) { 
-  this->column_index_length = val; 
-__isset.column_index_length = true; 
-} 
- 
-void ColumnChunk::__set_crypto_metadata(const ColumnCryptoMetaData& val) { 
-  this->crypto_metadata = val; 
-__isset.crypto_metadata = true; 
-} 
- 
-void ColumnChunk::__set_encrypted_column_metadata(const std::string& val) { 
-  this->encrypted_column_metadata = val; 
-__isset.encrypted_column_metadata = true; 
-} 
-std::ostream& operator<<(std::ostream& out, const ColumnChunk& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t ColumnChunk::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
-  bool isset_file_offset = false; 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    switch (fid) 
-    { 
-      case 1: 
-        if (ftype == ::apache::thrift::protocol::T_STRING) { 
-          xfer += iprot->readString(this->file_path); 
-          this->__isset.file_path = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 2: 
-        if (ftype == ::apache::thrift::protocol::T_I64) { 
-          xfer += iprot->readI64(this->file_offset); 
-          isset_file_offset = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 3: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->meta_data.read(iprot); 
-          this->__isset.meta_data = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 4: 
-        if (ftype == ::apache::thrift::protocol::T_I64) { 
-          xfer += iprot->readI64(this->offset_index_offset); 
-          this->__isset.offset_index_offset = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 5: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          xfer += iprot->readI32(this->offset_index_length); 
-          this->__isset.offset_index_length = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 6: 
-        if (ftype == ::apache::thrift::protocol::T_I64) { 
-          xfer += iprot->readI64(this->column_index_offset); 
-          this->__isset.column_index_offset = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 7: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          xfer += iprot->readI32(this->column_index_length); 
-          this->__isset.column_index_length = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 8: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->crypto_metadata.read(iprot); 
-          this->__isset.crypto_metadata = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 9: 
-        if (ftype == ::apache::thrift::protocol::T_STRING) { 
-          xfer += iprot->readBinary(this->encrypted_column_metadata); 
-          this->__isset.encrypted_column_metadata = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      default: 
-        xfer += iprot->skip(ftype); 
-        break; 
-    } 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  if (!isset_file_offset) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  return xfer; 
-} 
- 
-uint32_t ColumnChunk::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("ColumnChunk"); 
- 
-  if (this->__isset.file_path) { 
-    xfer += oprot->writeFieldBegin("file_path", ::apache::thrift::protocol::T_STRING, 1); 
-    xfer += oprot->writeString(this->file_path); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  xfer += oprot->writeFieldBegin("file_offset", ::apache::thrift::protocol::T_I64, 2); 
-  xfer += oprot->writeI64(this->file_offset); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  if (this->__isset.meta_data) { 
-    xfer += oprot->writeFieldBegin("meta_data", ::apache::thrift::protocol::T_STRUCT, 3); 
-    xfer += this->meta_data.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.offset_index_offset) { 
-    xfer += oprot->writeFieldBegin("offset_index_offset", ::apache::thrift::protocol::T_I64, 4); 
-    xfer += oprot->writeI64(this->offset_index_offset); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.offset_index_length) { 
-    xfer += oprot->writeFieldBegin("offset_index_length", ::apache::thrift::protocol::T_I32, 5); 
-    xfer += oprot->writeI32(this->offset_index_length); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.column_index_offset) { 
-    xfer += oprot->writeFieldBegin("column_index_offset", ::apache::thrift::protocol::T_I64, 6); 
-    xfer += oprot->writeI64(this->column_index_offset); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.column_index_length) { 
-    xfer += oprot->writeFieldBegin("column_index_length", ::apache::thrift::protocol::T_I32, 7); 
-    xfer += oprot->writeI32(this->column_index_length); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.crypto_metadata) { 
-    xfer += oprot->writeFieldBegin("crypto_metadata", ::apache::thrift::protocol::T_STRUCT, 8); 
-    xfer += this->crypto_metadata.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.encrypted_column_metadata) { 
-    xfer += oprot->writeFieldBegin("encrypted_column_metadata", ::apache::thrift::protocol::T_STRING, 9); 
-    xfer += oprot->writeBinary(this->encrypted_column_metadata); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(ColumnChunk &a, ColumnChunk &b) { 
-  using ::std::swap; 
-  swap(a.file_path, b.file_path); 
-  swap(a.file_offset, b.file_offset); 
-  swap(a.meta_data, b.meta_data); 
-  swap(a.offset_index_offset, b.offset_index_offset); 
-  swap(a.offset_index_length, b.offset_index_length); 
-  swap(a.column_index_offset, b.column_index_offset); 
-  swap(a.column_index_length, b.column_index_length); 
-  swap(a.crypto_metadata, b.crypto_metadata); 
-  swap(a.encrypted_column_metadata, b.encrypted_column_metadata); 
-  swap(a.__isset, b.__isset); 
-} 
- 
-ColumnChunk::ColumnChunk(const ColumnChunk& other122) { 
-  file_path = other122.file_path; 
-  file_offset = other122.file_offset; 
-  meta_data = other122.meta_data; 
-  offset_index_offset = other122.offset_index_offset; 
-  offset_index_length = other122.offset_index_length; 
-  column_index_offset = other122.column_index_offset; 
-  column_index_length = other122.column_index_length; 
-  crypto_metadata = other122.crypto_metadata; 
-  encrypted_column_metadata = other122.encrypted_column_metadata; 
-  __isset = other122.__isset; 
-} 
-ColumnChunk& ColumnChunk::operator=(const ColumnChunk& other123) { 
-  file_path = other123.file_path; 
-  file_offset = other123.file_offset; 
-  meta_data = other123.meta_data; 
-  offset_index_offset = other123.offset_index_offset; 
-  offset_index_length = other123.offset_index_length; 
-  column_index_offset = other123.column_index_offset; 
-  column_index_length = other123.column_index_length; 
-  crypto_metadata = other123.crypto_metadata; 
-  encrypted_column_metadata = other123.encrypted_column_metadata; 
-  __isset = other123.__isset; 
-  return *this; 
-} 
-void ColumnChunk::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "ColumnChunk("; 
-  out << "file_path="; (__isset.file_path ? (out << to_string(file_path)) : (out << "<null>")); 
-  out << ", " << "file_offset=" << to_string(file_offset); 
-  out << ", " << "meta_data="; (__isset.meta_data ? (out << to_string(meta_data)) : (out << "<null>")); 
-  out << ", " << "offset_index_offset="; (__isset.offset_index_offset ? (out << to_string(offset_index_offset)) : (out << "<null>")); 
-  out << ", " << "offset_index_length="; (__isset.offset_index_length ? (out << to_string(offset_index_length)) : (out << "<null>")); 
-  out << ", " << "column_index_offset="; (__isset.column_index_offset ? (out << to_string(column_index_offset)) : (out << "<null>")); 
-  out << ", " << "column_index_length="; (__isset.column_index_length ? (out << to_string(column_index_length)) : (out << "<null>")); 
-  out << ", " << "crypto_metadata="; (__isset.crypto_metadata ? (out << to_string(crypto_metadata)) : (out << "<null>")); 
-  out << ", " << "encrypted_column_metadata="; (__isset.encrypted_column_metadata ? (out << to_string(encrypted_column_metadata)) : (out << "<null>")); 
-  out << ")"; 
-} 
- 
- 
-RowGroup::~RowGroup() noexcept { 
-} 
- 
- 
-void RowGroup::__set_columns(const std::vector<ColumnChunk> & val) { 
-  this->columns = val; 
-} 
- 
-void RowGroup::__set_total_byte_size(const int64_t val) { 
-  this->total_byte_size = val; 
-} 
- 
-void RowGroup::__set_num_rows(const int64_t val) { 
-  this->num_rows = val; 
-} 
- 
-void RowGroup::__set_sorting_columns(const std::vector<SortingColumn> & val) { 
-  this->sorting_columns = val; 
-__isset.sorting_columns = true; 
-} 
- 
-void RowGroup::__set_file_offset(const int64_t val) { 
-  this->file_offset = val; 
-__isset.file_offset = true; 
-} 
- 
-void RowGroup::__set_total_compressed_size(const int64_t val) { 
-  this->total_compressed_size = val; 
-__isset.total_compressed_size = true; 
-} 
- 
-void RowGroup::__set_ordinal(const int16_t val) { 
-  this->ordinal = val; 
-__isset.ordinal = true; 
-} 
-std::ostream& operator<<(std::ostream& out, const RowGroup& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t RowGroup::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
-  bool isset_columns = false; 
-  bool isset_total_byte_size = false; 
-  bool isset_num_rows = false; 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    switch (fid) 
-    { 
-      case 1: 
-        if (ftype == ::apache::thrift::protocol::T_LIST) { 
-          { 
-            this->columns.clear(); 
-            uint32_t _size124; 
-            ::apache::thrift::protocol::TType _etype127; 
-            xfer += iprot->readListBegin(_etype127, _size124); 
-            this->columns.resize(_size124); 
-            uint32_t _i128; 
-            for (_i128 = 0; _i128 < _size124; ++_i128) 
-            { 
-              xfer += this->columns[_i128].read(iprot); 
-            } 
-            xfer += iprot->readListEnd(); 
-          } 
-          isset_columns = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 2: 
-        if (ftype == ::apache::thrift::protocol::T_I64) { 
-          xfer += iprot->readI64(this->total_byte_size); 
-          isset_total_byte_size = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 3: 
-        if (ftype == ::apache::thrift::protocol::T_I64) { 
-          xfer += iprot->readI64(this->num_rows); 
-          isset_num_rows = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 4: 
-        if (ftype == ::apache::thrift::protocol::T_LIST) { 
-          { 
-            this->sorting_columns.clear(); 
-            uint32_t _size129; 
-            ::apache::thrift::protocol::TType _etype132; 
-            xfer += iprot->readListBegin(_etype132, _size129); 
-            this->sorting_columns.resize(_size129); 
-            uint32_t _i133; 
-            for (_i133 = 0; _i133 < _size129; ++_i133) 
-            { 
-              xfer += this->sorting_columns[_i133].read(iprot); 
-            } 
-            xfer += iprot->readListEnd(); 
-          } 
-          this->__isset.sorting_columns = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 5: 
-        if (ftype == ::apache::thrift::protocol::T_I64) { 
-          xfer += iprot->readI64(this->file_offset); 
-          this->__isset.file_offset = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 6: 
-        if (ftype == ::apache::thrift::protocol::T_I64) { 
-          xfer += iprot->readI64(this->total_compressed_size); 
-          this->__isset.total_compressed_size = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 7: 
-        if (ftype == ::apache::thrift::protocol::T_I16) { 
-          xfer += iprot->readI16(this->ordinal); 
-          this->__isset.ordinal = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      default: 
-        xfer += iprot->skip(ftype); 
-        break; 
-    } 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  if (!isset_columns) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_total_byte_size) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_num_rows) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  return xfer; 
-} 
- 
-uint32_t RowGroup::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("RowGroup"); 
- 
-  xfer += oprot->writeFieldBegin("columns", ::apache::thrift::protocol::T_LIST, 1); 
-  { 
-    xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->columns.size())); 
-    std::vector<ColumnChunk> ::const_iterator _iter134; 
-    for (_iter134 = this->columns.begin(); _iter134 != this->columns.end(); ++_iter134) 
-    { 
-      xfer += (*_iter134).write(oprot); 
-    } 
-    xfer += oprot->writeListEnd(); 
-  } 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("total_byte_size", ::apache::thrift::protocol::T_I64, 2); 
-  xfer += oprot->writeI64(this->total_byte_size); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I64, 3); 
-  xfer += oprot->writeI64(this->num_rows); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  if (this->__isset.sorting_columns) { 
-    xfer += oprot->writeFieldBegin("sorting_columns", ::apache::thrift::protocol::T_LIST, 4); 
-    { 
-      xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->sorting_columns.size())); 
-      std::vector<SortingColumn> ::const_iterator _iter135; 
-      for (_iter135 = this->sorting_columns.begin(); _iter135 != this->sorting_columns.end(); ++_iter135) 
-      { 
-        xfer += (*_iter135).write(oprot); 
-      } 
-      xfer += oprot->writeListEnd(); 
-    } 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.file_offset) { 
-    xfer += oprot->writeFieldBegin("file_offset", ::apache::thrift::protocol::T_I64, 5); 
-    xfer += oprot->writeI64(this->file_offset); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.total_compressed_size) { 
-    xfer += oprot->writeFieldBegin("total_compressed_size", ::apache::thrift::protocol::T_I64, 6); 
-    xfer += oprot->writeI64(this->total_compressed_size); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.ordinal) { 
-    xfer += oprot->writeFieldBegin("ordinal", ::apache::thrift::protocol::T_I16, 7); 
-    xfer += oprot->writeI16(this->ordinal); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(RowGroup &a, RowGroup &b) { 
-  using ::std::swap; 
-  swap(a.columns, b.columns); 
-  swap(a.total_byte_size, b.total_byte_size); 
-  swap(a.num_rows, b.num_rows); 
-  swap(a.sorting_columns, b.sorting_columns); 
-  swap(a.file_offset, b.file_offset); 
-  swap(a.total_compressed_size, b.total_compressed_size); 
-  swap(a.ordinal, b.ordinal); 
-  swap(a.__isset, b.__isset); 
-} 
- 
-RowGroup::RowGroup(const RowGroup& other136) { 
-  columns = other136.columns; 
-  total_byte_size = other136.total_byte_size; 
-  num_rows = other136.num_rows; 
-  sorting_columns = other136.sorting_columns; 
-  file_offset = other136.file_offset; 
-  total_compressed_size = other136.total_compressed_size; 
-  ordinal = other136.ordinal; 
-  __isset = other136.__isset; 
-} 
-RowGroup& RowGroup::operator=(const RowGroup& other137) { 
-  columns = other137.columns; 
-  total_byte_size = other137.total_byte_size; 
-  num_rows = other137.num_rows; 
-  sorting_columns = other137.sorting_columns; 
-  file_offset = other137.file_offset; 
-  total_compressed_size = other137.total_compressed_size; 
-  ordinal = other137.ordinal; 
-  __isset = other137.__isset; 
-  return *this; 
-} 
-void RowGroup::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "RowGroup("; 
-  out << "columns=" << to_string(columns); 
-  out << ", " << "total_byte_size=" << to_string(total_byte_size); 
-  out << ", " << "num_rows=" << to_string(num_rows); 
-  out << ", " << "sorting_columns="; (__isset.sorting_columns ? (out << to_string(sorting_columns)) : (out << "<null>")); 
-  out << ", " << "file_offset="; (__isset.file_offset ? (out << to_string(file_offset)) : (out << "<null>")); 
-  out << ", " << "total_compressed_size="; (__isset.total_compressed_size ? (out << to_string(total_compressed_size)) : (out << "<null>")); 
-  out << ", " << "ordinal="; (__isset.ordinal ? (out << to_string(ordinal)) : (out << "<null>")); 
-  out << ")"; 
-} 
- 
- 
-TypeDefinedOrder::~TypeDefinedOrder() noexcept { 
-} 
- 
-std::ostream& operator<<(std::ostream& out, const TypeDefinedOrder& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t TypeDefinedOrder::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    xfer += iprot->skip(ftype); 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  return xfer; 
-} 
- 
-uint32_t TypeDefinedOrder::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("TypeDefinedOrder"); 
- 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(TypeDefinedOrder &a, TypeDefinedOrder &b) { 
-  using ::std::swap; 
-  (void) a; 
-  (void) b; 
-} 
- 
-TypeDefinedOrder::TypeDefinedOrder(const TypeDefinedOrder& other138) { 
-  (void) other138; 
-} 
-TypeDefinedOrder& TypeDefinedOrder::operator=(const TypeDefinedOrder& other139) { 
-  (void) other139; 
-  return *this; 
-} 
-void TypeDefinedOrder::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "TypeDefinedOrder("; 
-  out << ")"; 
-} 
- 
- 
-ColumnOrder::~ColumnOrder() noexcept { 
-} 
- 
- 
-void ColumnOrder::__set_TYPE_ORDER(const TypeDefinedOrder& val) { 
-  this->TYPE_ORDER = val; 
-__isset.TYPE_ORDER = true; 
-} 
-std::ostream& operator<<(std::ostream& out, const ColumnOrder& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t ColumnOrder::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    switch (fid) 
-    { 
-      case 1: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->TYPE_ORDER.read(iprot); 
-          this->__isset.TYPE_ORDER = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      default: 
-        xfer += iprot->skip(ftype); 
-        break; 
-    } 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  return xfer; 
-} 
- 
-uint32_t ColumnOrder::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("ColumnOrder"); 
- 
-  if (this->__isset.TYPE_ORDER) { 
-    xfer += oprot->writeFieldBegin("TYPE_ORDER", ::apache::thrift::protocol::T_STRUCT, 1); 
-    xfer += this->TYPE_ORDER.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(ColumnOrder &a, ColumnOrder &b) { 
-  using ::std::swap; 
-  swap(a.TYPE_ORDER, b.TYPE_ORDER); 
-  swap(a.__isset, b.__isset); 
-} 
- 
-ColumnOrder::ColumnOrder(const ColumnOrder& other140) { 
-  TYPE_ORDER = other140.TYPE_ORDER; 
-  __isset = other140.__isset; 
-} 
-ColumnOrder& ColumnOrder::operator=(const ColumnOrder& other141) { 
-  TYPE_ORDER = other141.TYPE_ORDER; 
-  __isset = other141.__isset; 
-  return *this; 
-} 
-void ColumnOrder::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "ColumnOrder("; 
-  out << "TYPE_ORDER="; (__isset.TYPE_ORDER ? (out << to_string(TYPE_ORDER)) : (out << "<null>")); 
-  out << ")"; 
-} 
- 
- 
-PageLocation::~PageLocation() noexcept { 
-} 
- 
- 
-void PageLocation::__set_offset(const int64_t val) { 
-  this->offset = val; 
-} 
- 
-void PageLocation::__set_compressed_page_size(const int32_t val) { 
-  this->compressed_page_size = val; 
-} 
- 
-void PageLocation::__set_first_row_index(const int64_t val) { 
-  this->first_row_index = val; 
-} 
-std::ostream& operator<<(std::ostream& out, const PageLocation& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t PageLocation::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
-  bool isset_offset = false; 
-  bool isset_compressed_page_size = false; 
-  bool isset_first_row_index = false; 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    switch (fid) 
-    { 
-      case 1: 
-        if (ftype == ::apache::thrift::protocol::T_I64) { 
-          xfer += iprot->readI64(this->offset); 
-          isset_offset = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 2: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          xfer += iprot->readI32(this->compressed_page_size); 
-          isset_compressed_page_size = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 3: 
-        if (ftype == ::apache::thrift::protocol::T_I64) { 
-          xfer += iprot->readI64(this->first_row_index); 
-          isset_first_row_index = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      default: 
-        xfer += iprot->skip(ftype); 
-        break; 
-    } 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  if (!isset_offset) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_compressed_page_size) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_first_row_index) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  return xfer; 
-} 
- 
-uint32_t PageLocation::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("PageLocation"); 
- 
-  xfer += oprot->writeFieldBegin("offset", ::apache::thrift::protocol::T_I64, 1); 
-  xfer += oprot->writeI64(this->offset); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("compressed_page_size", ::apache::thrift::protocol::T_I32, 2); 
-  xfer += oprot->writeI32(this->compressed_page_size); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("first_row_index", ::apache::thrift::protocol::T_I64, 3); 
-  xfer += oprot->writeI64(this->first_row_index); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(PageLocation &a, PageLocation &b) { 
-  using ::std::swap; 
-  swap(a.offset, b.offset); 
-  swap(a.compressed_page_size, b.compressed_page_size); 
-  swap(a.first_row_index, b.first_row_index); 
-} 
- 
-PageLocation::PageLocation(const PageLocation& other142) { 
-  offset = other142.offset; 
-  compressed_page_size = other142.compressed_page_size; 
-  first_row_index = other142.first_row_index; 
-} 
-PageLocation& PageLocation::operator=(const PageLocation& other143) { 
-  offset = other143.offset; 
-  compressed_page_size = other143.compressed_page_size; 
-  first_row_index = other143.first_row_index; 
-  return *this; 
-} 
-void PageLocation::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "PageLocation("; 
-  out << "offset=" << to_string(offset); 
-  out << ", " << "compressed_page_size=" << to_string(compressed_page_size); 
-  out << ", " << "first_row_index=" << to_string(first_row_index); 
-  out << ")"; 
-} 
- 
- 
-OffsetIndex::~OffsetIndex() noexcept { 
-} 
- 
- 
-void OffsetIndex::__set_page_locations(const std::vector<PageLocation> & val) { 
-  this->page_locations = val; 
-} 
-std::ostream& operator<<(std::ostream& out, const OffsetIndex& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t OffsetIndex::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
-  bool isset_page_locations = false; 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    switch (fid) 
-    { 
-      case 1: 
-        if (ftype == ::apache::thrift::protocol::T_LIST) { 
-          { 
-            this->page_locations.clear(); 
-            uint32_t _size144; 
-            ::apache::thrift::protocol::TType _etype147; 
-            xfer += iprot->readListBegin(_etype147, _size144); 
-            this->page_locations.resize(_size144); 
-            uint32_t _i148; 
-            for (_i148 = 0; _i148 < _size144; ++_i148) 
-            { 
-              xfer += this->page_locations[_i148].read(iprot); 
-            } 
-            xfer += iprot->readListEnd(); 
-          } 
-          isset_page_locations = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      default: 
-        xfer += iprot->skip(ftype); 
-        break; 
-    } 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  if (!isset_page_locations) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  return xfer; 
-} 
- 
-uint32_t OffsetIndex::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("OffsetIndex"); 
- 
-  xfer += oprot->writeFieldBegin("page_locations", ::apache::thrift::protocol::T_LIST, 1); 
-  { 
-    xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->page_locations.size())); 
-    std::vector<PageLocation> ::const_iterator _iter149; 
-    for (_iter149 = this->page_locations.begin(); _iter149 != this->page_locations.end(); ++_iter149) 
-    { 
-      xfer += (*_iter149).write(oprot); 
-    } 
-    xfer += oprot->writeListEnd(); 
-  } 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(OffsetIndex &a, OffsetIndex &b) { 
-  using ::std::swap; 
-  swap(a.page_locations, b.page_locations); 
-} 
- 
-OffsetIndex::OffsetIndex(const OffsetIndex& other150) { 
-  page_locations = other150.page_locations; 
-} 
-OffsetIndex& OffsetIndex::operator=(const OffsetIndex& other151) { 
-  page_locations = other151.page_locations; 
-  return *this; 
-} 
-void OffsetIndex::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "OffsetIndex("; 
-  out << "page_locations=" << to_string(page_locations); 
-  out << ")"; 
-} 
- 
- 
-ColumnIndex::~ColumnIndex() noexcept { 
-} 
- 
- 
-void ColumnIndex::__set_null_pages(const std::vector<bool> & val) { 
-  this->null_pages = val; 
-} 
- 
-void ColumnIndex::__set_min_values(const std::vector<std::string> & val) { 
-  this->min_values = val; 
-} 
- 
-void ColumnIndex::__set_max_values(const std::vector<std::string> & val) { 
-  this->max_values = val; 
-} 
- 
-void ColumnIndex::__set_boundary_order(const BoundaryOrder::type val) { 
-  this->boundary_order = val; 
-} 
- 
-void ColumnIndex::__set_null_counts(const std::vector<int64_t> & val) { 
-  this->null_counts = val; 
-__isset.null_counts = true; 
-} 
-std::ostream& operator<<(std::ostream& out, const ColumnIndex& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t ColumnIndex::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
-  bool isset_null_pages = false; 
-  bool isset_min_values = false; 
-  bool isset_max_values = false; 
-  bool isset_boundary_order = false; 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    switch (fid) 
-    { 
-      case 1: 
-        if (ftype == ::apache::thrift::protocol::T_LIST) { 
-          { 
-            this->null_pages.clear(); 
-            uint32_t _size152; 
-            ::apache::thrift::protocol::TType _etype155; 
-            xfer += iprot->readListBegin(_etype155, _size152); 
-            this->null_pages.resize(_size152); 
-            uint32_t _i156; 
-            for (_i156 = 0; _i156 < _size152; ++_i156) 
-            { 
-              bool result; 
-              xfer += iprot->readBool(result); 
-              this->null_pages[_i156] = result; 
-            } 
-            xfer += iprot->readListEnd(); 
-          } 
-          isset_null_pages = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 2: 
-        if (ftype == ::apache::thrift::protocol::T_LIST) { 
-          { 
-            this->min_values.clear(); 
-            uint32_t _size157; 
-            ::apache::thrift::protocol::TType _etype160; 
-            xfer += iprot->readListBegin(_etype160, _size157); 
-            this->min_values.resize(_size157); 
-            uint32_t _i161; 
-            for (_i161 = 0; _i161 < _size157; ++_i161) 
-            { 
-              xfer += iprot->readBinary(this->min_values[_i161]); 
-            } 
-            xfer += iprot->readListEnd(); 
-          } 
-          isset_min_values = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 3: 
-        if (ftype == ::apache::thrift::protocol::T_LIST) { 
-          { 
-            this->max_values.clear(); 
-            uint32_t _size162; 
-            ::apache::thrift::protocol::TType _etype165; 
-            xfer += iprot->readListBegin(_etype165, _size162); 
-            this->max_values.resize(_size162); 
-            uint32_t _i166; 
-            for (_i166 = 0; _i166 < _size162; ++_i166) 
-            { 
-              xfer += iprot->readBinary(this->max_values[_i166]); 
-            } 
-            xfer += iprot->readListEnd(); 
-          } 
-          isset_max_values = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 4: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          int32_t ecast167; 
-          xfer += iprot->readI32(ecast167); 
-          this->boundary_order = (BoundaryOrder::type)ecast167; 
-          isset_boundary_order = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 5: 
-        if (ftype == ::apache::thrift::protocol::T_LIST) { 
-          { 
-            this->null_counts.clear(); 
-            uint32_t _size168; 
-            ::apache::thrift::protocol::TType _etype171; 
-            xfer += iprot->readListBegin(_etype171, _size168); 
-            this->null_counts.resize(_size168); 
-            uint32_t _i172; 
-            for (_i172 = 0; _i172 < _size168; ++_i172) 
-            { 
-              xfer += iprot->readI64(this->null_counts[_i172]); 
-            } 
-            xfer += iprot->readListEnd(); 
-          } 
-          this->__isset.null_counts = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      default: 
-        xfer += iprot->skip(ftype); 
-        break; 
-    } 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  if (!isset_null_pages) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_min_values) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_max_values) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_boundary_order) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  return xfer; 
-} 
- 
-uint32_t ColumnIndex::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("ColumnIndex"); 
- 
-  xfer += oprot->writeFieldBegin("null_pages", ::apache::thrift::protocol::T_LIST, 1); 
-  { 
-    xfer += oprot->writeListBegin(::apache::thrift::protocol::T_BOOL, static_cast<uint32_t>(this->null_pages.size())); 
-    std::vector<bool> ::const_iterator _iter173; 
-    for (_iter173 = this->null_pages.begin(); _iter173 != this->null_pages.end(); ++_iter173) 
-    { 
-      xfer += oprot->writeBool((*_iter173)); 
-    } 
-    xfer += oprot->writeListEnd(); 
-  } 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("min_values", ::apache::thrift::protocol::T_LIST, 2); 
-  { 
-    xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast<uint32_t>(this->min_values.size())); 
-    std::vector<std::string> ::const_iterator _iter174; 
-    for (_iter174 = this->min_values.begin(); _iter174 != this->min_values.end(); ++_iter174) 
-    { 
-      xfer += oprot->writeBinary((*_iter174)); 
-    } 
-    xfer += oprot->writeListEnd(); 
-  } 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("max_values", ::apache::thrift::protocol::T_LIST, 3); 
-  { 
-    xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast<uint32_t>(this->max_values.size())); 
-    std::vector<std::string> ::const_iterator _iter175; 
-    for (_iter175 = this->max_values.begin(); _iter175 != this->max_values.end(); ++_iter175) 
-    { 
-      xfer += oprot->writeBinary((*_iter175)); 
-    } 
-    xfer += oprot->writeListEnd(); 
-  } 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("boundary_order", ::apache::thrift::protocol::T_I32, 4); 
-  xfer += oprot->writeI32((int32_t)this->boundary_order); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  if (this->__isset.null_counts) { 
-    xfer += oprot->writeFieldBegin("null_counts", ::apache::thrift::protocol::T_LIST, 5); 
-    { 
-      xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast<uint32_t>(this->null_counts.size())); 
-      std::vector<int64_t> ::const_iterator _iter176; 
-      for (_iter176 = this->null_counts.begin(); _iter176 != this->null_counts.end(); ++_iter176) 
-      { 
-        xfer += oprot->writeI64((*_iter176)); 
-      } 
-      xfer += oprot->writeListEnd(); 
-    } 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(ColumnIndex &a, ColumnIndex &b) { 
-  using ::std::swap; 
-  swap(a.null_pages, b.null_pages); 
-  swap(a.min_values, b.min_values); 
-  swap(a.max_values, b.max_values); 
-  swap(a.boundary_order, b.boundary_order); 
-  swap(a.null_counts, b.null_counts); 
-  swap(a.__isset, b.__isset); 
-} 
- 
-ColumnIndex::ColumnIndex(const ColumnIndex& other177) { 
-  null_pages = other177.null_pages; 
-  min_values = other177.min_values; 
-  max_values = other177.max_values; 
-  boundary_order = other177.boundary_order; 
-  null_counts = other177.null_counts; 
-  __isset = other177.__isset; 
-} 
-ColumnIndex& ColumnIndex::operator=(const ColumnIndex& other178) { 
-  null_pages = other178.null_pages; 
-  min_values = other178.min_values; 
-  max_values = other178.max_values; 
-  boundary_order = other178.boundary_order; 
-  null_counts = other178.null_counts; 
-  __isset = other178.__isset; 
-  return *this; 
-} 
-void ColumnIndex::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "ColumnIndex("; 
-  out << "null_pages=" << to_string(null_pages); 
-  out << ", " << "min_values=" << to_string(min_values); 
-  out << ", " << "max_values=" << to_string(max_values); 
-  out << ", " << "boundary_order=" << to_string(boundary_order); 
-  out << ", " << "null_counts="; (__isset.null_counts ? (out << to_string(null_counts)) : (out << "<null>")); 
-  out << ")"; 
-} 
- 
- 
-AesGcmV1::~AesGcmV1() noexcept { 
-} 
- 
- 
-void AesGcmV1::__set_aad_prefix(const std::string& val) { 
-  this->aad_prefix = val; 
-__isset.aad_prefix = true; 
-} 
- 
-void AesGcmV1::__set_aad_file_unique(const std::string& val) { 
-  this->aad_file_unique = val; 
-__isset.aad_file_unique = true; 
-} 
- 
-void AesGcmV1::__set_supply_aad_prefix(const bool val) { 
-  this->supply_aad_prefix = val; 
-__isset.supply_aad_prefix = true; 
-} 
-std::ostream& operator<<(std::ostream& out, const AesGcmV1& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t AesGcmV1::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    switch (fid) 
-    { 
-      case 1: 
-        if (ftype == ::apache::thrift::protocol::T_STRING) { 
-          xfer += iprot->readBinary(this->aad_prefix); 
-          this->__isset.aad_prefix = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 2: 
-        if (ftype == ::apache::thrift::protocol::T_STRING) { 
-          xfer += iprot->readBinary(this->aad_file_unique); 
-          this->__isset.aad_file_unique = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 3: 
-        if (ftype == ::apache::thrift::protocol::T_BOOL) { 
-          xfer += iprot->readBool(this->supply_aad_prefix); 
-          this->__isset.supply_aad_prefix = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      default: 
-        xfer += iprot->skip(ftype); 
-        break; 
-    } 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  return xfer; 
-} 
- 
-uint32_t AesGcmV1::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("AesGcmV1"); 
- 
-  if (this->__isset.aad_prefix) { 
-    xfer += oprot->writeFieldBegin("aad_prefix", ::apache::thrift::protocol::T_STRING, 1); 
-    xfer += oprot->writeBinary(this->aad_prefix); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.aad_file_unique) { 
-    xfer += oprot->writeFieldBegin("aad_file_unique", ::apache::thrift::protocol::T_STRING, 2); 
-    xfer += oprot->writeBinary(this->aad_file_unique); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.supply_aad_prefix) { 
-    xfer += oprot->writeFieldBegin("supply_aad_prefix", ::apache::thrift::protocol::T_BOOL, 3); 
-    xfer += oprot->writeBool(this->supply_aad_prefix); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(AesGcmV1 &a, AesGcmV1 &b) { 
-  using ::std::swap; 
-  swap(a.aad_prefix, b.aad_prefix); 
-  swap(a.aad_file_unique, b.aad_file_unique); 
-  swap(a.supply_aad_prefix, b.supply_aad_prefix); 
-  swap(a.__isset, b.__isset); 
-} 
- 
-AesGcmV1::AesGcmV1(const AesGcmV1& other179) { 
-  aad_prefix = other179.aad_prefix; 
-  aad_file_unique = other179.aad_file_unique; 
-  supply_aad_prefix = other179.supply_aad_prefix; 
-  __isset = other179.__isset; 
-} 
-AesGcmV1& AesGcmV1::operator=(const AesGcmV1& other180) { 
-  aad_prefix = other180.aad_prefix; 
-  aad_file_unique = other180.aad_file_unique; 
-  supply_aad_prefix = other180.supply_aad_prefix; 
-  __isset = other180.__isset; 
-  return *this; 
-} 
-void AesGcmV1::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "AesGcmV1("; 
-  out << "aad_prefix="; (__isset.aad_prefix ? (out << to_string(aad_prefix)) : (out << "<null>")); 
-  out << ", " << "aad_file_unique="; (__isset.aad_file_unique ? (out << to_string(aad_file_unique)) : (out << "<null>")); 
-  out << ", " << "supply_aad_prefix="; (__isset.supply_aad_prefix ? (out << to_string(supply_aad_prefix)) : (out << "<null>")); 
-  out << ")"; 
-} 
- 
- 
-AesGcmCtrV1::~AesGcmCtrV1() noexcept { 
-} 
- 
- 
-void AesGcmCtrV1::__set_aad_prefix(const std::string& val) { 
-  this->aad_prefix = val; 
-__isset.aad_prefix = true; 
-} 
- 
-void AesGcmCtrV1::__set_aad_file_unique(const std::string& val) { 
-  this->aad_file_unique = val; 
-__isset.aad_file_unique = true; 
-} 
- 
-void AesGcmCtrV1::__set_supply_aad_prefix(const bool val) { 
-  this->supply_aad_prefix = val; 
-__isset.supply_aad_prefix = true; 
-} 
-std::ostream& operator<<(std::ostream& out, const AesGcmCtrV1& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t AesGcmCtrV1::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    switch (fid) 
-    { 
-      case 1: 
-        if (ftype == ::apache::thrift::protocol::T_STRING) { 
-          xfer += iprot->readBinary(this->aad_prefix); 
-          this->__isset.aad_prefix = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 2: 
-        if (ftype == ::apache::thrift::protocol::T_STRING) { 
-          xfer += iprot->readBinary(this->aad_file_unique); 
-          this->__isset.aad_file_unique = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 3: 
-        if (ftype == ::apache::thrift::protocol::T_BOOL) { 
-          xfer += iprot->readBool(this->supply_aad_prefix); 
-          this->__isset.supply_aad_prefix = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      default: 
-        xfer += iprot->skip(ftype); 
-        break; 
-    } 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  return xfer; 
-} 
- 
-uint32_t AesGcmCtrV1::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("AesGcmCtrV1"); 
- 
-  if (this->__isset.aad_prefix) { 
-    xfer += oprot->writeFieldBegin("aad_prefix", ::apache::thrift::protocol::T_STRING, 1); 
-    xfer += oprot->writeBinary(this->aad_prefix); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.aad_file_unique) { 
-    xfer += oprot->writeFieldBegin("aad_file_unique", ::apache::thrift::protocol::T_STRING, 2); 
-    xfer += oprot->writeBinary(this->aad_file_unique); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.supply_aad_prefix) { 
-    xfer += oprot->writeFieldBegin("supply_aad_prefix", ::apache::thrift::protocol::T_BOOL, 3); 
-    xfer += oprot->writeBool(this->supply_aad_prefix); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(AesGcmCtrV1 &a, AesGcmCtrV1 &b) { 
-  using ::std::swap; 
-  swap(a.aad_prefix, b.aad_prefix); 
-  swap(a.aad_file_unique, b.aad_file_unique); 
-  swap(a.supply_aad_prefix, b.supply_aad_prefix); 
-  swap(a.__isset, b.__isset); 
-} 
- 
-AesGcmCtrV1::AesGcmCtrV1(const AesGcmCtrV1& other181) { 
-  aad_prefix = other181.aad_prefix; 
-  aad_file_unique = other181.aad_file_unique; 
-  supply_aad_prefix = other181.supply_aad_prefix; 
-  __isset = other181.__isset; 
-} 
-AesGcmCtrV1& AesGcmCtrV1::operator=(const AesGcmCtrV1& other182) { 
-  aad_prefix = other182.aad_prefix; 
-  aad_file_unique = other182.aad_file_unique; 
-  supply_aad_prefix = other182.supply_aad_prefix; 
-  __isset = other182.__isset; 
-  return *this; 
-} 
-void AesGcmCtrV1::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "AesGcmCtrV1("; 
-  out << "aad_prefix="; (__isset.aad_prefix ? (out << to_string(aad_prefix)) : (out << "<null>")); 
-  out << ", " << "aad_file_unique="; (__isset.aad_file_unique ? (out << to_string(aad_file_unique)) : (out << "<null>")); 
-  out << ", " << "supply_aad_prefix="; (__isset.supply_aad_prefix ? (out << to_string(supply_aad_prefix)) : (out << "<null>")); 
-  out << ")"; 
-} 
- 
- 
-EncryptionAlgorithm::~EncryptionAlgorithm() noexcept { 
-} 
- 
- 
-void EncryptionAlgorithm::__set_AES_GCM_V1(const AesGcmV1& val) { 
-  this->AES_GCM_V1 = val; 
-__isset.AES_GCM_V1 = true; 
-} 
- 
-void EncryptionAlgorithm::__set_AES_GCM_CTR_V1(const AesGcmCtrV1& val) { 
-  this->AES_GCM_CTR_V1 = val; 
-__isset.AES_GCM_CTR_V1 = true; 
-} 
-std::ostream& operator<<(std::ostream& out, const EncryptionAlgorithm& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t EncryptionAlgorithm::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    switch (fid) 
-    { 
-      case 1: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->AES_GCM_V1.read(iprot); 
-          this->__isset.AES_GCM_V1 = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 2: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->AES_GCM_CTR_V1.read(iprot); 
-          this->__isset.AES_GCM_CTR_V1 = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      default: 
-        xfer += iprot->skip(ftype); 
-        break; 
-    } 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  return xfer; 
-} 
- 
-uint32_t EncryptionAlgorithm::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("EncryptionAlgorithm"); 
- 
-  if (this->__isset.AES_GCM_V1) { 
-    xfer += oprot->writeFieldBegin("AES_GCM_V1", ::apache::thrift::protocol::T_STRUCT, 1); 
-    xfer += this->AES_GCM_V1.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.AES_GCM_CTR_V1) { 
-    xfer += oprot->writeFieldBegin("AES_GCM_CTR_V1", ::apache::thrift::protocol::T_STRUCT, 2); 
-    xfer += this->AES_GCM_CTR_V1.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(EncryptionAlgorithm &a, EncryptionAlgorithm &b) { 
-  using ::std::swap; 
-  swap(a.AES_GCM_V1, b.AES_GCM_V1); 
-  swap(a.AES_GCM_CTR_V1, b.AES_GCM_CTR_V1); 
-  swap(a.__isset, b.__isset); 
-} 
- 
-EncryptionAlgorithm::EncryptionAlgorithm(const EncryptionAlgorithm& other183) { 
-  AES_GCM_V1 = other183.AES_GCM_V1; 
-  AES_GCM_CTR_V1 = other183.AES_GCM_CTR_V1; 
-  __isset = other183.__isset; 
-} 
-EncryptionAlgorithm& EncryptionAlgorithm::operator=(const EncryptionAlgorithm& other184) { 
-  AES_GCM_V1 = other184.AES_GCM_V1; 
-  AES_GCM_CTR_V1 = other184.AES_GCM_CTR_V1; 
-  __isset = other184.__isset; 
-  return *this; 
-} 
-void EncryptionAlgorithm::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "EncryptionAlgorithm("; 
-  out << "AES_GCM_V1="; (__isset.AES_GCM_V1 ? (out << to_string(AES_GCM_V1)) : (out << "<null>")); 
-  out << ", " << "AES_GCM_CTR_V1="; (__isset.AES_GCM_CTR_V1 ? (out << to_string(AES_GCM_CTR_V1)) : (out << "<null>")); 
-  out << ")"; 
-} 
- 
- 
-FileMetaData::~FileMetaData() noexcept { 
-} 
- 
- 
-void FileMetaData::__set_version(const int32_t val) { 
-  this->version = val; 
-} 
- 
-void FileMetaData::__set_schema(const std::vector<SchemaElement> & val) { 
-  this->schema = val; 
-} 
- 
-void FileMetaData::__set_num_rows(const int64_t val) { 
-  this->num_rows = val; 
-} 
- 
-void FileMetaData::__set_row_groups(const std::vector<RowGroup> & val) { 
-  this->row_groups = val; 
-} 
- 
-void FileMetaData::__set_key_value_metadata(const std::vector<KeyValue> & val) { 
-  this->key_value_metadata = val; 
-__isset.key_value_metadata = true; 
-} 
- 
-void FileMetaData::__set_created_by(const std::string& val) { 
-  this->created_by = val; 
-__isset.created_by = true; 
-} 
- 
-void FileMetaData::__set_column_orders(const std::vector<ColumnOrder> & val) { 
-  this->column_orders = val; 
-__isset.column_orders = true; 
-} 
- 
-void FileMetaData::__set_encryption_algorithm(const EncryptionAlgorithm& val) { 
-  this->encryption_algorithm = val; 
-__isset.encryption_algorithm = true; 
-} 
- 
-void FileMetaData::__set_footer_signing_key_metadata(const std::string& val) { 
-  this->footer_signing_key_metadata = val; 
-__isset.footer_signing_key_metadata = true; 
-} 
-std::ostream& operator<<(std::ostream& out, const FileMetaData& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t FileMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
-  bool isset_version = false; 
-  bool isset_schema = false; 
-  bool isset_num_rows = false; 
-  bool isset_row_groups = false; 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    switch (fid) 
-    { 
-      case 1: 
-        if (ftype == ::apache::thrift::protocol::T_I32) { 
-          xfer += iprot->readI32(this->version); 
-          isset_version = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 2: 
-        if (ftype == ::apache::thrift::protocol::T_LIST) { 
-          { 
-            this->schema.clear(); 
-            uint32_t _size185; 
-            ::apache::thrift::protocol::TType _etype188; 
-            xfer += iprot->readListBegin(_etype188, _size185); 
-            this->schema.resize(_size185); 
-            uint32_t _i189; 
-            for (_i189 = 0; _i189 < _size185; ++_i189) 
-            { 
-              xfer += this->schema[_i189].read(iprot); 
-            } 
-            xfer += iprot->readListEnd(); 
-          } 
-          isset_schema = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 3: 
-        if (ftype == ::apache::thrift::protocol::T_I64) { 
-          xfer += iprot->readI64(this->num_rows); 
-          isset_num_rows = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 4: 
-        if (ftype == ::apache::thrift::protocol::T_LIST) { 
-          { 
-            this->row_groups.clear(); 
-            uint32_t _size190; 
-            ::apache::thrift::protocol::TType _etype193; 
-            xfer += iprot->readListBegin(_etype193, _size190); 
-            this->row_groups.resize(_size190); 
-            uint32_t _i194; 
-            for (_i194 = 0; _i194 < _size190; ++_i194) 
-            { 
-              xfer += this->row_groups[_i194].read(iprot); 
-            } 
-            xfer += iprot->readListEnd(); 
-          } 
-          isset_row_groups = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 5: 
-        if (ftype == ::apache::thrift::protocol::T_LIST) { 
-          { 
-            this->key_value_metadata.clear(); 
-            uint32_t _size195; 
-            ::apache::thrift::protocol::TType _etype198; 
-            xfer += iprot->readListBegin(_etype198, _size195); 
-            this->key_value_metadata.resize(_size195); 
-            uint32_t _i199; 
-            for (_i199 = 0; _i199 < _size195; ++_i199) 
-            { 
-              xfer += this->key_value_metadata[_i199].read(iprot); 
-            } 
-            xfer += iprot->readListEnd(); 
-          } 
-          this->__isset.key_value_metadata = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 6: 
-        if (ftype == ::apache::thrift::protocol::T_STRING) { 
-          xfer += iprot->readString(this->created_by); 
-          this->__isset.created_by = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 7: 
-        if (ftype == ::apache::thrift::protocol::T_LIST) { 
-          { 
-            this->column_orders.clear(); 
-            uint32_t _size200; 
-            ::apache::thrift::protocol::TType _etype203; 
-            xfer += iprot->readListBegin(_etype203, _size200); 
-            this->column_orders.resize(_size200); 
-            uint32_t _i204; 
-            for (_i204 = 0; _i204 < _size200; ++_i204) 
-            { 
-              xfer += this->column_orders[_i204].read(iprot); 
-            } 
-            xfer += iprot->readListEnd(); 
-          } 
-          this->__isset.column_orders = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 8: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->encryption_algorithm.read(iprot); 
-          this->__isset.encryption_algorithm = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 9: 
-        if (ftype == ::apache::thrift::protocol::T_STRING) { 
-          xfer += iprot->readBinary(this->footer_signing_key_metadata); 
-          this->__isset.footer_signing_key_metadata = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      default: 
-        xfer += iprot->skip(ftype); 
-        break; 
-    } 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  if (!isset_version) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_schema) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_num_rows) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  if (!isset_row_groups) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  return xfer; 
-} 
- 
-uint32_t FileMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("FileMetaData"); 
- 
-  xfer += oprot->writeFieldBegin("version", ::apache::thrift::protocol::T_I32, 1); 
-  xfer += oprot->writeI32(this->version); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("schema", ::apache::thrift::protocol::T_LIST, 2); 
-  { 
-    xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->schema.size())); 
-    std::vector<SchemaElement> ::const_iterator _iter205; 
-    for (_iter205 = this->schema.begin(); _iter205 != this->schema.end(); ++_iter205) 
-    { 
-      xfer += (*_iter205).write(oprot); 
-    } 
-    xfer += oprot->writeListEnd(); 
-  } 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I64, 3); 
-  xfer += oprot->writeI64(this->num_rows); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  xfer += oprot->writeFieldBegin("row_groups", ::apache::thrift::protocol::T_LIST, 4); 
-  { 
-    xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->row_groups.size())); 
-    std::vector<RowGroup> ::const_iterator _iter206; 
-    for (_iter206 = this->row_groups.begin(); _iter206 != this->row_groups.end(); ++_iter206) 
-    { 
-      xfer += (*_iter206).write(oprot); 
-    } 
-    xfer += oprot->writeListEnd(); 
-  } 
-  xfer += oprot->writeFieldEnd(); 
- 
-  if (this->__isset.key_value_metadata) { 
-    xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 5); 
-    { 
-      xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->key_value_metadata.size())); 
-      std::vector<KeyValue> ::const_iterator _iter207; 
-      for (_iter207 = this->key_value_metadata.begin(); _iter207 != this->key_value_metadata.end(); ++_iter207) 
-      { 
-        xfer += (*_iter207).write(oprot); 
-      } 
-      xfer += oprot->writeListEnd(); 
-    } 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.created_by) { 
-    xfer += oprot->writeFieldBegin("created_by", ::apache::thrift::protocol::T_STRING, 6); 
-    xfer += oprot->writeString(this->created_by); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.column_orders) { 
-    xfer += oprot->writeFieldBegin("column_orders", ::apache::thrift::protocol::T_LIST, 7); 
-    { 
-      xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->column_orders.size())); 
-      std::vector<ColumnOrder> ::const_iterator _iter208; 
-      for (_iter208 = this->column_orders.begin(); _iter208 != this->column_orders.end(); ++_iter208) 
-      { 
-        xfer += (*_iter208).write(oprot); 
-      } 
-      xfer += oprot->writeListEnd(); 
-    } 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.encryption_algorithm) { 
-    xfer += oprot->writeFieldBegin("encryption_algorithm", ::apache::thrift::protocol::T_STRUCT, 8); 
-    xfer += this->encryption_algorithm.write(oprot); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  if (this->__isset.footer_signing_key_metadata) { 
-    xfer += oprot->writeFieldBegin("footer_signing_key_metadata", ::apache::thrift::protocol::T_STRING, 9); 
-    xfer += oprot->writeBinary(this->footer_signing_key_metadata); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(FileMetaData &a, FileMetaData &b) { 
-  using ::std::swap; 
-  swap(a.version, b.version); 
-  swap(a.schema, b.schema); 
-  swap(a.num_rows, b.num_rows); 
-  swap(a.row_groups, b.row_groups); 
-  swap(a.key_value_metadata, b.key_value_metadata); 
-  swap(a.created_by, b.created_by); 
-  swap(a.column_orders, b.column_orders); 
-  swap(a.encryption_algorithm, b.encryption_algorithm); 
-  swap(a.footer_signing_key_metadata, b.footer_signing_key_metadata); 
-  swap(a.__isset, b.__isset); 
-} 
- 
-FileMetaData::FileMetaData(const FileMetaData& other209) { 
-  version = other209.version; 
-  schema = other209.schema; 
-  num_rows = other209.num_rows; 
-  row_groups = other209.row_groups; 
-  key_value_metadata = other209.key_value_metadata; 
-  created_by = other209.created_by; 
-  column_orders = other209.column_orders; 
-  encryption_algorithm = other209.encryption_algorithm; 
-  footer_signing_key_metadata = other209.footer_signing_key_metadata; 
-  __isset = other209.__isset; 
-} 
-FileMetaData& FileMetaData::operator=(const FileMetaData& other210) { 
-  version = other210.version; 
-  schema = other210.schema; 
-  num_rows = other210.num_rows; 
-  row_groups = other210.row_groups; 
-  key_value_metadata = other210.key_value_metadata; 
-  created_by = other210.created_by; 
-  column_orders = other210.column_orders; 
-  encryption_algorithm = other210.encryption_algorithm; 
-  footer_signing_key_metadata = other210.footer_signing_key_metadata; 
-  __isset = other210.__isset; 
-  return *this; 
-} 
-void FileMetaData::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "FileMetaData("; 
-  out << "version=" << to_string(version); 
-  out << ", " << "schema=" << to_string(schema); 
-  out << ", " << "num_rows=" << to_string(num_rows); 
-  out << ", " << "row_groups=" << to_string(row_groups); 
-  out << ", " << "key_value_metadata="; (__isset.key_value_metadata ? (out << to_string(key_value_metadata)) : (out << "<null>")); 
-  out << ", " << "created_by="; (__isset.created_by ? (out << to_string(created_by)) : (out << "<null>")); 
-  out << ", " << "column_orders="; (__isset.column_orders ? (out << to_string(column_orders)) : (out << "<null>")); 
-  out << ", " << "encryption_algorithm="; (__isset.encryption_algorithm ? (out << to_string(encryption_algorithm)) : (out << "<null>")); 
-  out << ", " << "footer_signing_key_metadata="; (__isset.footer_signing_key_metadata ? (out << to_string(footer_signing_key_metadata)) : (out << "<null>")); 
-  out << ")"; 
-} 
- 
- 
-FileCryptoMetaData::~FileCryptoMetaData() noexcept { 
-} 
- 
- 
-void FileCryptoMetaData::__set_encryption_algorithm(const EncryptionAlgorithm& val) { 
-  this->encryption_algorithm = val; 
-} 
- 
-void FileCryptoMetaData::__set_key_metadata(const std::string& val) { 
-  this->key_metadata = val; 
-__isset.key_metadata = true; 
-} 
-std::ostream& operator<<(std::ostream& out, const FileCryptoMetaData& obj) 
-{ 
-  obj.printTo(out); 
-  return out; 
-} 
- 
- 
-uint32_t FileCryptoMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { 
- 
-  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); 
-  uint32_t xfer = 0; 
-  std::string fname; 
-  ::apache::thrift::protocol::TType ftype; 
-  int16_t fid; 
- 
-  xfer += iprot->readStructBegin(fname); 
- 
-  using ::apache::thrift::protocol::TProtocolException; 
- 
-  bool isset_encryption_algorithm = false; 
- 
-  while (true) 
-  { 
-    xfer += iprot->readFieldBegin(fname, ftype, fid); 
-    if (ftype == ::apache::thrift::protocol::T_STOP) { 
-      break; 
-    } 
-    switch (fid) 
-    { 
-      case 1: 
-        if (ftype == ::apache::thrift::protocol::T_STRUCT) { 
-          xfer += this->encryption_algorithm.read(iprot); 
-          isset_encryption_algorithm = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      case 2: 
-        if (ftype == ::apache::thrift::protocol::T_STRING) { 
-          xfer += iprot->readBinary(this->key_metadata); 
-          this->__isset.key_metadata = true; 
-        } else { 
-          xfer += iprot->skip(ftype); 
-        } 
-        break; 
-      default: 
-        xfer += iprot->skip(ftype); 
-        break; 
-    } 
-    xfer += iprot->readFieldEnd(); 
-  } 
- 
-  xfer += iprot->readStructEnd(); 
- 
-  if (!isset_encryption_algorithm) 
-    throw TProtocolException(TProtocolException::INVALID_DATA); 
-  return xfer; 
-} 
- 
-uint32_t FileCryptoMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const { 
-  uint32_t xfer = 0; 
-  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); 
-  xfer += oprot->writeStructBegin("FileCryptoMetaData"); 
- 
-  xfer += oprot->writeFieldBegin("encryption_algorithm", ::apache::thrift::protocol::T_STRUCT, 1); 
-  xfer += this->encryption_algorithm.write(oprot); 
-  xfer += oprot->writeFieldEnd(); 
- 
-  if (this->__isset.key_metadata) { 
-    xfer += oprot->writeFieldBegin("key_metadata", ::apache::thrift::protocol::T_STRING, 2); 
-    xfer += oprot->writeBinary(this->key_metadata); 
-    xfer += oprot->writeFieldEnd(); 
-  } 
-  xfer += oprot->writeFieldStop(); 
-  xfer += oprot->writeStructEnd(); 
-  return xfer; 
-} 
- 
-void swap(FileCryptoMetaData &a, FileCryptoMetaData &b) { 
-  using ::std::swap; 
-  swap(a.encryption_algorithm, b.encryption_algorithm); 
-  swap(a.key_metadata, b.key_metadata); 
-  swap(a.__isset, b.__isset); 
-} 
- 
-FileCryptoMetaData::FileCryptoMetaData(const FileCryptoMetaData& other211) { 
-  encryption_algorithm = other211.encryption_algorithm; 
-  key_metadata = other211.key_metadata; 
-  __isset = other211.__isset; 
-} 
-FileCryptoMetaData& FileCryptoMetaData::operator=(const FileCryptoMetaData& other212) { 
-  encryption_algorithm = other212.encryption_algorithm; 
-  key_metadata = other212.key_metadata; 
-  __isset = other212.__isset; 
-  return *this; 
-} 
-void FileCryptoMetaData::printTo(std::ostream& out) const { 
-  using ::apache::thrift::to_string; 
-  out << "FileCryptoMetaData("; 
-  out << "encryption_algorithm=" << to_string(encryption_algorithm); 
-  out << ", " << "key_metadata="; (__isset.key_metadata ? (out << to_string(key_metadata)) : (out << "<null>")); 
-  out << ")"; 
-} 
- 
-}} // namespace 
+/**
+ * Autogenerated by Thrift Compiler (0.13.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+#include "parquet_types.h"
+
+#include <algorithm>
+#include <ostream>
+
+#include <thrift/TToString.h>
+
+namespace parquet { namespace format {
+
+int _kTypeValues[] = {
+  Type::BOOLEAN,
+  Type::INT32,
+  Type::INT64,
+  Type::INT96,
+  Type::FLOAT,
+  Type::DOUBLE,
+  Type::BYTE_ARRAY,
+  Type::FIXED_LEN_BYTE_ARRAY
+};
+const char* _kTypeNames[] = {
+  "BOOLEAN",
+  "INT32",
+  "INT64",
+  "INT96",
+  "FLOAT",
+  "DOUBLE",
+  "BYTE_ARRAY",
+  "FIXED_LEN_BYTE_ARRAY"
+};
+const std::map<int, const char*> _Type_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(8, _kTypeValues, _kTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
+
+std::ostream& operator<<(std::ostream& out, const Type::type& val) {
+  std::map<int, const char*>::const_iterator it = _Type_VALUES_TO_NAMES.find(val);
+  if (it != _Type_VALUES_TO_NAMES.end()) {
+    out << it->second;
+  } else {
+    out << static_cast<int>(val);
+  }
+  return out;
+}
+
+std::string to_string(const Type::type& val) {
+  std::map<int, const char*>::const_iterator it = _Type_VALUES_TO_NAMES.find(val);
+  if (it != _Type_VALUES_TO_NAMES.end()) {
+    return std::string(it->second);
+  } else {
+    return std::to_string(static_cast<int>(val));
+  }
+}
+
+int _kConvertedTypeValues[] = {
+  ConvertedType::UTF8,
+  ConvertedType::MAP,
+  ConvertedType::MAP_KEY_VALUE,
+  ConvertedType::LIST,
+  ConvertedType::ENUM,
+  ConvertedType::DECIMAL,
+  ConvertedType::DATE,
+  ConvertedType::TIME_MILLIS,
+  ConvertedType::TIME_MICROS,
+  ConvertedType::TIMESTAMP_MILLIS,
+  ConvertedType::TIMESTAMP_MICROS,
+  ConvertedType::UINT_8,
+  ConvertedType::UINT_16,
+  ConvertedType::UINT_32,
+  ConvertedType::UINT_64,
+  ConvertedType::INT_8,
+  ConvertedType::INT_16,
+  ConvertedType::INT_32,
+  ConvertedType::INT_64,
+  ConvertedType::JSON,
+  ConvertedType::BSON,
+  ConvertedType::INTERVAL
+};
+const char* _kConvertedTypeNames[] = {
+  "UTF8",
+  "MAP",
+  "MAP_KEY_VALUE",
+  "LIST",
+  "ENUM",
+  "DECIMAL",
+  "DATE",
+  "TIME_MILLIS",
+  "TIME_MICROS",
+  "TIMESTAMP_MILLIS",
+  "TIMESTAMP_MICROS",
+  "UINT_8",
+  "UINT_16",
+  "UINT_32",
+  "UINT_64",
+  "INT_8",
+  "INT_16",
+  "INT_32",
+  "INT_64",
+  "JSON",
+  "BSON",
+  "INTERVAL"
+};
+const std::map<int, const char*> _ConvertedType_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(22, _kConvertedTypeValues, _kConvertedTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
+
+std::ostream& operator<<(std::ostream& out, const ConvertedType::type& val) {
+  std::map<int, const char*>::const_iterator it = _ConvertedType_VALUES_TO_NAMES.find(val);
+  if (it != _ConvertedType_VALUES_TO_NAMES.end()) {
+    out << it->second;
+  } else {
+    out << static_cast<int>(val);
+  }
+  return out;
+}
+
+std::string to_string(const ConvertedType::type& val) {
+  std::map<int, const char*>::const_iterator it = _ConvertedType_VALUES_TO_NAMES.find(val);
+  if (it != _ConvertedType_VALUES_TO_NAMES.end()) {
+    return std::string(it->second);
+  } else {
+    return std::to_string(static_cast<int>(val));
+  }
+}
+
+int _kFieldRepetitionTypeValues[] = {
+  FieldRepetitionType::REQUIRED,
+  FieldRepetitionType::OPTIONAL,
+  FieldRepetitionType::REPEATED
+};
+const char* _kFieldRepetitionTypeNames[] = {
+  "REQUIRED",
+  "OPTIONAL",
+  "REPEATED"
+};
+const std::map<int, const char*> _FieldRepetitionType_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(3, _kFieldRepetitionTypeValues, _kFieldRepetitionTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
+
+std::ostream& operator<<(std::ostream& out, const FieldRepetitionType::type& val) {
+  std::map<int, const char*>::const_iterator it = _FieldRepetitionType_VALUES_TO_NAMES.find(val);
+  if (it != _FieldRepetitionType_VALUES_TO_NAMES.end()) {
+    out << it->second;
+  } else {
+    out << static_cast<int>(val);
+  }
+  return out;
+}
+
+std::string to_string(const FieldRepetitionType::type& val) {
+  std::map<int, const char*>::const_iterator it = _FieldRepetitionType_VALUES_TO_NAMES.find(val);
+  if (it != _FieldRepetitionType_VALUES_TO_NAMES.end()) {
+    return std::string(it->second);
+  } else {
+    return std::to_string(static_cast<int>(val));
+  }
+}
+
+int _kEncodingValues[] = {
+  Encoding::PLAIN,
+  Encoding::PLAIN_DICTIONARY,
+  Encoding::RLE,
+  Encoding::BIT_PACKED,
+  Encoding::DELTA_BINARY_PACKED,
+  Encoding::DELTA_LENGTH_BYTE_ARRAY,
+  Encoding::DELTA_BYTE_ARRAY,
+  Encoding::RLE_DICTIONARY,
+  Encoding::BYTE_STREAM_SPLIT
+};
+const char* _kEncodingNames[] = {
+  "PLAIN",
+  "PLAIN_DICTIONARY",
+  "RLE",
+  "BIT_PACKED",
+  "DELTA_BINARY_PACKED",
+  "DELTA_LENGTH_BYTE_ARRAY",
+  "DELTA_BYTE_ARRAY",
+  "RLE_DICTIONARY",
+  "BYTE_STREAM_SPLIT"
+};
+const std::map<int, const char*> _Encoding_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(9, _kEncodingValues, _kEncodingNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
+
+std::ostream& operator<<(std::ostream& out, const Encoding::type& val) {
+  std::map<int, const char*>::const_iterator it = _Encoding_VALUES_TO_NAMES.find(val);
+  if (it != _Encoding_VALUES_TO_NAMES.end()) {
+    out << it->second;
+  } else {
+    out << static_cast<int>(val);
+  }
+  return out;
+}
+
+std::string to_string(const Encoding::type& val) {
+  std::map<int, const char*>::const_iterator it = _Encoding_VALUES_TO_NAMES.find(val);
+  if (it != _Encoding_VALUES_TO_NAMES.end()) {
+    return std::string(it->second);
+  } else {
+    return std::to_string(static_cast<int>(val));
+  }
+}
+
+int _kCompressionCodecValues[] = {
+  CompressionCodec::UNCOMPRESSED,
+  CompressionCodec::SNAPPY,
+  CompressionCodec::GZIP,
+  CompressionCodec::LZO,
+  CompressionCodec::BROTLI,
+  CompressionCodec::LZ4,
+  CompressionCodec::ZSTD,
+  CompressionCodec::LZ4_RAW
+};
+const char* _kCompressionCodecNames[] = {
+  "UNCOMPRESSED",
+  "SNAPPY",
+  "GZIP",
+  "LZO",
+  "BROTLI",
+  "LZ4",
+  "ZSTD",
+  "LZ4_RAW"
+};
+const std::map<int, const char*> _CompressionCodec_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(8, _kCompressionCodecValues, _kCompressionCodecNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
+
+std::ostream& operator<<(std::ostream& out, const CompressionCodec::type& val) {
+  std::map<int, const char*>::const_iterator it = _CompressionCodec_VALUES_TO_NAMES.find(val);
+  if (it != _CompressionCodec_VALUES_TO_NAMES.end()) {
+    out << it->second;
+  } else {
+    out << static_cast<int>(val);
+  }
+  return out;
+}
+
+std::string to_string(const CompressionCodec::type& val) {
+  std::map<int, const char*>::const_iterator it = _CompressionCodec_VALUES_TO_NAMES.find(val);
+  if (it != _CompressionCodec_VALUES_TO_NAMES.end()) {
+    return std::string(it->second);
+  } else {
+    return std::to_string(static_cast<int>(val));
+  }
+}
+
+int _kPageTypeValues[] = {
+  PageType::DATA_PAGE,
+  PageType::INDEX_PAGE,
+  PageType::DICTIONARY_PAGE,
+  PageType::DATA_PAGE_V2
+};
+const char* _kPageTypeNames[] = {
+  "DATA_PAGE",
+  "INDEX_PAGE",
+  "DICTIONARY_PAGE",
+  "DATA_PAGE_V2"
+};
+const std::map<int, const char*> _PageType_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(4, _kPageTypeValues, _kPageTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
+
+std::ostream& operator<<(std::ostream& out, const PageType::type& val) {
+  std::map<int, const char*>::const_iterator it = _PageType_VALUES_TO_NAMES.find(val);
+  if (it != _PageType_VALUES_TO_NAMES.end()) {
+    out << it->second;
+  } else {
+    out << static_cast<int>(val);
+  }
+  return out;
+}
+
+std::string to_string(const PageType::type& val) {
+  std::map<int, const char*>::const_iterator it = _PageType_VALUES_TO_NAMES.find(val);
+  if (it != _PageType_VALUES_TO_NAMES.end()) {
+    return std::string(it->second);
+  } else {
+    return std::to_string(static_cast<int>(val));
+  }
+}
+
+int _kBoundaryOrderValues[] = {
+  BoundaryOrder::UNORDERED,
+  BoundaryOrder::ASCENDING,
+  BoundaryOrder::DESCENDING
+};
+const char* _kBoundaryOrderNames[] = {
+  "UNORDERED",
+  "ASCENDING",
+  "DESCENDING"
+};
+const std::map<int, const char*> _BoundaryOrder_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(3, _kBoundaryOrderValues, _kBoundaryOrderNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
+
+std::ostream& operator<<(std::ostream& out, const BoundaryOrder::type& val) {
+  std::map<int, const char*>::const_iterator it = _BoundaryOrder_VALUES_TO_NAMES.find(val);
+  if (it != _BoundaryOrder_VALUES_TO_NAMES.end()) {
+    out << it->second;
+  } else {
+    out << static_cast<int>(val);
+  }
+  return out;
+}
+
+std::string to_string(const BoundaryOrder::type& val) {
+  std::map<int, const char*>::const_iterator it = _BoundaryOrder_VALUES_TO_NAMES.find(val);
+  if (it != _BoundaryOrder_VALUES_TO_NAMES.end()) {
+    return std::string(it->second);
+  } else {
+    return std::to_string(static_cast<int>(val));
+  }
+}
+
+
+Statistics::~Statistics() noexcept {
+}
+
+
+void Statistics::__set_max(const std::string& val) {
+  this->max = val;
+__isset.max = true;
+}
+
+void Statistics::__set_min(const std::string& val) {
+  this->min = val;
+__isset.min = true;
+}
+
+void Statistics::__set_null_count(const int64_t val) {
+  this->null_count = val;
+__isset.null_count = true;
+}
+
+void Statistics::__set_distinct_count(const int64_t val) {
+  this->distinct_count = val;
+__isset.distinct_count = true;
+}
+
+void Statistics::__set_max_value(const std::string& val) {
+  this->max_value = val;
+__isset.max_value = true;
+}
+
+void Statistics::__set_min_value(const std::string& val) {
+  this->min_value = val;
+__isset.min_value = true;
+}
+std::ostream& operator<<(std::ostream& out, const Statistics& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t Statistics::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_STRING) {
+          xfer += iprot->readBinary(this->max);
+          this->__isset.max = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_STRING) {
+          xfer += iprot->readBinary(this->min);
+          this->__isset.min = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 3:
+        if (ftype == ::apache::thrift::protocol::T_I64) {
+          xfer += iprot->readI64(this->null_count);
+          this->__isset.null_count = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 4:
+        if (ftype == ::apache::thrift::protocol::T_I64) {
+          xfer += iprot->readI64(this->distinct_count);
+          this->__isset.distinct_count = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 5:
+        if (ftype == ::apache::thrift::protocol::T_STRING) {
+          xfer += iprot->readBinary(this->max_value);
+          this->__isset.max_value = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 6:
+        if (ftype == ::apache::thrift::protocol::T_STRING) {
+          xfer += iprot->readBinary(this->min_value);
+          this->__isset.min_value = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  return xfer;
+}
+
+uint32_t Statistics::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("Statistics");
+
+  if (this->__isset.max) {
+    xfer += oprot->writeFieldBegin("max", ::apache::thrift::protocol::T_STRING, 1);
+    xfer += oprot->writeBinary(this->max);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.min) {
+    xfer += oprot->writeFieldBegin("min", ::apache::thrift::protocol::T_STRING, 2);
+    xfer += oprot->writeBinary(this->min);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.null_count) {
+    xfer += oprot->writeFieldBegin("null_count", ::apache::thrift::protocol::T_I64, 3);
+    xfer += oprot->writeI64(this->null_count);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.distinct_count) {
+    xfer += oprot->writeFieldBegin("distinct_count", ::apache::thrift::protocol::T_I64, 4);
+    xfer += oprot->writeI64(this->distinct_count);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.max_value) {
+    xfer += oprot->writeFieldBegin("max_value", ::apache::thrift::protocol::T_STRING, 5);
+    xfer += oprot->writeBinary(this->max_value);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.min_value) {
+    xfer += oprot->writeFieldBegin("min_value", ::apache::thrift::protocol::T_STRING, 6);
+    xfer += oprot->writeBinary(this->min_value);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(Statistics &a, Statistics &b) {
+  using ::std::swap;
+  swap(a.max, b.max);
+  swap(a.min, b.min);
+  swap(a.null_count, b.null_count);
+  swap(a.distinct_count, b.distinct_count);
+  swap(a.max_value, b.max_value);
+  swap(a.min_value, b.min_value);
+  swap(a.__isset, b.__isset);
+}
+
+Statistics::Statistics(const Statistics& other0) {
+  max = other0.max;
+  min = other0.min;
+  null_count = other0.null_count;
+  distinct_count = other0.distinct_count;
+  max_value = other0.max_value;
+  min_value = other0.min_value;
+  __isset = other0.__isset;
+}
+Statistics& Statistics::operator=(const Statistics& other1) {
+  max = other1.max;
+  min = other1.min;
+  null_count = other1.null_count;
+  distinct_count = other1.distinct_count;
+  max_value = other1.max_value;
+  min_value = other1.min_value;
+  __isset = other1.__isset;
+  return *this;
+}
+void Statistics::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "Statistics(";
+  out << "max="; (__isset.max ? (out << to_string(max)) : (out << "<null>"));
+  out << ", " << "min="; (__isset.min ? (out << to_string(min)) : (out << "<null>"));
+  out << ", " << "null_count="; (__isset.null_count ? (out << to_string(null_count)) : (out << "<null>"));
+  out << ", " << "distinct_count="; (__isset.distinct_count ? (out << to_string(distinct_count)) : (out << "<null>"));
+  out << ", " << "max_value="; (__isset.max_value ? (out << to_string(max_value)) : (out << "<null>"));
+  out << ", " << "min_value="; (__isset.min_value ? (out << to_string(min_value)) : (out << "<null>"));
+  out << ")";
+}
+
+
+StringType::~StringType() noexcept {
+}
+
+std::ostream& operator<<(std::ostream& out, const StringType& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t StringType::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    xfer += iprot->skip(ftype);
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  return xfer;
+}
+
+uint32_t StringType::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("StringType");
+
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(StringType &a, StringType &b) {
+  using ::std::swap;
+  (void) a;
+  (void) b;
+}
+
+StringType::StringType(const StringType& other2) {
+  (void) other2;
+}
+StringType& StringType::operator=(const StringType& other3) {
+  (void) other3;
+  return *this;
+}
+void StringType::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "StringType(";
+  out << ")";
+}
+
+
+UUIDType::~UUIDType() noexcept {
+}
+
+std::ostream& operator<<(std::ostream& out, const UUIDType& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t UUIDType::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    xfer += iprot->skip(ftype);
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  return xfer;
+}
+
+uint32_t UUIDType::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("UUIDType");
+
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(UUIDType &a, UUIDType &b) {
+  using ::std::swap;
+  (void) a;
+  (void) b;
+}
+
+UUIDType::UUIDType(const UUIDType& other4) {
+  (void) other4;
+}
+UUIDType& UUIDType::operator=(const UUIDType& other5) {
+  (void) other5;
+  return *this;
+}
+void UUIDType::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "UUIDType(";
+  out << ")";
+}
+
+
+MapType::~MapType() noexcept {
+}
+
+std::ostream& operator<<(std::ostream& out, const MapType& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t MapType::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    xfer += iprot->skip(ftype);
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  return xfer;
+}
+
+uint32_t MapType::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("MapType");
+
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(MapType &a, MapType &b) {
+  using ::std::swap;
+  (void) a;
+  (void) b;
+}
+
+MapType::MapType(const MapType& other6) {
+  (void) other6;
+}
+MapType& MapType::operator=(const MapType& other7) {
+  (void) other7;
+  return *this;
+}
+void MapType::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "MapType(";
+  out << ")";
+}
+
+
+ListType::~ListType() noexcept {
+}
+
+std::ostream& operator<<(std::ostream& out, const ListType& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t ListType::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    xfer += iprot->skip(ftype);
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  return xfer;
+}
+
+uint32_t ListType::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("ListType");
+
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(ListType &a, ListType &b) {
+  using ::std::swap;
+  (void) a;
+  (void) b;
+}
+
+ListType::ListType(const ListType& other8) {
+  (void) other8;
+}
+ListType& ListType::operator=(const ListType& other9) {
+  (void) other9;
+  return *this;
+}
+void ListType::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "ListType(";
+  out << ")";
+}
+
+
+EnumType::~EnumType() noexcept {
+}
+
+std::ostream& operator<<(std::ostream& out, const EnumType& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t EnumType::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    xfer += iprot->skip(ftype);
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  return xfer;
+}
+
+uint32_t EnumType::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("EnumType");
+
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(EnumType &a, EnumType &b) {
+  using ::std::swap;
+  (void) a;
+  (void) b;
+}
+
+EnumType::EnumType(const EnumType& other10) {
+  (void) other10;
+}
+EnumType& EnumType::operator=(const EnumType& other11) {
+  (void) other11;
+  return *this;
+}
+void EnumType::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "EnumType(";
+  out << ")";
+}
+
+
+DateType::~DateType() noexcept {
+}
+
+std::ostream& operator<<(std::ostream& out, const DateType& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t DateType::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    xfer += iprot->skip(ftype);
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  return xfer;
+}
+
+uint32_t DateType::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("DateType");
+
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(DateType &a, DateType &b) {
+  using ::std::swap;
+  (void) a;
+  (void) b;
+}
+
+DateType::DateType(const DateType& other12) {
+  (void) other12;
+}
+DateType& DateType::operator=(const DateType& other13) {
+  (void) other13;
+  return *this;
+}
+void DateType::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "DateType(";
+  out << ")";
+}
+
+
+NullType::~NullType() noexcept {
+}
+
+std::ostream& operator<<(std::ostream& out, const NullType& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t NullType::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    xfer += iprot->skip(ftype);
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  return xfer;
+}
+
+uint32_t NullType::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("NullType");
+
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(NullType &a, NullType &b) {
+  using ::std::swap;
+  (void) a;
+  (void) b;
+}
+
+NullType::NullType(const NullType& other14) {
+  (void) other14;
+}
+NullType& NullType::operator=(const NullType& other15) {
+  (void) other15;
+  return *this;
+}
+void NullType::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "NullType(";
+  out << ")";
+}
+
+
+DecimalType::~DecimalType() noexcept {
+}
+
+
+void DecimalType::__set_scale(const int32_t val) {
+  this->scale = val;
+}
+
+void DecimalType::__set_precision(const int32_t val) {
+  this->precision = val;
+}
+std::ostream& operator<<(std::ostream& out, const DecimalType& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t DecimalType::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+  bool isset_scale = false;
+  bool isset_precision = false;
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->scale);
+          isset_scale = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->precision);
+          isset_precision = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  if (!isset_scale)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_precision)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  return xfer;
+}
+
+uint32_t DecimalType::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("DecimalType");
+
+  xfer += oprot->writeFieldBegin("scale", ::apache::thrift::protocol::T_I32, 1);
+  xfer += oprot->writeI32(this->scale);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("precision", ::apache::thrift::protocol::T_I32, 2);
+  xfer += oprot->writeI32(this->precision);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(DecimalType &a, DecimalType &b) {
+  using ::std::swap;
+  swap(a.scale, b.scale);
+  swap(a.precision, b.precision);
+}
+
+DecimalType::DecimalType(const DecimalType& other16) {
+  scale = other16.scale;
+  precision = other16.precision;
+}
+DecimalType& DecimalType::operator=(const DecimalType& other17) {
+  scale = other17.scale;
+  precision = other17.precision;
+  return *this;
+}
+void DecimalType::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "DecimalType(";
+  out << "scale=" << to_string(scale);
+  out << ", " << "precision=" << to_string(precision);
+  out << ")";
+}
+
+
+MilliSeconds::~MilliSeconds() noexcept {
+}
+
+std::ostream& operator<<(std::ostream& out, const MilliSeconds& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t MilliSeconds::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    xfer += iprot->skip(ftype);
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  return xfer;
+}
+
+uint32_t MilliSeconds::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("MilliSeconds");
+
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(MilliSeconds &a, MilliSeconds &b) {
+  using ::std::swap;
+  (void) a;
+  (void) b;
+}
+
+MilliSeconds::MilliSeconds(const MilliSeconds& other18) {
+  (void) other18;
+}
+MilliSeconds& MilliSeconds::operator=(const MilliSeconds& other19) {
+  (void) other19;
+  return *this;
+}
+void MilliSeconds::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "MilliSeconds(";
+  out << ")";
+}
+
+
+MicroSeconds::~MicroSeconds() noexcept {
+}
+
+std::ostream& operator<<(std::ostream& out, const MicroSeconds& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t MicroSeconds::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    xfer += iprot->skip(ftype);
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  return xfer;
+}
+
+uint32_t MicroSeconds::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("MicroSeconds");
+
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(MicroSeconds &a, MicroSeconds &b) {
+  using ::std::swap;
+  (void) a;
+  (void) b;
+}
+
+MicroSeconds::MicroSeconds(const MicroSeconds& other20) {
+  (void) other20;
+}
+MicroSeconds& MicroSeconds::operator=(const MicroSeconds& other21) {
+  (void) other21;
+  return *this;
+}
+void MicroSeconds::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "MicroSeconds(";
+  out << ")";
+}
+
+
+NanoSeconds::~NanoSeconds() noexcept {
+}
+
+std::ostream& operator<<(std::ostream& out, const NanoSeconds& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t NanoSeconds::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    xfer += iprot->skip(ftype);
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  return xfer;
+}
+
+uint32_t NanoSeconds::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("NanoSeconds");
+
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(NanoSeconds &a, NanoSeconds &b) {
+  using ::std::swap;
+  (void) a;
+  (void) b;
+}
+
+NanoSeconds::NanoSeconds(const NanoSeconds& other22) {
+  (void) other22;
+}
+NanoSeconds& NanoSeconds::operator=(const NanoSeconds& other23) {
+  (void) other23;
+  return *this;
+}
+void NanoSeconds::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "NanoSeconds(";
+  out << ")";
+}
+
+
+TimeUnit::~TimeUnit() noexcept {
+}
+
+
+void TimeUnit::__set_MILLIS(const MilliSeconds& val) {
+  this->MILLIS = val;
+__isset.MILLIS = true;
+}
+
+void TimeUnit::__set_MICROS(const MicroSeconds& val) {
+  this->MICROS = val;
+__isset.MICROS = true;
+}
+
+void TimeUnit::__set_NANOS(const NanoSeconds& val) {
+  this->NANOS = val;
+__isset.NANOS = true;
+}
+std::ostream& operator<<(std::ostream& out, const TimeUnit& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t TimeUnit::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->MILLIS.read(iprot);
+          this->__isset.MILLIS = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->MICROS.read(iprot);
+          this->__isset.MICROS = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 3:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->NANOS.read(iprot);
+          this->__isset.NANOS = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  return xfer;
+}
+
+uint32_t TimeUnit::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("TimeUnit");
+
+  if (this->__isset.MILLIS) {
+    xfer += oprot->writeFieldBegin("MILLIS", ::apache::thrift::protocol::T_STRUCT, 1);
+    xfer += this->MILLIS.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.MICROS) {
+    xfer += oprot->writeFieldBegin("MICROS", ::apache::thrift::protocol::T_STRUCT, 2);
+    xfer += this->MICROS.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.NANOS) {
+    xfer += oprot->writeFieldBegin("NANOS", ::apache::thrift::protocol::T_STRUCT, 3);
+    xfer += this->NANOS.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(TimeUnit &a, TimeUnit &b) {
+  using ::std::swap;
+  swap(a.MILLIS, b.MILLIS);
+  swap(a.MICROS, b.MICROS);
+  swap(a.NANOS, b.NANOS);
+  swap(a.__isset, b.__isset);
+}
+
+TimeUnit::TimeUnit(const TimeUnit& other24) {
+  MILLIS = other24.MILLIS;
+  MICROS = other24.MICROS;
+  NANOS = other24.NANOS;
+  __isset = other24.__isset;
+}
+TimeUnit& TimeUnit::operator=(const TimeUnit& other25) {
+  MILLIS = other25.MILLIS;
+  MICROS = other25.MICROS;
+  NANOS = other25.NANOS;
+  __isset = other25.__isset;
+  return *this;
+}
+void TimeUnit::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "TimeUnit(";
+  out << "MILLIS="; (__isset.MILLIS ? (out << to_string(MILLIS)) : (out << "<null>"));
+  out << ", " << "MICROS="; (__isset.MICROS ? (out << to_string(MICROS)) : (out << "<null>"));
+  out << ", " << "NANOS="; (__isset.NANOS ? (out << to_string(NANOS)) : (out << "<null>"));
+  out << ")";
+}
+
+
+TimestampType::~TimestampType() noexcept {
+}
+
+
+void TimestampType::__set_isAdjustedToUTC(const bool val) {
+  this->isAdjustedToUTC = val;
+}
+
+void TimestampType::__set_unit(const TimeUnit& val) {
+  this->unit = val;
+}
+std::ostream& operator<<(std::ostream& out, const TimestampType& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t TimestampType::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+  bool isset_isAdjustedToUTC = false;
+  bool isset_unit = false;
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_BOOL) {
+          xfer += iprot->readBool(this->isAdjustedToUTC);
+          isset_isAdjustedToUTC = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->unit.read(iprot);
+          isset_unit = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  if (!isset_isAdjustedToUTC)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_unit)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  return xfer;
+}
+
+uint32_t TimestampType::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("TimestampType");
+
+  xfer += oprot->writeFieldBegin("isAdjustedToUTC", ::apache::thrift::protocol::T_BOOL, 1);
+  xfer += oprot->writeBool(this->isAdjustedToUTC);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("unit", ::apache::thrift::protocol::T_STRUCT, 2);
+  xfer += this->unit.write(oprot);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(TimestampType &a, TimestampType &b) {
+  using ::std::swap;
+  swap(a.isAdjustedToUTC, b.isAdjustedToUTC);
+  swap(a.unit, b.unit);
+}
+
+TimestampType::TimestampType(const TimestampType& other26) {
+  isAdjustedToUTC = other26.isAdjustedToUTC;
+  unit = other26.unit;
+}
+TimestampType& TimestampType::operator=(const TimestampType& other27) {
+  isAdjustedToUTC = other27.isAdjustedToUTC;
+  unit = other27.unit;
+  return *this;
+}
+void TimestampType::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "TimestampType(";
+  out << "isAdjustedToUTC=" << to_string(isAdjustedToUTC);
+  out << ", " << "unit=" << to_string(unit);
+  out << ")";
+}
+
+
+TimeType::~TimeType() noexcept {
+}
+
+
+void TimeType::__set_isAdjustedToUTC(const bool val) {
+  this->isAdjustedToUTC = val;
+}
+
+void TimeType::__set_unit(const TimeUnit& val) {
+  this->unit = val;
+}
+std::ostream& operator<<(std::ostream& out, const TimeType& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t TimeType::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+  bool isset_isAdjustedToUTC = false;
+  bool isset_unit = false;
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_BOOL) {
+          xfer += iprot->readBool(this->isAdjustedToUTC);
+          isset_isAdjustedToUTC = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->unit.read(iprot);
+          isset_unit = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  if (!isset_isAdjustedToUTC)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_unit)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  return xfer;
+}
+
+uint32_t TimeType::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("TimeType");
+
+  xfer += oprot->writeFieldBegin("isAdjustedToUTC", ::apache::thrift::protocol::T_BOOL, 1);
+  xfer += oprot->writeBool(this->isAdjustedToUTC);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("unit", ::apache::thrift::protocol::T_STRUCT, 2);
+  xfer += this->unit.write(oprot);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(TimeType &a, TimeType &b) {
+  using ::std::swap;
+  swap(a.isAdjustedToUTC, b.isAdjustedToUTC);
+  swap(a.unit, b.unit);
+}
+
+TimeType::TimeType(const TimeType& other28) {
+  isAdjustedToUTC = other28.isAdjustedToUTC;
+  unit = other28.unit;
+}
+TimeType& TimeType::operator=(const TimeType& other29) {
+  isAdjustedToUTC = other29.isAdjustedToUTC;
+  unit = other29.unit;
+  return *this;
+}
+void TimeType::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "TimeType(";
+  out << "isAdjustedToUTC=" << to_string(isAdjustedToUTC);
+  out << ", " << "unit=" << to_string(unit);
+  out << ")";
+}
+
+
+IntType::~IntType() noexcept {
+}
+
+
+void IntType::__set_bitWidth(const int8_t val) {
+  this->bitWidth = val;
+}
+
+void IntType::__set_isSigned(const bool val) {
+  this->isSigned = val;
+}
+std::ostream& operator<<(std::ostream& out, const IntType& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t IntType::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+  bool isset_bitWidth = false;
+  bool isset_isSigned = false;
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_BYTE) {
+          xfer += iprot->readByte(this->bitWidth);
+          isset_bitWidth = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_BOOL) {
+          xfer += iprot->readBool(this->isSigned);
+          isset_isSigned = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  if (!isset_bitWidth)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_isSigned)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  return xfer;
+}
+
+uint32_t IntType::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("IntType");
+
+  xfer += oprot->writeFieldBegin("bitWidth", ::apache::thrift::protocol::T_BYTE, 1);
+  xfer += oprot->writeByte(this->bitWidth);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("isSigned", ::apache::thrift::protocol::T_BOOL, 2);
+  xfer += oprot->writeBool(this->isSigned);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(IntType &a, IntType &b) {
+  using ::std::swap;
+  swap(a.bitWidth, b.bitWidth);
+  swap(a.isSigned, b.isSigned);
+}
+
+IntType::IntType(const IntType& other30) {
+  bitWidth = other30.bitWidth;
+  isSigned = other30.isSigned;
+}
+IntType& IntType::operator=(const IntType& other31) {
+  bitWidth = other31.bitWidth;
+  isSigned = other31.isSigned;
+  return *this;
+}
+void IntType::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "IntType(";
+  out << "bitWidth=" << to_string(bitWidth);
+  out << ", " << "isSigned=" << to_string(isSigned);
+  out << ")";
+}
+
+
+JsonType::~JsonType() noexcept {
+}
+
+std::ostream& operator<<(std::ostream& out, const JsonType& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t JsonType::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    xfer += iprot->skip(ftype);
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  return xfer;
+}
+
+uint32_t JsonType::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("JsonType");
+
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(JsonType &a, JsonType &b) {
+  using ::std::swap;
+  (void) a;
+  (void) b;
+}
+
+JsonType::JsonType(const JsonType& other32) {
+  (void) other32;
+}
+JsonType& JsonType::operator=(const JsonType& other33) {
+  (void) other33;
+  return *this;
+}
+void JsonType::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "JsonType(";
+  out << ")";
+}
+
+
+BsonType::~BsonType() noexcept {
+}
+
+std::ostream& operator<<(std::ostream& out, const BsonType& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t BsonType::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    xfer += iprot->skip(ftype);
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  return xfer;
+}
+
+uint32_t BsonType::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("BsonType");
+
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(BsonType &a, BsonType &b) {
+  using ::std::swap;
+  (void) a;
+  (void) b;
+}
+
+BsonType::BsonType(const BsonType& other34) {
+  (void) other34;
+}
+BsonType& BsonType::operator=(const BsonType& other35) {
+  (void) other35;
+  return *this;
+}
+void BsonType::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "BsonType(";
+  out << ")";
+}
+
+
+LogicalType::~LogicalType() noexcept {
+}
+
+
+void LogicalType::__set_STRING(const StringType& val) {
+  this->STRING = val;
+__isset.STRING = true;
+}
+
+void LogicalType::__set_MAP(const MapType& val) {
+  this->MAP = val;
+__isset.MAP = true;
+}
+
+void LogicalType::__set_LIST(const ListType& val) {
+  this->LIST = val;
+__isset.LIST = true;
+}
+
+void LogicalType::__set_ENUM(const EnumType& val) {
+  this->ENUM = val;
+__isset.ENUM = true;
+}
+
+void LogicalType::__set_DECIMAL(const DecimalType& val) {
+  this->DECIMAL = val;
+__isset.DECIMAL = true;
+}
+
+void LogicalType::__set_DATE(const DateType& val) {
+  this->DATE = val;
+__isset.DATE = true;
+}
+
+void LogicalType::__set_TIME(const TimeType& val) {
+  this->TIME = val;
+__isset.TIME = true;
+}
+
+void LogicalType::__set_TIMESTAMP(const TimestampType& val) {
+  this->TIMESTAMP = val;
+__isset.TIMESTAMP = true;
+}
+
+void LogicalType::__set_INTEGER(const IntType& val) {
+  this->INTEGER = val;
+__isset.INTEGER = true;
+}
+
+void LogicalType::__set_UNKNOWN(const NullType& val) {
+  this->UNKNOWN = val;
+__isset.UNKNOWN = true;
+}
+
+void LogicalType::__set_JSON(const JsonType& val) {
+  this->JSON = val;
+__isset.JSON = true;
+}
+
+void LogicalType::__set_BSON(const BsonType& val) {
+  this->BSON = val;
+__isset.BSON = true;
+}
+
+void LogicalType::__set_UUID(const UUIDType& val) {
+  this->UUID = val;
+__isset.UUID = true;
+}
+std::ostream& operator<<(std::ostream& out, const LogicalType& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t LogicalType::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->STRING.read(iprot);
+          this->__isset.STRING = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->MAP.read(iprot);
+          this->__isset.MAP = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 3:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->LIST.read(iprot);
+          this->__isset.LIST = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 4:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->ENUM.read(iprot);
+          this->__isset.ENUM = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 5:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->DECIMAL.read(iprot);
+          this->__isset.DECIMAL = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 6:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->DATE.read(iprot);
+          this->__isset.DATE = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 7:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->TIME.read(iprot);
+          this->__isset.TIME = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 8:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->TIMESTAMP.read(iprot);
+          this->__isset.TIMESTAMP = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 10:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->INTEGER.read(iprot);
+          this->__isset.INTEGER = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 11:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->UNKNOWN.read(iprot);
+          this->__isset.UNKNOWN = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 12:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->JSON.read(iprot);
+          this->__isset.JSON = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 13:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->BSON.read(iprot);
+          this->__isset.BSON = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 14:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->UUID.read(iprot);
+          this->__isset.UUID = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  return xfer;
+}
+
+uint32_t LogicalType::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("LogicalType");
+
+  if (this->__isset.STRING) {
+    xfer += oprot->writeFieldBegin("STRING", ::apache::thrift::protocol::T_STRUCT, 1);
+    xfer += this->STRING.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.MAP) {
+    xfer += oprot->writeFieldBegin("MAP", ::apache::thrift::protocol::T_STRUCT, 2);
+    xfer += this->MAP.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.LIST) {
+    xfer += oprot->writeFieldBegin("LIST", ::apache::thrift::protocol::T_STRUCT, 3);
+    xfer += this->LIST.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.ENUM) {
+    xfer += oprot->writeFieldBegin("ENUM", ::apache::thrift::protocol::T_STRUCT, 4);
+    xfer += this->ENUM.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.DECIMAL) {
+    xfer += oprot->writeFieldBegin("DECIMAL", ::apache::thrift::protocol::T_STRUCT, 5);
+    xfer += this->DECIMAL.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.DATE) {
+    xfer += oprot->writeFieldBegin("DATE", ::apache::thrift::protocol::T_STRUCT, 6);
+    xfer += this->DATE.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.TIME) {
+    xfer += oprot->writeFieldBegin("TIME", ::apache::thrift::protocol::T_STRUCT, 7);
+    xfer += this->TIME.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.TIMESTAMP) {
+    xfer += oprot->writeFieldBegin("TIMESTAMP", ::apache::thrift::protocol::T_STRUCT, 8);
+    xfer += this->TIMESTAMP.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.INTEGER) {
+    xfer += oprot->writeFieldBegin("INTEGER", ::apache::thrift::protocol::T_STRUCT, 10);
+    xfer += this->INTEGER.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.UNKNOWN) {
+    xfer += oprot->writeFieldBegin("UNKNOWN", ::apache::thrift::protocol::T_STRUCT, 11);
+    xfer += this->UNKNOWN.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.JSON) {
+    xfer += oprot->writeFieldBegin("JSON", ::apache::thrift::protocol::T_STRUCT, 12);
+    xfer += this->JSON.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.BSON) {
+    xfer += oprot->writeFieldBegin("BSON", ::apache::thrift::protocol::T_STRUCT, 13);
+    xfer += this->BSON.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.UUID) {
+    xfer += oprot->writeFieldBegin("UUID", ::apache::thrift::protocol::T_STRUCT, 14);
+    xfer += this->UUID.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(LogicalType &a, LogicalType &b) {
+  using ::std::swap;
+  swap(a.STRING, b.STRING);
+  swap(a.MAP, b.MAP);
+  swap(a.LIST, b.LIST);
+  swap(a.ENUM, b.ENUM);
+  swap(a.DECIMAL, b.DECIMAL);
+  swap(a.DATE, b.DATE);
+  swap(a.TIME, b.TIME);
+  swap(a.TIMESTAMP, b.TIMESTAMP);
+  swap(a.INTEGER, b.INTEGER);
+  swap(a.UNKNOWN, b.UNKNOWN);
+  swap(a.JSON, b.JSON);
+  swap(a.BSON, b.BSON);
+  swap(a.UUID, b.UUID);
+  swap(a.__isset, b.__isset);
+}
+
+LogicalType::LogicalType(const LogicalType& other36) {
+  STRING = other36.STRING;
+  MAP = other36.MAP;
+  LIST = other36.LIST;
+  ENUM = other36.ENUM;
+  DECIMAL = other36.DECIMAL;
+  DATE = other36.DATE;
+  TIME = other36.TIME;
+  TIMESTAMP = other36.TIMESTAMP;
+  INTEGER = other36.INTEGER;
+  UNKNOWN = other36.UNKNOWN;
+  JSON = other36.JSON;
+  BSON = other36.BSON;
+  UUID = other36.UUID;
+  __isset = other36.__isset;
+}
+LogicalType& LogicalType::operator=(const LogicalType& other37) {
+  STRING = other37.STRING;
+  MAP = other37.MAP;
+  LIST = other37.LIST;
+  ENUM = other37.ENUM;
+  DECIMAL = other37.DECIMAL;
+  DATE = other37.DATE;
+  TIME = other37.TIME;
+  TIMESTAMP = other37.TIMESTAMP;
+  INTEGER = other37.INTEGER;
+  UNKNOWN = other37.UNKNOWN;
+  JSON = other37.JSON;
+  BSON = other37.BSON;
+  UUID = other37.UUID;
+  __isset = other37.__isset;
+  return *this;
+}
+void LogicalType::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "LogicalType(";
+  out << "STRING="; (__isset.STRING ? (out << to_string(STRING)) : (out << "<null>"));
+  out << ", " << "MAP="; (__isset.MAP ? (out << to_string(MAP)) : (out << "<null>"));
+  out << ", " << "LIST="; (__isset.LIST ? (out << to_string(LIST)) : (out << "<null>"));
+  out << ", " << "ENUM="; (__isset.ENUM ? (out << to_string(ENUM)) : (out << "<null>"));
+  out << ", " << "DECIMAL="; (__isset.DECIMAL ? (out << to_string(DECIMAL)) : (out << "<null>"));
+  out << ", " << "DATE="; (__isset.DATE ? (out << to_string(DATE)) : (out << "<null>"));
+  out << ", " << "TIME="; (__isset.TIME ? (out << to_string(TIME)) : (out << "<null>"));
+  out << ", " << "TIMESTAMP="; (__isset.TIMESTAMP ? (out << to_string(TIMESTAMP)) : (out << "<null>"));
+  out << ", " << "INTEGER="; (__isset.INTEGER ? (out << to_string(INTEGER)) : (out << "<null>"));
+  out << ", " << "UNKNOWN="; (__isset.UNKNOWN ? (out << to_string(UNKNOWN)) : (out << "<null>"));
+  out << ", " << "JSON="; (__isset.JSON ? (out << to_string(JSON)) : (out << "<null>"));
+  out << ", " << "BSON="; (__isset.BSON ? (out << to_string(BSON)) : (out << "<null>"));
+  out << ", " << "UUID="; (__isset.UUID ? (out << to_string(UUID)) : (out << "<null>"));
+  out << ")";
+}
+
+
+SchemaElement::~SchemaElement() noexcept {
+}
+
+
+void SchemaElement::__set_type(const Type::type val) {
+  this->type = val;
+__isset.type = true;
+}
+
+void SchemaElement::__set_type_length(const int32_t val) {
+  this->type_length = val;
+__isset.type_length = true;
+}
+
+void SchemaElement::__set_repetition_type(const FieldRepetitionType::type val) {
+  this->repetition_type = val;
+__isset.repetition_type = true;
+}
+
+void SchemaElement::__set_name(const std::string& val) {
+  this->name = val;
+}
+
+void SchemaElement::__set_num_children(const int32_t val) {
+  this->num_children = val;
+__isset.num_children = true;
+}
+
+void SchemaElement::__set_converted_type(const ConvertedType::type val) {
+  this->converted_type = val;
+__isset.converted_type = true;
+}
+
+void SchemaElement::__set_scale(const int32_t val) {
+  this->scale = val;
+__isset.scale = true;
+}
+
+void SchemaElement::__set_precision(const int32_t val) {
+  this->precision = val;
+__isset.precision = true;
+}
+
+void SchemaElement::__set_field_id(const int32_t val) {
+  this->field_id = val;
+__isset.field_id = true;
+}
+
+void SchemaElement::__set_logicalType(const LogicalType& val) {
+  this->logicalType = val;
+__isset.logicalType = true;
+}
+std::ostream& operator<<(std::ostream& out, const SchemaElement& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t SchemaElement::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+  bool isset_name = false;
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          int32_t ecast38;
+          xfer += iprot->readI32(ecast38);
+          this->type = (Type::type)ecast38;
+          this->__isset.type = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->type_length);
+          this->__isset.type_length = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 3:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          int32_t ecast39;
+          xfer += iprot->readI32(ecast39);
+          this->repetition_type = (FieldRepetitionType::type)ecast39;
+          this->__isset.repetition_type = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 4:
+        if (ftype == ::apache::thrift::protocol::T_STRING) {
+          xfer += iprot->readString(this->name);
+          isset_name = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 5:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->num_children);
+          this->__isset.num_children = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 6:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          int32_t ecast40;
+          xfer += iprot->readI32(ecast40);
+          this->converted_type = (ConvertedType::type)ecast40;
+          this->__isset.converted_type = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 7:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->scale);
+          this->__isset.scale = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 8:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->precision);
+          this->__isset.precision = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 9:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->field_id);
+          this->__isset.field_id = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 10:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->logicalType.read(iprot);
+          this->__isset.logicalType = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  if (!isset_name)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  return xfer;
+}
+
+uint32_t SchemaElement::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("SchemaElement");
+
+  if (this->__isset.type) {
+    xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1);
+    xfer += oprot->writeI32((int32_t)this->type);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.type_length) {
+    xfer += oprot->writeFieldBegin("type_length", ::apache::thrift::protocol::T_I32, 2);
+    xfer += oprot->writeI32(this->type_length);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.repetition_type) {
+    xfer += oprot->writeFieldBegin("repetition_type", ::apache::thrift::protocol::T_I32, 3);
+    xfer += oprot->writeI32((int32_t)this->repetition_type);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldBegin("name", ::apache::thrift::protocol::T_STRING, 4);
+  xfer += oprot->writeString(this->name);
+  xfer += oprot->writeFieldEnd();
+
+  if (this->__isset.num_children) {
+    xfer += oprot->writeFieldBegin("num_children", ::apache::thrift::protocol::T_I32, 5);
+    xfer += oprot->writeI32(this->num_children);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.converted_type) {
+    xfer += oprot->writeFieldBegin("converted_type", ::apache::thrift::protocol::T_I32, 6);
+    xfer += oprot->writeI32((int32_t)this->converted_type);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.scale) {
+    xfer += oprot->writeFieldBegin("scale", ::apache::thrift::protocol::T_I32, 7);
+    xfer += oprot->writeI32(this->scale);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.precision) {
+    xfer += oprot->writeFieldBegin("precision", ::apache::thrift::protocol::T_I32, 8);
+    xfer += oprot->writeI32(this->precision);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.field_id) {
+    xfer += oprot->writeFieldBegin("field_id", ::apache::thrift::protocol::T_I32, 9);
+    xfer += oprot->writeI32(this->field_id);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.logicalType) {
+    xfer += oprot->writeFieldBegin("logicalType", ::apache::thrift::protocol::T_STRUCT, 10);
+    xfer += this->logicalType.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(SchemaElement &a, SchemaElement &b) {
+  using ::std::swap;
+  swap(a.type, b.type);
+  swap(a.type_length, b.type_length);
+  swap(a.repetition_type, b.repetition_type);
+  swap(a.name, b.name);
+  swap(a.num_children, b.num_children);
+  swap(a.converted_type, b.converted_type);
+  swap(a.scale, b.scale);
+  swap(a.precision, b.precision);
+  swap(a.field_id, b.field_id);
+  swap(a.logicalType, b.logicalType);
+  swap(a.__isset, b.__isset);
+}
+
+SchemaElement::SchemaElement(const SchemaElement& other41) {
+  type = other41.type;
+  type_length = other41.type_length;
+  repetition_type = other41.repetition_type;
+  name = other41.name;
+  num_children = other41.num_children;
+  converted_type = other41.converted_type;
+  scale = other41.scale;
+  precision = other41.precision;
+  field_id = other41.field_id;
+  logicalType = other41.logicalType;
+  __isset = other41.__isset;
+}
+SchemaElement& SchemaElement::operator=(const SchemaElement& other42) {
+  type = other42.type;
+  type_length = other42.type_length;
+  repetition_type = other42.repetition_type;
+  name = other42.name;
+  num_children = other42.num_children;
+  converted_type = other42.converted_type;
+  scale = other42.scale;
+  precision = other42.precision;
+  field_id = other42.field_id;
+  logicalType = other42.logicalType;
+  __isset = other42.__isset;
+  return *this;
+}
+void SchemaElement::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "SchemaElement(";
+  out << "type="; (__isset.type ? (out << to_string(type)) : (out << "<null>"));
+  out << ", " << "type_length="; (__isset.type_length ? (out << to_string(type_length)) : (out << "<null>"));
+  out << ", " << "repetition_type="; (__isset.repetition_type ? (out << to_string(repetition_type)) : (out << "<null>"));
+  out << ", " << "name=" << to_string(name);
+  out << ", " << "num_children="; (__isset.num_children ? (out << to_string(num_children)) : (out << "<null>"));
+  out << ", " << "converted_type="; (__isset.converted_type ? (out << to_string(converted_type)) : (out << "<null>"));
+  out << ", " << "scale="; (__isset.scale ? (out << to_string(scale)) : (out << "<null>"));
+  out << ", " << "precision="; (__isset.precision ? (out << to_string(precision)) : (out << "<null>"));
+  out << ", " << "field_id="; (__isset.field_id ? (out << to_string(field_id)) : (out << "<null>"));
+  out << ", " << "logicalType="; (__isset.logicalType ? (out << to_string(logicalType)) : (out << "<null>"));
+  out << ")";
+}
+
+
+DataPageHeader::~DataPageHeader() noexcept {
+}
+
+
+void DataPageHeader::__set_num_values(const int32_t val) {
+  this->num_values = val;
+}
+
+void DataPageHeader::__set_encoding(const Encoding::type val) {
+  this->encoding = val;
+}
+
+void DataPageHeader::__set_definition_level_encoding(const Encoding::type val) {
+  this->definition_level_encoding = val;
+}
+
+void DataPageHeader::__set_repetition_level_encoding(const Encoding::type val) {
+  this->repetition_level_encoding = val;
+}
+
+void DataPageHeader::__set_statistics(const Statistics& val) {
+  this->statistics = val;
+__isset.statistics = true;
+}
+std::ostream& operator<<(std::ostream& out, const DataPageHeader& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t DataPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+  bool isset_num_values = false;
+  bool isset_encoding = false;
+  bool isset_definition_level_encoding = false;
+  bool isset_repetition_level_encoding = false;
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->num_values);
+          isset_num_values = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          int32_t ecast43;
+          xfer += iprot->readI32(ecast43);
+          this->encoding = (Encoding::type)ecast43;
+          isset_encoding = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 3:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          int32_t ecast44;
+          xfer += iprot->readI32(ecast44);
+          this->definition_level_encoding = (Encoding::type)ecast44;
+          isset_definition_level_encoding = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 4:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          int32_t ecast45;
+          xfer += iprot->readI32(ecast45);
+          this->repetition_level_encoding = (Encoding::type)ecast45;
+          isset_repetition_level_encoding = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 5:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->statistics.read(iprot);
+          this->__isset.statistics = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  if (!isset_num_values)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_encoding)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_definition_level_encoding)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_repetition_level_encoding)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  return xfer;
+}
+
+uint32_t DataPageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("DataPageHeader");
+
+  xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1);
+  xfer += oprot->writeI32(this->num_values);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2);
+  xfer += oprot->writeI32((int32_t)this->encoding);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("definition_level_encoding", ::apache::thrift::protocol::T_I32, 3);
+  xfer += oprot->writeI32((int32_t)this->definition_level_encoding);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("repetition_level_encoding", ::apache::thrift::protocol::T_I32, 4);
+  xfer += oprot->writeI32((int32_t)this->repetition_level_encoding);
+  xfer += oprot->writeFieldEnd();
+
+  if (this->__isset.statistics) {
+    xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 5);
+    xfer += this->statistics.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(DataPageHeader &a, DataPageHeader &b) {
+  using ::std::swap;
+  swap(a.num_values, b.num_values);
+  swap(a.encoding, b.encoding);
+  swap(a.definition_level_encoding, b.definition_level_encoding);
+  swap(a.repetition_level_encoding, b.repetition_level_encoding);
+  swap(a.statistics, b.statistics);
+  swap(a.__isset, b.__isset);
+}
+
+DataPageHeader::DataPageHeader(const DataPageHeader& other46) {
+  num_values = other46.num_values;
+  encoding = other46.encoding;
+  definition_level_encoding = other46.definition_level_encoding;
+  repetition_level_encoding = other46.repetition_level_encoding;
+  statistics = other46.statistics;
+  __isset = other46.__isset;
+}
+DataPageHeader& DataPageHeader::operator=(const DataPageHeader& other47) {
+  num_values = other47.num_values;
+  encoding = other47.encoding;
+  definition_level_encoding = other47.definition_level_encoding;
+  repetition_level_encoding = other47.repetition_level_encoding;
+  statistics = other47.statistics;
+  __isset = other47.__isset;
+  return *this;
+}
+void DataPageHeader::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "DataPageHeader(";
+  out << "num_values=" << to_string(num_values);
+  out << ", " << "encoding=" << to_string(encoding);
+  out << ", " << "definition_level_encoding=" << to_string(definition_level_encoding);
+  out << ", " << "repetition_level_encoding=" << to_string(repetition_level_encoding);
+  out << ", " << "statistics="; (__isset.statistics ? (out << to_string(statistics)) : (out << "<null>"));
+  out << ")";
+}
+
+
+IndexPageHeader::~IndexPageHeader() noexcept {
+}
+
+std::ostream& operator<<(std::ostream& out, const IndexPageHeader& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t IndexPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    xfer += iprot->skip(ftype);
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  return xfer;
+}
+
+uint32_t IndexPageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("IndexPageHeader");
+
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(IndexPageHeader &a, IndexPageHeader &b) {
+  using ::std::swap;
+  (void) a;
+  (void) b;
+}
+
+IndexPageHeader::IndexPageHeader(const IndexPageHeader& other48) {
+  (void) other48;
+}
+IndexPageHeader& IndexPageHeader::operator=(const IndexPageHeader& other49) {
+  (void) other49;
+  return *this;
+}
+void IndexPageHeader::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "IndexPageHeader(";
+  out << ")";
+}
+
+
+DictionaryPageHeader::~DictionaryPageHeader() noexcept {
+}
+
+
+void DictionaryPageHeader::__set_num_values(const int32_t val) {
+  this->num_values = val;
+}
+
+void DictionaryPageHeader::__set_encoding(const Encoding::type val) {
+  this->encoding = val;
+}
+
+void DictionaryPageHeader::__set_is_sorted(const bool val) {
+  this->is_sorted = val;
+__isset.is_sorted = true;
+}
+std::ostream& operator<<(std::ostream& out, const DictionaryPageHeader& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t DictionaryPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+  bool isset_num_values = false;
+  bool isset_encoding = false;
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->num_values);
+          isset_num_values = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          int32_t ecast50;
+          xfer += iprot->readI32(ecast50);
+          this->encoding = (Encoding::type)ecast50;
+          isset_encoding = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 3:
+        if (ftype == ::apache::thrift::protocol::T_BOOL) {
+          xfer += iprot->readBool(this->is_sorted);
+          this->__isset.is_sorted = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  if (!isset_num_values)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_encoding)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  return xfer;
+}
+
+uint32_t DictionaryPageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("DictionaryPageHeader");
+
+  xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1);
+  xfer += oprot->writeI32(this->num_values);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2);
+  xfer += oprot->writeI32((int32_t)this->encoding);
+  xfer += oprot->writeFieldEnd();
+
+  if (this->__isset.is_sorted) {
+    xfer += oprot->writeFieldBegin("is_sorted", ::apache::thrift::protocol::T_BOOL, 3);
+    xfer += oprot->writeBool(this->is_sorted);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(DictionaryPageHeader &a, DictionaryPageHeader &b) {
+  using ::std::swap;
+  swap(a.num_values, b.num_values);
+  swap(a.encoding, b.encoding);
+  swap(a.is_sorted, b.is_sorted);
+  swap(a.__isset, b.__isset);
+}
+
+DictionaryPageHeader::DictionaryPageHeader(const DictionaryPageHeader& other51) {
+  num_values = other51.num_values;
+  encoding = other51.encoding;
+  is_sorted = other51.is_sorted;
+  __isset = other51.__isset;
+}
+DictionaryPageHeader& DictionaryPageHeader::operator=(const DictionaryPageHeader& other52) {
+  num_values = other52.num_values;
+  encoding = other52.encoding;
+  is_sorted = other52.is_sorted;
+  __isset = other52.__isset;
+  return *this;
+}
+void DictionaryPageHeader::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "DictionaryPageHeader(";
+  out << "num_values=" << to_string(num_values);
+  out << ", " << "encoding=" << to_string(encoding);
+  out << ", " << "is_sorted="; (__isset.is_sorted ? (out << to_string(is_sorted)) : (out << "<null>"));
+  out << ")";
+}
+
+
+DataPageHeaderV2::~DataPageHeaderV2() noexcept {
+}
+
+
+void DataPageHeaderV2::__set_num_values(const int32_t val) {
+  this->num_values = val;
+}
+
+void DataPageHeaderV2::__set_num_nulls(const int32_t val) {
+  this->num_nulls = val;
+}
+
+void DataPageHeaderV2::__set_num_rows(const int32_t val) {
+  this->num_rows = val;
+}
+
+void DataPageHeaderV2::__set_encoding(const Encoding::type val) {
+  this->encoding = val;
+}
+
+void DataPageHeaderV2::__set_definition_levels_byte_length(const int32_t val) {
+  this->definition_levels_byte_length = val;
+}
+
+void DataPageHeaderV2::__set_repetition_levels_byte_length(const int32_t val) {
+  this->repetition_levels_byte_length = val;
+}
+
+void DataPageHeaderV2::__set_is_compressed(const bool val) {
+  this->is_compressed = val;
+__isset.is_compressed = true;
+}
+
+void DataPageHeaderV2::__set_statistics(const Statistics& val) {
+  this->statistics = val;
+__isset.statistics = true;
+}
+std::ostream& operator<<(std::ostream& out, const DataPageHeaderV2& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t DataPageHeaderV2::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+  bool isset_num_values = false;
+  bool isset_num_nulls = false;
+  bool isset_num_rows = false;
+  bool isset_encoding = false;
+  bool isset_definition_levels_byte_length = false;
+  bool isset_repetition_levels_byte_length = false;
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->num_values);
+          isset_num_values = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->num_nulls);
+          isset_num_nulls = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 3:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->num_rows);
+          isset_num_rows = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 4:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          int32_t ecast53;
+          xfer += iprot->readI32(ecast53);
+          this->encoding = (Encoding::type)ecast53;
+          isset_encoding = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 5:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->definition_levels_byte_length);
+          isset_definition_levels_byte_length = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 6:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->repetition_levels_byte_length);
+          isset_repetition_levels_byte_length = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 7:
+        if (ftype == ::apache::thrift::protocol::T_BOOL) {
+          xfer += iprot->readBool(this->is_compressed);
+          this->__isset.is_compressed = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 8:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->statistics.read(iprot);
+          this->__isset.statistics = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  if (!isset_num_values)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_num_nulls)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_num_rows)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_encoding)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_definition_levels_byte_length)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_repetition_levels_byte_length)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  return xfer;
+}
+
+uint32_t DataPageHeaderV2::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("DataPageHeaderV2");
+
+  xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1);
+  xfer += oprot->writeI32(this->num_values);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("num_nulls", ::apache::thrift::protocol::T_I32, 2);
+  xfer += oprot->writeI32(this->num_nulls);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I32, 3);
+  xfer += oprot->writeI32(this->num_rows);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 4);
+  xfer += oprot->writeI32((int32_t)this->encoding);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("definition_levels_byte_length", ::apache::thrift::protocol::T_I32, 5);
+  xfer += oprot->writeI32(this->definition_levels_byte_length);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("repetition_levels_byte_length", ::apache::thrift::protocol::T_I32, 6);
+  xfer += oprot->writeI32(this->repetition_levels_byte_length);
+  xfer += oprot->writeFieldEnd();
+
+  if (this->__isset.is_compressed) {
+    xfer += oprot->writeFieldBegin("is_compressed", ::apache::thrift::protocol::T_BOOL, 7);
+    xfer += oprot->writeBool(this->is_compressed);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.statistics) {
+    xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 8);
+    xfer += this->statistics.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b) {
+  using ::std::swap;
+  swap(a.num_values, b.num_values);
+  swap(a.num_nulls, b.num_nulls);
+  swap(a.num_rows, b.num_rows);
+  swap(a.encoding, b.encoding);
+  swap(a.definition_levels_byte_length, b.definition_levels_byte_length);
+  swap(a.repetition_levels_byte_length, b.repetition_levels_byte_length);
+  swap(a.is_compressed, b.is_compressed);
+  swap(a.statistics, b.statistics);
+  swap(a.__isset, b.__isset);
+}
+
+DataPageHeaderV2::DataPageHeaderV2(const DataPageHeaderV2& other54) {
+  num_values = other54.num_values;
+  num_nulls = other54.num_nulls;
+  num_rows = other54.num_rows;
+  encoding = other54.encoding;
+  definition_levels_byte_length = other54.definition_levels_byte_length;
+  repetition_levels_byte_length = other54.repetition_levels_byte_length;
+  is_compressed = other54.is_compressed;
+  statistics = other54.statistics;
+  __isset = other54.__isset;
+}
+DataPageHeaderV2& DataPageHeaderV2::operator=(const DataPageHeaderV2& other55) {
+  num_values = other55.num_values;
+  num_nulls = other55.num_nulls;
+  num_rows = other55.num_rows;
+  encoding = other55.encoding;
+  definition_levels_byte_length = other55.definition_levels_byte_length;
+  repetition_levels_byte_length = other55.repetition_levels_byte_length;
+  is_compressed = other55.is_compressed;
+  statistics = other55.statistics;
+  __isset = other55.__isset;
+  return *this;
+}
+void DataPageHeaderV2::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "DataPageHeaderV2(";
+  out << "num_values=" << to_string(num_values);
+  out << ", " << "num_nulls=" << to_string(num_nulls);
+  out << ", " << "num_rows=" << to_string(num_rows);
+  out << ", " << "encoding=" << to_string(encoding);
+  out << ", " << "definition_levels_byte_length=" << to_string(definition_levels_byte_length);
+  out << ", " << "repetition_levels_byte_length=" << to_string(repetition_levels_byte_length);
+  out << ", " << "is_compressed="; (__isset.is_compressed ? (out << to_string(is_compressed)) : (out << "<null>"));
+  out << ", " << "statistics="; (__isset.statistics ? (out << to_string(statistics)) : (out << "<null>"));
+  out << ")";
+}
+
+
+SplitBlockAlgorithm::~SplitBlockAlgorithm() noexcept {
+}
+
+std::ostream& operator<<(std::ostream& out, const SplitBlockAlgorithm& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t SplitBlockAlgorithm::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    xfer += iprot->skip(ftype);
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  return xfer;
+}
+
+uint32_t SplitBlockAlgorithm::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("SplitBlockAlgorithm");
+
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(SplitBlockAlgorithm &a, SplitBlockAlgorithm &b) {
+  using ::std::swap;
+  (void) a;
+  (void) b;
+}
+
+SplitBlockAlgorithm::SplitBlockAlgorithm(const SplitBlockAlgorithm& other56) {
+  (void) other56;
+}
+SplitBlockAlgorithm& SplitBlockAlgorithm::operator=(const SplitBlockAlgorithm& other57) {
+  (void) other57;
+  return *this;
+}
+void SplitBlockAlgorithm::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "SplitBlockAlgorithm(";
+  out << ")";
+}
+
+
+BloomFilterAlgorithm::~BloomFilterAlgorithm() noexcept {
+}
+
+
+void BloomFilterAlgorithm::__set_BLOCK(const SplitBlockAlgorithm& val) {
+  this->BLOCK = val;
+__isset.BLOCK = true;
+}
+std::ostream& operator<<(std::ostream& out, const BloomFilterAlgorithm& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t BloomFilterAlgorithm::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->BLOCK.read(iprot);
+          this->__isset.BLOCK = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  return xfer;
+}
+
+uint32_t BloomFilterAlgorithm::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("BloomFilterAlgorithm");
+
+  if (this->__isset.BLOCK) {
+    xfer += oprot->writeFieldBegin("BLOCK", ::apache::thrift::protocol::T_STRUCT, 1);
+    xfer += this->BLOCK.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(BloomFilterAlgorithm &a, BloomFilterAlgorithm &b) {
+  using ::std::swap;
+  swap(a.BLOCK, b.BLOCK);
+  swap(a.__isset, b.__isset);
+}
+
+BloomFilterAlgorithm::BloomFilterAlgorithm(const BloomFilterAlgorithm& other58) {
+  BLOCK = other58.BLOCK;
+  __isset = other58.__isset;
+}
+BloomFilterAlgorithm& BloomFilterAlgorithm::operator=(const BloomFilterAlgorithm& other59) {
+  BLOCK = other59.BLOCK;
+  __isset = other59.__isset;
+  return *this;
+}
+void BloomFilterAlgorithm::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "BloomFilterAlgorithm(";
+  out << "BLOCK="; (__isset.BLOCK ? (out << to_string(BLOCK)) : (out << "<null>"));
+  out << ")";
+}
+
+
+XxHash::~XxHash() noexcept {
+}
+
+std::ostream& operator<<(std::ostream& out, const XxHash& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t XxHash::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    xfer += iprot->skip(ftype);
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  return xfer;
+}
+
+uint32_t XxHash::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("XxHash");
+
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(XxHash &a, XxHash &b) {
+  using ::std::swap;
+  (void) a;
+  (void) b;
+}
+
+XxHash::XxHash(const XxHash& other60) {
+  (void) other60;
+}
+XxHash& XxHash::operator=(const XxHash& other61) {
+  (void) other61;
+  return *this;
+}
+void XxHash::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "XxHash(";
+  out << ")";
+}
+
+
+BloomFilterHash::~BloomFilterHash() noexcept {
+}
+
+
+void BloomFilterHash::__set_XXHASH(const XxHash& val) {
+  this->XXHASH = val;
+__isset.XXHASH = true;
+}
+std::ostream& operator<<(std::ostream& out, const BloomFilterHash& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t BloomFilterHash::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->XXHASH.read(iprot);
+          this->__isset.XXHASH = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  return xfer;
+}
+
+uint32_t BloomFilterHash::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("BloomFilterHash");
+
+  if (this->__isset.XXHASH) {
+    xfer += oprot->writeFieldBegin("XXHASH", ::apache::thrift::protocol::T_STRUCT, 1);
+    xfer += this->XXHASH.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(BloomFilterHash &a, BloomFilterHash &b) {
+  using ::std::swap;
+  swap(a.XXHASH, b.XXHASH);
+  swap(a.__isset, b.__isset);
+}
+
+BloomFilterHash::BloomFilterHash(const BloomFilterHash& other62) {
+  XXHASH = other62.XXHASH;
+  __isset = other62.__isset;
+}
+BloomFilterHash& BloomFilterHash::operator=(const BloomFilterHash& other63) {
+  XXHASH = other63.XXHASH;
+  __isset = other63.__isset;
+  return *this;
+}
+void BloomFilterHash::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "BloomFilterHash(";
+  out << "XXHASH="; (__isset.XXHASH ? (out << to_string(XXHASH)) : (out << "<null>"));
+  out << ")";
+}
+
+
+Uncompressed::~Uncompressed() noexcept {
+}
+
+std::ostream& operator<<(std::ostream& out, const Uncompressed& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t Uncompressed::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    xfer += iprot->skip(ftype);
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  return xfer;
+}
+
+uint32_t Uncompressed::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("Uncompressed");
+
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(Uncompressed &a, Uncompressed &b) {
+  using ::std::swap;
+  (void) a;
+  (void) b;
+}
+
+Uncompressed::Uncompressed(const Uncompressed& other64) {
+  (void) other64;
+}
+Uncompressed& Uncompressed::operator=(const Uncompressed& other65) {
+  (void) other65;
+  return *this;
+}
+void Uncompressed::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "Uncompressed(";
+  out << ")";
+}
+
+
+BloomFilterCompression::~BloomFilterCompression() noexcept {
+}
+
+
+void BloomFilterCompression::__set_UNCOMPRESSED(const Uncompressed& val) {
+  this->UNCOMPRESSED = val;
+__isset.UNCOMPRESSED = true;
+}
+std::ostream& operator<<(std::ostream& out, const BloomFilterCompression& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t BloomFilterCompression::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->UNCOMPRESSED.read(iprot);
+          this->__isset.UNCOMPRESSED = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  return xfer;
+}
+
+uint32_t BloomFilterCompression::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("BloomFilterCompression");
+
+  if (this->__isset.UNCOMPRESSED) {
+    xfer += oprot->writeFieldBegin("UNCOMPRESSED", ::apache::thrift::protocol::T_STRUCT, 1);
+    xfer += this->UNCOMPRESSED.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(BloomFilterCompression &a, BloomFilterCompression &b) {
+  using ::std::swap;
+  swap(a.UNCOMPRESSED, b.UNCOMPRESSED);
+  swap(a.__isset, b.__isset);
+}
+
+BloomFilterCompression::BloomFilterCompression(const BloomFilterCompression& other66) {
+  UNCOMPRESSED = other66.UNCOMPRESSED;
+  __isset = other66.__isset;
+}
+BloomFilterCompression& BloomFilterCompression::operator=(const BloomFilterCompression& other67) {
+  UNCOMPRESSED = other67.UNCOMPRESSED;
+  __isset = other67.__isset;
+  return *this;
+}
+void BloomFilterCompression::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "BloomFilterCompression(";
+  out << "UNCOMPRESSED="; (__isset.UNCOMPRESSED ? (out << to_string(UNCOMPRESSED)) : (out << "<null>"));
+  out << ")";
+}
+
+
+BloomFilterHeader::~BloomFilterHeader() noexcept {
+}
+
+
+void BloomFilterHeader::__set_numBytes(const int32_t val) {
+  this->numBytes = val;
+}
+
+void BloomFilterHeader::__set_algorithm(const BloomFilterAlgorithm& val) {
+  this->algorithm = val;
+}
+
+void BloomFilterHeader::__set_hash(const BloomFilterHash& val) {
+  this->hash = val;
+}
+
+void BloomFilterHeader::__set_compression(const BloomFilterCompression& val) {
+  this->compression = val;
+}
+std::ostream& operator<<(std::ostream& out, const BloomFilterHeader& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t BloomFilterHeader::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+  bool isset_numBytes = false;
+  bool isset_algorithm = false;
+  bool isset_hash = false;
+  bool isset_compression = false;
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->numBytes);
+          isset_numBytes = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->algorithm.read(iprot);
+          isset_algorithm = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 3:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->hash.read(iprot);
+          isset_hash = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 4:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->compression.read(iprot);
+          isset_compression = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  if (!isset_numBytes)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_algorithm)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_hash)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_compression)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  return xfer;
+}
+
+uint32_t BloomFilterHeader::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("BloomFilterHeader");
+
+  xfer += oprot->writeFieldBegin("numBytes", ::apache::thrift::protocol::T_I32, 1);
+  xfer += oprot->writeI32(this->numBytes);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("algorithm", ::apache::thrift::protocol::T_STRUCT, 2);
+  xfer += this->algorithm.write(oprot);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("hash", ::apache::thrift::protocol::T_STRUCT, 3);
+  xfer += this->hash.write(oprot);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("compression", ::apache::thrift::protocol::T_STRUCT, 4);
+  xfer += this->compression.write(oprot);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(BloomFilterHeader &a, BloomFilterHeader &b) {
+  using ::std::swap;
+  swap(a.numBytes, b.numBytes);
+  swap(a.algorithm, b.algorithm);
+  swap(a.hash, b.hash);
+  swap(a.compression, b.compression);
+}
+
+BloomFilterHeader::BloomFilterHeader(const BloomFilterHeader& other68) {
+  numBytes = other68.numBytes;
+  algorithm = other68.algorithm;
+  hash = other68.hash;
+  compression = other68.compression;
+}
+BloomFilterHeader& BloomFilterHeader::operator=(const BloomFilterHeader& other69) {
+  numBytes = other69.numBytes;
+  algorithm = other69.algorithm;
+  hash = other69.hash;
+  compression = other69.compression;
+  return *this;
+}
+void BloomFilterHeader::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "BloomFilterHeader(";
+  out << "numBytes=" << to_string(numBytes);
+  out << ", " << "algorithm=" << to_string(algorithm);
+  out << ", " << "hash=" << to_string(hash);
+  out << ", " << "compression=" << to_string(compression);
+  out << ")";
+}
+
+
+PageHeader::~PageHeader() noexcept {
+}
+
+
+void PageHeader::__set_type(const PageType::type val) {
+  this->type = val;
+}
+
+void PageHeader::__set_uncompressed_page_size(const int32_t val) {
+  this->uncompressed_page_size = val;
+}
+
+void PageHeader::__set_compressed_page_size(const int32_t val) {
+  this->compressed_page_size = val;
+}
+
+void PageHeader::__set_crc(const int32_t val) {
+  this->crc = val;
+__isset.crc = true;
+}
+
+void PageHeader::__set_data_page_header(const DataPageHeader& val) {
+  this->data_page_header = val;
+__isset.data_page_header = true;
+}
+
+void PageHeader::__set_index_page_header(const IndexPageHeader& val) {
+  this->index_page_header = val;
+__isset.index_page_header = true;
+}
+
+void PageHeader::__set_dictionary_page_header(const DictionaryPageHeader& val) {
+  this->dictionary_page_header = val;
+__isset.dictionary_page_header = true;
+}
+
+void PageHeader::__set_data_page_header_v2(const DataPageHeaderV2& val) {
+  this->data_page_header_v2 = val;
+__isset.data_page_header_v2 = true;
+}
+std::ostream& operator<<(std::ostream& out, const PageHeader& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t PageHeader::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+  bool isset_type = false;
+  bool isset_uncompressed_page_size = false;
+  bool isset_compressed_page_size = false;
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          int32_t ecast70;
+          xfer += iprot->readI32(ecast70);
+          this->type = (PageType::type)ecast70;
+          isset_type = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->uncompressed_page_size);
+          isset_uncompressed_page_size = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 3:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->compressed_page_size);
+          isset_compressed_page_size = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 4:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->crc);
+          this->__isset.crc = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 5:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->data_page_header.read(iprot);
+          this->__isset.data_page_header = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 6:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->index_page_header.read(iprot);
+          this->__isset.index_page_header = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 7:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->dictionary_page_header.read(iprot);
+          this->__isset.dictionary_page_header = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 8:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->data_page_header_v2.read(iprot);
+          this->__isset.data_page_header_v2 = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  if (!isset_type)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_uncompressed_page_size)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_compressed_page_size)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  return xfer;
+}
+
+uint32_t PageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("PageHeader");
+
+  xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1);
+  xfer += oprot->writeI32((int32_t)this->type);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("uncompressed_page_size", ::apache::thrift::protocol::T_I32, 2);
+  xfer += oprot->writeI32(this->uncompressed_page_size);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("compressed_page_size", ::apache::thrift::protocol::T_I32, 3);
+  xfer += oprot->writeI32(this->compressed_page_size);
+  xfer += oprot->writeFieldEnd();
+
+  if (this->__isset.crc) {
+    xfer += oprot->writeFieldBegin("crc", ::apache::thrift::protocol::T_I32, 4);
+    xfer += oprot->writeI32(this->crc);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.data_page_header) {
+    xfer += oprot->writeFieldBegin("data_page_header", ::apache::thrift::protocol::T_STRUCT, 5);
+    xfer += this->data_page_header.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.index_page_header) {
+    xfer += oprot->writeFieldBegin("index_page_header", ::apache::thrift::protocol::T_STRUCT, 6);
+    xfer += this->index_page_header.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.dictionary_page_header) {
+    xfer += oprot->writeFieldBegin("dictionary_page_header", ::apache::thrift::protocol::T_STRUCT, 7);
+    xfer += this->dictionary_page_header.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.data_page_header_v2) {
+    xfer += oprot->writeFieldBegin("data_page_header_v2", ::apache::thrift::protocol::T_STRUCT, 8);
+    xfer += this->data_page_header_v2.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(PageHeader &a, PageHeader &b) {
+  using ::std::swap;
+  swap(a.type, b.type);
+  swap(a.uncompressed_page_size, b.uncompressed_page_size);
+  swap(a.compressed_page_size, b.compressed_page_size);
+  swap(a.crc, b.crc);
+  swap(a.data_page_header, b.data_page_header);
+  swap(a.index_page_header, b.index_page_header);
+  swap(a.dictionary_page_header, b.dictionary_page_header);
+  swap(a.data_page_header_v2, b.data_page_header_v2);
+  swap(a.__isset, b.__isset);
+}
+
+PageHeader::PageHeader(const PageHeader& other71) {
+  type = other71.type;
+  uncompressed_page_size = other71.uncompressed_page_size;
+  compressed_page_size = other71.compressed_page_size;
+  crc = other71.crc;
+  data_page_header = other71.data_page_header;
+  index_page_header = other71.index_page_header;
+  dictionary_page_header = other71.dictionary_page_header;
+  data_page_header_v2 = other71.data_page_header_v2;
+  __isset = other71.__isset;
+}
+PageHeader& PageHeader::operator=(const PageHeader& other72) {
+  type = other72.type;
+  uncompressed_page_size = other72.uncompressed_page_size;
+  compressed_page_size = other72.compressed_page_size;
+  crc = other72.crc;
+  data_page_header = other72.data_page_header;
+  index_page_header = other72.index_page_header;
+  dictionary_page_header = other72.dictionary_page_header;
+  data_page_header_v2 = other72.data_page_header_v2;
+  __isset = other72.__isset;
+  return *this;
+}
+void PageHeader::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "PageHeader(";
+  out << "type=" << to_string(type);
+  out << ", " << "uncompressed_page_size=" << to_string(uncompressed_page_size);
+  out << ", " << "compressed_page_size=" << to_string(compressed_page_size);
+  out << ", " << "crc="; (__isset.crc ? (out << to_string(crc)) : (out << "<null>"));
+  out << ", " << "data_page_header="; (__isset.data_page_header ? (out << to_string(data_page_header)) : (out << "<null>"));
+  out << ", " << "index_page_header="; (__isset.index_page_header ? (out << to_string(index_page_header)) : (out << "<null>"));
+  out << ", " << "dictionary_page_header="; (__isset.dictionary_page_header ? (out << to_string(dictionary_page_header)) : (out << "<null>"));
+  out << ", " << "data_page_header_v2="; (__isset.data_page_header_v2 ? (out << to_string(data_page_header_v2)) : (out << "<null>"));
+  out << ")";
+}
+
+
+KeyValue::~KeyValue() noexcept {
+}
+
+
+void KeyValue::__set_key(const std::string& val) {
+  this->key = val;
+}
+
+void KeyValue::__set_value(const std::string& val) {
+  this->value = val;
+__isset.value = true;
+}
+std::ostream& operator<<(std::ostream& out, const KeyValue& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t KeyValue::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+  bool isset_key = false;
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_STRING) {
+          xfer += iprot->readString(this->key);
+          isset_key = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_STRING) {
+          xfer += iprot->readString(this->value);
+          this->__isset.value = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  if (!isset_key)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  return xfer;
+}
+
+uint32_t KeyValue::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("KeyValue");
+
+  xfer += oprot->writeFieldBegin("key", ::apache::thrift::protocol::T_STRING, 1);
+  xfer += oprot->writeString(this->key);
+  xfer += oprot->writeFieldEnd();
+
+  if (this->__isset.value) {
+    xfer += oprot->writeFieldBegin("value", ::apache::thrift::protocol::T_STRING, 2);
+    xfer += oprot->writeString(this->value);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(KeyValue &a, KeyValue &b) {
+  using ::std::swap;
+  swap(a.key, b.key);
+  swap(a.value, b.value);
+  swap(a.__isset, b.__isset);
+}
+
+KeyValue::KeyValue(const KeyValue& other73) {
+  key = other73.key;
+  value = other73.value;
+  __isset = other73.__isset;
+}
+KeyValue& KeyValue::operator=(const KeyValue& other74) {
+  key = other74.key;
+  value = other74.value;
+  __isset = other74.__isset;
+  return *this;
+}
+void KeyValue::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "KeyValue(";
+  out << "key=" << to_string(key);
+  out << ", " << "value="; (__isset.value ? (out << to_string(value)) : (out << "<null>"));
+  out << ")";
+}
+
+
+SortingColumn::~SortingColumn() noexcept {
+}
+
+
+void SortingColumn::__set_column_idx(const int32_t val) {
+  this->column_idx = val;
+}
+
+void SortingColumn::__set_descending(const bool val) {
+  this->descending = val;
+}
+
+void SortingColumn::__set_nulls_first(const bool val) {
+  this->nulls_first = val;
+}
+std::ostream& operator<<(std::ostream& out, const SortingColumn& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t SortingColumn::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+  bool isset_column_idx = false;
+  bool isset_descending = false;
+  bool isset_nulls_first = false;
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->column_idx);
+          isset_column_idx = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_BOOL) {
+          xfer += iprot->readBool(this->descending);
+          isset_descending = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 3:
+        if (ftype == ::apache::thrift::protocol::T_BOOL) {
+          xfer += iprot->readBool(this->nulls_first);
+          isset_nulls_first = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  if (!isset_column_idx)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_descending)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_nulls_first)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  return xfer;
+}
+
+uint32_t SortingColumn::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("SortingColumn");
+
+  xfer += oprot->writeFieldBegin("column_idx", ::apache::thrift::protocol::T_I32, 1);
+  xfer += oprot->writeI32(this->column_idx);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("descending", ::apache::thrift::protocol::T_BOOL, 2);
+  xfer += oprot->writeBool(this->descending);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("nulls_first", ::apache::thrift::protocol::T_BOOL, 3);
+  xfer += oprot->writeBool(this->nulls_first);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(SortingColumn &a, SortingColumn &b) {
+  using ::std::swap;
+  swap(a.column_idx, b.column_idx);
+  swap(a.descending, b.descending);
+  swap(a.nulls_first, b.nulls_first);
+}
+
+SortingColumn::SortingColumn(const SortingColumn& other75) {
+  column_idx = other75.column_idx;
+  descending = other75.descending;
+  nulls_first = other75.nulls_first;
+}
+SortingColumn& SortingColumn::operator=(const SortingColumn& other76) {
+  column_idx = other76.column_idx;
+  descending = other76.descending;
+  nulls_first = other76.nulls_first;
+  return *this;
+}
+void SortingColumn::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "SortingColumn(";
+  out << "column_idx=" << to_string(column_idx);
+  out << ", " << "descending=" << to_string(descending);
+  out << ", " << "nulls_first=" << to_string(nulls_first);
+  out << ")";
+}
+
+
+PageEncodingStats::~PageEncodingStats() noexcept {
+}
+
+
+void PageEncodingStats::__set_page_type(const PageType::type val) {
+  this->page_type = val;
+}
+
+void PageEncodingStats::__set_encoding(const Encoding::type val) {
+  this->encoding = val;
+}
+
+void PageEncodingStats::__set_count(const int32_t val) {
+  this->count = val;
+}
+std::ostream& operator<<(std::ostream& out, const PageEncodingStats& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t PageEncodingStats::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+  bool isset_page_type = false;
+  bool isset_encoding = false;
+  bool isset_count = false;
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          int32_t ecast77;
+          xfer += iprot->readI32(ecast77);
+          this->page_type = (PageType::type)ecast77;
+          isset_page_type = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          int32_t ecast78;
+          xfer += iprot->readI32(ecast78);
+          this->encoding = (Encoding::type)ecast78;
+          isset_encoding = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 3:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->count);
+          isset_count = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  if (!isset_page_type)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_encoding)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_count)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  return xfer;
+}
+
+uint32_t PageEncodingStats::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("PageEncodingStats");
+
+  xfer += oprot->writeFieldBegin("page_type", ::apache::thrift::protocol::T_I32, 1);
+  xfer += oprot->writeI32((int32_t)this->page_type);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2);
+  xfer += oprot->writeI32((int32_t)this->encoding);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("count", ::apache::thrift::protocol::T_I32, 3);
+  xfer += oprot->writeI32(this->count);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(PageEncodingStats &a, PageEncodingStats &b) {
+  using ::std::swap;
+  swap(a.page_type, b.page_type);
+  swap(a.encoding, b.encoding);
+  swap(a.count, b.count);
+}
+
+PageEncodingStats::PageEncodingStats(const PageEncodingStats& other79) {
+  page_type = other79.page_type;
+  encoding = other79.encoding;
+  count = other79.count;
+}
+PageEncodingStats& PageEncodingStats::operator=(const PageEncodingStats& other80) {
+  page_type = other80.page_type;
+  encoding = other80.encoding;
+  count = other80.count;
+  return *this;
+}
+void PageEncodingStats::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "PageEncodingStats(";
+  out << "page_type=" << to_string(page_type);
+  out << ", " << "encoding=" << to_string(encoding);
+  out << ", " << "count=" << to_string(count);
+  out << ")";
+}
+
+
+ColumnMetaData::~ColumnMetaData() noexcept {
+}
+
+
+void ColumnMetaData::__set_type(const Type::type val) {
+  this->type = val;
+}
+
+void ColumnMetaData::__set_encodings(const std::vector<Encoding::type> & val) {
+  this->encodings = val;
+}
+
+void ColumnMetaData::__set_path_in_schema(const std::vector<std::string> & val) {
+  this->path_in_schema = val;
+}
+
+void ColumnMetaData::__set_codec(const CompressionCodec::type val) {
+  this->codec = val;
+}
+
+void ColumnMetaData::__set_num_values(const int64_t val) {
+  this->num_values = val;
+}
+
+void ColumnMetaData::__set_total_uncompressed_size(const int64_t val) {
+  this->total_uncompressed_size = val;
+}
+
+void ColumnMetaData::__set_total_compressed_size(const int64_t val) {
+  this->total_compressed_size = val;
+}
+
+void ColumnMetaData::__set_key_value_metadata(const std::vector<KeyValue> & val) {
+  this->key_value_metadata = val;
+__isset.key_value_metadata = true;
+}
+
+void ColumnMetaData::__set_data_page_offset(const int64_t val) {
+  this->data_page_offset = val;
+}
+
+void ColumnMetaData::__set_index_page_offset(const int64_t val) {
+  this->index_page_offset = val;
+__isset.index_page_offset = true;
+}
+
+void ColumnMetaData::__set_dictionary_page_offset(const int64_t val) {
+  this->dictionary_page_offset = val;
+__isset.dictionary_page_offset = true;
+}
+
+void ColumnMetaData::__set_statistics(const Statistics& val) {
+  this->statistics = val;
+__isset.statistics = true;
+}
+
+void ColumnMetaData::__set_encoding_stats(const std::vector<PageEncodingStats> & val) {
+  this->encoding_stats = val;
+__isset.encoding_stats = true;
+}
+
+void ColumnMetaData::__set_bloom_filter_offset(const int64_t val) {
+  this->bloom_filter_offset = val;
+__isset.bloom_filter_offset = true;
+}
+std::ostream& operator<<(std::ostream& out, const ColumnMetaData& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+  bool isset_type = false;
+  bool isset_encodings = false;
+  bool isset_path_in_schema = false;
+  bool isset_codec = false;
+  bool isset_num_values = false;
+  bool isset_total_uncompressed_size = false;
+  bool isset_total_compressed_size = false;
+  bool isset_data_page_offset = false;
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          int32_t ecast81;
+          xfer += iprot->readI32(ecast81);
+          this->type = (Type::type)ecast81;
+          isset_type = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_LIST) {
+          {
+            this->encodings.clear();
+            uint32_t _size82;
+            ::apache::thrift::protocol::TType _etype85;
+            xfer += iprot->readListBegin(_etype85, _size82);
+            this->encodings.resize(_size82);
+            uint32_t _i86;
+            for (_i86 = 0; _i86 < _size82; ++_i86)
+            {
+              int32_t ecast87;
+              xfer += iprot->readI32(ecast87);
+              this->encodings[_i86] = (Encoding::type)ecast87;
+            }
+            xfer += iprot->readListEnd();
+          }
+          isset_encodings = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 3:
+        if (ftype == ::apache::thrift::protocol::T_LIST) {
+          {
+            this->path_in_schema.clear();
+            uint32_t _size88;
+            ::apache::thrift::protocol::TType _etype91;
+            xfer += iprot->readListBegin(_etype91, _size88);
+            this->path_in_schema.resize(_size88);
+            uint32_t _i92;
+            for (_i92 = 0; _i92 < _size88; ++_i92)
+            {
+              xfer += iprot->readString(this->path_in_schema[_i92]);
+            }
+            xfer += iprot->readListEnd();
+          }
+          isset_path_in_schema = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 4:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          int32_t ecast93;
+          xfer += iprot->readI32(ecast93);
+          this->codec = (CompressionCodec::type)ecast93;
+          isset_codec = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 5:
+        if (ftype == ::apache::thrift::protocol::T_I64) {
+          xfer += iprot->readI64(this->num_values);
+          isset_num_values = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 6:
+        if (ftype == ::apache::thrift::protocol::T_I64) {
+          xfer += iprot->readI64(this->total_uncompressed_size);
+          isset_total_uncompressed_size = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 7:
+        if (ftype == ::apache::thrift::protocol::T_I64) {
+          xfer += iprot->readI64(this->total_compressed_size);
+          isset_total_compressed_size = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 8:
+        if (ftype == ::apache::thrift::protocol::T_LIST) {
+          {
+            this->key_value_metadata.clear();
+            uint32_t _size94;
+            ::apache::thrift::protocol::TType _etype97;
+            xfer += iprot->readListBegin(_etype97, _size94);
+            this->key_value_metadata.resize(_size94);
+            uint32_t _i98;
+            for (_i98 = 0; _i98 < _size94; ++_i98)
+            {
+              xfer += this->key_value_metadata[_i98].read(iprot);
+            }
+            xfer += iprot->readListEnd();
+          }
+          this->__isset.key_value_metadata = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 9:
+        if (ftype == ::apache::thrift::protocol::T_I64) {
+          xfer += iprot->readI64(this->data_page_offset);
+          isset_data_page_offset = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 10:
+        if (ftype == ::apache::thrift::protocol::T_I64) {
+          xfer += iprot->readI64(this->index_page_offset);
+          this->__isset.index_page_offset = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 11:
+        if (ftype == ::apache::thrift::protocol::T_I64) {
+          xfer += iprot->readI64(this->dictionary_page_offset);
+          this->__isset.dictionary_page_offset = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 12:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->statistics.read(iprot);
+          this->__isset.statistics = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 13:
+        if (ftype == ::apache::thrift::protocol::T_LIST) {
+          {
+            this->encoding_stats.clear();
+            uint32_t _size99;
+            ::apache::thrift::protocol::TType _etype102;
+            xfer += iprot->readListBegin(_etype102, _size99);
+            this->encoding_stats.resize(_size99);
+            uint32_t _i103;
+            for (_i103 = 0; _i103 < _size99; ++_i103)
+            {
+              xfer += this->encoding_stats[_i103].read(iprot);
+            }
+            xfer += iprot->readListEnd();
+          }
+          this->__isset.encoding_stats = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 14:
+        if (ftype == ::apache::thrift::protocol::T_I64) {
+          xfer += iprot->readI64(this->bloom_filter_offset);
+          this->__isset.bloom_filter_offset = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  if (!isset_type)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_encodings)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_path_in_schema)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_codec)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_num_values)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_total_uncompressed_size)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_total_compressed_size)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_data_page_offset)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  return xfer;
+}
+
+uint32_t ColumnMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("ColumnMetaData");
+
+  xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1);
+  xfer += oprot->writeI32((int32_t)this->type);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("encodings", ::apache::thrift::protocol::T_LIST, 2);
+  {
+    xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I32, static_cast<uint32_t>(this->encodings.size()));
+    std::vector<Encoding::type> ::const_iterator _iter104;
+    for (_iter104 = this->encodings.begin(); _iter104 != this->encodings.end(); ++_iter104)
+    {
+      xfer += oprot->writeI32((int32_t)(*_iter104));
+    }
+    xfer += oprot->writeListEnd();
+  }
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 3);
+  {
+    xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast<uint32_t>(this->path_in_schema.size()));
+    std::vector<std::string> ::const_iterator _iter105;
+    for (_iter105 = this->path_in_schema.begin(); _iter105 != this->path_in_schema.end(); ++_iter105)
+    {
+      xfer += oprot->writeString((*_iter105));
+    }
+    xfer += oprot->writeListEnd();
+  }
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("codec", ::apache::thrift::protocol::T_I32, 4);
+  xfer += oprot->writeI32((int32_t)this->codec);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I64, 5);
+  xfer += oprot->writeI64(this->num_values);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("total_uncompressed_size", ::apache::thrift::protocol::T_I64, 6);
+  xfer += oprot->writeI64(this->total_uncompressed_size);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("total_compressed_size", ::apache::thrift::protocol::T_I64, 7);
+  xfer += oprot->writeI64(this->total_compressed_size);
+  xfer += oprot->writeFieldEnd();
+
+  if (this->__isset.key_value_metadata) {
+    xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 8);
+    {
+      xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->key_value_metadata.size()));
+      std::vector<KeyValue> ::const_iterator _iter106;
+      for (_iter106 = this->key_value_metadata.begin(); _iter106 != this->key_value_metadata.end(); ++_iter106)
+      {
+        xfer += (*_iter106).write(oprot);
+      }
+      xfer += oprot->writeListEnd();
+    }
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldBegin("data_page_offset", ::apache::thrift::protocol::T_I64, 9);
+  xfer += oprot->writeI64(this->data_page_offset);
+  xfer += oprot->writeFieldEnd();
+
+  if (this->__isset.index_page_offset) {
+    xfer += oprot->writeFieldBegin("index_page_offset", ::apache::thrift::protocol::T_I64, 10);
+    xfer += oprot->writeI64(this->index_page_offset);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.dictionary_page_offset) {
+    xfer += oprot->writeFieldBegin("dictionary_page_offset", ::apache::thrift::protocol::T_I64, 11);
+    xfer += oprot->writeI64(this->dictionary_page_offset);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.statistics) {
+    xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 12);
+    xfer += this->statistics.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.encoding_stats) {
+    xfer += oprot->writeFieldBegin("encoding_stats", ::apache::thrift::protocol::T_LIST, 13);
+    {
+      xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->encoding_stats.size()));
+      std::vector<PageEncodingStats> ::const_iterator _iter107;
+      for (_iter107 = this->encoding_stats.begin(); _iter107 != this->encoding_stats.end(); ++_iter107)
+      {
+        xfer += (*_iter107).write(oprot);
+      }
+      xfer += oprot->writeListEnd();
+    }
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.bloom_filter_offset) {
+    xfer += oprot->writeFieldBegin("bloom_filter_offset", ::apache::thrift::protocol::T_I64, 14);
+    xfer += oprot->writeI64(this->bloom_filter_offset);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(ColumnMetaData &a, ColumnMetaData &b) {
+  using ::std::swap;
+  swap(a.type, b.type);
+  swap(a.encodings, b.encodings);
+  swap(a.path_in_schema, b.path_in_schema);
+  swap(a.codec, b.codec);
+  swap(a.num_values, b.num_values);
+  swap(a.total_uncompressed_size, b.total_uncompressed_size);
+  swap(a.total_compressed_size, b.total_compressed_size);
+  swap(a.key_value_metadata, b.key_value_metadata);
+  swap(a.data_page_offset, b.data_page_offset);
+  swap(a.index_page_offset, b.index_page_offset);
+  swap(a.dictionary_page_offset, b.dictionary_page_offset);
+  swap(a.statistics, b.statistics);
+  swap(a.encoding_stats, b.encoding_stats);
+  swap(a.bloom_filter_offset, b.bloom_filter_offset);
+  swap(a.__isset, b.__isset);
+}
+
+ColumnMetaData::ColumnMetaData(const ColumnMetaData& other108) {
+  type = other108.type;
+  encodings = other108.encodings;
+  path_in_schema = other108.path_in_schema;
+  codec = other108.codec;
+  num_values = other108.num_values;
+  total_uncompressed_size = other108.total_uncompressed_size;
+  total_compressed_size = other108.total_compressed_size;
+  key_value_metadata = other108.key_value_metadata;
+  data_page_offset = other108.data_page_offset;
+  index_page_offset = other108.index_page_offset;
+  dictionary_page_offset = other108.dictionary_page_offset;
+  statistics = other108.statistics;
+  encoding_stats = other108.encoding_stats;
+  bloom_filter_offset = other108.bloom_filter_offset;
+  __isset = other108.__isset;
+}
+ColumnMetaData& ColumnMetaData::operator=(const ColumnMetaData& other109) {
+  type = other109.type;
+  encodings = other109.encodings;
+  path_in_schema = other109.path_in_schema;
+  codec = other109.codec;
+  num_values = other109.num_values;
+  total_uncompressed_size = other109.total_uncompressed_size;
+  total_compressed_size = other109.total_compressed_size;
+  key_value_metadata = other109.key_value_metadata;
+  data_page_offset = other109.data_page_offset;
+  index_page_offset = other109.index_page_offset;
+  dictionary_page_offset = other109.dictionary_page_offset;
+  statistics = other109.statistics;
+  encoding_stats = other109.encoding_stats;
+  bloom_filter_offset = other109.bloom_filter_offset;
+  __isset = other109.__isset;
+  return *this;
+}
+void ColumnMetaData::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "ColumnMetaData(";
+  out << "type=" << to_string(type);
+  out << ", " << "encodings=" << to_string(encodings);
+  out << ", " << "path_in_schema=" << to_string(path_in_schema);
+  out << ", " << "codec=" << to_string(codec);
+  out << ", " << "num_values=" << to_string(num_values);
+  out << ", " << "total_uncompressed_size=" << to_string(total_uncompressed_size);
+  out << ", " << "total_compressed_size=" << to_string(total_compressed_size);
+  out << ", " << "key_value_metadata="; (__isset.key_value_metadata ? (out << to_string(key_value_metadata)) : (out << "<null>"));
+  out << ", " << "data_page_offset=" << to_string(data_page_offset);
+  out << ", " << "index_page_offset="; (__isset.index_page_offset ? (out << to_string(index_page_offset)) : (out << "<null>"));
+  out << ", " << "dictionary_page_offset="; (__isset.dictionary_page_offset ? (out << to_string(dictionary_page_offset)) : (out << "<null>"));
+  out << ", " << "statistics="; (__isset.statistics ? (out << to_string(statistics)) : (out << "<null>"));
+  out << ", " << "encoding_stats="; (__isset.encoding_stats ? (out << to_string(encoding_stats)) : (out << "<null>"));
+  out << ", " << "bloom_filter_offset="; (__isset.bloom_filter_offset ? (out << to_string(bloom_filter_offset)) : (out << "<null>"));
+  out << ")";
+}
+
+
+EncryptionWithFooterKey::~EncryptionWithFooterKey() noexcept {
+}
+
+std::ostream& operator<<(std::ostream& out, const EncryptionWithFooterKey& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t EncryptionWithFooterKey::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    xfer += iprot->skip(ftype);
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  return xfer;
+}
+
+uint32_t EncryptionWithFooterKey::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("EncryptionWithFooterKey");
+
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(EncryptionWithFooterKey &a, EncryptionWithFooterKey &b) {
+  using ::std::swap;
+  (void) a;
+  (void) b;
+}
+
+EncryptionWithFooterKey::EncryptionWithFooterKey(const EncryptionWithFooterKey& other110) {
+  (void) other110;
+}
+EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(const EncryptionWithFooterKey& other111) {
+  (void) other111;
+  return *this;
+}
+void EncryptionWithFooterKey::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "EncryptionWithFooterKey(";
+  out << ")";
+}
+
+
+EncryptionWithColumnKey::~EncryptionWithColumnKey() noexcept {
+}
+
+
+void EncryptionWithColumnKey::__set_path_in_schema(const std::vector<std::string> & val) {
+  this->path_in_schema = val;
+}
+
+void EncryptionWithColumnKey::__set_key_metadata(const std::string& val) {
+  this->key_metadata = val;
+__isset.key_metadata = true;
+}
+std::ostream& operator<<(std::ostream& out, const EncryptionWithColumnKey& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t EncryptionWithColumnKey::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+  bool isset_path_in_schema = false;
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_LIST) {
+          {
+            this->path_in_schema.clear();
+            uint32_t _size112;
+            ::apache::thrift::protocol::TType _etype115;
+            xfer += iprot->readListBegin(_etype115, _size112);
+            this->path_in_schema.resize(_size112);
+            uint32_t _i116;
+            for (_i116 = 0; _i116 < _size112; ++_i116)
+            {
+              xfer += iprot->readString(this->path_in_schema[_i116]);
+            }
+            xfer += iprot->readListEnd();
+          }
+          isset_path_in_schema = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_STRING) {
+          xfer += iprot->readBinary(this->key_metadata);
+          this->__isset.key_metadata = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  if (!isset_path_in_schema)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  return xfer;
+}
+
+uint32_t EncryptionWithColumnKey::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("EncryptionWithColumnKey");
+
+  xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 1);
+  {
+    xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast<uint32_t>(this->path_in_schema.size()));
+    std::vector<std::string> ::const_iterator _iter117;
+    for (_iter117 = this->path_in_schema.begin(); _iter117 != this->path_in_schema.end(); ++_iter117)
+    {
+      xfer += oprot->writeString((*_iter117));
+    }
+    xfer += oprot->writeListEnd();
+  }
+  xfer += oprot->writeFieldEnd();
+
+  if (this->__isset.key_metadata) {
+    xfer += oprot->writeFieldBegin("key_metadata", ::apache::thrift::protocol::T_STRING, 2);
+    xfer += oprot->writeBinary(this->key_metadata);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(EncryptionWithColumnKey &a, EncryptionWithColumnKey &b) {
+  using ::std::swap;
+  swap(a.path_in_schema, b.path_in_schema);
+  swap(a.key_metadata, b.key_metadata);
+  swap(a.__isset, b.__isset);
+}
+
+EncryptionWithColumnKey::EncryptionWithColumnKey(const EncryptionWithColumnKey& other118) {
+  path_in_schema = other118.path_in_schema;
+  key_metadata = other118.key_metadata;
+  __isset = other118.__isset;
+}
+EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(const EncryptionWithColumnKey& other119) {
+  path_in_schema = other119.path_in_schema;
+  key_metadata = other119.key_metadata;
+  __isset = other119.__isset;
+  return *this;
+}
+void EncryptionWithColumnKey::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "EncryptionWithColumnKey(";
+  out << "path_in_schema=" << to_string(path_in_schema);
+  out << ", " << "key_metadata="; (__isset.key_metadata ? (out << to_string(key_metadata)) : (out << "<null>"));
+  out << ")";
+}
+
+
+ColumnCryptoMetaData::~ColumnCryptoMetaData() noexcept {
+}
+
+
+void ColumnCryptoMetaData::__set_ENCRYPTION_WITH_FOOTER_KEY(const EncryptionWithFooterKey& val) {
+  this->ENCRYPTION_WITH_FOOTER_KEY = val;
+__isset.ENCRYPTION_WITH_FOOTER_KEY = true;
+}
+
+void ColumnCryptoMetaData::__set_ENCRYPTION_WITH_COLUMN_KEY(const EncryptionWithColumnKey& val) {
+  this->ENCRYPTION_WITH_COLUMN_KEY = val;
+__isset.ENCRYPTION_WITH_COLUMN_KEY = true;
+}
+std::ostream& operator<<(std::ostream& out, const ColumnCryptoMetaData& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t ColumnCryptoMetaData::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->ENCRYPTION_WITH_FOOTER_KEY.read(iprot);
+          this->__isset.ENCRYPTION_WITH_FOOTER_KEY = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->ENCRYPTION_WITH_COLUMN_KEY.read(iprot);
+          this->__isset.ENCRYPTION_WITH_COLUMN_KEY = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  return xfer;
+}
+
+uint32_t ColumnCryptoMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("ColumnCryptoMetaData");
+
+  if (this->__isset.ENCRYPTION_WITH_FOOTER_KEY) {
+    xfer += oprot->writeFieldBegin("ENCRYPTION_WITH_FOOTER_KEY", ::apache::thrift::protocol::T_STRUCT, 1);
+    xfer += this->ENCRYPTION_WITH_FOOTER_KEY.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.ENCRYPTION_WITH_COLUMN_KEY) {
+    xfer += oprot->writeFieldBegin("ENCRYPTION_WITH_COLUMN_KEY", ::apache::thrift::protocol::T_STRUCT, 2);
+    xfer += this->ENCRYPTION_WITH_COLUMN_KEY.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(ColumnCryptoMetaData &a, ColumnCryptoMetaData &b) {
+  using ::std::swap;
+  swap(a.ENCRYPTION_WITH_FOOTER_KEY, b.ENCRYPTION_WITH_FOOTER_KEY);
+  swap(a.ENCRYPTION_WITH_COLUMN_KEY, b.ENCRYPTION_WITH_COLUMN_KEY);
+  swap(a.__isset, b.__isset);
+}
+
+ColumnCryptoMetaData::ColumnCryptoMetaData(const ColumnCryptoMetaData& other120) {
+  ENCRYPTION_WITH_FOOTER_KEY = other120.ENCRYPTION_WITH_FOOTER_KEY;
+  ENCRYPTION_WITH_COLUMN_KEY = other120.ENCRYPTION_WITH_COLUMN_KEY;
+  __isset = other120.__isset;
+}
+ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(const ColumnCryptoMetaData& other121) {
+  ENCRYPTION_WITH_FOOTER_KEY = other121.ENCRYPTION_WITH_FOOTER_KEY;
+  ENCRYPTION_WITH_COLUMN_KEY = other121.ENCRYPTION_WITH_COLUMN_KEY;
+  __isset = other121.__isset;
+  return *this;
+}
+void ColumnCryptoMetaData::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "ColumnCryptoMetaData(";
+  out << "ENCRYPTION_WITH_FOOTER_KEY="; (__isset.ENCRYPTION_WITH_FOOTER_KEY ? (out << to_string(ENCRYPTION_WITH_FOOTER_KEY)) : (out << "<null>"));
+  out << ", " << "ENCRYPTION_WITH_COLUMN_KEY="; (__isset.ENCRYPTION_WITH_COLUMN_KEY ? (out << to_string(ENCRYPTION_WITH_COLUMN_KEY)) : (out << "<null>"));
+  out << ")";
+}
+
+
+ColumnChunk::~ColumnChunk() noexcept {
+}
+
+
+void ColumnChunk::__set_file_path(const std::string& val) {
+  this->file_path = val;
+__isset.file_path = true;
+}
+
+void ColumnChunk::__set_file_offset(const int64_t val) {
+  this->file_offset = val;
+}
+
+void ColumnChunk::__set_meta_data(const ColumnMetaData& val) {
+  this->meta_data = val;
+__isset.meta_data = true;
+}
+
+void ColumnChunk::__set_offset_index_offset(const int64_t val) {
+  this->offset_index_offset = val;
+__isset.offset_index_offset = true;
+}
+
+void ColumnChunk::__set_offset_index_length(const int32_t val) {
+  this->offset_index_length = val;
+__isset.offset_index_length = true;
+}
+
+void ColumnChunk::__set_column_index_offset(const int64_t val) {
+  this->column_index_offset = val;
+__isset.column_index_offset = true;
+}
+
+void ColumnChunk::__set_column_index_length(const int32_t val) {
+  this->column_index_length = val;
+__isset.column_index_length = true;
+}
+
+void ColumnChunk::__set_crypto_metadata(const ColumnCryptoMetaData& val) {
+  this->crypto_metadata = val;
+__isset.crypto_metadata = true;
+}
+
+void ColumnChunk::__set_encrypted_column_metadata(const std::string& val) {
+  this->encrypted_column_metadata = val;
+__isset.encrypted_column_metadata = true;
+}
+std::ostream& operator<<(std::ostream& out, const ColumnChunk& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t ColumnChunk::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+  bool isset_file_offset = false;
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_STRING) {
+          xfer += iprot->readString(this->file_path);
+          this->__isset.file_path = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_I64) {
+          xfer += iprot->readI64(this->file_offset);
+          isset_file_offset = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 3:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->meta_data.read(iprot);
+          this->__isset.meta_data = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 4:
+        if (ftype == ::apache::thrift::protocol::T_I64) {
+          xfer += iprot->readI64(this->offset_index_offset);
+          this->__isset.offset_index_offset = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 5:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->offset_index_length);
+          this->__isset.offset_index_length = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 6:
+        if (ftype == ::apache::thrift::protocol::T_I64) {
+          xfer += iprot->readI64(this->column_index_offset);
+          this->__isset.column_index_offset = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 7:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->column_index_length);
+          this->__isset.column_index_length = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 8:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->crypto_metadata.read(iprot);
+          this->__isset.crypto_metadata = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 9:
+        if (ftype == ::apache::thrift::protocol::T_STRING) {
+          xfer += iprot->readBinary(this->encrypted_column_metadata);
+          this->__isset.encrypted_column_metadata = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  if (!isset_file_offset)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  return xfer;
+}
+
+uint32_t ColumnChunk::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("ColumnChunk");
+
+  if (this->__isset.file_path) {
+    xfer += oprot->writeFieldBegin("file_path", ::apache::thrift::protocol::T_STRING, 1);
+    xfer += oprot->writeString(this->file_path);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldBegin("file_offset", ::apache::thrift::protocol::T_I64, 2);
+  xfer += oprot->writeI64(this->file_offset);
+  xfer += oprot->writeFieldEnd();
+
+  if (this->__isset.meta_data) {
+    xfer += oprot->writeFieldBegin("meta_data", ::apache::thrift::protocol::T_STRUCT, 3);
+    xfer += this->meta_data.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.offset_index_offset) {
+    xfer += oprot->writeFieldBegin("offset_index_offset", ::apache::thrift::protocol::T_I64, 4);
+    xfer += oprot->writeI64(this->offset_index_offset);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.offset_index_length) {
+    xfer += oprot->writeFieldBegin("offset_index_length", ::apache::thrift::protocol::T_I32, 5);
+    xfer += oprot->writeI32(this->offset_index_length);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.column_index_offset) {
+    xfer += oprot->writeFieldBegin("column_index_offset", ::apache::thrift::protocol::T_I64, 6);
+    xfer += oprot->writeI64(this->column_index_offset);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.column_index_length) {
+    xfer += oprot->writeFieldBegin("column_index_length", ::apache::thrift::protocol::T_I32, 7);
+    xfer += oprot->writeI32(this->column_index_length);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.crypto_metadata) {
+    xfer += oprot->writeFieldBegin("crypto_metadata", ::apache::thrift::protocol::T_STRUCT, 8);
+    xfer += this->crypto_metadata.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.encrypted_column_metadata) {
+    xfer += oprot->writeFieldBegin("encrypted_column_metadata", ::apache::thrift::protocol::T_STRING, 9);
+    xfer += oprot->writeBinary(this->encrypted_column_metadata);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(ColumnChunk &a, ColumnChunk &b) {
+  using ::std::swap;
+  swap(a.file_path, b.file_path);
+  swap(a.file_offset, b.file_offset);
+  swap(a.meta_data, b.meta_data);
+  swap(a.offset_index_offset, b.offset_index_offset);
+  swap(a.offset_index_length, b.offset_index_length);
+  swap(a.column_index_offset, b.column_index_offset);
+  swap(a.column_index_length, b.column_index_length);
+  swap(a.crypto_metadata, b.crypto_metadata);
+  swap(a.encrypted_column_metadata, b.encrypted_column_metadata);
+  swap(a.__isset, b.__isset);
+}
+
+ColumnChunk::ColumnChunk(const ColumnChunk& other122) {
+  file_path = other122.file_path;
+  file_offset = other122.file_offset;
+  meta_data = other122.meta_data;
+  offset_index_offset = other122.offset_index_offset;
+  offset_index_length = other122.offset_index_length;
+  column_index_offset = other122.column_index_offset;
+  column_index_length = other122.column_index_length;
+  crypto_metadata = other122.crypto_metadata;
+  encrypted_column_metadata = other122.encrypted_column_metadata;
+  __isset = other122.__isset;
+}
+ColumnChunk& ColumnChunk::operator=(const ColumnChunk& other123) {
+  file_path = other123.file_path;
+  file_offset = other123.file_offset;
+  meta_data = other123.meta_data;
+  offset_index_offset = other123.offset_index_offset;
+  offset_index_length = other123.offset_index_length;
+  column_index_offset = other123.column_index_offset;
+  column_index_length = other123.column_index_length;
+  crypto_metadata = other123.crypto_metadata;
+  encrypted_column_metadata = other123.encrypted_column_metadata;
+  __isset = other123.__isset;
+  return *this;
+}
+void ColumnChunk::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "ColumnChunk(";
+  out << "file_path="; (__isset.file_path ? (out << to_string(file_path)) : (out << "<null>"));
+  out << ", " << "file_offset=" << to_string(file_offset);
+  out << ", " << "meta_data="; (__isset.meta_data ? (out << to_string(meta_data)) : (out << "<null>"));
+  out << ", " << "offset_index_offset="; (__isset.offset_index_offset ? (out << to_string(offset_index_offset)) : (out << "<null>"));
+  out << ", " << "offset_index_length="; (__isset.offset_index_length ? (out << to_string(offset_index_length)) : (out << "<null>"));
+  out << ", " << "column_index_offset="; (__isset.column_index_offset ? (out << to_string(column_index_offset)) : (out << "<null>"));
+  out << ", " << "column_index_length="; (__isset.column_index_length ? (out << to_string(column_index_length)) : (out << "<null>"));
+  out << ", " << "crypto_metadata="; (__isset.crypto_metadata ? (out << to_string(crypto_metadata)) : (out << "<null>"));
+  out << ", " << "encrypted_column_metadata="; (__isset.encrypted_column_metadata ? (out << to_string(encrypted_column_metadata)) : (out << "<null>"));
+  out << ")";
+}
+
+
+RowGroup::~RowGroup() noexcept {
+}
+
+
+void RowGroup::__set_columns(const std::vector<ColumnChunk> & val) {
+  this->columns = val;
+}
+
+void RowGroup::__set_total_byte_size(const int64_t val) {
+  this->total_byte_size = val;
+}
+
+void RowGroup::__set_num_rows(const int64_t val) {
+  this->num_rows = val;
+}
+
+void RowGroup::__set_sorting_columns(const std::vector<SortingColumn> & val) {
+  this->sorting_columns = val;
+__isset.sorting_columns = true;
+}
+
+void RowGroup::__set_file_offset(const int64_t val) {
+  this->file_offset = val;
+__isset.file_offset = true;
+}
+
+void RowGroup::__set_total_compressed_size(const int64_t val) {
+  this->total_compressed_size = val;
+__isset.total_compressed_size = true;
+}
+
+void RowGroup::__set_ordinal(const int16_t val) {
+  this->ordinal = val;
+__isset.ordinal = true;
+}
+std::ostream& operator<<(std::ostream& out, const RowGroup& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t RowGroup::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+  bool isset_columns = false;
+  bool isset_total_byte_size = false;
+  bool isset_num_rows = false;
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_LIST) {
+          {
+            this->columns.clear();
+            uint32_t _size124;
+            ::apache::thrift::protocol::TType _etype127;
+            xfer += iprot->readListBegin(_etype127, _size124);
+            this->columns.resize(_size124);
+            uint32_t _i128;
+            for (_i128 = 0; _i128 < _size124; ++_i128)
+            {
+              xfer += this->columns[_i128].read(iprot);
+            }
+            xfer += iprot->readListEnd();
+          }
+          isset_columns = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_I64) {
+          xfer += iprot->readI64(this->total_byte_size);
+          isset_total_byte_size = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 3:
+        if (ftype == ::apache::thrift::protocol::T_I64) {
+          xfer += iprot->readI64(this->num_rows);
+          isset_num_rows = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 4:
+        if (ftype == ::apache::thrift::protocol::T_LIST) {
+          {
+            this->sorting_columns.clear();
+            uint32_t _size129;
+            ::apache::thrift::protocol::TType _etype132;
+            xfer += iprot->readListBegin(_etype132, _size129);
+            this->sorting_columns.resize(_size129);
+            uint32_t _i133;
+            for (_i133 = 0; _i133 < _size129; ++_i133)
+            {
+              xfer += this->sorting_columns[_i133].read(iprot);
+            }
+            xfer += iprot->readListEnd();
+          }
+          this->__isset.sorting_columns = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 5:
+        if (ftype == ::apache::thrift::protocol::T_I64) {
+          xfer += iprot->readI64(this->file_offset);
+          this->__isset.file_offset = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 6:
+        if (ftype == ::apache::thrift::protocol::T_I64) {
+          xfer += iprot->readI64(this->total_compressed_size);
+          this->__isset.total_compressed_size = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 7:
+        if (ftype == ::apache::thrift::protocol::T_I16) {
+          xfer += iprot->readI16(this->ordinal);
+          this->__isset.ordinal = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  if (!isset_columns)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_total_byte_size)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_num_rows)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  return xfer;
+}
+
+uint32_t RowGroup::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("RowGroup");
+
+  xfer += oprot->writeFieldBegin("columns", ::apache::thrift::protocol::T_LIST, 1);
+  {
+    xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->columns.size()));
+    std::vector<ColumnChunk> ::const_iterator _iter134;
+    for (_iter134 = this->columns.begin(); _iter134 != this->columns.end(); ++_iter134)
+    {
+      xfer += (*_iter134).write(oprot);
+    }
+    xfer += oprot->writeListEnd();
+  }
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("total_byte_size", ::apache::thrift::protocol::T_I64, 2);
+  xfer += oprot->writeI64(this->total_byte_size);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I64, 3);
+  xfer += oprot->writeI64(this->num_rows);
+  xfer += oprot->writeFieldEnd();
+
+  if (this->__isset.sorting_columns) {
+    xfer += oprot->writeFieldBegin("sorting_columns", ::apache::thrift::protocol::T_LIST, 4);
+    {
+      xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->sorting_columns.size()));
+      std::vector<SortingColumn> ::const_iterator _iter135;
+      for (_iter135 = this->sorting_columns.begin(); _iter135 != this->sorting_columns.end(); ++_iter135)
+      {
+        xfer += (*_iter135).write(oprot);
+      }
+      xfer += oprot->writeListEnd();
+    }
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.file_offset) {
+    xfer += oprot->writeFieldBegin("file_offset", ::apache::thrift::protocol::T_I64, 5);
+    xfer += oprot->writeI64(this->file_offset);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.total_compressed_size) {
+    xfer += oprot->writeFieldBegin("total_compressed_size", ::apache::thrift::protocol::T_I64, 6);
+    xfer += oprot->writeI64(this->total_compressed_size);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.ordinal) {
+    xfer += oprot->writeFieldBegin("ordinal", ::apache::thrift::protocol::T_I16, 7);
+    xfer += oprot->writeI16(this->ordinal);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(RowGroup &a, RowGroup &b) {
+  using ::std::swap;
+  swap(a.columns, b.columns);
+  swap(a.total_byte_size, b.total_byte_size);
+  swap(a.num_rows, b.num_rows);
+  swap(a.sorting_columns, b.sorting_columns);
+  swap(a.file_offset, b.file_offset);
+  swap(a.total_compressed_size, b.total_compressed_size);
+  swap(a.ordinal, b.ordinal);
+  swap(a.__isset, b.__isset);
+}
+
+RowGroup::RowGroup(const RowGroup& other136) {
+  columns = other136.columns;
+  total_byte_size = other136.total_byte_size;
+  num_rows = other136.num_rows;
+  sorting_columns = other136.sorting_columns;
+  file_offset = other136.file_offset;
+  total_compressed_size = other136.total_compressed_size;
+  ordinal = other136.ordinal;
+  __isset = other136.__isset;
+}
+RowGroup& RowGroup::operator=(const RowGroup& other137) {
+  columns = other137.columns;
+  total_byte_size = other137.total_byte_size;
+  num_rows = other137.num_rows;
+  sorting_columns = other137.sorting_columns;
+  file_offset = other137.file_offset;
+  total_compressed_size = other137.total_compressed_size;
+  ordinal = other137.ordinal;
+  __isset = other137.__isset;
+  return *this;
+}
+void RowGroup::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "RowGroup(";
+  out << "columns=" << to_string(columns);
+  out << ", " << "total_byte_size=" << to_string(total_byte_size);
+  out << ", " << "num_rows=" << to_string(num_rows);
+  out << ", " << "sorting_columns="; (__isset.sorting_columns ? (out << to_string(sorting_columns)) : (out << "<null>"));
+  out << ", " << "file_offset="; (__isset.file_offset ? (out << to_string(file_offset)) : (out << "<null>"));
+  out << ", " << "total_compressed_size="; (__isset.total_compressed_size ? (out << to_string(total_compressed_size)) : (out << "<null>"));
+  out << ", " << "ordinal="; (__isset.ordinal ? (out << to_string(ordinal)) : (out << "<null>"));
+  out << ")";
+}
+
+
+TypeDefinedOrder::~TypeDefinedOrder() noexcept {
+}
+
+std::ostream& operator<<(std::ostream& out, const TypeDefinedOrder& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t TypeDefinedOrder::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    xfer += iprot->skip(ftype);
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  return xfer;
+}
+
+uint32_t TypeDefinedOrder::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("TypeDefinedOrder");
+
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(TypeDefinedOrder &a, TypeDefinedOrder &b) {
+  using ::std::swap;
+  (void) a;
+  (void) b;
+}
+
+TypeDefinedOrder::TypeDefinedOrder(const TypeDefinedOrder& other138) {
+  (void) other138;
+}
+TypeDefinedOrder& TypeDefinedOrder::operator=(const TypeDefinedOrder& other139) {
+  (void) other139;
+  return *this;
+}
+void TypeDefinedOrder::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "TypeDefinedOrder(";
+  out << ")";
+}
+
+
+ColumnOrder::~ColumnOrder() noexcept {
+}
+
+
+void ColumnOrder::__set_TYPE_ORDER(const TypeDefinedOrder& val) {
+  this->TYPE_ORDER = val;
+__isset.TYPE_ORDER = true;
+}
+std::ostream& operator<<(std::ostream& out, const ColumnOrder& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t ColumnOrder::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->TYPE_ORDER.read(iprot);
+          this->__isset.TYPE_ORDER = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  return xfer;
+}
+
+uint32_t ColumnOrder::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("ColumnOrder");
+
+  if (this->__isset.TYPE_ORDER) {
+    xfer += oprot->writeFieldBegin("TYPE_ORDER", ::apache::thrift::protocol::T_STRUCT, 1);
+    xfer += this->TYPE_ORDER.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(ColumnOrder &a, ColumnOrder &b) {
+  using ::std::swap;
+  swap(a.TYPE_ORDER, b.TYPE_ORDER);
+  swap(a.__isset, b.__isset);
+}
+
+ColumnOrder::ColumnOrder(const ColumnOrder& other140) {
+  TYPE_ORDER = other140.TYPE_ORDER;
+  __isset = other140.__isset;
+}
+ColumnOrder& ColumnOrder::operator=(const ColumnOrder& other141) {
+  TYPE_ORDER = other141.TYPE_ORDER;
+  __isset = other141.__isset;
+  return *this;
+}
+void ColumnOrder::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "ColumnOrder(";
+  out << "TYPE_ORDER="; (__isset.TYPE_ORDER ? (out << to_string(TYPE_ORDER)) : (out << "<null>"));
+  out << ")";
+}
+
+
+PageLocation::~PageLocation() noexcept {
+}
+
+
+void PageLocation::__set_offset(const int64_t val) {
+  this->offset = val;
+}
+
+void PageLocation::__set_compressed_page_size(const int32_t val) {
+  this->compressed_page_size = val;
+}
+
+void PageLocation::__set_first_row_index(const int64_t val) {
+  this->first_row_index = val;
+}
+std::ostream& operator<<(std::ostream& out, const PageLocation& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t PageLocation::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+  bool isset_offset = false;
+  bool isset_compressed_page_size = false;
+  bool isset_first_row_index = false;
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_I64) {
+          xfer += iprot->readI64(this->offset);
+          isset_offset = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->compressed_page_size);
+          isset_compressed_page_size = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 3:
+        if (ftype == ::apache::thrift::protocol::T_I64) {
+          xfer += iprot->readI64(this->first_row_index);
+          isset_first_row_index = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  if (!isset_offset)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_compressed_page_size)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_first_row_index)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  return xfer;
+}
+
+uint32_t PageLocation::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("PageLocation");
+
+  xfer += oprot->writeFieldBegin("offset", ::apache::thrift::protocol::T_I64, 1);
+  xfer += oprot->writeI64(this->offset);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("compressed_page_size", ::apache::thrift::protocol::T_I32, 2);
+  xfer += oprot->writeI32(this->compressed_page_size);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("first_row_index", ::apache::thrift::protocol::T_I64, 3);
+  xfer += oprot->writeI64(this->first_row_index);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(PageLocation &a, PageLocation &b) {
+  using ::std::swap;
+  swap(a.offset, b.offset);
+  swap(a.compressed_page_size, b.compressed_page_size);
+  swap(a.first_row_index, b.first_row_index);
+}
+
+PageLocation::PageLocation(const PageLocation& other142) {
+  offset = other142.offset;
+  compressed_page_size = other142.compressed_page_size;
+  first_row_index = other142.first_row_index;
+}
+PageLocation& PageLocation::operator=(const PageLocation& other143) {
+  offset = other143.offset;
+  compressed_page_size = other143.compressed_page_size;
+  first_row_index = other143.first_row_index;
+  return *this;
+}
+void PageLocation::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "PageLocation(";
+  out << "offset=" << to_string(offset);
+  out << ", " << "compressed_page_size=" << to_string(compressed_page_size);
+  out << ", " << "first_row_index=" << to_string(first_row_index);
+  out << ")";
+}
+
+
+OffsetIndex::~OffsetIndex() noexcept {
+}
+
+
+void OffsetIndex::__set_page_locations(const std::vector<PageLocation> & val) {
+  this->page_locations = val;
+}
+std::ostream& operator<<(std::ostream& out, const OffsetIndex& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t OffsetIndex::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+  bool isset_page_locations = false;
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_LIST) {
+          {
+            this->page_locations.clear();
+            uint32_t _size144;
+            ::apache::thrift::protocol::TType _etype147;
+            xfer += iprot->readListBegin(_etype147, _size144);
+            this->page_locations.resize(_size144);
+            uint32_t _i148;
+            for (_i148 = 0; _i148 < _size144; ++_i148)
+            {
+              xfer += this->page_locations[_i148].read(iprot);
+            }
+            xfer += iprot->readListEnd();
+          }
+          isset_page_locations = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  if (!isset_page_locations)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  return xfer;
+}
+
+uint32_t OffsetIndex::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("OffsetIndex");
+
+  xfer += oprot->writeFieldBegin("page_locations", ::apache::thrift::protocol::T_LIST, 1);
+  {
+    xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->page_locations.size()));
+    std::vector<PageLocation> ::const_iterator _iter149;
+    for (_iter149 = this->page_locations.begin(); _iter149 != this->page_locations.end(); ++_iter149)
+    {
+      xfer += (*_iter149).write(oprot);
+    }
+    xfer += oprot->writeListEnd();
+  }
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(OffsetIndex &a, OffsetIndex &b) {
+  using ::std::swap;
+  swap(a.page_locations, b.page_locations);
+}
+
+OffsetIndex::OffsetIndex(const OffsetIndex& other150) {
+  page_locations = other150.page_locations;
+}
+OffsetIndex& OffsetIndex::operator=(const OffsetIndex& other151) {
+  page_locations = other151.page_locations;
+  return *this;
+}
+void OffsetIndex::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "OffsetIndex(";
+  out << "page_locations=" << to_string(page_locations);
+  out << ")";
+}
+
+
+ColumnIndex::~ColumnIndex() noexcept {
+}
+
+
+void ColumnIndex::__set_null_pages(const std::vector<bool> & val) {
+  this->null_pages = val;
+}
+
+void ColumnIndex::__set_min_values(const std::vector<std::string> & val) {
+  this->min_values = val;
+}
+
+void ColumnIndex::__set_max_values(const std::vector<std::string> & val) {
+  this->max_values = val;
+}
+
+void ColumnIndex::__set_boundary_order(const BoundaryOrder::type val) {
+  this->boundary_order = val;
+}
+
+void ColumnIndex::__set_null_counts(const std::vector<int64_t> & val) {
+  this->null_counts = val;
+__isset.null_counts = true;
+}
+std::ostream& operator<<(std::ostream& out, const ColumnIndex& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t ColumnIndex::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+  bool isset_null_pages = false;
+  bool isset_min_values = false;
+  bool isset_max_values = false;
+  bool isset_boundary_order = false;
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_LIST) {
+          {
+            this->null_pages.clear();
+            uint32_t _size152;
+            ::apache::thrift::protocol::TType _etype155;
+            xfer += iprot->readListBegin(_etype155, _size152);
+            this->null_pages.resize(_size152);
+            uint32_t _i156;
+            for (_i156 = 0; _i156 < _size152; ++_i156)
+            {
+              bool result;
+              xfer += iprot->readBool(result);
+              this->null_pages[_i156] = result;
+            }
+            xfer += iprot->readListEnd();
+          }
+          isset_null_pages = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_LIST) {
+          {
+            this->min_values.clear();
+            uint32_t _size157;
+            ::apache::thrift::protocol::TType _etype160;
+            xfer += iprot->readListBegin(_etype160, _size157);
+            this->min_values.resize(_size157);
+            uint32_t _i161;
+            for (_i161 = 0; _i161 < _size157; ++_i161)
+            {
+              xfer += iprot->readBinary(this->min_values[_i161]);
+            }
+            xfer += iprot->readListEnd();
+          }
+          isset_min_values = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 3:
+        if (ftype == ::apache::thrift::protocol::T_LIST) {
+          {
+            this->max_values.clear();
+            uint32_t _size162;
+            ::apache::thrift::protocol::TType _etype165;
+            xfer += iprot->readListBegin(_etype165, _size162);
+            this->max_values.resize(_size162);
+            uint32_t _i166;
+            for (_i166 = 0; _i166 < _size162; ++_i166)
+            {
+              xfer += iprot->readBinary(this->max_values[_i166]);
+            }
+            xfer += iprot->readListEnd();
+          }
+          isset_max_values = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 4:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          int32_t ecast167;
+          xfer += iprot->readI32(ecast167);
+          this->boundary_order = (BoundaryOrder::type)ecast167;
+          isset_boundary_order = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 5:
+        if (ftype == ::apache::thrift::protocol::T_LIST) {
+          {
+            this->null_counts.clear();
+            uint32_t _size168;
+            ::apache::thrift::protocol::TType _etype171;
+            xfer += iprot->readListBegin(_etype171, _size168);
+            this->null_counts.resize(_size168);
+            uint32_t _i172;
+            for (_i172 = 0; _i172 < _size168; ++_i172)
+            {
+              xfer += iprot->readI64(this->null_counts[_i172]);
+            }
+            xfer += iprot->readListEnd();
+          }
+          this->__isset.null_counts = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  if (!isset_null_pages)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_min_values)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_max_values)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_boundary_order)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  return xfer;
+}
+
+uint32_t ColumnIndex::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("ColumnIndex");
+
+  xfer += oprot->writeFieldBegin("null_pages", ::apache::thrift::protocol::T_LIST, 1);
+  {
+    xfer += oprot->writeListBegin(::apache::thrift::protocol::T_BOOL, static_cast<uint32_t>(this->null_pages.size()));
+    std::vector<bool> ::const_iterator _iter173;
+    for (_iter173 = this->null_pages.begin(); _iter173 != this->null_pages.end(); ++_iter173)
+    {
+      xfer += oprot->writeBool((*_iter173));
+    }
+    xfer += oprot->writeListEnd();
+  }
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("min_values", ::apache::thrift::protocol::T_LIST, 2);
+  {
+    xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast<uint32_t>(this->min_values.size()));
+    std::vector<std::string> ::const_iterator _iter174;
+    for (_iter174 = this->min_values.begin(); _iter174 != this->min_values.end(); ++_iter174)
+    {
+      xfer += oprot->writeBinary((*_iter174));
+    }
+    xfer += oprot->writeListEnd();
+  }
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("max_values", ::apache::thrift::protocol::T_LIST, 3);
+  {
+    xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast<uint32_t>(this->max_values.size()));
+    std::vector<std::string> ::const_iterator _iter175;
+    for (_iter175 = this->max_values.begin(); _iter175 != this->max_values.end(); ++_iter175)
+    {
+      xfer += oprot->writeBinary((*_iter175));
+    }
+    xfer += oprot->writeListEnd();
+  }
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("boundary_order", ::apache::thrift::protocol::T_I32, 4);
+  xfer += oprot->writeI32((int32_t)this->boundary_order);
+  xfer += oprot->writeFieldEnd();
+
+  if (this->__isset.null_counts) {
+    xfer += oprot->writeFieldBegin("null_counts", ::apache::thrift::protocol::T_LIST, 5);
+    {
+      xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast<uint32_t>(this->null_counts.size()));
+      std::vector<int64_t> ::const_iterator _iter176;
+      for (_iter176 = this->null_counts.begin(); _iter176 != this->null_counts.end(); ++_iter176)
+      {
+        xfer += oprot->writeI64((*_iter176));
+      }
+      xfer += oprot->writeListEnd();
+    }
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(ColumnIndex &a, ColumnIndex &b) {
+  using ::std::swap;
+  swap(a.null_pages, b.null_pages);
+  swap(a.min_values, b.min_values);
+  swap(a.max_values, b.max_values);
+  swap(a.boundary_order, b.boundary_order);
+  swap(a.null_counts, b.null_counts);
+  swap(a.__isset, b.__isset);
+}
+
+ColumnIndex::ColumnIndex(const ColumnIndex& other177) {
+  null_pages = other177.null_pages;
+  min_values = other177.min_values;
+  max_values = other177.max_values;
+  boundary_order = other177.boundary_order;
+  null_counts = other177.null_counts;
+  __isset = other177.__isset;
+}
+ColumnIndex& ColumnIndex::operator=(const ColumnIndex& other178) {
+  null_pages = other178.null_pages;
+  min_values = other178.min_values;
+  max_values = other178.max_values;
+  boundary_order = other178.boundary_order;
+  null_counts = other178.null_counts;
+  __isset = other178.__isset;
+  return *this;
+}
+void ColumnIndex::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "ColumnIndex(";
+  out << "null_pages=" << to_string(null_pages);
+  out << ", " << "min_values=" << to_string(min_values);
+  out << ", " << "max_values=" << to_string(max_values);
+  out << ", " << "boundary_order=" << to_string(boundary_order);
+  out << ", " << "null_counts="; (__isset.null_counts ? (out << to_string(null_counts)) : (out << "<null>"));
+  out << ")";
+}
+
+
+AesGcmV1::~AesGcmV1() noexcept {
+}
+
+
+void AesGcmV1::__set_aad_prefix(const std::string& val) {
+  this->aad_prefix = val;
+__isset.aad_prefix = true;
+}
+
+void AesGcmV1::__set_aad_file_unique(const std::string& val) {
+  this->aad_file_unique = val;
+__isset.aad_file_unique = true;
+}
+
+void AesGcmV1::__set_supply_aad_prefix(const bool val) {
+  this->supply_aad_prefix = val;
+__isset.supply_aad_prefix = true;
+}
+std::ostream& operator<<(std::ostream& out, const AesGcmV1& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t AesGcmV1::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_STRING) {
+          xfer += iprot->readBinary(this->aad_prefix);
+          this->__isset.aad_prefix = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_STRING) {
+          xfer += iprot->readBinary(this->aad_file_unique);
+          this->__isset.aad_file_unique = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 3:
+        if (ftype == ::apache::thrift::protocol::T_BOOL) {
+          xfer += iprot->readBool(this->supply_aad_prefix);
+          this->__isset.supply_aad_prefix = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  return xfer;
+}
+
+uint32_t AesGcmV1::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("AesGcmV1");
+
+  if (this->__isset.aad_prefix) {
+    xfer += oprot->writeFieldBegin("aad_prefix", ::apache::thrift::protocol::T_STRING, 1);
+    xfer += oprot->writeBinary(this->aad_prefix);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.aad_file_unique) {
+    xfer += oprot->writeFieldBegin("aad_file_unique", ::apache::thrift::protocol::T_STRING, 2);
+    xfer += oprot->writeBinary(this->aad_file_unique);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.supply_aad_prefix) {
+    xfer += oprot->writeFieldBegin("supply_aad_prefix", ::apache::thrift::protocol::T_BOOL, 3);
+    xfer += oprot->writeBool(this->supply_aad_prefix);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(AesGcmV1 &a, AesGcmV1 &b) {
+  using ::std::swap;
+  swap(a.aad_prefix, b.aad_prefix);
+  swap(a.aad_file_unique, b.aad_file_unique);
+  swap(a.supply_aad_prefix, b.supply_aad_prefix);
+  swap(a.__isset, b.__isset);
+}
+
+AesGcmV1::AesGcmV1(const AesGcmV1& other179) {
+  aad_prefix = other179.aad_prefix;
+  aad_file_unique = other179.aad_file_unique;
+  supply_aad_prefix = other179.supply_aad_prefix;
+  __isset = other179.__isset;
+}
+AesGcmV1& AesGcmV1::operator=(const AesGcmV1& other180) {
+  aad_prefix = other180.aad_prefix;
+  aad_file_unique = other180.aad_file_unique;
+  supply_aad_prefix = other180.supply_aad_prefix;
+  __isset = other180.__isset;
+  return *this;
+}
+void AesGcmV1::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "AesGcmV1(";
+  out << "aad_prefix="; (__isset.aad_prefix ? (out << to_string(aad_prefix)) : (out << "<null>"));
+  out << ", " << "aad_file_unique="; (__isset.aad_file_unique ? (out << to_string(aad_file_unique)) : (out << "<null>"));
+  out << ", " << "supply_aad_prefix="; (__isset.supply_aad_prefix ? (out << to_string(supply_aad_prefix)) : (out << "<null>"));
+  out << ")";
+}
+
+
+AesGcmCtrV1::~AesGcmCtrV1() noexcept {
+}
+
+
+void AesGcmCtrV1::__set_aad_prefix(const std::string& val) {
+  this->aad_prefix = val;
+__isset.aad_prefix = true;
+}
+
+void AesGcmCtrV1::__set_aad_file_unique(const std::string& val) {
+  this->aad_file_unique = val;
+__isset.aad_file_unique = true;
+}
+
+void AesGcmCtrV1::__set_supply_aad_prefix(const bool val) {
+  this->supply_aad_prefix = val;
+__isset.supply_aad_prefix = true;
+}
+std::ostream& operator<<(std::ostream& out, const AesGcmCtrV1& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t AesGcmCtrV1::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_STRING) {
+          xfer += iprot->readBinary(this->aad_prefix);
+          this->__isset.aad_prefix = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_STRING) {
+          xfer += iprot->readBinary(this->aad_file_unique);
+          this->__isset.aad_file_unique = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 3:
+        if (ftype == ::apache::thrift::protocol::T_BOOL) {
+          xfer += iprot->readBool(this->supply_aad_prefix);
+          this->__isset.supply_aad_prefix = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  return xfer;
+}
+
+uint32_t AesGcmCtrV1::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("AesGcmCtrV1");
+
+  if (this->__isset.aad_prefix) {
+    xfer += oprot->writeFieldBegin("aad_prefix", ::apache::thrift::protocol::T_STRING, 1);
+    xfer += oprot->writeBinary(this->aad_prefix);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.aad_file_unique) {
+    xfer += oprot->writeFieldBegin("aad_file_unique", ::apache::thrift::protocol::T_STRING, 2);
+    xfer += oprot->writeBinary(this->aad_file_unique);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.supply_aad_prefix) {
+    xfer += oprot->writeFieldBegin("supply_aad_prefix", ::apache::thrift::protocol::T_BOOL, 3);
+    xfer += oprot->writeBool(this->supply_aad_prefix);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(AesGcmCtrV1 &a, AesGcmCtrV1 &b) {
+  using ::std::swap;
+  swap(a.aad_prefix, b.aad_prefix);
+  swap(a.aad_file_unique, b.aad_file_unique);
+  swap(a.supply_aad_prefix, b.supply_aad_prefix);
+  swap(a.__isset, b.__isset);
+}
+
+AesGcmCtrV1::AesGcmCtrV1(const AesGcmCtrV1& other181) {
+  aad_prefix = other181.aad_prefix;
+  aad_file_unique = other181.aad_file_unique;
+  supply_aad_prefix = other181.supply_aad_prefix;
+  __isset = other181.__isset;
+}
+AesGcmCtrV1& AesGcmCtrV1::operator=(const AesGcmCtrV1& other182) {
+  aad_prefix = other182.aad_prefix;
+  aad_file_unique = other182.aad_file_unique;
+  supply_aad_prefix = other182.supply_aad_prefix;
+  __isset = other182.__isset;
+  return *this;
+}
+void AesGcmCtrV1::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "AesGcmCtrV1(";
+  out << "aad_prefix="; (__isset.aad_prefix ? (out << to_string(aad_prefix)) : (out << "<null>"));
+  out << ", " << "aad_file_unique="; (__isset.aad_file_unique ? (out << to_string(aad_file_unique)) : (out << "<null>"));
+  out << ", " << "supply_aad_prefix="; (__isset.supply_aad_prefix ? (out << to_string(supply_aad_prefix)) : (out << "<null>"));
+  out << ")";
+}
+
+
+EncryptionAlgorithm::~EncryptionAlgorithm() noexcept {
+}
+
+
+void EncryptionAlgorithm::__set_AES_GCM_V1(const AesGcmV1& val) {
+  this->AES_GCM_V1 = val;
+__isset.AES_GCM_V1 = true;
+}
+
+void EncryptionAlgorithm::__set_AES_GCM_CTR_V1(const AesGcmCtrV1& val) {
+  this->AES_GCM_CTR_V1 = val;
+__isset.AES_GCM_CTR_V1 = true;
+}
+std::ostream& operator<<(std::ostream& out, const EncryptionAlgorithm& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t EncryptionAlgorithm::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->AES_GCM_V1.read(iprot);
+          this->__isset.AES_GCM_V1 = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->AES_GCM_CTR_V1.read(iprot);
+          this->__isset.AES_GCM_CTR_V1 = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  return xfer;
+}
+
+uint32_t EncryptionAlgorithm::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("EncryptionAlgorithm");
+
+  if (this->__isset.AES_GCM_V1) {
+    xfer += oprot->writeFieldBegin("AES_GCM_V1", ::apache::thrift::protocol::T_STRUCT, 1);
+    xfer += this->AES_GCM_V1.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.AES_GCM_CTR_V1) {
+    xfer += oprot->writeFieldBegin("AES_GCM_CTR_V1", ::apache::thrift::protocol::T_STRUCT, 2);
+    xfer += this->AES_GCM_CTR_V1.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(EncryptionAlgorithm &a, EncryptionAlgorithm &b) {
+  using ::std::swap;
+  swap(a.AES_GCM_V1, b.AES_GCM_V1);
+  swap(a.AES_GCM_CTR_V1, b.AES_GCM_CTR_V1);
+  swap(a.__isset, b.__isset);
+}
+
+EncryptionAlgorithm::EncryptionAlgorithm(const EncryptionAlgorithm& other183) {
+  AES_GCM_V1 = other183.AES_GCM_V1;
+  AES_GCM_CTR_V1 = other183.AES_GCM_CTR_V1;
+  __isset = other183.__isset;
+}
+EncryptionAlgorithm& EncryptionAlgorithm::operator=(const EncryptionAlgorithm& other184) {
+  AES_GCM_V1 = other184.AES_GCM_V1;
+  AES_GCM_CTR_V1 = other184.AES_GCM_CTR_V1;
+  __isset = other184.__isset;
+  return *this;
+}
+void EncryptionAlgorithm::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "EncryptionAlgorithm(";
+  out << "AES_GCM_V1="; (__isset.AES_GCM_V1 ? (out << to_string(AES_GCM_V1)) : (out << "<null>"));
+  out << ", " << "AES_GCM_CTR_V1="; (__isset.AES_GCM_CTR_V1 ? (out << to_string(AES_GCM_CTR_V1)) : (out << "<null>"));
+  out << ")";
+}
+
+
+FileMetaData::~FileMetaData() noexcept {
+}
+
+
+void FileMetaData::__set_version(const int32_t val) {
+  this->version = val;
+}
+
+void FileMetaData::__set_schema(const std::vector<SchemaElement> & val) {
+  this->schema = val;
+}
+
+void FileMetaData::__set_num_rows(const int64_t val) {
+  this->num_rows = val;
+}
+
+void FileMetaData::__set_row_groups(const std::vector<RowGroup> & val) {
+  this->row_groups = val;
+}
+
+void FileMetaData::__set_key_value_metadata(const std::vector<KeyValue> & val) {
+  this->key_value_metadata = val;
+__isset.key_value_metadata = true;
+}
+
+void FileMetaData::__set_created_by(const std::string& val) {
+  this->created_by = val;
+__isset.created_by = true;
+}
+
+void FileMetaData::__set_column_orders(const std::vector<ColumnOrder> & val) {
+  this->column_orders = val;
+__isset.column_orders = true;
+}
+
+void FileMetaData::__set_encryption_algorithm(const EncryptionAlgorithm& val) {
+  this->encryption_algorithm = val;
+__isset.encryption_algorithm = true;
+}
+
+void FileMetaData::__set_footer_signing_key_metadata(const std::string& val) {
+  this->footer_signing_key_metadata = val;
+__isset.footer_signing_key_metadata = true;
+}
+std::ostream& operator<<(std::ostream& out, const FileMetaData& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t FileMetaData::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+  bool isset_version = false;
+  bool isset_schema = false;
+  bool isset_num_rows = false;
+  bool isset_row_groups = false;
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_I32) {
+          xfer += iprot->readI32(this->version);
+          isset_version = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_LIST) {
+          {
+            this->schema.clear();
+            uint32_t _size185;
+            ::apache::thrift::protocol::TType _etype188;
+            xfer += iprot->readListBegin(_etype188, _size185);
+            this->schema.resize(_size185);
+            uint32_t _i189;
+            for (_i189 = 0; _i189 < _size185; ++_i189)
+            {
+              xfer += this->schema[_i189].read(iprot);
+            }
+            xfer += iprot->readListEnd();
+          }
+          isset_schema = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 3:
+        if (ftype == ::apache::thrift::protocol::T_I64) {
+          xfer += iprot->readI64(this->num_rows);
+          isset_num_rows = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 4:
+        if (ftype == ::apache::thrift::protocol::T_LIST) {
+          {
+            this->row_groups.clear();
+            uint32_t _size190;
+            ::apache::thrift::protocol::TType _etype193;
+            xfer += iprot->readListBegin(_etype193, _size190);
+            this->row_groups.resize(_size190);
+            uint32_t _i194;
+            for (_i194 = 0; _i194 < _size190; ++_i194)
+            {
+              xfer += this->row_groups[_i194].read(iprot);
+            }
+            xfer += iprot->readListEnd();
+          }
+          isset_row_groups = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 5:
+        if (ftype == ::apache::thrift::protocol::T_LIST) {
+          {
+            this->key_value_metadata.clear();
+            uint32_t _size195;
+            ::apache::thrift::protocol::TType _etype198;
+            xfer += iprot->readListBegin(_etype198, _size195);
+            this->key_value_metadata.resize(_size195);
+            uint32_t _i199;
+            for (_i199 = 0; _i199 < _size195; ++_i199)
+            {
+              xfer += this->key_value_metadata[_i199].read(iprot);
+            }
+            xfer += iprot->readListEnd();
+          }
+          this->__isset.key_value_metadata = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 6:
+        if (ftype == ::apache::thrift::protocol::T_STRING) {
+          xfer += iprot->readString(this->created_by);
+          this->__isset.created_by = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 7:
+        if (ftype == ::apache::thrift::protocol::T_LIST) {
+          {
+            this->column_orders.clear();
+            uint32_t _size200;
+            ::apache::thrift::protocol::TType _etype203;
+            xfer += iprot->readListBegin(_etype203, _size200);
+            this->column_orders.resize(_size200);
+            uint32_t _i204;
+            for (_i204 = 0; _i204 < _size200; ++_i204)
+            {
+              xfer += this->column_orders[_i204].read(iprot);
+            }
+            xfer += iprot->readListEnd();
+          }
+          this->__isset.column_orders = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 8:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->encryption_algorithm.read(iprot);
+          this->__isset.encryption_algorithm = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 9:
+        if (ftype == ::apache::thrift::protocol::T_STRING) {
+          xfer += iprot->readBinary(this->footer_signing_key_metadata);
+          this->__isset.footer_signing_key_metadata = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  if (!isset_version)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_schema)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_num_rows)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  if (!isset_row_groups)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  return xfer;
+}
+
+uint32_t FileMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("FileMetaData");
+
+  xfer += oprot->writeFieldBegin("version", ::apache::thrift::protocol::T_I32, 1);
+  xfer += oprot->writeI32(this->version);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("schema", ::apache::thrift::protocol::T_LIST, 2);
+  {
+    xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->schema.size()));
+    std::vector<SchemaElement> ::const_iterator _iter205;
+    for (_iter205 = this->schema.begin(); _iter205 != this->schema.end(); ++_iter205)
+    {
+      xfer += (*_iter205).write(oprot);
+    }
+    xfer += oprot->writeListEnd();
+  }
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I64, 3);
+  xfer += oprot->writeI64(this->num_rows);
+  xfer += oprot->writeFieldEnd();
+
+  xfer += oprot->writeFieldBegin("row_groups", ::apache::thrift::protocol::T_LIST, 4);
+  {
+    xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->row_groups.size()));
+    std::vector<RowGroup> ::const_iterator _iter206;
+    for (_iter206 = this->row_groups.begin(); _iter206 != this->row_groups.end(); ++_iter206)
+    {
+      xfer += (*_iter206).write(oprot);
+    }
+    xfer += oprot->writeListEnd();
+  }
+  xfer += oprot->writeFieldEnd();
+
+  if (this->__isset.key_value_metadata) {
+    xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 5);
+    {
+      xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->key_value_metadata.size()));
+      std::vector<KeyValue> ::const_iterator _iter207;
+      for (_iter207 = this->key_value_metadata.begin(); _iter207 != this->key_value_metadata.end(); ++_iter207)
+      {
+        xfer += (*_iter207).write(oprot);
+      }
+      xfer += oprot->writeListEnd();
+    }
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.created_by) {
+    xfer += oprot->writeFieldBegin("created_by", ::apache::thrift::protocol::T_STRING, 6);
+    xfer += oprot->writeString(this->created_by);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.column_orders) {
+    xfer += oprot->writeFieldBegin("column_orders", ::apache::thrift::protocol::T_LIST, 7);
+    {
+      xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->column_orders.size()));
+      std::vector<ColumnOrder> ::const_iterator _iter208;
+      for (_iter208 = this->column_orders.begin(); _iter208 != this->column_orders.end(); ++_iter208)
+      {
+        xfer += (*_iter208).write(oprot);
+      }
+      xfer += oprot->writeListEnd();
+    }
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.encryption_algorithm) {
+    xfer += oprot->writeFieldBegin("encryption_algorithm", ::apache::thrift::protocol::T_STRUCT, 8);
+    xfer += this->encryption_algorithm.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.footer_signing_key_metadata) {
+    xfer += oprot->writeFieldBegin("footer_signing_key_metadata", ::apache::thrift::protocol::T_STRING, 9);
+    xfer += oprot->writeBinary(this->footer_signing_key_metadata);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(FileMetaData &a, FileMetaData &b) {
+  using ::std::swap;
+  swap(a.version, b.version);
+  swap(a.schema, b.schema);
+  swap(a.num_rows, b.num_rows);
+  swap(a.row_groups, b.row_groups);
+  swap(a.key_value_metadata, b.key_value_metadata);
+  swap(a.created_by, b.created_by);
+  swap(a.column_orders, b.column_orders);
+  swap(a.encryption_algorithm, b.encryption_algorithm);
+  swap(a.footer_signing_key_metadata, b.footer_signing_key_metadata);
+  swap(a.__isset, b.__isset);
+}
+
+FileMetaData::FileMetaData(const FileMetaData& other209) {
+  version = other209.version;
+  schema = other209.schema;
+  num_rows = other209.num_rows;
+  row_groups = other209.row_groups;
+  key_value_metadata = other209.key_value_metadata;
+  created_by = other209.created_by;
+  column_orders = other209.column_orders;
+  encryption_algorithm = other209.encryption_algorithm;
+  footer_signing_key_metadata = other209.footer_signing_key_metadata;
+  __isset = other209.__isset;
+}
+FileMetaData& FileMetaData::operator=(const FileMetaData& other210) {
+  version = other210.version;
+  schema = other210.schema;
+  num_rows = other210.num_rows;
+  row_groups = other210.row_groups;
+  key_value_metadata = other210.key_value_metadata;
+  created_by = other210.created_by;
+  column_orders = other210.column_orders;
+  encryption_algorithm = other210.encryption_algorithm;
+  footer_signing_key_metadata = other210.footer_signing_key_metadata;
+  __isset = other210.__isset;
+  return *this;
+}
+void FileMetaData::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "FileMetaData(";
+  out << "version=" << to_string(version);
+  out << ", " << "schema=" << to_string(schema);
+  out << ", " << "num_rows=" << to_string(num_rows);
+  out << ", " << "row_groups=" << to_string(row_groups);
+  out << ", " << "key_value_metadata="; (__isset.key_value_metadata ? (out << to_string(key_value_metadata)) : (out << "<null>"));
+  out << ", " << "created_by="; (__isset.created_by ? (out << to_string(created_by)) : (out << "<null>"));
+  out << ", " << "column_orders="; (__isset.column_orders ? (out << to_string(column_orders)) : (out << "<null>"));
+  out << ", " << "encryption_algorithm="; (__isset.encryption_algorithm ? (out << to_string(encryption_algorithm)) : (out << "<null>"));
+  out << ", " << "footer_signing_key_metadata="; (__isset.footer_signing_key_metadata ? (out << to_string(footer_signing_key_metadata)) : (out << "<null>"));
+  out << ")";
+}
+
+
+FileCryptoMetaData::~FileCryptoMetaData() noexcept {
+}
+
+
+void FileCryptoMetaData::__set_encryption_algorithm(const EncryptionAlgorithm& val) {
+  this->encryption_algorithm = val;
+}
+
+void FileCryptoMetaData::__set_key_metadata(const std::string& val) {
+  this->key_metadata = val;
+__isset.key_metadata = true;
+}
+std::ostream& operator<<(std::ostream& out, const FileCryptoMetaData& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t FileCryptoMetaData::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+  bool isset_encryption_algorithm = false;
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    switch (fid)
+    {
+      case 1:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->encryption_algorithm.read(iprot);
+          isset_encryption_algorithm = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_STRING) {
+          xfer += iprot->readBinary(this->key_metadata);
+          this->__isset.key_metadata = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
+      default:
+        xfer += iprot->skip(ftype);
+        break;
+    }
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  if (!isset_encryption_algorithm)
+    throw TProtocolException(TProtocolException::INVALID_DATA);
+  return xfer;
+}
+
+uint32_t FileCryptoMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("FileCryptoMetaData");
+
+  xfer += oprot->writeFieldBegin("encryption_algorithm", ::apache::thrift::protocol::T_STRUCT, 1);
+  xfer += this->encryption_algorithm.write(oprot);
+  xfer += oprot->writeFieldEnd();
+
+  if (this->__isset.key_metadata) {
+    xfer += oprot->writeFieldBegin("key_metadata", ::apache::thrift::protocol::T_STRING, 2);
+    xfer += oprot->writeBinary(this->key_metadata);
+    xfer += oprot->writeFieldEnd();
+  }
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(FileCryptoMetaData &a, FileCryptoMetaData &b) {
+  using ::std::swap;
+  swap(a.encryption_algorithm, b.encryption_algorithm);
+  swap(a.key_metadata, b.key_metadata);
+  swap(a.__isset, b.__isset);
+}
+
+FileCryptoMetaData::FileCryptoMetaData(const FileCryptoMetaData& other211) {
+  encryption_algorithm = other211.encryption_algorithm;
+  key_metadata = other211.key_metadata;
+  __isset = other211.__isset;
+}
+FileCryptoMetaData& FileCryptoMetaData::operator=(const FileCryptoMetaData& other212) {
+  encryption_algorithm = other212.encryption_algorithm;
+  key_metadata = other212.key_metadata;
+  __isset = other212.__isset;
+  return *this;
+}
+void FileCryptoMetaData::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "FileCryptoMetaData(";
+  out << "encryption_algorithm=" << to_string(encryption_algorithm);
+  out << ", " << "key_metadata="; (__isset.key_metadata ? (out << to_string(key_metadata)) : (out << "<null>"));
+  out << ")";
+}
+
+}} // namespace
diff --git a/contrib/libs/apache/arrow/cpp/src/generated/parquet_types.h b/contrib/libs/apache/arrow/cpp/src/generated/parquet_types.h
index c48383fa4d5..3d7edd40983 100644
--- a/contrib/libs/apache/arrow/cpp/src/generated/parquet_types.h
+++ b/contrib/libs/apache/arrow/cpp/src/generated/parquet_types.h
@@ -1,2917 +1,2917 @@
-/** 
- * Autogenerated by Thrift Compiler (0.13.0) 
- * 
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 
- *  @generated 
- */ 
-#ifndef parquet_TYPES_H 
-#define parquet_TYPES_H 
- 
-#include <iosfwd> 
- 
-#include <thrift/Thrift.h> 
-#include <thrift/TApplicationException.h> 
-#include <thrift/TBase.h> 
-#include <thrift/protocol/TProtocol.h> 
-#include <thrift/transport/TTransport.h> 
- 
-#include <functional> 
-#include <memory> 
- 
-#include "parquet/windows_compatibility.h" 
- 
-namespace parquet { namespace format { 
- 
-struct Type { 
-  enum type { 
-    BOOLEAN = 0, 
-    INT32 = 1, 
-    INT64 = 2, 
-    INT96 = 3, 
-    FLOAT = 4, 
-    DOUBLE = 5, 
-    BYTE_ARRAY = 6, 
-    FIXED_LEN_BYTE_ARRAY = 7 
-  }; 
-}; 
- 
-extern const std::map<int, const char*> _Type_VALUES_TO_NAMES; 
- 
-std::ostream& operator<<(std::ostream& out, const Type::type& val); 
- 
-std::string to_string(const Type::type& val); 
- 
-struct ConvertedType { 
-  enum type { 
-    UTF8 = 0, 
-    MAP = 1, 
-    MAP_KEY_VALUE = 2, 
-    LIST = 3, 
-    ENUM = 4, 
-    DECIMAL = 5, 
-    DATE = 6, 
-    TIME_MILLIS = 7, 
-    TIME_MICROS = 8, 
-    TIMESTAMP_MILLIS = 9, 
-    TIMESTAMP_MICROS = 10, 
-    UINT_8 = 11, 
-    UINT_16 = 12, 
-    UINT_32 = 13, 
-    UINT_64 = 14, 
-    INT_8 = 15, 
-    INT_16 = 16, 
-    INT_32 = 17, 
-    INT_64 = 18, 
-    JSON = 19, 
-    BSON = 20, 
-    INTERVAL = 21 
-  }; 
-}; 
- 
-extern const std::map<int, const char*> _ConvertedType_VALUES_TO_NAMES; 
- 
-std::ostream& operator<<(std::ostream& out, const ConvertedType::type& val); 
- 
-std::string to_string(const ConvertedType::type& val); 
- 
-struct FieldRepetitionType { 
-  enum type { 
-    REQUIRED = 0, 
-    OPTIONAL = 1, 
-    REPEATED = 2 
-  }; 
-}; 
- 
-extern const std::map<int, const char*> _FieldRepetitionType_VALUES_TO_NAMES; 
- 
-std::ostream& operator<<(std::ostream& out, const FieldRepetitionType::type& val); 
- 
-std::string to_string(const FieldRepetitionType::type& val); 
- 
-struct Encoding { 
-  enum type { 
-    PLAIN = 0, 
-    PLAIN_DICTIONARY = 2, 
-    RLE = 3, 
-    BIT_PACKED = 4, 
-    DELTA_BINARY_PACKED = 5, 
-    DELTA_LENGTH_BYTE_ARRAY = 6, 
-    DELTA_BYTE_ARRAY = 7, 
-    RLE_DICTIONARY = 8, 
-    BYTE_STREAM_SPLIT = 9 
-  }; 
-}; 
- 
-extern const std::map<int, const char*> _Encoding_VALUES_TO_NAMES; 
- 
-std::ostream& operator<<(std::ostream& out, const Encoding::type& val); 
- 
-std::string to_string(const Encoding::type& val); 
- 
-struct CompressionCodec { 
-  enum type { 
-    UNCOMPRESSED = 0, 
-    SNAPPY = 1, 
-    GZIP = 2, 
-    LZO = 3, 
-    BROTLI = 4, 
-    LZ4 = 5, 
-    ZSTD = 6, 
-    LZ4_RAW = 7 
-  }; 
-}; 
- 
-extern const std::map<int, const char*> _CompressionCodec_VALUES_TO_NAMES; 
- 
-std::ostream& operator<<(std::ostream& out, const CompressionCodec::type& val); 
- 
-std::string to_string(const CompressionCodec::type& val); 
- 
-struct PageType { 
-  enum type { 
-    DATA_PAGE = 0, 
-    INDEX_PAGE = 1, 
-    DICTIONARY_PAGE = 2, 
-    DATA_PAGE_V2 = 3 
-  }; 
-}; 
- 
-extern const std::map<int, const char*> _PageType_VALUES_TO_NAMES; 
- 
-std::ostream& operator<<(std::ostream& out, const PageType::type& val); 
- 
-std::string to_string(const PageType::type& val); 
- 
-struct BoundaryOrder { 
-  enum type { 
-    UNORDERED = 0, 
-    ASCENDING = 1, 
-    DESCENDING = 2 
-  }; 
-}; 
- 
-extern const std::map<int, const char*> _BoundaryOrder_VALUES_TO_NAMES; 
- 
-std::ostream& operator<<(std::ostream& out, const BoundaryOrder::type& val); 
- 
-std::string to_string(const BoundaryOrder::type& val); 
- 
-class Statistics; 
- 
-class StringType; 
- 
-class UUIDType; 
- 
-class MapType; 
- 
-class ListType; 
- 
-class EnumType; 
- 
-class DateType; 
- 
-class NullType; 
- 
-class DecimalType; 
- 
-class MilliSeconds; 
- 
-class MicroSeconds; 
- 
-class NanoSeconds; 
- 
-class TimeUnit; 
- 
-class TimestampType; 
- 
-class TimeType; 
- 
-class IntType; 
- 
-class JsonType; 
- 
-class BsonType; 
- 
-class LogicalType; 
- 
-class SchemaElement; 
- 
-class DataPageHeader; 
- 
-class IndexPageHeader; 
- 
-class DictionaryPageHeader; 
- 
-class DataPageHeaderV2; 
- 
-class SplitBlockAlgorithm; 
- 
-class BloomFilterAlgorithm; 
- 
-class XxHash; 
- 
-class BloomFilterHash; 
- 
-class Uncompressed; 
- 
-class BloomFilterCompression; 
- 
-class BloomFilterHeader; 
- 
-class PageHeader; 
- 
-class KeyValue; 
- 
-class SortingColumn; 
- 
-class PageEncodingStats; 
- 
-class ColumnMetaData; 
- 
-class EncryptionWithFooterKey; 
- 
-class EncryptionWithColumnKey; 
- 
-class ColumnCryptoMetaData; 
- 
-class ColumnChunk; 
- 
-class RowGroup; 
- 
-class TypeDefinedOrder; 
- 
-class ColumnOrder; 
- 
-class PageLocation; 
- 
-class OffsetIndex; 
- 
-class ColumnIndex; 
- 
-class AesGcmV1; 
- 
-class AesGcmCtrV1; 
- 
-class EncryptionAlgorithm; 
- 
-class FileMetaData; 
- 
-class FileCryptoMetaData; 
- 
-typedef struct _Statistics__isset { 
-  _Statistics__isset() : max(false), min(false), null_count(false), distinct_count(false), max_value(false), min_value(false) {} 
-  bool max :1; 
-  bool min :1; 
-  bool null_count :1; 
-  bool distinct_count :1; 
-  bool max_value :1; 
-  bool min_value :1; 
-} _Statistics__isset; 
- 
-class Statistics : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  Statistics(const Statistics&); 
-  Statistics& operator=(const Statistics&); 
-  Statistics() : max(), min(), null_count(0), distinct_count(0), max_value(), min_value() { 
-  } 
- 
-  virtual ~Statistics() noexcept; 
-  std::string max; 
-  std::string min; 
-  int64_t null_count; 
-  int64_t distinct_count; 
-  std::string max_value; 
-  std::string min_value; 
- 
-  _Statistics__isset __isset; 
- 
-  void __set_max(const std::string& val); 
- 
-  void __set_min(const std::string& val); 
- 
-  void __set_null_count(const int64_t val); 
- 
-  void __set_distinct_count(const int64_t val); 
- 
-  void __set_max_value(const std::string& val); 
- 
-  void __set_min_value(const std::string& val); 
- 
-  bool operator == (const Statistics & rhs) const 
-  { 
-    if (__isset.max != rhs.__isset.max) 
-      return false; 
-    else if (__isset.max && !(max == rhs.max)) 
-      return false; 
-    if (__isset.min != rhs.__isset.min) 
-      return false; 
-    else if (__isset.min && !(min == rhs.min)) 
-      return false; 
-    if (__isset.null_count != rhs.__isset.null_count) 
-      return false; 
-    else if (__isset.null_count && !(null_count == rhs.null_count)) 
-      return false; 
-    if (__isset.distinct_count != rhs.__isset.distinct_count) 
-      return false; 
-    else if (__isset.distinct_count && !(distinct_count == rhs.distinct_count)) 
-      return false; 
-    if (__isset.max_value != rhs.__isset.max_value) 
-      return false; 
-    else if (__isset.max_value && !(max_value == rhs.max_value)) 
-      return false; 
-    if (__isset.min_value != rhs.__isset.min_value) 
-      return false; 
-    else if (__isset.min_value && !(min_value == rhs.min_value)) 
-      return false; 
-    return true; 
-  } 
-  bool operator != (const Statistics &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const Statistics & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(Statistics &a, Statistics &b); 
- 
-std::ostream& operator<<(std::ostream& out, const Statistics& obj); 
- 
- 
-class StringType : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  StringType(const StringType&); 
-  StringType& operator=(const StringType&); 
-  StringType() { 
-  } 
- 
-  virtual ~StringType() noexcept; 
- 
-  bool operator == (const StringType & /* rhs */) const 
-  { 
-    return true; 
-  } 
-  bool operator != (const StringType &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const StringType & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(StringType &a, StringType &b); 
- 
-std::ostream& operator<<(std::ostream& out, const StringType& obj); 
- 
- 
-class UUIDType : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  UUIDType(const UUIDType&); 
-  UUIDType& operator=(const UUIDType&); 
-  UUIDType() { 
-  } 
- 
-  virtual ~UUIDType() noexcept; 
- 
-  bool operator == (const UUIDType & /* rhs */) const 
-  { 
-    return true; 
-  } 
-  bool operator != (const UUIDType &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const UUIDType & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(UUIDType &a, UUIDType &b); 
- 
-std::ostream& operator<<(std::ostream& out, const UUIDType& obj); 
- 
- 
-class MapType : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  MapType(const MapType&); 
-  MapType& operator=(const MapType&); 
-  MapType() { 
-  } 
- 
-  virtual ~MapType() noexcept; 
- 
-  bool operator == (const MapType & /* rhs */) const 
-  { 
-    return true; 
-  } 
-  bool operator != (const MapType &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const MapType & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(MapType &a, MapType &b); 
- 
-std::ostream& operator<<(std::ostream& out, const MapType& obj); 
- 
- 
-class ListType : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  ListType(const ListType&); 
-  ListType& operator=(const ListType&); 
-  ListType() { 
-  } 
- 
-  virtual ~ListType() noexcept; 
- 
-  bool operator == (const ListType & /* rhs */) const 
-  { 
-    return true; 
-  } 
-  bool operator != (const ListType &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const ListType & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(ListType &a, ListType &b); 
- 
-std::ostream& operator<<(std::ostream& out, const ListType& obj); 
- 
- 
-class EnumType : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  EnumType(const EnumType&); 
-  EnumType& operator=(const EnumType&); 
-  EnumType() { 
-  } 
- 
-  virtual ~EnumType() noexcept; 
- 
-  bool operator == (const EnumType & /* rhs */) const 
-  { 
-    return true; 
-  } 
-  bool operator != (const EnumType &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const EnumType & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(EnumType &a, EnumType &b); 
- 
-std::ostream& operator<<(std::ostream& out, const EnumType& obj); 
- 
- 
-class DateType : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  DateType(const DateType&); 
-  DateType& operator=(const DateType&); 
-  DateType() { 
-  } 
- 
-  virtual ~DateType() noexcept; 
- 
-  bool operator == (const DateType & /* rhs */) const 
-  { 
-    return true; 
-  } 
-  bool operator != (const DateType &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const DateType & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(DateType &a, DateType &b); 
- 
-std::ostream& operator<<(std::ostream& out, const DateType& obj); 
- 
- 
-class NullType : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  NullType(const NullType&); 
-  NullType& operator=(const NullType&); 
-  NullType() { 
-  } 
- 
-  virtual ~NullType() noexcept; 
- 
-  bool operator == (const NullType & /* rhs */) const 
-  { 
-    return true; 
-  } 
-  bool operator != (const NullType &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const NullType & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(NullType &a, NullType &b); 
- 
-std::ostream& operator<<(std::ostream& out, const NullType& obj); 
- 
- 
-class DecimalType : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  DecimalType(const DecimalType&); 
-  DecimalType& operator=(const DecimalType&); 
-  DecimalType() : scale(0), precision(0) { 
-  } 
- 
-  virtual ~DecimalType() noexcept; 
-  int32_t scale; 
-  int32_t precision; 
- 
-  void __set_scale(const int32_t val); 
- 
-  void __set_precision(const int32_t val); 
- 
-  bool operator == (const DecimalType & rhs) const 
-  { 
-    if (!(scale == rhs.scale)) 
-      return false; 
-    if (!(precision == rhs.precision)) 
-      return false; 
-    return true; 
-  } 
-  bool operator != (const DecimalType &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const DecimalType & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(DecimalType &a, DecimalType &b); 
- 
-std::ostream& operator<<(std::ostream& out, const DecimalType& obj); 
- 
- 
-class MilliSeconds : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  MilliSeconds(const MilliSeconds&); 
-  MilliSeconds& operator=(const MilliSeconds&); 
-  MilliSeconds() { 
-  } 
- 
-  virtual ~MilliSeconds() noexcept; 
- 
-  bool operator == (const MilliSeconds & /* rhs */) const 
-  { 
-    return true; 
-  } 
-  bool operator != (const MilliSeconds &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const MilliSeconds & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(MilliSeconds &a, MilliSeconds &b); 
- 
-std::ostream& operator<<(std::ostream& out, const MilliSeconds& obj); 
- 
- 
-class MicroSeconds : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  MicroSeconds(const MicroSeconds&); 
-  MicroSeconds& operator=(const MicroSeconds&); 
-  MicroSeconds() { 
-  } 
- 
-  virtual ~MicroSeconds() noexcept; 
- 
-  bool operator == (const MicroSeconds & /* rhs */) const 
-  { 
-    return true; 
-  } 
-  bool operator != (const MicroSeconds &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const MicroSeconds & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(MicroSeconds &a, MicroSeconds &b); 
- 
-std::ostream& operator<<(std::ostream& out, const MicroSeconds& obj); 
- 
- 
-class NanoSeconds : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  NanoSeconds(const NanoSeconds&); 
-  NanoSeconds& operator=(const NanoSeconds&); 
-  NanoSeconds() { 
-  } 
- 
-  virtual ~NanoSeconds() noexcept; 
- 
-  bool operator == (const NanoSeconds & /* rhs */) const 
-  { 
-    return true; 
-  } 
-  bool operator != (const NanoSeconds &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const NanoSeconds & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(NanoSeconds &a, NanoSeconds &b); 
- 
-std::ostream& operator<<(std::ostream& out, const NanoSeconds& obj); 
- 
-typedef struct _TimeUnit__isset { 
-  _TimeUnit__isset() : MILLIS(false), MICROS(false), NANOS(false) {} 
-  bool MILLIS :1; 
-  bool MICROS :1; 
-  bool NANOS :1; 
-} _TimeUnit__isset; 
- 
-class TimeUnit : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  TimeUnit(const TimeUnit&); 
-  TimeUnit& operator=(const TimeUnit&); 
-  TimeUnit() { 
-  } 
- 
-  virtual ~TimeUnit() noexcept; 
-  MilliSeconds MILLIS; 
-  MicroSeconds MICROS; 
-  NanoSeconds NANOS; 
- 
-  _TimeUnit__isset __isset; 
- 
-  void __set_MILLIS(const MilliSeconds& val); 
- 
-  void __set_MICROS(const MicroSeconds& val); 
- 
-  void __set_NANOS(const NanoSeconds& val); 
- 
-  bool operator == (const TimeUnit & rhs) const 
-  { 
-    if (__isset.MILLIS != rhs.__isset.MILLIS) 
-      return false; 
-    else if (__isset.MILLIS && !(MILLIS == rhs.MILLIS)) 
-      return false; 
-    if (__isset.MICROS != rhs.__isset.MICROS) 
-      return false; 
-    else if (__isset.MICROS && !(MICROS == rhs.MICROS)) 
-      return false; 
-    if (__isset.NANOS != rhs.__isset.NANOS) 
-      return false; 
-    else if (__isset.NANOS && !(NANOS == rhs.NANOS)) 
-      return false; 
-    return true; 
-  } 
-  bool operator != (const TimeUnit &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const TimeUnit & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(TimeUnit &a, TimeUnit &b); 
- 
-std::ostream& operator<<(std::ostream& out, const TimeUnit& obj); 
- 
- 
-class TimestampType : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  TimestampType(const TimestampType&); 
-  TimestampType& operator=(const TimestampType&); 
-  TimestampType() : isAdjustedToUTC(0) { 
-  } 
- 
-  virtual ~TimestampType() noexcept; 
-  bool isAdjustedToUTC; 
-  TimeUnit unit; 
- 
-  void __set_isAdjustedToUTC(const bool val); 
- 
-  void __set_unit(const TimeUnit& val); 
- 
-  bool operator == (const TimestampType & rhs) const 
-  { 
-    if (!(isAdjustedToUTC == rhs.isAdjustedToUTC)) 
-      return false; 
-    if (!(unit == rhs.unit)) 
-      return false; 
-    return true; 
-  } 
-  bool operator != (const TimestampType &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const TimestampType & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(TimestampType &a, TimestampType &b); 
- 
-std::ostream& operator<<(std::ostream& out, const TimestampType& obj); 
- 
- 
-class TimeType : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  TimeType(const TimeType&); 
-  TimeType& operator=(const TimeType&); 
-  TimeType() : isAdjustedToUTC(0) { 
-  } 
- 
-  virtual ~TimeType() noexcept; 
-  bool isAdjustedToUTC; 
-  TimeUnit unit; 
- 
-  void __set_isAdjustedToUTC(const bool val); 
- 
-  void __set_unit(const TimeUnit& val); 
- 
-  bool operator == (const TimeType & rhs) const 
-  { 
-    if (!(isAdjustedToUTC == rhs.isAdjustedToUTC)) 
-      return false; 
-    if (!(unit == rhs.unit)) 
-      return false; 
-    return true; 
-  } 
-  bool operator != (const TimeType &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const TimeType & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(TimeType &a, TimeType &b); 
- 
-std::ostream& operator<<(std::ostream& out, const TimeType& obj); 
- 
- 
-class IntType : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  IntType(const IntType&); 
-  IntType& operator=(const IntType&); 
-  IntType() : bitWidth(0), isSigned(0) { 
-  } 
- 
-  virtual ~IntType() noexcept; 
-  int8_t bitWidth; 
-  bool isSigned; 
- 
-  void __set_bitWidth(const int8_t val); 
- 
-  void __set_isSigned(const bool val); 
- 
-  bool operator == (const IntType & rhs) const 
-  { 
-    if (!(bitWidth == rhs.bitWidth)) 
-      return false; 
-    if (!(isSigned == rhs.isSigned)) 
-      return false; 
-    return true; 
-  } 
-  bool operator != (const IntType &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const IntType & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(IntType &a, IntType &b); 
- 
-std::ostream& operator<<(std::ostream& out, const IntType& obj); 
- 
- 
-class JsonType : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  JsonType(const JsonType&); 
-  JsonType& operator=(const JsonType&); 
-  JsonType() { 
-  } 
- 
-  virtual ~JsonType() noexcept; 
- 
-  bool operator == (const JsonType & /* rhs */) const 
-  { 
-    return true; 
-  } 
-  bool operator != (const JsonType &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const JsonType & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(JsonType &a, JsonType &b); 
- 
-std::ostream& operator<<(std::ostream& out, const JsonType& obj); 
- 
- 
-class BsonType : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  BsonType(const BsonType&); 
-  BsonType& operator=(const BsonType&); 
-  BsonType() { 
-  } 
- 
-  virtual ~BsonType() noexcept; 
- 
-  bool operator == (const BsonType & /* rhs */) const 
-  { 
-    return true; 
-  } 
-  bool operator != (const BsonType &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const BsonType & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(BsonType &a, BsonType &b); 
- 
-std::ostream& operator<<(std::ostream& out, const BsonType& obj); 
- 
-typedef struct _LogicalType__isset { 
-  _LogicalType__isset() : STRING(false), MAP(false), LIST(false), ENUM(false), DECIMAL(false), DATE(false), TIME(false), TIMESTAMP(false), INTEGER(false), UNKNOWN(false), JSON(false), BSON(false), UUID(false) {} 
-  bool STRING :1; 
-  bool MAP :1; 
-  bool LIST :1; 
-  bool ENUM :1; 
-  bool DECIMAL :1; 
-  bool DATE :1; 
-  bool TIME :1; 
-  bool TIMESTAMP :1; 
-  bool INTEGER :1; 
-  bool UNKNOWN :1; 
-  bool JSON :1; 
-  bool BSON :1; 
-  bool UUID :1; 
-} _LogicalType__isset; 
- 
-class LogicalType : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  LogicalType(const LogicalType&); 
-  LogicalType& operator=(const LogicalType&); 
-  LogicalType() { 
-  } 
- 
-  virtual ~LogicalType() noexcept; 
-  StringType STRING; 
-  MapType MAP; 
-  ListType LIST; 
-  EnumType ENUM; 
-  DecimalType DECIMAL; 
-  DateType DATE; 
-  TimeType TIME; 
-  TimestampType TIMESTAMP; 
-  IntType INTEGER; 
-  NullType UNKNOWN; 
-  JsonType JSON; 
-  BsonType BSON; 
-  UUIDType UUID; 
- 
-  _LogicalType__isset __isset; 
- 
-  void __set_STRING(const StringType& val); 
- 
-  void __set_MAP(const MapType& val); 
- 
-  void __set_LIST(const ListType& val); 
- 
-  void __set_ENUM(const EnumType& val); 
- 
-  void __set_DECIMAL(const DecimalType& val); 
- 
-  void __set_DATE(const DateType& val); 
- 
-  void __set_TIME(const TimeType& val); 
- 
-  void __set_TIMESTAMP(const TimestampType& val); 
- 
-  void __set_INTEGER(const IntType& val); 
- 
-  void __set_UNKNOWN(const NullType& val); 
- 
-  void __set_JSON(const JsonType& val); 
- 
-  void __set_BSON(const BsonType& val); 
- 
-  void __set_UUID(const UUIDType& val); 
- 
-  bool operator == (const LogicalType & rhs) const 
-  { 
-    if (__isset.STRING != rhs.__isset.STRING) 
-      return false; 
-    else if (__isset.STRING && !(STRING == rhs.STRING)) 
-      return false; 
-    if (__isset.MAP != rhs.__isset.MAP) 
-      return false; 
-    else if (__isset.MAP && !(MAP == rhs.MAP)) 
-      return false; 
-    if (__isset.LIST != rhs.__isset.LIST) 
-      return false; 
-    else if (__isset.LIST && !(LIST == rhs.LIST)) 
-      return false; 
-    if (__isset.ENUM != rhs.__isset.ENUM) 
-      return false; 
-    else if (__isset.ENUM && !(ENUM == rhs.ENUM)) 
-      return false; 
-    if (__isset.DECIMAL != rhs.__isset.DECIMAL) 
-      return false; 
-    else if (__isset.DECIMAL && !(DECIMAL == rhs.DECIMAL)) 
-      return false; 
-    if (__isset.DATE != rhs.__isset.DATE) 
-      return false; 
-    else if (__isset.DATE && !(DATE == rhs.DATE)) 
-      return false; 
-    if (__isset.TIME != rhs.__isset.TIME) 
-      return false; 
-    else if (__isset.TIME && !(TIME == rhs.TIME)) 
-      return false; 
-    if (__isset.TIMESTAMP != rhs.__isset.TIMESTAMP) 
-      return false; 
-    else if (__isset.TIMESTAMP && !(TIMESTAMP == rhs.TIMESTAMP)) 
-      return false; 
-    if (__isset.INTEGER != rhs.__isset.INTEGER) 
-      return false; 
-    else if (__isset.INTEGER && !(INTEGER == rhs.INTEGER)) 
-      return false; 
-    if (__isset.UNKNOWN != rhs.__isset.UNKNOWN) 
-      return false; 
-    else if (__isset.UNKNOWN && !(UNKNOWN == rhs.UNKNOWN)) 
-      return false; 
-    if (__isset.JSON != rhs.__isset.JSON) 
-      return false; 
-    else if (__isset.JSON && !(JSON == rhs.JSON)) 
-      return false; 
-    if (__isset.BSON != rhs.__isset.BSON) 
-      return false; 
-    else if (__isset.BSON && !(BSON == rhs.BSON)) 
-      return false; 
-    if (__isset.UUID != rhs.__isset.UUID) 
-      return false; 
-    else if (__isset.UUID && !(UUID == rhs.UUID)) 
-      return false; 
-    return true; 
-  } 
-  bool operator != (const LogicalType &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const LogicalType & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(LogicalType &a, LogicalType &b); 
- 
-std::ostream& operator<<(std::ostream& out, const LogicalType& obj); 
- 
-typedef struct _SchemaElement__isset { 
-  _SchemaElement__isset() : type(false), type_length(false), repetition_type(false), num_children(false), converted_type(false), scale(false), precision(false), field_id(false), logicalType(false) {} 
-  bool type :1; 
-  bool type_length :1; 
-  bool repetition_type :1; 
-  bool num_children :1; 
-  bool converted_type :1; 
-  bool scale :1; 
-  bool precision :1; 
-  bool field_id :1; 
-  bool logicalType :1; 
-} _SchemaElement__isset; 
- 
-class SchemaElement : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  SchemaElement(const SchemaElement&); 
-  SchemaElement& operator=(const SchemaElement&); 
-  SchemaElement() : type((Type::type)0), type_length(0), repetition_type((FieldRepetitionType::type)0), name(), num_children(0), converted_type((ConvertedType::type)0), scale(0), precision(0), field_id(0) { 
-  } 
- 
-  virtual ~SchemaElement() noexcept; 
-  Type::type type; 
-  int32_t type_length; 
-  FieldRepetitionType::type repetition_type; 
-  std::string name; 
-  int32_t num_children; 
-  ConvertedType::type converted_type; 
-  int32_t scale; 
-  int32_t precision; 
-  int32_t field_id; 
-  LogicalType logicalType; 
- 
-  _SchemaElement__isset __isset; 
- 
-  void __set_type(const Type::type val); 
- 
-  void __set_type_length(const int32_t val); 
- 
-  void __set_repetition_type(const FieldRepetitionType::type val); 
- 
-  void __set_name(const std::string& val); 
- 
-  void __set_num_children(const int32_t val); 
- 
-  void __set_converted_type(const ConvertedType::type val); 
- 
-  void __set_scale(const int32_t val); 
- 
-  void __set_precision(const int32_t val); 
- 
-  void __set_field_id(const int32_t val); 
- 
-  void __set_logicalType(const LogicalType& val); 
- 
-  bool operator == (const SchemaElement & rhs) const 
-  { 
-    if (__isset.type != rhs.__isset.type) 
-      return false; 
-    else if (__isset.type && !(type == rhs.type)) 
-      return false; 
-    if (__isset.type_length != rhs.__isset.type_length) 
-      return false; 
-    else if (__isset.type_length && !(type_length == rhs.type_length)) 
-      return false; 
-    if (__isset.repetition_type != rhs.__isset.repetition_type) 
-      return false; 
-    else if (__isset.repetition_type && !(repetition_type == rhs.repetition_type)) 
-      return false; 
-    if (!(name == rhs.name)) 
-      return false; 
-    if (__isset.num_children != rhs.__isset.num_children) 
-      return false; 
-    else if (__isset.num_children && !(num_children == rhs.num_children)) 
-      return false; 
-    if (__isset.converted_type != rhs.__isset.converted_type) 
-      return false; 
-    else if (__isset.converted_type && !(converted_type == rhs.converted_type)) 
-      return false; 
-    if (__isset.scale != rhs.__isset.scale) 
-      return false; 
-    else if (__isset.scale && !(scale == rhs.scale)) 
-      return false; 
-    if (__isset.precision != rhs.__isset.precision) 
-      return false; 
-    else if (__isset.precision && !(precision == rhs.precision)) 
-      return false; 
-    if (__isset.field_id != rhs.__isset.field_id) 
-      return false; 
-    else if (__isset.field_id && !(field_id == rhs.field_id)) 
-      return false; 
-    if (__isset.logicalType != rhs.__isset.logicalType) 
-      return false; 
-    else if (__isset.logicalType && !(logicalType == rhs.logicalType)) 
-      return false; 
-    return true; 
-  } 
-  bool operator != (const SchemaElement &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const SchemaElement & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(SchemaElement &a, SchemaElement &b); 
- 
-std::ostream& operator<<(std::ostream& out, const SchemaElement& obj); 
- 
-typedef struct _DataPageHeader__isset { 
-  _DataPageHeader__isset() : statistics(false) {} 
-  bool statistics :1; 
-} _DataPageHeader__isset; 
- 
-class DataPageHeader : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  DataPageHeader(const DataPageHeader&); 
-  DataPageHeader& operator=(const DataPageHeader&); 
-  DataPageHeader() : num_values(0), encoding((Encoding::type)0), definition_level_encoding((Encoding::type)0), repetition_level_encoding((Encoding::type)0) { 
-  } 
- 
-  virtual ~DataPageHeader() noexcept; 
-  int32_t num_values; 
-  Encoding::type encoding; 
-  Encoding::type definition_level_encoding; 
-  Encoding::type repetition_level_encoding; 
-  Statistics statistics; 
- 
-  _DataPageHeader__isset __isset; 
- 
-  void __set_num_values(const int32_t val); 
- 
-  void __set_encoding(const Encoding::type val); 
- 
-  void __set_definition_level_encoding(const Encoding::type val); 
- 
-  void __set_repetition_level_encoding(const Encoding::type val); 
- 
-  void __set_statistics(const Statistics& val); 
- 
-  bool operator == (const DataPageHeader & rhs) const 
-  { 
-    if (!(num_values == rhs.num_values)) 
-      return false; 
-    if (!(encoding == rhs.encoding)) 
-      return false; 
-    if (!(definition_level_encoding == rhs.definition_level_encoding)) 
-      return false; 
-    if (!(repetition_level_encoding == rhs.repetition_level_encoding)) 
-      return false; 
-    if (__isset.statistics != rhs.__isset.statistics) 
-      return false; 
-    else if (__isset.statistics && !(statistics == rhs.statistics)) 
-      return false; 
-    return true; 
-  } 
-  bool operator != (const DataPageHeader &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const DataPageHeader & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(DataPageHeader &a, DataPageHeader &b); 
- 
-std::ostream& operator<<(std::ostream& out, const DataPageHeader& obj); 
- 
- 
-class IndexPageHeader : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  IndexPageHeader(const IndexPageHeader&); 
-  IndexPageHeader& operator=(const IndexPageHeader&); 
-  IndexPageHeader() { 
-  } 
- 
-  virtual ~IndexPageHeader() noexcept; 
- 
-  bool operator == (const IndexPageHeader & /* rhs */) const 
-  { 
-    return true; 
-  } 
-  bool operator != (const IndexPageHeader &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const IndexPageHeader & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(IndexPageHeader &a, IndexPageHeader &b); 
- 
-std::ostream& operator<<(std::ostream& out, const IndexPageHeader& obj); 
- 
-typedef struct _DictionaryPageHeader__isset { 
-  _DictionaryPageHeader__isset() : is_sorted(false) {} 
-  bool is_sorted :1; 
-} _DictionaryPageHeader__isset; 
- 
-class DictionaryPageHeader : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  DictionaryPageHeader(const DictionaryPageHeader&); 
-  DictionaryPageHeader& operator=(const DictionaryPageHeader&); 
-  DictionaryPageHeader() : num_values(0), encoding((Encoding::type)0), is_sorted(0) { 
-  } 
- 
-  virtual ~DictionaryPageHeader() noexcept; 
-  int32_t num_values; 
-  Encoding::type encoding; 
-  bool is_sorted; 
- 
-  _DictionaryPageHeader__isset __isset; 
- 
-  void __set_num_values(const int32_t val); 
- 
-  void __set_encoding(const Encoding::type val); 
- 
-  void __set_is_sorted(const bool val); 
- 
-  bool operator == (const DictionaryPageHeader & rhs) const 
-  { 
-    if (!(num_values == rhs.num_values)) 
-      return false; 
-    if (!(encoding == rhs.encoding)) 
-      return false; 
-    if (__isset.is_sorted != rhs.__isset.is_sorted) 
-      return false; 
-    else if (__isset.is_sorted && !(is_sorted == rhs.is_sorted)) 
-      return false; 
-    return true; 
-  } 
-  bool operator != (const DictionaryPageHeader &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const DictionaryPageHeader & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(DictionaryPageHeader &a, DictionaryPageHeader &b); 
- 
-std::ostream& operator<<(std::ostream& out, const DictionaryPageHeader& obj); 
- 
-typedef struct _DataPageHeaderV2__isset { 
-  _DataPageHeaderV2__isset() : is_compressed(true), statistics(false) {} 
-  bool is_compressed :1; 
-  bool statistics :1; 
-} _DataPageHeaderV2__isset; 
- 
-class DataPageHeaderV2 : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  DataPageHeaderV2(const DataPageHeaderV2&); 
-  DataPageHeaderV2& operator=(const DataPageHeaderV2&); 
-  DataPageHeaderV2() : num_values(0), num_nulls(0), num_rows(0), encoding((Encoding::type)0), definition_levels_byte_length(0), repetition_levels_byte_length(0), is_compressed(true) { 
-  } 
- 
-  virtual ~DataPageHeaderV2() noexcept; 
-  int32_t num_values; 
-  int32_t num_nulls; 
-  int32_t num_rows; 
-  Encoding::type encoding; 
-  int32_t definition_levels_byte_length; 
-  int32_t repetition_levels_byte_length; 
-  bool is_compressed; 
-  Statistics statistics; 
- 
-  _DataPageHeaderV2__isset __isset; 
- 
-  void __set_num_values(const int32_t val); 
- 
-  void __set_num_nulls(const int32_t val); 
- 
-  void __set_num_rows(const int32_t val); 
- 
-  void __set_encoding(const Encoding::type val); 
- 
-  void __set_definition_levels_byte_length(const int32_t val); 
- 
-  void __set_repetition_levels_byte_length(const int32_t val); 
- 
-  void __set_is_compressed(const bool val); 
- 
-  void __set_statistics(const Statistics& val); 
- 
-  bool operator == (const DataPageHeaderV2 & rhs) const 
-  { 
-    if (!(num_values == rhs.num_values)) 
-      return false; 
-    if (!(num_nulls == rhs.num_nulls)) 
-      return false; 
-    if (!(num_rows == rhs.num_rows)) 
-      return false; 
-    if (!(encoding == rhs.encoding)) 
-      return false; 
-    if (!(definition_levels_byte_length == rhs.definition_levels_byte_length)) 
-      return false; 
-    if (!(repetition_levels_byte_length == rhs.repetition_levels_byte_length)) 
-      return false; 
-    if (__isset.is_compressed != rhs.__isset.is_compressed) 
-      return false; 
-    else if (__isset.is_compressed && !(is_compressed == rhs.is_compressed)) 
-      return false; 
-    if (__isset.statistics != rhs.__isset.statistics) 
-      return false; 
-    else if (__isset.statistics && !(statistics == rhs.statistics)) 
-      return false; 
-    return true; 
-  } 
-  bool operator != (const DataPageHeaderV2 &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const DataPageHeaderV2 & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b); 
- 
-std::ostream& operator<<(std::ostream& out, const DataPageHeaderV2& obj); 
- 
- 
-class SplitBlockAlgorithm : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  SplitBlockAlgorithm(const SplitBlockAlgorithm&); 
-  SplitBlockAlgorithm& operator=(const SplitBlockAlgorithm&); 
-  SplitBlockAlgorithm() { 
-  } 
- 
-  virtual ~SplitBlockAlgorithm() noexcept; 
- 
-  bool operator == (const SplitBlockAlgorithm & /* rhs */) const 
-  { 
-    return true; 
-  } 
-  bool operator != (const SplitBlockAlgorithm &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const SplitBlockAlgorithm & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(SplitBlockAlgorithm &a, SplitBlockAlgorithm &b); 
- 
-std::ostream& operator<<(std::ostream& out, const SplitBlockAlgorithm& obj); 
- 
-typedef struct _BloomFilterAlgorithm__isset { 
-  _BloomFilterAlgorithm__isset() : BLOCK(false) {} 
-  bool BLOCK :1; 
-} _BloomFilterAlgorithm__isset; 
- 
-class BloomFilterAlgorithm : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  BloomFilterAlgorithm(const BloomFilterAlgorithm&); 
-  BloomFilterAlgorithm& operator=(const BloomFilterAlgorithm&); 
-  BloomFilterAlgorithm() { 
-  } 
- 
-  virtual ~BloomFilterAlgorithm() noexcept; 
-  SplitBlockAlgorithm BLOCK; 
- 
-  _BloomFilterAlgorithm__isset __isset; 
- 
-  void __set_BLOCK(const SplitBlockAlgorithm& val); 
- 
-  bool operator == (const BloomFilterAlgorithm & rhs) const 
-  { 
-    if (__isset.BLOCK != rhs.__isset.BLOCK) 
-      return false; 
-    else if (__isset.BLOCK && !(BLOCK == rhs.BLOCK)) 
-      return false; 
-    return true; 
-  } 
-  bool operator != (const BloomFilterAlgorithm &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const BloomFilterAlgorithm & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(BloomFilterAlgorithm &a, BloomFilterAlgorithm &b); 
- 
-std::ostream& operator<<(std::ostream& out, const BloomFilterAlgorithm& obj); 
- 
- 
-class XxHash : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  XxHash(const XxHash&); 
-  XxHash& operator=(const XxHash&); 
-  XxHash() { 
-  } 
- 
-  virtual ~XxHash() noexcept; 
- 
-  bool operator == (const XxHash & /* rhs */) const 
-  { 
-    return true; 
-  } 
-  bool operator != (const XxHash &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const XxHash & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(XxHash &a, XxHash &b); 
- 
-std::ostream& operator<<(std::ostream& out, const XxHash& obj); 
- 
-typedef struct _BloomFilterHash__isset { 
-  _BloomFilterHash__isset() : XXHASH(false) {} 
-  bool XXHASH :1; 
-} _BloomFilterHash__isset; 
- 
-class BloomFilterHash : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  BloomFilterHash(const BloomFilterHash&); 
-  BloomFilterHash& operator=(const BloomFilterHash&); 
-  BloomFilterHash() { 
-  } 
- 
-  virtual ~BloomFilterHash() noexcept; 
-  XxHash XXHASH; 
- 
-  _BloomFilterHash__isset __isset; 
- 
-  void __set_XXHASH(const XxHash& val); 
- 
-  bool operator == (const BloomFilterHash & rhs) const 
-  { 
-    if (__isset.XXHASH != rhs.__isset.XXHASH) 
-      return false; 
-    else if (__isset.XXHASH && !(XXHASH == rhs.XXHASH)) 
-      return false; 
-    return true; 
-  } 
-  bool operator != (const BloomFilterHash &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const BloomFilterHash & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(BloomFilterHash &a, BloomFilterHash &b); 
- 
-std::ostream& operator<<(std::ostream& out, const BloomFilterHash& obj); 
- 
- 
-class Uncompressed : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  Uncompressed(const Uncompressed&); 
-  Uncompressed& operator=(const Uncompressed&); 
-  Uncompressed() { 
-  } 
- 
-  virtual ~Uncompressed() noexcept; 
- 
-  bool operator == (const Uncompressed & /* rhs */) const 
-  { 
-    return true; 
-  } 
-  bool operator != (const Uncompressed &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const Uncompressed & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(Uncompressed &a, Uncompressed &b); 
- 
-std::ostream& operator<<(std::ostream& out, const Uncompressed& obj); 
- 
-typedef struct _BloomFilterCompression__isset { 
-  _BloomFilterCompression__isset() : UNCOMPRESSED(false) {} 
-  bool UNCOMPRESSED :1; 
-} _BloomFilterCompression__isset; 
- 
-class BloomFilterCompression : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  BloomFilterCompression(const BloomFilterCompression&); 
-  BloomFilterCompression& operator=(const BloomFilterCompression&); 
-  BloomFilterCompression() { 
-  } 
- 
-  virtual ~BloomFilterCompression() noexcept; 
-  Uncompressed UNCOMPRESSED; 
- 
-  _BloomFilterCompression__isset __isset; 
- 
-  void __set_UNCOMPRESSED(const Uncompressed& val); 
- 
-  bool operator == (const BloomFilterCompression & rhs) const 
-  { 
-    if (__isset.UNCOMPRESSED != rhs.__isset.UNCOMPRESSED) 
-      return false; 
-    else if (__isset.UNCOMPRESSED && !(UNCOMPRESSED == rhs.UNCOMPRESSED)) 
-      return false; 
-    return true; 
-  } 
-  bool operator != (const BloomFilterCompression &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const BloomFilterCompression & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(BloomFilterCompression &a, BloomFilterCompression &b); 
- 
-std::ostream& operator<<(std::ostream& out, const BloomFilterCompression& obj); 
- 
- 
-class BloomFilterHeader : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  BloomFilterHeader(const BloomFilterHeader&); 
-  BloomFilterHeader& operator=(const BloomFilterHeader&); 
-  BloomFilterHeader() : numBytes(0) { 
-  } 
- 
-  virtual ~BloomFilterHeader() noexcept; 
-  int32_t numBytes; 
-  BloomFilterAlgorithm algorithm; 
-  BloomFilterHash hash; 
-  BloomFilterCompression compression; 
- 
-  void __set_numBytes(const int32_t val); 
- 
-  void __set_algorithm(const BloomFilterAlgorithm& val); 
- 
-  void __set_hash(const BloomFilterHash& val); 
- 
-  void __set_compression(const BloomFilterCompression& val); 
- 
-  bool operator == (const BloomFilterHeader & rhs) const 
-  { 
-    if (!(numBytes == rhs.numBytes)) 
-      return false; 
-    if (!(algorithm == rhs.algorithm)) 
-      return false; 
-    if (!(hash == rhs.hash)) 
-      return false; 
-    if (!(compression == rhs.compression)) 
-      return false; 
-    return true; 
-  } 
-  bool operator != (const BloomFilterHeader &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const BloomFilterHeader & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(BloomFilterHeader &a, BloomFilterHeader &b); 
- 
-std::ostream& operator<<(std::ostream& out, const BloomFilterHeader& obj); 
- 
-typedef struct _PageHeader__isset { 
-  _PageHeader__isset() : crc(false), data_page_header(false), index_page_header(false), dictionary_page_header(false), data_page_header_v2(false) {} 
-  bool crc :1; 
-  bool data_page_header :1; 
-  bool index_page_header :1; 
-  bool dictionary_page_header :1; 
-  bool data_page_header_v2 :1; 
-} _PageHeader__isset; 
- 
-class PageHeader : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  PageHeader(const PageHeader&); 
-  PageHeader& operator=(const PageHeader&); 
-  PageHeader() : type((PageType::type)0), uncompressed_page_size(0), compressed_page_size(0), crc(0) { 
-  } 
- 
-  virtual ~PageHeader() noexcept; 
-  PageType::type type; 
-  int32_t uncompressed_page_size; 
-  int32_t compressed_page_size; 
-  int32_t crc; 
-  DataPageHeader data_page_header; 
-  IndexPageHeader index_page_header; 
-  DictionaryPageHeader dictionary_page_header; 
-  DataPageHeaderV2 data_page_header_v2; 
- 
-  _PageHeader__isset __isset; 
- 
-  void __set_type(const PageType::type val); 
- 
-  void __set_uncompressed_page_size(const int32_t val); 
- 
-  void __set_compressed_page_size(const int32_t val); 
- 
-  void __set_crc(const int32_t val); 
- 
-  void __set_data_page_header(const DataPageHeader& val); 
- 
-  void __set_index_page_header(const IndexPageHeader& val); 
- 
-  void __set_dictionary_page_header(const DictionaryPageHeader& val); 
- 
-  void __set_data_page_header_v2(const DataPageHeaderV2& val); 
- 
-  bool operator == (const PageHeader & rhs) const 
-  { 
-    if (!(type == rhs.type)) 
-      return false; 
-    if (!(uncompressed_page_size == rhs.uncompressed_page_size)) 
-      return false; 
-    if (!(compressed_page_size == rhs.compressed_page_size)) 
-      return false; 
-    if (__isset.crc != rhs.__isset.crc) 
-      return false; 
-    else if (__isset.crc && !(crc == rhs.crc)) 
-      return false; 
-    if (__isset.data_page_header != rhs.__isset.data_page_header) 
-      return false; 
-    else if (__isset.data_page_header && !(data_page_header == rhs.data_page_header)) 
-      return false; 
-    if (__isset.index_page_header != rhs.__isset.index_page_header) 
-      return false; 
-    else if (__isset.index_page_header && !(index_page_header == rhs.index_page_header)) 
-      return false; 
-    if (__isset.dictionary_page_header != rhs.__isset.dictionary_page_header) 
-      return false; 
-    else if (__isset.dictionary_page_header && !(dictionary_page_header == rhs.dictionary_page_header)) 
-      return false; 
-    if (__isset.data_page_header_v2 != rhs.__isset.data_page_header_v2) 
-      return false; 
-    else if (__isset.data_page_header_v2 && !(data_page_header_v2 == rhs.data_page_header_v2)) 
-      return false; 
-    return true; 
-  } 
-  bool operator != (const PageHeader &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const PageHeader & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(PageHeader &a, PageHeader &b); 
- 
-std::ostream& operator<<(std::ostream& out, const PageHeader& obj); 
- 
-typedef struct _KeyValue__isset { 
-  _KeyValue__isset() : value(false) {} 
-  bool value :1; 
-} _KeyValue__isset; 
- 
-class KeyValue : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  KeyValue(const KeyValue&); 
-  KeyValue& operator=(const KeyValue&); 
-  KeyValue() : key(), value() { 
-  } 
- 
-  virtual ~KeyValue() noexcept; 
-  std::string key; 
-  std::string value; 
- 
-  _KeyValue__isset __isset; 
- 
-  void __set_key(const std::string& val); 
- 
-  void __set_value(const std::string& val); 
- 
-  bool operator == (const KeyValue & rhs) const 
-  { 
-    if (!(key == rhs.key)) 
-      return false; 
-    if (__isset.value != rhs.__isset.value) 
-      return false; 
-    else if (__isset.value && !(value == rhs.value)) 
-      return false; 
-    return true; 
-  } 
-  bool operator != (const KeyValue &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const KeyValue & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(KeyValue &a, KeyValue &b); 
- 
-std::ostream& operator<<(std::ostream& out, const KeyValue& obj); 
- 
- 
-class SortingColumn : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  SortingColumn(const SortingColumn&); 
-  SortingColumn& operator=(const SortingColumn&); 
-  SortingColumn() : column_idx(0), descending(0), nulls_first(0) { 
-  } 
- 
-  virtual ~SortingColumn() noexcept; 
-  int32_t column_idx; 
-  bool descending; 
-  bool nulls_first; 
- 
-  void __set_column_idx(const int32_t val); 
- 
-  void __set_descending(const bool val); 
- 
-  void __set_nulls_first(const bool val); 
- 
-  bool operator == (const SortingColumn & rhs) const 
-  { 
-    if (!(column_idx == rhs.column_idx)) 
-      return false; 
-    if (!(descending == rhs.descending)) 
-      return false; 
-    if (!(nulls_first == rhs.nulls_first)) 
-      return false; 
-    return true; 
-  } 
-  bool operator != (const SortingColumn &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const SortingColumn & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(SortingColumn &a, SortingColumn &b); 
- 
-std::ostream& operator<<(std::ostream& out, const SortingColumn& obj); 
- 
- 
-class PageEncodingStats : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  PageEncodingStats(const PageEncodingStats&); 
-  PageEncodingStats& operator=(const PageEncodingStats&); 
-  PageEncodingStats() : page_type((PageType::type)0), encoding((Encoding::type)0), count(0) { 
-  } 
- 
-  virtual ~PageEncodingStats() noexcept; 
-  PageType::type page_type; 
-  Encoding::type encoding; 
-  int32_t count; 
- 
-  void __set_page_type(const PageType::type val); 
- 
-  void __set_encoding(const Encoding::type val); 
- 
-  void __set_count(const int32_t val); 
- 
-  bool operator == (const PageEncodingStats & rhs) const 
-  { 
-    if (!(page_type == rhs.page_type)) 
-      return false; 
-    if (!(encoding == rhs.encoding)) 
-      return false; 
-    if (!(count == rhs.count)) 
-      return false; 
-    return true; 
-  } 
-  bool operator != (const PageEncodingStats &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const PageEncodingStats & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(PageEncodingStats &a, PageEncodingStats &b); 
- 
-std::ostream& operator<<(std::ostream& out, const PageEncodingStats& obj); 
- 
-typedef struct _ColumnMetaData__isset { 
-  _ColumnMetaData__isset() : key_value_metadata(false), index_page_offset(false), dictionary_page_offset(false), statistics(false), encoding_stats(false), bloom_filter_offset(false) {} 
-  bool key_value_metadata :1; 
-  bool index_page_offset :1; 
-  bool dictionary_page_offset :1; 
-  bool statistics :1; 
-  bool encoding_stats :1; 
-  bool bloom_filter_offset :1; 
-} _ColumnMetaData__isset; 
- 
-class ColumnMetaData : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  ColumnMetaData(const ColumnMetaData&); 
-  ColumnMetaData& operator=(const ColumnMetaData&); 
-  ColumnMetaData() : type((Type::type)0), codec((CompressionCodec::type)0), num_values(0), total_uncompressed_size(0), total_compressed_size(0), data_page_offset(0), index_page_offset(0), dictionary_page_offset(0), bloom_filter_offset(0) { 
-  } 
- 
-  virtual ~ColumnMetaData() noexcept; 
-  Type::type type; 
-  std::vector<Encoding::type>  encodings; 
-  std::vector<std::string>  path_in_schema; 
-  CompressionCodec::type codec; 
-  int64_t num_values; 
-  int64_t total_uncompressed_size; 
-  int64_t total_compressed_size; 
-  std::vector<KeyValue>  key_value_metadata; 
-  int64_t data_page_offset; 
-  int64_t index_page_offset; 
-  int64_t dictionary_page_offset; 
-  Statistics statistics; 
-  std::vector<PageEncodingStats>  encoding_stats; 
-  int64_t bloom_filter_offset; 
- 
-  _ColumnMetaData__isset __isset; 
- 
-  void __set_type(const Type::type val); 
- 
-  void __set_encodings(const std::vector<Encoding::type> & val); 
- 
-  void __set_path_in_schema(const std::vector<std::string> & val); 
- 
-  void __set_codec(const CompressionCodec::type val); 
- 
-  void __set_num_values(const int64_t val); 
- 
-  void __set_total_uncompressed_size(const int64_t val); 
- 
-  void __set_total_compressed_size(const int64_t val); 
- 
-  void __set_key_value_metadata(const std::vector<KeyValue> & val); 
- 
-  void __set_data_page_offset(const int64_t val); 
- 
-  void __set_index_page_offset(const int64_t val); 
- 
-  void __set_dictionary_page_offset(const int64_t val); 
- 
-  void __set_statistics(const Statistics& val); 
- 
-  void __set_encoding_stats(const std::vector<PageEncodingStats> & val); 
- 
-  void __set_bloom_filter_offset(const int64_t val); 
- 
-  bool operator == (const ColumnMetaData & rhs) const 
-  { 
-    if (!(type == rhs.type)) 
-      return false; 
-    if (!(encodings == rhs.encodings)) 
-      return false; 
-    if (!(path_in_schema == rhs.path_in_schema)) 
-      return false; 
-    if (!(codec == rhs.codec)) 
-      return false; 
-    if (!(num_values == rhs.num_values)) 
-      return false; 
-    if (!(total_uncompressed_size == rhs.total_uncompressed_size)) 
-      return false; 
-    if (!(total_compressed_size == rhs.total_compressed_size)) 
-      return false; 
-    if (__isset.key_value_metadata != rhs.__isset.key_value_metadata) 
-      return false; 
-    else if (__isset.key_value_metadata && !(key_value_metadata == rhs.key_value_metadata)) 
-      return false; 
-    if (!(data_page_offset == rhs.data_page_offset)) 
-      return false; 
-    if (__isset.index_page_offset != rhs.__isset.index_page_offset) 
-      return false; 
-    else if (__isset.index_page_offset && !(index_page_offset == rhs.index_page_offset)) 
-      return false; 
-    if (__isset.dictionary_page_offset != rhs.__isset.dictionary_page_offset) 
-      return false; 
-    else if (__isset.dictionary_page_offset && !(dictionary_page_offset == rhs.dictionary_page_offset)) 
-      return false; 
-    if (__isset.statistics != rhs.__isset.statistics) 
-      return false; 
-    else if (__isset.statistics && !(statistics == rhs.statistics)) 
-      return false; 
-    if (__isset.encoding_stats != rhs.__isset.encoding_stats) 
-      return false; 
-    else if (__isset.encoding_stats && !(encoding_stats == rhs.encoding_stats)) 
-      return false; 
-    if (__isset.bloom_filter_offset != rhs.__isset.bloom_filter_offset) 
-      return false; 
-    else if (__isset.bloom_filter_offset && !(bloom_filter_offset == rhs.bloom_filter_offset)) 
-      return false; 
-    return true; 
-  } 
-  bool operator != (const ColumnMetaData &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const ColumnMetaData & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(ColumnMetaData &a, ColumnMetaData &b); 
- 
-std::ostream& operator<<(std::ostream& out, const ColumnMetaData& obj); 
- 
- 
-class EncryptionWithFooterKey : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  EncryptionWithFooterKey(const EncryptionWithFooterKey&); 
-  EncryptionWithFooterKey& operator=(const EncryptionWithFooterKey&); 
-  EncryptionWithFooterKey() { 
-  } 
- 
-  virtual ~EncryptionWithFooterKey() noexcept; 
- 
-  bool operator == (const EncryptionWithFooterKey & /* rhs */) const 
-  { 
-    return true; 
-  } 
-  bool operator != (const EncryptionWithFooterKey &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const EncryptionWithFooterKey & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(EncryptionWithFooterKey &a, EncryptionWithFooterKey &b); 
- 
-std::ostream& operator<<(std::ostream& out, const EncryptionWithFooterKey& obj); 
- 
-typedef struct _EncryptionWithColumnKey__isset { 
-  _EncryptionWithColumnKey__isset() : key_metadata(false) {} 
-  bool key_metadata :1; 
-} _EncryptionWithColumnKey__isset; 
- 
-class EncryptionWithColumnKey : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  EncryptionWithColumnKey(const EncryptionWithColumnKey&); 
-  EncryptionWithColumnKey& operator=(const EncryptionWithColumnKey&); 
-  EncryptionWithColumnKey() : key_metadata() { 
-  } 
- 
-  virtual ~EncryptionWithColumnKey() noexcept; 
-  std::vector<std::string>  path_in_schema; 
-  std::string key_metadata; 
- 
-  _EncryptionWithColumnKey__isset __isset; 
- 
-  void __set_path_in_schema(const std::vector<std::string> & val); 
- 
-  void __set_key_metadata(const std::string& val); 
- 
-  bool operator == (const EncryptionWithColumnKey & rhs) const 
-  { 
-    if (!(path_in_schema == rhs.path_in_schema)) 
-      return false; 
-    if (__isset.key_metadata != rhs.__isset.key_metadata) 
-      return false; 
-    else if (__isset.key_metadata && !(key_metadata == rhs.key_metadata)) 
-      return false; 
-    return true; 
-  } 
-  bool operator != (const EncryptionWithColumnKey &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const EncryptionWithColumnKey & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(EncryptionWithColumnKey &a, EncryptionWithColumnKey &b); 
- 
-std::ostream& operator<<(std::ostream& out, const EncryptionWithColumnKey& obj); 
- 
-typedef struct _ColumnCryptoMetaData__isset { 
-  _ColumnCryptoMetaData__isset() : ENCRYPTION_WITH_FOOTER_KEY(false), ENCRYPTION_WITH_COLUMN_KEY(false) {} 
-  bool ENCRYPTION_WITH_FOOTER_KEY :1; 
-  bool ENCRYPTION_WITH_COLUMN_KEY :1; 
-} _ColumnCryptoMetaData__isset; 
- 
-class ColumnCryptoMetaData : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  ColumnCryptoMetaData(const ColumnCryptoMetaData&); 
-  ColumnCryptoMetaData& operator=(const ColumnCryptoMetaData&); 
-  ColumnCryptoMetaData() { 
-  } 
- 
-  virtual ~ColumnCryptoMetaData() noexcept; 
-  EncryptionWithFooterKey ENCRYPTION_WITH_FOOTER_KEY; 
-  EncryptionWithColumnKey ENCRYPTION_WITH_COLUMN_KEY; 
- 
-  _ColumnCryptoMetaData__isset __isset; 
- 
-  void __set_ENCRYPTION_WITH_FOOTER_KEY(const EncryptionWithFooterKey& val); 
- 
-  void __set_ENCRYPTION_WITH_COLUMN_KEY(const EncryptionWithColumnKey& val); 
- 
-  bool operator == (const ColumnCryptoMetaData & rhs) const 
-  { 
-    if (__isset.ENCRYPTION_WITH_FOOTER_KEY != rhs.__isset.ENCRYPTION_WITH_FOOTER_KEY) 
-      return false; 
-    else if (__isset.ENCRYPTION_WITH_FOOTER_KEY && !(ENCRYPTION_WITH_FOOTER_KEY == rhs.ENCRYPTION_WITH_FOOTER_KEY)) 
-      return false; 
-    if (__isset.ENCRYPTION_WITH_COLUMN_KEY != rhs.__isset.ENCRYPTION_WITH_COLUMN_KEY) 
-      return false; 
-    else if (__isset.ENCRYPTION_WITH_COLUMN_KEY && !(ENCRYPTION_WITH_COLUMN_KEY == rhs.ENCRYPTION_WITH_COLUMN_KEY)) 
-      return false; 
-    return true; 
-  } 
-  bool operator != (const ColumnCryptoMetaData &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const ColumnCryptoMetaData & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(ColumnCryptoMetaData &a, ColumnCryptoMetaData &b); 
- 
-std::ostream& operator<<(std::ostream& out, const ColumnCryptoMetaData& obj); 
- 
-typedef struct _ColumnChunk__isset { 
-  _ColumnChunk__isset() : file_path(false), meta_data(false), offset_index_offset(false), offset_index_length(false), column_index_offset(false), column_index_length(false), crypto_metadata(false), encrypted_column_metadata(false) {} 
-  bool file_path :1; 
-  bool meta_data :1; 
-  bool offset_index_offset :1; 
-  bool offset_index_length :1; 
-  bool column_index_offset :1; 
-  bool column_index_length :1; 
-  bool crypto_metadata :1; 
-  bool encrypted_column_metadata :1; 
-} _ColumnChunk__isset; 
- 
-class ColumnChunk : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  ColumnChunk(const ColumnChunk&); 
-  ColumnChunk& operator=(const ColumnChunk&); 
-  ColumnChunk() : file_path(), file_offset(0), offset_index_offset(0), offset_index_length(0), column_index_offset(0), column_index_length(0), encrypted_column_metadata() { 
-  } 
- 
-  virtual ~ColumnChunk() noexcept; 
-  std::string file_path; 
-  int64_t file_offset; 
-  ColumnMetaData meta_data; 
-  int64_t offset_index_offset; 
-  int32_t offset_index_length; 
-  int64_t column_index_offset; 
-  int32_t column_index_length; 
-  ColumnCryptoMetaData crypto_metadata; 
-  std::string encrypted_column_metadata; 
- 
-  _ColumnChunk__isset __isset; 
- 
-  void __set_file_path(const std::string& val); 
- 
-  void __set_file_offset(const int64_t val); 
- 
-  void __set_meta_data(const ColumnMetaData& val); 
- 
-  void __set_offset_index_offset(const int64_t val); 
- 
-  void __set_offset_index_length(const int32_t val); 
- 
-  void __set_column_index_offset(const int64_t val); 
- 
-  void __set_column_index_length(const int32_t val); 
- 
-  void __set_crypto_metadata(const ColumnCryptoMetaData& val); 
- 
-  void __set_encrypted_column_metadata(const std::string& val); 
- 
-  bool operator == (const ColumnChunk & rhs) const 
-  { 
-    if (__isset.file_path != rhs.__isset.file_path) 
-      return false; 
-    else if (__isset.file_path && !(file_path == rhs.file_path)) 
-      return false; 
-    if (!(file_offset == rhs.file_offset)) 
-      return false; 
-    if (__isset.meta_data != rhs.__isset.meta_data) 
-      return false; 
-    else if (__isset.meta_data && !(meta_data == rhs.meta_data)) 
-      return false; 
-    if (__isset.offset_index_offset != rhs.__isset.offset_index_offset) 
-      return false; 
-    else if (__isset.offset_index_offset && !(offset_index_offset == rhs.offset_index_offset)) 
-      return false; 
-    if (__isset.offset_index_length != rhs.__isset.offset_index_length) 
-      return false; 
-    else if (__isset.offset_index_length && !(offset_index_length == rhs.offset_index_length)) 
-      return false; 
-    if (__isset.column_index_offset != rhs.__isset.column_index_offset) 
-      return false; 
-    else if (__isset.column_index_offset && !(column_index_offset == rhs.column_index_offset)) 
-      return false; 
-    if (__isset.column_index_length != rhs.__isset.column_index_length) 
-      return false; 
-    else if (__isset.column_index_length && !(column_index_length == rhs.column_index_length)) 
-      return false; 
-    if (__isset.crypto_metadata != rhs.__isset.crypto_metadata) 
-      return false; 
-    else if (__isset.crypto_metadata && !(crypto_metadata == rhs.crypto_metadata)) 
-      return false; 
-    if (__isset.encrypted_column_metadata != rhs.__isset.encrypted_column_metadata) 
-      return false; 
-    else if (__isset.encrypted_column_metadata && !(encrypted_column_metadata == rhs.encrypted_column_metadata)) 
-      return false; 
-    return true; 
-  } 
-  bool operator != (const ColumnChunk &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const ColumnChunk & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(ColumnChunk &a, ColumnChunk &b); 
- 
-std::ostream& operator<<(std::ostream& out, const ColumnChunk& obj); 
- 
-typedef struct _RowGroup__isset { 
-  _RowGroup__isset() : sorting_columns(false), file_offset(false), total_compressed_size(false), ordinal(false) {} 
-  bool sorting_columns :1; 
-  bool file_offset :1; 
-  bool total_compressed_size :1; 
-  bool ordinal :1; 
-} _RowGroup__isset; 
- 
-class RowGroup : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  RowGroup(const RowGroup&); 
-  RowGroup& operator=(const RowGroup&); 
-  RowGroup() : total_byte_size(0), num_rows(0), file_offset(0), total_compressed_size(0), ordinal(0) { 
-  } 
- 
-  virtual ~RowGroup() noexcept; 
-  std::vector<ColumnChunk>  columns; 
-  int64_t total_byte_size; 
-  int64_t num_rows; 
-  std::vector<SortingColumn>  sorting_columns; 
-  int64_t file_offset; 
-  int64_t total_compressed_size; 
-  int16_t ordinal; 
- 
-  _RowGroup__isset __isset; 
- 
-  void __set_columns(const std::vector<ColumnChunk> & val); 
- 
-  void __set_total_byte_size(const int64_t val); 
- 
-  void __set_num_rows(const int64_t val); 
- 
-  void __set_sorting_columns(const std::vector<SortingColumn> & val); 
- 
-  void __set_file_offset(const int64_t val); 
- 
-  void __set_total_compressed_size(const int64_t val); 
- 
-  void __set_ordinal(const int16_t val); 
- 
-  bool operator == (const RowGroup & rhs) const 
-  { 
-    if (!(columns == rhs.columns)) 
-      return false; 
-    if (!(total_byte_size == rhs.total_byte_size)) 
-      return false; 
-    if (!(num_rows == rhs.num_rows)) 
-      return false; 
-    if (__isset.sorting_columns != rhs.__isset.sorting_columns) 
-      return false; 
-    else if (__isset.sorting_columns && !(sorting_columns == rhs.sorting_columns)) 
-      return false; 
-    if (__isset.file_offset != rhs.__isset.file_offset) 
-      return false; 
-    else if (__isset.file_offset && !(file_offset == rhs.file_offset)) 
-      return false; 
-    if (__isset.total_compressed_size != rhs.__isset.total_compressed_size) 
-      return false; 
-    else if (__isset.total_compressed_size && !(total_compressed_size == rhs.total_compressed_size)) 
-      return false; 
-    if (__isset.ordinal != rhs.__isset.ordinal) 
-      return false; 
-    else if (__isset.ordinal && !(ordinal == rhs.ordinal)) 
-      return false; 
-    return true; 
-  } 
-  bool operator != (const RowGroup &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const RowGroup & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(RowGroup &a, RowGroup &b); 
- 
-std::ostream& operator<<(std::ostream& out, const RowGroup& obj); 
- 
- 
-class TypeDefinedOrder : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  TypeDefinedOrder(const TypeDefinedOrder&); 
-  TypeDefinedOrder& operator=(const TypeDefinedOrder&); 
-  TypeDefinedOrder() { 
-  } 
- 
-  virtual ~TypeDefinedOrder() noexcept; 
- 
-  bool operator == (const TypeDefinedOrder & /* rhs */) const 
-  { 
-    return true; 
-  } 
-  bool operator != (const TypeDefinedOrder &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const TypeDefinedOrder & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(TypeDefinedOrder &a, TypeDefinedOrder &b); 
- 
-std::ostream& operator<<(std::ostream& out, const TypeDefinedOrder& obj); 
- 
-typedef struct _ColumnOrder__isset { 
-  _ColumnOrder__isset() : TYPE_ORDER(false) {} 
-  bool TYPE_ORDER :1; 
-} _ColumnOrder__isset; 
- 
-class ColumnOrder : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  ColumnOrder(const ColumnOrder&); 
-  ColumnOrder& operator=(const ColumnOrder&); 
-  ColumnOrder() { 
-  } 
- 
-  virtual ~ColumnOrder() noexcept; 
-  TypeDefinedOrder TYPE_ORDER; 
- 
-  _ColumnOrder__isset __isset; 
- 
-  void __set_TYPE_ORDER(const TypeDefinedOrder& val); 
- 
-  bool operator == (const ColumnOrder & rhs) const 
-  { 
-    if (__isset.TYPE_ORDER != rhs.__isset.TYPE_ORDER) 
-      return false; 
-    else if (__isset.TYPE_ORDER && !(TYPE_ORDER == rhs.TYPE_ORDER)) 
-      return false; 
-    return true; 
-  } 
-  bool operator != (const ColumnOrder &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const ColumnOrder & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(ColumnOrder &a, ColumnOrder &b); 
- 
-std::ostream& operator<<(std::ostream& out, const ColumnOrder& obj); 
- 
- 
-class PageLocation : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  PageLocation(const PageLocation&); 
-  PageLocation& operator=(const PageLocation&); 
-  PageLocation() : offset(0), compressed_page_size(0), first_row_index(0) { 
-  } 
- 
-  virtual ~PageLocation() noexcept; 
-  int64_t offset; 
-  int32_t compressed_page_size; 
-  int64_t first_row_index; 
- 
-  void __set_offset(const int64_t val); 
- 
-  void __set_compressed_page_size(const int32_t val); 
- 
-  void __set_first_row_index(const int64_t val); 
- 
-  bool operator == (const PageLocation & rhs) const 
-  { 
-    if (!(offset == rhs.offset)) 
-      return false; 
-    if (!(compressed_page_size == rhs.compressed_page_size)) 
-      return false; 
-    if (!(first_row_index == rhs.first_row_index)) 
-      return false; 
-    return true; 
-  } 
-  bool operator != (const PageLocation &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const PageLocation & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(PageLocation &a, PageLocation &b); 
- 
-std::ostream& operator<<(std::ostream& out, const PageLocation& obj); 
- 
- 
-class OffsetIndex : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  OffsetIndex(const OffsetIndex&); 
-  OffsetIndex& operator=(const OffsetIndex&); 
-  OffsetIndex() { 
-  } 
- 
-  virtual ~OffsetIndex() noexcept; 
-  std::vector<PageLocation>  page_locations; 
- 
-  void __set_page_locations(const std::vector<PageLocation> & val); 
- 
-  bool operator == (const OffsetIndex & rhs) const 
-  { 
-    if (!(page_locations == rhs.page_locations)) 
-      return false; 
-    return true; 
-  } 
-  bool operator != (const OffsetIndex &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const OffsetIndex & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(OffsetIndex &a, OffsetIndex &b); 
- 
-std::ostream& operator<<(std::ostream& out, const OffsetIndex& obj); 
- 
-typedef struct _ColumnIndex__isset { 
-  _ColumnIndex__isset() : null_counts(false) {} 
-  bool null_counts :1; 
-} _ColumnIndex__isset; 
- 
-class ColumnIndex : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  ColumnIndex(const ColumnIndex&); 
-  ColumnIndex& operator=(const ColumnIndex&); 
-  ColumnIndex() : boundary_order((BoundaryOrder::type)0) { 
-  } 
- 
-  virtual ~ColumnIndex() noexcept; 
-  std::vector<bool>  null_pages; 
-  std::vector<std::string>  min_values; 
-  std::vector<std::string>  max_values; 
-  BoundaryOrder::type boundary_order; 
-  std::vector<int64_t>  null_counts; 
- 
-  _ColumnIndex__isset __isset; 
- 
-  void __set_null_pages(const std::vector<bool> & val); 
- 
-  void __set_min_values(const std::vector<std::string> & val); 
- 
-  void __set_max_values(const std::vector<std::string> & val); 
- 
-  void __set_boundary_order(const BoundaryOrder::type val); 
- 
-  void __set_null_counts(const std::vector<int64_t> & val); 
- 
-  bool operator == (const ColumnIndex & rhs) const 
-  { 
-    if (!(null_pages == rhs.null_pages)) 
-      return false; 
-    if (!(min_values == rhs.min_values)) 
-      return false; 
-    if (!(max_values == rhs.max_values)) 
-      return false; 
-    if (!(boundary_order == rhs.boundary_order)) 
-      return false; 
-    if (__isset.null_counts != rhs.__isset.null_counts) 
-      return false; 
-    else if (__isset.null_counts && !(null_counts == rhs.null_counts)) 
-      return false; 
-    return true; 
-  } 
-  bool operator != (const ColumnIndex &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const ColumnIndex & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(ColumnIndex &a, ColumnIndex &b); 
- 
-std::ostream& operator<<(std::ostream& out, const ColumnIndex& obj); 
- 
-typedef struct _AesGcmV1__isset { 
-  _AesGcmV1__isset() : aad_prefix(false), aad_file_unique(false), supply_aad_prefix(false) {} 
-  bool aad_prefix :1; 
-  bool aad_file_unique :1; 
-  bool supply_aad_prefix :1; 
-} _AesGcmV1__isset; 
- 
-class AesGcmV1 : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  AesGcmV1(const AesGcmV1&); 
-  AesGcmV1& operator=(const AesGcmV1&); 
-  AesGcmV1() : aad_prefix(), aad_file_unique(), supply_aad_prefix(0) { 
-  } 
- 
-  virtual ~AesGcmV1() noexcept; 
-  std::string aad_prefix; 
-  std::string aad_file_unique; 
-  bool supply_aad_prefix; 
- 
-  _AesGcmV1__isset __isset; 
- 
-  void __set_aad_prefix(const std::string& val); 
- 
-  void __set_aad_file_unique(const std::string& val); 
- 
-  void __set_supply_aad_prefix(const bool val); 
- 
-  bool operator == (const AesGcmV1 & rhs) const 
-  { 
-    if (__isset.aad_prefix != rhs.__isset.aad_prefix) 
-      return false; 
-    else if (__isset.aad_prefix && !(aad_prefix == rhs.aad_prefix)) 
-      return false; 
-    if (__isset.aad_file_unique != rhs.__isset.aad_file_unique) 
-      return false; 
-    else if (__isset.aad_file_unique && !(aad_file_unique == rhs.aad_file_unique)) 
-      return false; 
-    if (__isset.supply_aad_prefix != rhs.__isset.supply_aad_prefix) 
-      return false; 
-    else if (__isset.supply_aad_prefix && !(supply_aad_prefix == rhs.supply_aad_prefix)) 
-      return false; 
-    return true; 
-  } 
-  bool operator != (const AesGcmV1 &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const AesGcmV1 & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(AesGcmV1 &a, AesGcmV1 &b); 
- 
-std::ostream& operator<<(std::ostream& out, const AesGcmV1& obj); 
- 
-typedef struct _AesGcmCtrV1__isset { 
-  _AesGcmCtrV1__isset() : aad_prefix(false), aad_file_unique(false), supply_aad_prefix(false) {} 
-  bool aad_prefix :1; 
-  bool aad_file_unique :1; 
-  bool supply_aad_prefix :1; 
-} _AesGcmCtrV1__isset; 
- 
-class AesGcmCtrV1 : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  AesGcmCtrV1(const AesGcmCtrV1&); 
-  AesGcmCtrV1& operator=(const AesGcmCtrV1&); 
-  AesGcmCtrV1() : aad_prefix(), aad_file_unique(), supply_aad_prefix(0) { 
-  } 
- 
-  virtual ~AesGcmCtrV1() noexcept; 
-  std::string aad_prefix; 
-  std::string aad_file_unique; 
-  bool supply_aad_prefix; 
- 
-  _AesGcmCtrV1__isset __isset; 
- 
-  void __set_aad_prefix(const std::string& val); 
- 
-  void __set_aad_file_unique(const std::string& val); 
- 
-  void __set_supply_aad_prefix(const bool val); 
- 
-  bool operator == (const AesGcmCtrV1 & rhs) const 
-  { 
-    if (__isset.aad_prefix != rhs.__isset.aad_prefix) 
-      return false; 
-    else if (__isset.aad_prefix && !(aad_prefix == rhs.aad_prefix)) 
-      return false; 
-    if (__isset.aad_file_unique != rhs.__isset.aad_file_unique) 
-      return false; 
-    else if (__isset.aad_file_unique && !(aad_file_unique == rhs.aad_file_unique)) 
-      return false; 
-    if (__isset.supply_aad_prefix != rhs.__isset.supply_aad_prefix) 
-      return false; 
-    else if (__isset.supply_aad_prefix && !(supply_aad_prefix == rhs.supply_aad_prefix)) 
-      return false; 
-    return true; 
-  } 
-  bool operator != (const AesGcmCtrV1 &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const AesGcmCtrV1 & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(AesGcmCtrV1 &a, AesGcmCtrV1 &b); 
- 
-std::ostream& operator<<(std::ostream& out, const AesGcmCtrV1& obj); 
- 
-typedef struct _EncryptionAlgorithm__isset { 
-  _EncryptionAlgorithm__isset() : AES_GCM_V1(false), AES_GCM_CTR_V1(false) {} 
-  bool AES_GCM_V1 :1; 
-  bool AES_GCM_CTR_V1 :1; 
-} _EncryptionAlgorithm__isset; 
- 
-class EncryptionAlgorithm : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  EncryptionAlgorithm(const EncryptionAlgorithm&); 
-  EncryptionAlgorithm& operator=(const EncryptionAlgorithm&); 
-  EncryptionAlgorithm() { 
-  } 
- 
-  virtual ~EncryptionAlgorithm() noexcept; 
-  AesGcmV1 AES_GCM_V1; 
-  AesGcmCtrV1 AES_GCM_CTR_V1; 
- 
-  _EncryptionAlgorithm__isset __isset; 
- 
-  void __set_AES_GCM_V1(const AesGcmV1& val); 
- 
-  void __set_AES_GCM_CTR_V1(const AesGcmCtrV1& val); 
- 
-  bool operator == (const EncryptionAlgorithm & rhs) const 
-  { 
-    if (__isset.AES_GCM_V1 != rhs.__isset.AES_GCM_V1) 
-      return false; 
-    else if (__isset.AES_GCM_V1 && !(AES_GCM_V1 == rhs.AES_GCM_V1)) 
-      return false; 
-    if (__isset.AES_GCM_CTR_V1 != rhs.__isset.AES_GCM_CTR_V1) 
-      return false; 
-    else if (__isset.AES_GCM_CTR_V1 && !(AES_GCM_CTR_V1 == rhs.AES_GCM_CTR_V1)) 
-      return false; 
-    return true; 
-  } 
-  bool operator != (const EncryptionAlgorithm &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const EncryptionAlgorithm & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(EncryptionAlgorithm &a, EncryptionAlgorithm &b); 
- 
-std::ostream& operator<<(std::ostream& out, const EncryptionAlgorithm& obj); 
- 
-typedef struct _FileMetaData__isset { 
-  _FileMetaData__isset() : key_value_metadata(false), created_by(false), column_orders(false), encryption_algorithm(false), footer_signing_key_metadata(false) {} 
-  bool key_value_metadata :1; 
-  bool created_by :1; 
-  bool column_orders :1; 
-  bool encryption_algorithm :1; 
-  bool footer_signing_key_metadata :1; 
-} _FileMetaData__isset; 
- 
-class FileMetaData : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  FileMetaData(const FileMetaData&); 
-  FileMetaData& operator=(const FileMetaData&); 
-  FileMetaData() : version(0), num_rows(0), created_by(), footer_signing_key_metadata() { 
-  } 
- 
-  virtual ~FileMetaData() noexcept; 
-  int32_t version; 
-  std::vector<SchemaElement>  schema; 
-  int64_t num_rows; 
-  std::vector<RowGroup>  row_groups; 
-  std::vector<KeyValue>  key_value_metadata; 
-  std::string created_by; 
-  std::vector<ColumnOrder>  column_orders; 
-  EncryptionAlgorithm encryption_algorithm; 
-  std::string footer_signing_key_metadata; 
- 
-  _FileMetaData__isset __isset; 
- 
-  void __set_version(const int32_t val); 
- 
-  void __set_schema(const std::vector<SchemaElement> & val); 
- 
-  void __set_num_rows(const int64_t val); 
- 
-  void __set_row_groups(const std::vector<RowGroup> & val); 
- 
-  void __set_key_value_metadata(const std::vector<KeyValue> & val); 
- 
-  void __set_created_by(const std::string& val); 
- 
-  void __set_column_orders(const std::vector<ColumnOrder> & val); 
- 
-  void __set_encryption_algorithm(const EncryptionAlgorithm& val); 
- 
-  void __set_footer_signing_key_metadata(const std::string& val); 
- 
-  bool operator == (const FileMetaData & rhs) const 
-  { 
-    if (!(version == rhs.version)) 
-      return false; 
-    if (!(schema == rhs.schema)) 
-      return false; 
-    if (!(num_rows == rhs.num_rows)) 
-      return false; 
-    if (!(row_groups == rhs.row_groups)) 
-      return false; 
-    if (__isset.key_value_metadata != rhs.__isset.key_value_metadata) 
-      return false; 
-    else if (__isset.key_value_metadata && !(key_value_metadata == rhs.key_value_metadata)) 
-      return false; 
-    if (__isset.created_by != rhs.__isset.created_by) 
-      return false; 
-    else if (__isset.created_by && !(created_by == rhs.created_by)) 
-      return false; 
-    if (__isset.column_orders != rhs.__isset.column_orders) 
-      return false; 
-    else if (__isset.column_orders && !(column_orders == rhs.column_orders)) 
-      return false; 
-    if (__isset.encryption_algorithm != rhs.__isset.encryption_algorithm) 
-      return false; 
-    else if (__isset.encryption_algorithm && !(encryption_algorithm == rhs.encryption_algorithm)) 
-      return false; 
-    if (__isset.footer_signing_key_metadata != rhs.__isset.footer_signing_key_metadata) 
-      return false; 
-    else if (__isset.footer_signing_key_metadata && !(footer_signing_key_metadata == rhs.footer_signing_key_metadata)) 
-      return false; 
-    return true; 
-  } 
-  bool operator != (const FileMetaData &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const FileMetaData & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(FileMetaData &a, FileMetaData &b); 
- 
-std::ostream& operator<<(std::ostream& out, const FileMetaData& obj); 
- 
-typedef struct _FileCryptoMetaData__isset { 
-  _FileCryptoMetaData__isset() : key_metadata(false) {} 
-  bool key_metadata :1; 
-} _FileCryptoMetaData__isset; 
- 
-class FileCryptoMetaData : public virtual ::apache::thrift::TBase { 
- public: 
- 
-  FileCryptoMetaData(const FileCryptoMetaData&); 
-  FileCryptoMetaData& operator=(const FileCryptoMetaData&); 
-  FileCryptoMetaData() : key_metadata() { 
-  } 
- 
-  virtual ~FileCryptoMetaData() noexcept; 
-  EncryptionAlgorithm encryption_algorithm; 
-  std::string key_metadata; 
- 
-  _FileCryptoMetaData__isset __isset; 
- 
-  void __set_encryption_algorithm(const EncryptionAlgorithm& val); 
- 
-  void __set_key_metadata(const std::string& val); 
- 
-  bool operator == (const FileCryptoMetaData & rhs) const 
-  { 
-    if (!(encryption_algorithm == rhs.encryption_algorithm)) 
-      return false; 
-    if (__isset.key_metadata != rhs.__isset.key_metadata) 
-      return false; 
-    else if (__isset.key_metadata && !(key_metadata == rhs.key_metadata)) 
-      return false; 
-    return true; 
-  } 
-  bool operator != (const FileCryptoMetaData &rhs) const { 
-    return !(*this == rhs); 
-  } 
- 
-  bool operator < (const FileCryptoMetaData & ) const; 
- 
-  uint32_t read(::apache::thrift::protocol::TProtocol* iprot); 
-  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; 
- 
-  virtual void printTo(std::ostream& out) const; 
-}; 
- 
-void swap(FileCryptoMetaData &a, FileCryptoMetaData &b); 
- 
-std::ostream& operator<<(std::ostream& out, const FileCryptoMetaData& obj); 
- 
-}} // namespace 
- 
-#endif 
+/**
+ * Autogenerated by Thrift Compiler (0.13.0)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+#ifndef parquet_TYPES_H
+#define parquet_TYPES_H
+
+#include <iosfwd>
+
+#include <thrift/Thrift.h>
+#include <thrift/TApplicationException.h>
+#include <thrift/TBase.h>
+#include <thrift/protocol/TProtocol.h>
+#include <thrift/transport/TTransport.h>
+
+#include <functional>
+#include <memory>
+
+#include "parquet/windows_compatibility.h"
+
+namespace parquet { namespace format {
+
+struct Type {
+  enum type {
+    BOOLEAN = 0,
+    INT32 = 1,
+    INT64 = 2,
+    INT96 = 3,
+    FLOAT = 4,
+    DOUBLE = 5,
+    BYTE_ARRAY = 6,
+    FIXED_LEN_BYTE_ARRAY = 7
+  };
+};
+
+extern const std::map<int, const char*> _Type_VALUES_TO_NAMES;
+
+std::ostream& operator<<(std::ostream& out, const Type::type& val);
+
+std::string to_string(const Type::type& val);
+
+struct ConvertedType {
+  enum type {
+    UTF8 = 0,
+    MAP = 1,
+    MAP_KEY_VALUE = 2,
+    LIST = 3,
+    ENUM = 4,
+    DECIMAL = 5,
+    DATE = 6,
+    TIME_MILLIS = 7,
+    TIME_MICROS = 8,
+    TIMESTAMP_MILLIS = 9,
+    TIMESTAMP_MICROS = 10,
+    UINT_8 = 11,
+    UINT_16 = 12,
+    UINT_32 = 13,
+    UINT_64 = 14,
+    INT_8 = 15,
+    INT_16 = 16,
+    INT_32 = 17,
+    INT_64 = 18,
+    JSON = 19,
+    BSON = 20,
+    INTERVAL = 21
+  };
+};
+
+extern const std::map<int, const char*> _ConvertedType_VALUES_TO_NAMES;
+
+std::ostream& operator<<(std::ostream& out, const ConvertedType::type& val);
+
+std::string to_string(const ConvertedType::type& val);
+
+struct FieldRepetitionType {
+  enum type {
+    REQUIRED = 0,
+    OPTIONAL = 1,
+    REPEATED = 2
+  };
+};
+
+extern const std::map<int, const char*> _FieldRepetitionType_VALUES_TO_NAMES;
+
+std::ostream& operator<<(std::ostream& out, const FieldRepetitionType::type& val);
+
+std::string to_string(const FieldRepetitionType::type& val);
+
+struct Encoding {
+  enum type {
+    PLAIN = 0,
+    PLAIN_DICTIONARY = 2,
+    RLE = 3,
+    BIT_PACKED = 4,
+    DELTA_BINARY_PACKED = 5,
+    DELTA_LENGTH_BYTE_ARRAY = 6,
+    DELTA_BYTE_ARRAY = 7,
+    RLE_DICTIONARY = 8,
+    BYTE_STREAM_SPLIT = 9
+  };
+};
+
+extern const std::map<int, const char*> _Encoding_VALUES_TO_NAMES;
+
+std::ostream& operator<<(std::ostream& out, const Encoding::type& val);
+
+std::string to_string(const Encoding::type& val);
+
+struct CompressionCodec {
+  enum type {
+    UNCOMPRESSED = 0,
+    SNAPPY = 1,
+    GZIP = 2,
+    LZO = 3,
+    BROTLI = 4,
+    LZ4 = 5,
+    ZSTD = 6,
+    LZ4_RAW = 7
+  };
+};
+
+extern const std::map<int, const char*> _CompressionCodec_VALUES_TO_NAMES;
+
+std::ostream& operator<<(std::ostream& out, const CompressionCodec::type& val);
+
+std::string to_string(const CompressionCodec::type& val);
+
+struct PageType {
+  enum type {
+    DATA_PAGE = 0,
+    INDEX_PAGE = 1,
+    DICTIONARY_PAGE = 2,
+    DATA_PAGE_V2 = 3
+  };
+};
+
+extern const std::map<int, const char*> _PageType_VALUES_TO_NAMES;
+
+std::ostream& operator<<(std::ostream& out, const PageType::type& val);
+
+std::string to_string(const PageType::type& val);
+
+struct BoundaryOrder {
+  enum type {
+    UNORDERED = 0,
+    ASCENDING = 1,
+    DESCENDING = 2
+  };
+};
+
+extern const std::map<int, const char*> _BoundaryOrder_VALUES_TO_NAMES;
+
+std::ostream& operator<<(std::ostream& out, const BoundaryOrder::type& val);
+
+std::string to_string(const BoundaryOrder::type& val);
+
+class Statistics;
+
+class StringType;
+
+class UUIDType;
+
+class MapType;
+
+class ListType;
+
+class EnumType;
+
+class DateType;
+
+class NullType;
+
+class DecimalType;
+
+class MilliSeconds;
+
+class MicroSeconds;
+
+class NanoSeconds;
+
+class TimeUnit;
+
+class TimestampType;
+
+class TimeType;
+
+class IntType;
+
+class JsonType;
+
+class BsonType;
+
+class LogicalType;
+
+class SchemaElement;
+
+class DataPageHeader;
+
+class IndexPageHeader;
+
+class DictionaryPageHeader;
+
+class DataPageHeaderV2;
+
+class SplitBlockAlgorithm;
+
+class BloomFilterAlgorithm;
+
+class XxHash;
+
+class BloomFilterHash;
+
+class Uncompressed;
+
+class BloomFilterCompression;
+
+class BloomFilterHeader;
+
+class PageHeader;
+
+class KeyValue;
+
+class SortingColumn;
+
+class PageEncodingStats;
+
+class ColumnMetaData;
+
+class EncryptionWithFooterKey;
+
+class EncryptionWithColumnKey;
+
+class ColumnCryptoMetaData;
+
+class ColumnChunk;
+
+class RowGroup;
+
+class TypeDefinedOrder;
+
+class ColumnOrder;
+
+class PageLocation;
+
+class OffsetIndex;
+
+class ColumnIndex;
+
+class AesGcmV1;
+
+class AesGcmCtrV1;
+
+class EncryptionAlgorithm;
+
+class FileMetaData;
+
+class FileCryptoMetaData;
+
+typedef struct _Statistics__isset {
+  _Statistics__isset() : max(false), min(false), null_count(false), distinct_count(false), max_value(false), min_value(false) {}
+  bool max :1;
+  bool min :1;
+  bool null_count :1;
+  bool distinct_count :1;
+  bool max_value :1;
+  bool min_value :1;
+} _Statistics__isset;
+
+class Statistics : public virtual ::apache::thrift::TBase {
+ public:
+
+  Statistics(const Statistics&);
+  Statistics& operator=(const Statistics&);
+  Statistics() : max(), min(), null_count(0), distinct_count(0), max_value(), min_value() {
+  }
+
+  virtual ~Statistics() noexcept;
+  std::string max;
+  std::string min;
+  int64_t null_count;
+  int64_t distinct_count;
+  std::string max_value;
+  std::string min_value;
+
+  _Statistics__isset __isset;
+
+  void __set_max(const std::string& val);
+
+  void __set_min(const std::string& val);
+
+  void __set_null_count(const int64_t val);
+
+  void __set_distinct_count(const int64_t val);
+
+  void __set_max_value(const std::string& val);
+
+  void __set_min_value(const std::string& val);
+
+  bool operator == (const Statistics & rhs) const
+  {
+    if (__isset.max != rhs.__isset.max)
+      return false;
+    else if (__isset.max && !(max == rhs.max))
+      return false;
+    if (__isset.min != rhs.__isset.min)
+      return false;
+    else if (__isset.min && !(min == rhs.min))
+      return false;
+    if (__isset.null_count != rhs.__isset.null_count)
+      return false;
+    else if (__isset.null_count && !(null_count == rhs.null_count))
+      return false;
+    if (__isset.distinct_count != rhs.__isset.distinct_count)
+      return false;
+    else if (__isset.distinct_count && !(distinct_count == rhs.distinct_count))
+      return false;
+    if (__isset.max_value != rhs.__isset.max_value)
+      return false;
+    else if (__isset.max_value && !(max_value == rhs.max_value))
+      return false;
+    if (__isset.min_value != rhs.__isset.min_value)
+      return false;
+    else if (__isset.min_value && !(min_value == rhs.min_value))
+      return false;
+    return true;
+  }
+  bool operator != (const Statistics &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const Statistics & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(Statistics &a, Statistics &b);
+
+std::ostream& operator<<(std::ostream& out, const Statistics& obj);
+
+
+class StringType : public virtual ::apache::thrift::TBase {
+ public:
+
+  StringType(const StringType&);
+  StringType& operator=(const StringType&);
+  StringType() {
+  }
+
+  virtual ~StringType() noexcept;
+
+  bool operator == (const StringType & /* rhs */) const
+  {
+    return true;
+  }
+  bool operator != (const StringType &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const StringType & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(StringType &a, StringType &b);
+
+std::ostream& operator<<(std::ostream& out, const StringType& obj);
+
+
+class UUIDType : public virtual ::apache::thrift::TBase {
+ public:
+
+  UUIDType(const UUIDType&);
+  UUIDType& operator=(const UUIDType&);
+  UUIDType() {
+  }
+
+  virtual ~UUIDType() noexcept;
+
+  bool operator == (const UUIDType & /* rhs */) const
+  {
+    return true;
+  }
+  bool operator != (const UUIDType &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const UUIDType & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(UUIDType &a, UUIDType &b);
+
+std::ostream& operator<<(std::ostream& out, const UUIDType& obj);
+
+
+class MapType : public virtual ::apache::thrift::TBase {
+ public:
+
+  MapType(const MapType&);
+  MapType& operator=(const MapType&);
+  MapType() {
+  }
+
+  virtual ~MapType() noexcept;
+
+  bool operator == (const MapType & /* rhs */) const
+  {
+    return true;
+  }
+  bool operator != (const MapType &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const MapType & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(MapType &a, MapType &b);
+
+std::ostream& operator<<(std::ostream& out, const MapType& obj);
+
+
+class ListType : public virtual ::apache::thrift::TBase {
+ public:
+
+  ListType(const ListType&);
+  ListType& operator=(const ListType&);
+  ListType() {
+  }
+
+  virtual ~ListType() noexcept;
+
+  bool operator == (const ListType & /* rhs */) const
+  {
+    return true;
+  }
+  bool operator != (const ListType &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const ListType & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(ListType &a, ListType &b);
+
+std::ostream& operator<<(std::ostream& out, const ListType& obj);
+
+
+class EnumType : public virtual ::apache::thrift::TBase {
+ public:
+
+  EnumType(const EnumType&);
+  EnumType& operator=(const EnumType&);
+  EnumType() {
+  }
+
+  virtual ~EnumType() noexcept;
+
+  bool operator == (const EnumType & /* rhs */) const
+  {
+    return true;
+  }
+  bool operator != (const EnumType &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const EnumType & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(EnumType &a, EnumType &b);
+
+std::ostream& operator<<(std::ostream& out, const EnumType& obj);
+
+
+class DateType : public virtual ::apache::thrift::TBase {
+ public:
+
+  DateType(const DateType&);
+  DateType& operator=(const DateType&);
+  DateType() {
+  }
+
+  virtual ~DateType() noexcept;
+
+  bool operator == (const DateType & /* rhs */) const
+  {
+    return true;
+  }
+  bool operator != (const DateType &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const DateType & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(DateType &a, DateType &b);
+
+std::ostream& operator<<(std::ostream& out, const DateType& obj);
+
+
+class NullType : public virtual ::apache::thrift::TBase {
+ public:
+
+  NullType(const NullType&);
+  NullType& operator=(const NullType&);
+  NullType() {
+  }
+
+  virtual ~NullType() noexcept;
+
+  bool operator == (const NullType & /* rhs */) const
+  {
+    return true;
+  }
+  bool operator != (const NullType &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const NullType & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(NullType &a, NullType &b);
+
+std::ostream& operator<<(std::ostream& out, const NullType& obj);
+
+
+class DecimalType : public virtual ::apache::thrift::TBase {
+ public:
+
+  DecimalType(const DecimalType&);
+  DecimalType& operator=(const DecimalType&);
+  DecimalType() : scale(0), precision(0) {
+  }
+
+  virtual ~DecimalType() noexcept;
+  int32_t scale;
+  int32_t precision;
+
+  void __set_scale(const int32_t val);
+
+  void __set_precision(const int32_t val);
+
+  bool operator == (const DecimalType & rhs) const
+  {
+    if (!(scale == rhs.scale))
+      return false;
+    if (!(precision == rhs.precision))
+      return false;
+    return true;
+  }
+  bool operator != (const DecimalType &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const DecimalType & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(DecimalType &a, DecimalType &b);
+
+std::ostream& operator<<(std::ostream& out, const DecimalType& obj);
+
+
+class MilliSeconds : public virtual ::apache::thrift::TBase {
+ public:
+
+  MilliSeconds(const MilliSeconds&);
+  MilliSeconds& operator=(const MilliSeconds&);
+  MilliSeconds() {
+  }
+
+  virtual ~MilliSeconds() noexcept;
+
+  bool operator == (const MilliSeconds & /* rhs */) const
+  {
+    return true;
+  }
+  bool operator != (const MilliSeconds &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const MilliSeconds & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(MilliSeconds &a, MilliSeconds &b);
+
+std::ostream& operator<<(std::ostream& out, const MilliSeconds& obj);
+
+
+class MicroSeconds : public virtual ::apache::thrift::TBase {
+ public:
+
+  MicroSeconds(const MicroSeconds&);
+  MicroSeconds& operator=(const MicroSeconds&);
+  MicroSeconds() {
+  }
+
+  virtual ~MicroSeconds() noexcept;
+
+  bool operator == (const MicroSeconds & /* rhs */) const
+  {
+    return true;
+  }
+  bool operator != (const MicroSeconds &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const MicroSeconds & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(MicroSeconds &a, MicroSeconds &b);
+
+std::ostream& operator<<(std::ostream& out, const MicroSeconds& obj);
+
+
+class NanoSeconds : public virtual ::apache::thrift::TBase {
+ public:
+
+  NanoSeconds(const NanoSeconds&);
+  NanoSeconds& operator=(const NanoSeconds&);
+  NanoSeconds() {
+  }
+
+  virtual ~NanoSeconds() noexcept;
+
+  bool operator == (const NanoSeconds & /* rhs */) const
+  {
+    return true;
+  }
+  bool operator != (const NanoSeconds &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const NanoSeconds & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(NanoSeconds &a, NanoSeconds &b);
+
+std::ostream& operator<<(std::ostream& out, const NanoSeconds& obj);
+
+typedef struct _TimeUnit__isset {
+  _TimeUnit__isset() : MILLIS(false), MICROS(false), NANOS(false) {}
+  bool MILLIS :1;
+  bool MICROS :1;
+  bool NANOS :1;
+} _TimeUnit__isset;
+
+class TimeUnit : public virtual ::apache::thrift::TBase {
+ public:
+
+  TimeUnit(const TimeUnit&);
+  TimeUnit& operator=(const TimeUnit&);
+  TimeUnit() {
+  }
+
+  virtual ~TimeUnit() noexcept;
+  MilliSeconds MILLIS;
+  MicroSeconds MICROS;
+  NanoSeconds NANOS;
+
+  _TimeUnit__isset __isset;
+
+  void __set_MILLIS(const MilliSeconds& val);
+
+  void __set_MICROS(const MicroSeconds& val);
+
+  void __set_NANOS(const NanoSeconds& val);
+
+  bool operator == (const TimeUnit & rhs) const
+  {
+    if (__isset.MILLIS != rhs.__isset.MILLIS)
+      return false;
+    else if (__isset.MILLIS && !(MILLIS == rhs.MILLIS))
+      return false;
+    if (__isset.MICROS != rhs.__isset.MICROS)
+      return false;
+    else if (__isset.MICROS && !(MICROS == rhs.MICROS))
+      return false;
+    if (__isset.NANOS != rhs.__isset.NANOS)
+      return false;
+    else if (__isset.NANOS && !(NANOS == rhs.NANOS))
+      return false;
+    return true;
+  }
+  bool operator != (const TimeUnit &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const TimeUnit & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(TimeUnit &a, TimeUnit &b);
+
+std::ostream& operator<<(std::ostream& out, const TimeUnit& obj);
+
+
+class TimestampType : public virtual ::apache::thrift::TBase {
+ public:
+
+  TimestampType(const TimestampType&);
+  TimestampType& operator=(const TimestampType&);
+  TimestampType() : isAdjustedToUTC(0) {
+  }
+
+  virtual ~TimestampType() noexcept;
+  bool isAdjustedToUTC;
+  TimeUnit unit;
+
+  void __set_isAdjustedToUTC(const bool val);
+
+  void __set_unit(const TimeUnit& val);
+
+  bool operator == (const TimestampType & rhs) const
+  {
+    if (!(isAdjustedToUTC == rhs.isAdjustedToUTC))
+      return false;
+    if (!(unit == rhs.unit))
+      return false;
+    return true;
+  }
+  bool operator != (const TimestampType &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const TimestampType & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(TimestampType &a, TimestampType &b);
+
+std::ostream& operator<<(std::ostream& out, const TimestampType& obj);
+
+
+class TimeType : public virtual ::apache::thrift::TBase {
+ public:
+
+  TimeType(const TimeType&);
+  TimeType& operator=(const TimeType&);
+  TimeType() : isAdjustedToUTC(0) {
+  }
+
+  virtual ~TimeType() noexcept;
+  bool isAdjustedToUTC;
+  TimeUnit unit;
+
+  void __set_isAdjustedToUTC(const bool val);
+
+  void __set_unit(const TimeUnit& val);
+
+  bool operator == (const TimeType & rhs) const
+  {
+    if (!(isAdjustedToUTC == rhs.isAdjustedToUTC))
+      return false;
+    if (!(unit == rhs.unit))
+      return false;
+    return true;
+  }
+  bool operator != (const TimeType &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const TimeType & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(TimeType &a, TimeType &b);
+
+std::ostream& operator<<(std::ostream& out, const TimeType& obj);
+
+
+class IntType : public virtual ::apache::thrift::TBase {
+ public:
+
+  IntType(const IntType&);
+  IntType& operator=(const IntType&);
+  IntType() : bitWidth(0), isSigned(0) {
+  }
+
+  virtual ~IntType() noexcept;
+  int8_t bitWidth;
+  bool isSigned;
+
+  void __set_bitWidth(const int8_t val);
+
+  void __set_isSigned(const bool val);
+
+  bool operator == (const IntType & rhs) const
+  {
+    if (!(bitWidth == rhs.bitWidth))
+      return false;
+    if (!(isSigned == rhs.isSigned))
+      return false;
+    return true;
+  }
+  bool operator != (const IntType &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const IntType & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(IntType &a, IntType &b);
+
+std::ostream& operator<<(std::ostream& out, const IntType& obj);
+
+
+class JsonType : public virtual ::apache::thrift::TBase {
+ public:
+
+  JsonType(const JsonType&);
+  JsonType& operator=(const JsonType&);
+  JsonType() {
+  }
+
+  virtual ~JsonType() noexcept;
+
+  bool operator == (const JsonType & /* rhs */) const
+  {
+    return true;
+  }
+  bool operator != (const JsonType &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const JsonType & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(JsonType &a, JsonType &b);
+
+std::ostream& operator<<(std::ostream& out, const JsonType& obj);
+
+
+class BsonType : public virtual ::apache::thrift::TBase {
+ public:
+
+  BsonType(const BsonType&);
+  BsonType& operator=(const BsonType&);
+  BsonType() {
+  }
+
+  virtual ~BsonType() noexcept;
+
+  bool operator == (const BsonType & /* rhs */) const
+  {
+    return true;
+  }
+  bool operator != (const BsonType &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const BsonType & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(BsonType &a, BsonType &b);
+
+std::ostream& operator<<(std::ostream& out, const BsonType& obj);
+
+typedef struct _LogicalType__isset {
+  _LogicalType__isset() : STRING(false), MAP(false), LIST(false), ENUM(false), DECIMAL(false), DATE(false), TIME(false), TIMESTAMP(false), INTEGER(false), UNKNOWN(false), JSON(false), BSON(false), UUID(false) {}
+  bool STRING :1;
+  bool MAP :1;
+  bool LIST :1;
+  bool ENUM :1;
+  bool DECIMAL :1;
+  bool DATE :1;
+  bool TIME :1;
+  bool TIMESTAMP :1;
+  bool INTEGER :1;
+  bool UNKNOWN :1;
+  bool JSON :1;
+  bool BSON :1;
+  bool UUID :1;
+} _LogicalType__isset;
+
+class LogicalType : public virtual ::apache::thrift::TBase {
+ public:
+
+  LogicalType(const LogicalType&);
+  LogicalType& operator=(const LogicalType&);
+  LogicalType() {
+  }
+
+  virtual ~LogicalType() noexcept;
+  StringType STRING;
+  MapType MAP;
+  ListType LIST;
+  EnumType ENUM;
+  DecimalType DECIMAL;
+  DateType DATE;
+  TimeType TIME;
+  TimestampType TIMESTAMP;
+  IntType INTEGER;
+  NullType UNKNOWN;
+  JsonType JSON;
+  BsonType BSON;
+  UUIDType UUID;
+
+  _LogicalType__isset __isset;
+
+  void __set_STRING(const StringType& val);
+
+  void __set_MAP(const MapType& val);
+
+  void __set_LIST(const ListType& val);
+
+  void __set_ENUM(const EnumType& val);
+
+  void __set_DECIMAL(const DecimalType& val);
+
+  void __set_DATE(const DateType& val);
+
+  void __set_TIME(const TimeType& val);
+
+  void __set_TIMESTAMP(const TimestampType& val);
+
+  void __set_INTEGER(const IntType& val);
+
+  void __set_UNKNOWN(const NullType& val);
+
+  void __set_JSON(const JsonType& val);
+
+  void __set_BSON(const BsonType& val);
+
+  void __set_UUID(const UUIDType& val);
+
+  bool operator == (const LogicalType & rhs) const
+  {
+    if (__isset.STRING != rhs.__isset.STRING)
+      return false;
+    else if (__isset.STRING && !(STRING == rhs.STRING))
+      return false;
+    if (__isset.MAP != rhs.__isset.MAP)
+      return false;
+    else if (__isset.MAP && !(MAP == rhs.MAP))
+      return false;
+    if (__isset.LIST != rhs.__isset.LIST)
+      return false;
+    else if (__isset.LIST && !(LIST == rhs.LIST))
+      return false;
+    if (__isset.ENUM != rhs.__isset.ENUM)
+      return false;
+    else if (__isset.ENUM && !(ENUM == rhs.ENUM))
+      return false;
+    if (__isset.DECIMAL != rhs.__isset.DECIMAL)
+      return false;
+    else if (__isset.DECIMAL && !(DECIMAL == rhs.DECIMAL))
+      return false;
+    if (__isset.DATE != rhs.__isset.DATE)
+      return false;
+    else if (__isset.DATE && !(DATE == rhs.DATE))
+      return false;
+    if (__isset.TIME != rhs.__isset.TIME)
+      return false;
+    else if (__isset.TIME && !(TIME == rhs.TIME))
+      return false;
+    if (__isset.TIMESTAMP != rhs.__isset.TIMESTAMP)
+      return false;
+    else if (__isset.TIMESTAMP && !(TIMESTAMP == rhs.TIMESTAMP))
+      return false;
+    if (__isset.INTEGER != rhs.__isset.INTEGER)
+      return false;
+    else if (__isset.INTEGER && !(INTEGER == rhs.INTEGER))
+      return false;
+    if (__isset.UNKNOWN != rhs.__isset.UNKNOWN)
+      return false;
+    else if (__isset.UNKNOWN && !(UNKNOWN == rhs.UNKNOWN))
+      return false;
+    if (__isset.JSON != rhs.__isset.JSON)
+      return false;
+    else if (__isset.JSON && !(JSON == rhs.JSON))
+      return false;
+    if (__isset.BSON != rhs.__isset.BSON)
+      return false;
+    else if (__isset.BSON && !(BSON == rhs.BSON))
+      return false;
+    if (__isset.UUID != rhs.__isset.UUID)
+      return false;
+    else if (__isset.UUID && !(UUID == rhs.UUID))
+      return false;
+    return true;
+  }
+  bool operator != (const LogicalType &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const LogicalType & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(LogicalType &a, LogicalType &b);
+
+std::ostream& operator<<(std::ostream& out, const LogicalType& obj);
+
+typedef struct _SchemaElement__isset {
+  _SchemaElement__isset() : type(false), type_length(false), repetition_type(false), num_children(false), converted_type(false), scale(false), precision(false), field_id(false), logicalType(false) {}
+  bool type :1;
+  bool type_length :1;
+  bool repetition_type :1;
+  bool num_children :1;
+  bool converted_type :1;
+  bool scale :1;
+  bool precision :1;
+  bool field_id :1;
+  bool logicalType :1;
+} _SchemaElement__isset;
+
+class SchemaElement : public virtual ::apache::thrift::TBase {
+ public:
+
+  SchemaElement(const SchemaElement&);
+  SchemaElement& operator=(const SchemaElement&);
+  SchemaElement() : type((Type::type)0), type_length(0), repetition_type((FieldRepetitionType::type)0), name(), num_children(0), converted_type((ConvertedType::type)0), scale(0), precision(0), field_id(0) {
+  }
+
+  virtual ~SchemaElement() noexcept;
+  Type::type type;
+  int32_t type_length;
+  FieldRepetitionType::type repetition_type;
+  std::string name;
+  int32_t num_children;
+  ConvertedType::type converted_type;
+  int32_t scale;
+  int32_t precision;
+  int32_t field_id;
+  LogicalType logicalType;
+
+  _SchemaElement__isset __isset;
+
+  void __set_type(const Type::type val);
+
+  void __set_type_length(const int32_t val);
+
+  void __set_repetition_type(const FieldRepetitionType::type val);
+
+  void __set_name(const std::string& val);
+
+  void __set_num_children(const int32_t val);
+
+  void __set_converted_type(const ConvertedType::type val);
+
+  void __set_scale(const int32_t val);
+
+  void __set_precision(const int32_t val);
+
+  void __set_field_id(const int32_t val);
+
+  void __set_logicalType(const LogicalType& val);
+
+  bool operator == (const SchemaElement & rhs) const
+  {
+    if (__isset.type != rhs.__isset.type)
+      return false;
+    else if (__isset.type && !(type == rhs.type))
+      return false;
+    if (__isset.type_length != rhs.__isset.type_length)
+      return false;
+    else if (__isset.type_length && !(type_length == rhs.type_length))
+      return false;
+    if (__isset.repetition_type != rhs.__isset.repetition_type)
+      return false;
+    else if (__isset.repetition_type && !(repetition_type == rhs.repetition_type))
+      return false;
+    if (!(name == rhs.name))
+      return false;
+    if (__isset.num_children != rhs.__isset.num_children)
+      return false;
+    else if (__isset.num_children && !(num_children == rhs.num_children))
+      return false;
+    if (__isset.converted_type != rhs.__isset.converted_type)
+      return false;
+    else if (__isset.converted_type && !(converted_type == rhs.converted_type))
+      return false;
+    if (__isset.scale != rhs.__isset.scale)
+      return false;
+    else if (__isset.scale && !(scale == rhs.scale))
+      return false;
+    if (__isset.precision != rhs.__isset.precision)
+      return false;
+    else if (__isset.precision && !(precision == rhs.precision))
+      return false;
+    if (__isset.field_id != rhs.__isset.field_id)
+      return false;
+    else if (__isset.field_id && !(field_id == rhs.field_id))
+      return false;
+    if (__isset.logicalType != rhs.__isset.logicalType)
+      return false;
+    else if (__isset.logicalType && !(logicalType == rhs.logicalType))
+      return false;
+    return true;
+  }
+  bool operator != (const SchemaElement &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const SchemaElement & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(SchemaElement &a, SchemaElement &b);
+
+std::ostream& operator<<(std::ostream& out, const SchemaElement& obj);
+
+typedef struct _DataPageHeader__isset {
+  _DataPageHeader__isset() : statistics(false) {}
+  bool statistics :1;
+} _DataPageHeader__isset;
+
+class DataPageHeader : public virtual ::apache::thrift::TBase {
+ public:
+
+  DataPageHeader(const DataPageHeader&);
+  DataPageHeader& operator=(const DataPageHeader&);
+  DataPageHeader() : num_values(0), encoding((Encoding::type)0), definition_level_encoding((Encoding::type)0), repetition_level_encoding((Encoding::type)0) {
+  }
+
+  virtual ~DataPageHeader() noexcept;
+  int32_t num_values;
+  Encoding::type encoding;
+  Encoding::type definition_level_encoding;
+  Encoding::type repetition_level_encoding;
+  Statistics statistics;
+
+  _DataPageHeader__isset __isset;
+
+  void __set_num_values(const int32_t val);
+
+  void __set_encoding(const Encoding::type val);
+
+  void __set_definition_level_encoding(const Encoding::type val);
+
+  void __set_repetition_level_encoding(const Encoding::type val);
+
+  void __set_statistics(const Statistics& val);
+
+  bool operator == (const DataPageHeader & rhs) const
+  {
+    if (!(num_values == rhs.num_values))
+      return false;
+    if (!(encoding == rhs.encoding))
+      return false;
+    if (!(definition_level_encoding == rhs.definition_level_encoding))
+      return false;
+    if (!(repetition_level_encoding == rhs.repetition_level_encoding))
+      return false;
+    if (__isset.statistics != rhs.__isset.statistics)
+      return false;
+    else if (__isset.statistics && !(statistics == rhs.statistics))
+      return false;
+    return true;
+  }
+  bool operator != (const DataPageHeader &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const DataPageHeader & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(DataPageHeader &a, DataPageHeader &b);
+
+std::ostream& operator<<(std::ostream& out, const DataPageHeader& obj);
+
+
+class IndexPageHeader : public virtual ::apache::thrift::TBase {
+ public:
+
+  IndexPageHeader(const IndexPageHeader&);
+  IndexPageHeader& operator=(const IndexPageHeader&);
+  IndexPageHeader() {
+  }
+
+  virtual ~IndexPageHeader() noexcept;
+
+  bool operator == (const IndexPageHeader & /* rhs */) const
+  {
+    return true;
+  }
+  bool operator != (const IndexPageHeader &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const IndexPageHeader & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(IndexPageHeader &a, IndexPageHeader &b);
+
+std::ostream& operator<<(std::ostream& out, const IndexPageHeader& obj);
+
+typedef struct _DictionaryPageHeader__isset {
+  _DictionaryPageHeader__isset() : is_sorted(false) {}
+  bool is_sorted :1;
+} _DictionaryPageHeader__isset;
+
+class DictionaryPageHeader : public virtual ::apache::thrift::TBase {
+ public:
+
+  DictionaryPageHeader(const DictionaryPageHeader&);
+  DictionaryPageHeader& operator=(const DictionaryPageHeader&);
+  DictionaryPageHeader() : num_values(0), encoding((Encoding::type)0), is_sorted(0) {
+  }
+
+  virtual ~DictionaryPageHeader() noexcept;
+  int32_t num_values;
+  Encoding::type encoding;
+  bool is_sorted;
+
+  _DictionaryPageHeader__isset __isset;
+
+  void __set_num_values(const int32_t val);
+
+  void __set_encoding(const Encoding::type val);
+
+  void __set_is_sorted(const bool val);
+
+  bool operator == (const DictionaryPageHeader & rhs) const
+  {
+    if (!(num_values == rhs.num_values))
+      return false;
+    if (!(encoding == rhs.encoding))
+      return false;
+    if (__isset.is_sorted != rhs.__isset.is_sorted)
+      return false;
+    else if (__isset.is_sorted && !(is_sorted == rhs.is_sorted))
+      return false;
+    return true;
+  }
+  bool operator != (const DictionaryPageHeader &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const DictionaryPageHeader & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(DictionaryPageHeader &a, DictionaryPageHeader &b);
+
+std::ostream& operator<<(std::ostream& out, const DictionaryPageHeader& obj);
+
+typedef struct _DataPageHeaderV2__isset {
+  _DataPageHeaderV2__isset() : is_compressed(true), statistics(false) {}
+  bool is_compressed :1;
+  bool statistics :1;
+} _DataPageHeaderV2__isset;
+
+class DataPageHeaderV2 : public virtual ::apache::thrift::TBase {
+ public:
+
+  DataPageHeaderV2(const DataPageHeaderV2&);
+  DataPageHeaderV2& operator=(const DataPageHeaderV2&);
+  DataPageHeaderV2() : num_values(0), num_nulls(0), num_rows(0), encoding((Encoding::type)0), definition_levels_byte_length(0), repetition_levels_byte_length(0), is_compressed(true) {
+  }
+
+  virtual ~DataPageHeaderV2() noexcept;
+  int32_t num_values;
+  int32_t num_nulls;
+  int32_t num_rows;
+  Encoding::type encoding;
+  int32_t definition_levels_byte_length;
+  int32_t repetition_levels_byte_length;
+  bool is_compressed;
+  Statistics statistics;
+
+  _DataPageHeaderV2__isset __isset;
+
+  void __set_num_values(const int32_t val);
+
+  void __set_num_nulls(const int32_t val);
+
+  void __set_num_rows(const int32_t val);
+
+  void __set_encoding(const Encoding::type val);
+
+  void __set_definition_levels_byte_length(const int32_t val);
+
+  void __set_repetition_levels_byte_length(const int32_t val);
+
+  void __set_is_compressed(const bool val);
+
+  void __set_statistics(const Statistics& val);
+
+  bool operator == (const DataPageHeaderV2 & rhs) const
+  {
+    if (!(num_values == rhs.num_values))
+      return false;
+    if (!(num_nulls == rhs.num_nulls))
+      return false;
+    if (!(num_rows == rhs.num_rows))
+      return false;
+    if (!(encoding == rhs.encoding))
+      return false;
+    if (!(definition_levels_byte_length == rhs.definition_levels_byte_length))
+      return false;
+    if (!(repetition_levels_byte_length == rhs.repetition_levels_byte_length))
+      return false;
+    if (__isset.is_compressed != rhs.__isset.is_compressed)
+      return false;
+    else if (__isset.is_compressed && !(is_compressed == rhs.is_compressed))
+      return false;
+    if (__isset.statistics != rhs.__isset.statistics)
+      return false;
+    else if (__isset.statistics && !(statistics == rhs.statistics))
+      return false;
+    return true;
+  }
+  bool operator != (const DataPageHeaderV2 &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const DataPageHeaderV2 & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b);
+
+std::ostream& operator<<(std::ostream& out, const DataPageHeaderV2& obj);
+
+
+class SplitBlockAlgorithm : public virtual ::apache::thrift::TBase {
+ public:
+
+  SplitBlockAlgorithm(const SplitBlockAlgorithm&);
+  SplitBlockAlgorithm& operator=(const SplitBlockAlgorithm&);
+  SplitBlockAlgorithm() {
+  }
+
+  virtual ~SplitBlockAlgorithm() noexcept;
+
+  bool operator == (const SplitBlockAlgorithm & /* rhs */) const
+  {
+    return true;
+  }
+  bool operator != (const SplitBlockAlgorithm &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const SplitBlockAlgorithm & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(SplitBlockAlgorithm &a, SplitBlockAlgorithm &b);
+
+std::ostream& operator<<(std::ostream& out, const SplitBlockAlgorithm& obj);
+
+typedef struct _BloomFilterAlgorithm__isset {
+  _BloomFilterAlgorithm__isset() : BLOCK(false) {}
+  bool BLOCK :1;
+} _BloomFilterAlgorithm__isset;
+
+class BloomFilterAlgorithm : public virtual ::apache::thrift::TBase {
+ public:
+
+  BloomFilterAlgorithm(const BloomFilterAlgorithm&);
+  BloomFilterAlgorithm& operator=(const BloomFilterAlgorithm&);
+  BloomFilterAlgorithm() {
+  }
+
+  virtual ~BloomFilterAlgorithm() noexcept;
+  SplitBlockAlgorithm BLOCK;
+
+  _BloomFilterAlgorithm__isset __isset;
+
+  void __set_BLOCK(const SplitBlockAlgorithm& val);
+
+  bool operator == (const BloomFilterAlgorithm & rhs) const
+  {
+    if (__isset.BLOCK != rhs.__isset.BLOCK)
+      return false;
+    else if (__isset.BLOCK && !(BLOCK == rhs.BLOCK))
+      return false;
+    return true;
+  }
+  bool operator != (const BloomFilterAlgorithm &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const BloomFilterAlgorithm & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(BloomFilterAlgorithm &a, BloomFilterAlgorithm &b);
+
+std::ostream& operator<<(std::ostream& out, const BloomFilterAlgorithm& obj);
+
+
+class XxHash : public virtual ::apache::thrift::TBase {
+ public:
+
+  XxHash(const XxHash&);
+  XxHash& operator=(const XxHash&);
+  XxHash() {
+  }
+
+  virtual ~XxHash() noexcept;
+
+  bool operator == (const XxHash & /* rhs */) const
+  {
+    return true;
+  }
+  bool operator != (const XxHash &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const XxHash & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(XxHash &a, XxHash &b);
+
+std::ostream& operator<<(std::ostream& out, const XxHash& obj);
+
+typedef struct _BloomFilterHash__isset {
+  _BloomFilterHash__isset() : XXHASH(false) {}
+  bool XXHASH :1;
+} _BloomFilterHash__isset;
+
+class BloomFilterHash : public virtual ::apache::thrift::TBase {
+ public:
+
+  BloomFilterHash(const BloomFilterHash&);
+  BloomFilterHash& operator=(const BloomFilterHash&);
+  BloomFilterHash() {
+  }
+
+  virtual ~BloomFilterHash() noexcept;
+  XxHash XXHASH;
+
+  _BloomFilterHash__isset __isset;
+
+  void __set_XXHASH(const XxHash& val);
+
+  bool operator == (const BloomFilterHash & rhs) const
+  {
+    if (__isset.XXHASH != rhs.__isset.XXHASH)
+      return false;
+    else if (__isset.XXHASH && !(XXHASH == rhs.XXHASH))
+      return false;
+    return true;
+  }
+  bool operator != (const BloomFilterHash &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const BloomFilterHash & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(BloomFilterHash &a, BloomFilterHash &b);
+
+std::ostream& operator<<(std::ostream& out, const BloomFilterHash& obj);
+
+
+class Uncompressed : public virtual ::apache::thrift::TBase {
+ public:
+
+  Uncompressed(const Uncompressed&);
+  Uncompressed& operator=(const Uncompressed&);
+  Uncompressed() {
+  }
+
+  virtual ~Uncompressed() noexcept;
+
+  bool operator == (const Uncompressed & /* rhs */) const
+  {
+    return true;
+  }
+  bool operator != (const Uncompressed &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const Uncompressed & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(Uncompressed &a, Uncompressed &b);
+
+std::ostream& operator<<(std::ostream& out, const Uncompressed& obj);
+
+typedef struct _BloomFilterCompression__isset {
+  _BloomFilterCompression__isset() : UNCOMPRESSED(false) {}
+  bool UNCOMPRESSED :1;
+} _BloomFilterCompression__isset;
+
+class BloomFilterCompression : public virtual ::apache::thrift::TBase {
+ public:
+
+  BloomFilterCompression(const BloomFilterCompression&);
+  BloomFilterCompression& operator=(const BloomFilterCompression&);
+  BloomFilterCompression() {
+  }
+
+  virtual ~BloomFilterCompression() noexcept;
+  Uncompressed UNCOMPRESSED;
+
+  _BloomFilterCompression__isset __isset;
+
+  void __set_UNCOMPRESSED(const Uncompressed& val);
+
+  bool operator == (const BloomFilterCompression & rhs) const
+  {
+    if (__isset.UNCOMPRESSED != rhs.__isset.UNCOMPRESSED)
+      return false;
+    else if (__isset.UNCOMPRESSED && !(UNCOMPRESSED == rhs.UNCOMPRESSED))
+      return false;
+    return true;
+  }
+  bool operator != (const BloomFilterCompression &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const BloomFilterCompression & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(BloomFilterCompression &a, BloomFilterCompression &b);
+
+std::ostream& operator<<(std::ostream& out, const BloomFilterCompression& obj);
+
+
+class BloomFilterHeader : public virtual ::apache::thrift::TBase {
+ public:
+
+  BloomFilterHeader(const BloomFilterHeader&);
+  BloomFilterHeader& operator=(const BloomFilterHeader&);
+  BloomFilterHeader() : numBytes(0) {
+  }
+
+  virtual ~BloomFilterHeader() noexcept;
+  int32_t numBytes;
+  BloomFilterAlgorithm algorithm;
+  BloomFilterHash hash;
+  BloomFilterCompression compression;
+
+  void __set_numBytes(const int32_t val);
+
+  void __set_algorithm(const BloomFilterAlgorithm& val);
+
+  void __set_hash(const BloomFilterHash& val);
+
+  void __set_compression(const BloomFilterCompression& val);
+
+  bool operator == (const BloomFilterHeader & rhs) const
+  {
+    if (!(numBytes == rhs.numBytes))
+      return false;
+    if (!(algorithm == rhs.algorithm))
+      return false;
+    if (!(hash == rhs.hash))
+      return false;
+    if (!(compression == rhs.compression))
+      return false;
+    return true;
+  }
+  bool operator != (const BloomFilterHeader &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const BloomFilterHeader & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(BloomFilterHeader &a, BloomFilterHeader &b);
+
+std::ostream& operator<<(std::ostream& out, const BloomFilterHeader& obj);
+
+typedef struct _PageHeader__isset {
+  _PageHeader__isset() : crc(false), data_page_header(false), index_page_header(false), dictionary_page_header(false), data_page_header_v2(false) {}
+  bool crc :1;
+  bool data_page_header :1;
+  bool index_page_header :1;
+  bool dictionary_page_header :1;
+  bool data_page_header_v2 :1;
+} _PageHeader__isset;
+
+class PageHeader : public virtual ::apache::thrift::TBase {
+ public:
+
+  PageHeader(const PageHeader&);
+  PageHeader& operator=(const PageHeader&);
+  PageHeader() : type((PageType::type)0), uncompressed_page_size(0), compressed_page_size(0), crc(0) {
+  }
+
+  virtual ~PageHeader() noexcept;
+  PageType::type type;
+  int32_t uncompressed_page_size;
+  int32_t compressed_page_size;
+  int32_t crc;
+  DataPageHeader data_page_header;
+  IndexPageHeader index_page_header;
+  DictionaryPageHeader dictionary_page_header;
+  DataPageHeaderV2 data_page_header_v2;
+
+  _PageHeader__isset __isset;
+
+  void __set_type(const PageType::type val);
+
+  void __set_uncompressed_page_size(const int32_t val);
+
+  void __set_compressed_page_size(const int32_t val);
+
+  void __set_crc(const int32_t val);
+
+  void __set_data_page_header(const DataPageHeader& val);
+
+  void __set_index_page_header(const IndexPageHeader& val);
+
+  void __set_dictionary_page_header(const DictionaryPageHeader& val);
+
+  void __set_data_page_header_v2(const DataPageHeaderV2& val);
+
+  bool operator == (const PageHeader & rhs) const
+  {
+    if (!(type == rhs.type))
+      return false;
+    if (!(uncompressed_page_size == rhs.uncompressed_page_size))
+      return false;
+    if (!(compressed_page_size == rhs.compressed_page_size))
+      return false;
+    if (__isset.crc != rhs.__isset.crc)
+      return false;
+    else if (__isset.crc && !(crc == rhs.crc))
+      return false;
+    if (__isset.data_page_header != rhs.__isset.data_page_header)
+      return false;
+    else if (__isset.data_page_header && !(data_page_header == rhs.data_page_header))
+      return false;
+    if (__isset.index_page_header != rhs.__isset.index_page_header)
+      return false;
+    else if (__isset.index_page_header && !(index_page_header == rhs.index_page_header))
+      return false;
+    if (__isset.dictionary_page_header != rhs.__isset.dictionary_page_header)
+      return false;
+    else if (__isset.dictionary_page_header && !(dictionary_page_header == rhs.dictionary_page_header))
+      return false;
+    if (__isset.data_page_header_v2 != rhs.__isset.data_page_header_v2)
+      return false;
+    else if (__isset.data_page_header_v2 && !(data_page_header_v2 == rhs.data_page_header_v2))
+      return false;
+    return true;
+  }
+  bool operator != (const PageHeader &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const PageHeader & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(PageHeader &a, PageHeader &b);
+
+std::ostream& operator<<(std::ostream& out, const PageHeader& obj);
+
+typedef struct _KeyValue__isset {
+  _KeyValue__isset() : value(false) {}
+  bool value :1;
+} _KeyValue__isset;
+
+class KeyValue : public virtual ::apache::thrift::TBase {
+ public:
+
+  KeyValue(const KeyValue&);
+  KeyValue& operator=(const KeyValue&);
+  KeyValue() : key(), value() {
+  }
+
+  virtual ~KeyValue() noexcept;
+  std::string key;
+  std::string value;
+
+  _KeyValue__isset __isset;
+
+  void __set_key(const std::string& val);
+
+  void __set_value(const std::string& val);
+
+  bool operator == (const KeyValue & rhs) const
+  {
+    if (!(key == rhs.key))
+      return false;
+    if (__isset.value != rhs.__isset.value)
+      return false;
+    else if (__isset.value && !(value == rhs.value))
+      return false;
+    return true;
+  }
+  bool operator != (const KeyValue &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const KeyValue & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(KeyValue &a, KeyValue &b);
+
+std::ostream& operator<<(std::ostream& out, const KeyValue& obj);
+
+
+class SortingColumn : public virtual ::apache::thrift::TBase {
+ public:
+
+  SortingColumn(const SortingColumn&);
+  SortingColumn& operator=(const SortingColumn&);
+  SortingColumn() : column_idx(0), descending(0), nulls_first(0) {
+  }
+
+  virtual ~SortingColumn() noexcept;
+  int32_t column_idx;
+  bool descending;
+  bool nulls_first;
+
+  void __set_column_idx(const int32_t val);
+
+  void __set_descending(const bool val);
+
+  void __set_nulls_first(const bool val);
+
+  bool operator == (const SortingColumn & rhs) const
+  {
+    if (!(column_idx == rhs.column_idx))
+      return false;
+    if (!(descending == rhs.descending))
+      return false;
+    if (!(nulls_first == rhs.nulls_first))
+      return false;
+    return true;
+  }
+  bool operator != (const SortingColumn &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const SortingColumn & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(SortingColumn &a, SortingColumn &b);
+
+std::ostream& operator<<(std::ostream& out, const SortingColumn& obj);
+
+
+class PageEncodingStats : public virtual ::apache::thrift::TBase {
+ public:
+
+  PageEncodingStats(const PageEncodingStats&);
+  PageEncodingStats& operator=(const PageEncodingStats&);
+  PageEncodingStats() : page_type((PageType::type)0), encoding((Encoding::type)0), count(0) {
+  }
+
+  virtual ~PageEncodingStats() noexcept;
+  PageType::type page_type;
+  Encoding::type encoding;
+  int32_t count;
+
+  void __set_page_type(const PageType::type val);
+
+  void __set_encoding(const Encoding::type val);
+
+  void __set_count(const int32_t val);
+
+  bool operator == (const PageEncodingStats & rhs) const
+  {
+    if (!(page_type == rhs.page_type))
+      return false;
+    if (!(encoding == rhs.encoding))
+      return false;
+    if (!(count == rhs.count))
+      return false;
+    return true;
+  }
+  bool operator != (const PageEncodingStats &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const PageEncodingStats & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(PageEncodingStats &a, PageEncodingStats &b);
+
+std::ostream& operator<<(std::ostream& out, const PageEncodingStats& obj);
+
+typedef struct _ColumnMetaData__isset {
+  _ColumnMetaData__isset() : key_value_metadata(false), index_page_offset(false), dictionary_page_offset(false), statistics(false), encoding_stats(false), bloom_filter_offset(false) {}
+  bool key_value_metadata :1;
+  bool index_page_offset :1;
+  bool dictionary_page_offset :1;
+  bool statistics :1;
+  bool encoding_stats :1;
+  bool bloom_filter_offset :1;
+} _ColumnMetaData__isset;
+
+class ColumnMetaData : public virtual ::apache::thrift::TBase {
+ public:
+
+  ColumnMetaData(const ColumnMetaData&);
+  ColumnMetaData& operator=(const ColumnMetaData&);
+  ColumnMetaData() : type((Type::type)0), codec((CompressionCodec::type)0), num_values(0), total_uncompressed_size(0), total_compressed_size(0), data_page_offset(0), index_page_offset(0), dictionary_page_offset(0), bloom_filter_offset(0) {
+  }
+
+  virtual ~ColumnMetaData() noexcept;
+  Type::type type;
+  std::vector<Encoding::type>  encodings;
+  std::vector<std::string>  path_in_schema;
+  CompressionCodec::type codec;
+  int64_t num_values;
+  int64_t total_uncompressed_size;
+  int64_t total_compressed_size;
+  std::vector<KeyValue>  key_value_metadata;
+  int64_t data_page_offset;
+  int64_t index_page_offset;
+  int64_t dictionary_page_offset;
+  Statistics statistics;
+  std::vector<PageEncodingStats>  encoding_stats;
+  int64_t bloom_filter_offset;
+
+  _ColumnMetaData__isset __isset;
+
+  void __set_type(const Type::type val);
+
+  void __set_encodings(const std::vector<Encoding::type> & val);
+
+  void __set_path_in_schema(const std::vector<std::string> & val);
+
+  void __set_codec(const CompressionCodec::type val);
+
+  void __set_num_values(const int64_t val);
+
+  void __set_total_uncompressed_size(const int64_t val);
+
+  void __set_total_compressed_size(const int64_t val);
+
+  void __set_key_value_metadata(const std::vector<KeyValue> & val);
+
+  void __set_data_page_offset(const int64_t val);
+
+  void __set_index_page_offset(const int64_t val);
+
+  void __set_dictionary_page_offset(const int64_t val);
+
+  void __set_statistics(const Statistics& val);
+
+  void __set_encoding_stats(const std::vector<PageEncodingStats> & val);
+
+  void __set_bloom_filter_offset(const int64_t val);
+
+  bool operator == (const ColumnMetaData & rhs) const
+  {
+    if (!(type == rhs.type))
+      return false;
+    if (!(encodings == rhs.encodings))
+      return false;
+    if (!(path_in_schema == rhs.path_in_schema))
+      return false;
+    if (!(codec == rhs.codec))
+      return false;
+    if (!(num_values == rhs.num_values))
+      return false;
+    if (!(total_uncompressed_size == rhs.total_uncompressed_size))
+      return false;
+    if (!(total_compressed_size == rhs.total_compressed_size))
+      return false;
+    if (__isset.key_value_metadata != rhs.__isset.key_value_metadata)
+      return false;
+    else if (__isset.key_value_metadata && !(key_value_metadata == rhs.key_value_metadata))
+      return false;
+    if (!(data_page_offset == rhs.data_page_offset))
+      return false;
+    if (__isset.index_page_offset != rhs.__isset.index_page_offset)
+      return false;
+    else if (__isset.index_page_offset && !(index_page_offset == rhs.index_page_offset))
+      return false;
+    if (__isset.dictionary_page_offset != rhs.__isset.dictionary_page_offset)
+      return false;
+    else if (__isset.dictionary_page_offset && !(dictionary_page_offset == rhs.dictionary_page_offset))
+      return false;
+    if (__isset.statistics != rhs.__isset.statistics)
+      return false;
+    else if (__isset.statistics && !(statistics == rhs.statistics))
+      return false;
+    if (__isset.encoding_stats != rhs.__isset.encoding_stats)
+      return false;
+    else if (__isset.encoding_stats && !(encoding_stats == rhs.encoding_stats))
+      return false;
+    if (__isset.bloom_filter_offset != rhs.__isset.bloom_filter_offset)
+      return false;
+    else if (__isset.bloom_filter_offset && !(bloom_filter_offset == rhs.bloom_filter_offset))
+      return false;
+    return true;
+  }
+  bool operator != (const ColumnMetaData &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const ColumnMetaData & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(ColumnMetaData &a, ColumnMetaData &b);
+
+std::ostream& operator<<(std::ostream& out, const ColumnMetaData& obj);
+
+
+class EncryptionWithFooterKey : public virtual ::apache::thrift::TBase {
+ public:
+
+  EncryptionWithFooterKey(const EncryptionWithFooterKey&);
+  EncryptionWithFooterKey& operator=(const EncryptionWithFooterKey&);
+  EncryptionWithFooterKey() {
+  }
+
+  virtual ~EncryptionWithFooterKey() noexcept;
+
+  bool operator == (const EncryptionWithFooterKey & /* rhs */) const
+  {
+    return true;
+  }
+  bool operator != (const EncryptionWithFooterKey &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const EncryptionWithFooterKey & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(EncryptionWithFooterKey &a, EncryptionWithFooterKey &b);
+
+std::ostream& operator<<(std::ostream& out, const EncryptionWithFooterKey& obj);
+
+typedef struct _EncryptionWithColumnKey__isset {
+  _EncryptionWithColumnKey__isset() : key_metadata(false) {}
+  bool key_metadata :1;
+} _EncryptionWithColumnKey__isset;
+
+class EncryptionWithColumnKey : public virtual ::apache::thrift::TBase {
+ public:
+
+  EncryptionWithColumnKey(const EncryptionWithColumnKey&);
+  EncryptionWithColumnKey& operator=(const EncryptionWithColumnKey&);
+  EncryptionWithColumnKey() : key_metadata() {
+  }
+
+  virtual ~EncryptionWithColumnKey() noexcept;
+  std::vector<std::string>  path_in_schema;
+  std::string key_metadata;
+
+  _EncryptionWithColumnKey__isset __isset;
+
+  void __set_path_in_schema(const std::vector<std::string> & val);
+
+  void __set_key_metadata(const std::string& val);
+
+  bool operator == (const EncryptionWithColumnKey & rhs) const
+  {
+    if (!(path_in_schema == rhs.path_in_schema))
+      return false;
+    if (__isset.key_metadata != rhs.__isset.key_metadata)
+      return false;
+    else if (__isset.key_metadata && !(key_metadata == rhs.key_metadata))
+      return false;
+    return true;
+  }
+  bool operator != (const EncryptionWithColumnKey &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const EncryptionWithColumnKey & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(EncryptionWithColumnKey &a, EncryptionWithColumnKey &b);
+
+std::ostream& operator<<(std::ostream& out, const EncryptionWithColumnKey& obj);
+
+typedef struct _ColumnCryptoMetaData__isset {
+  _ColumnCryptoMetaData__isset() : ENCRYPTION_WITH_FOOTER_KEY(false), ENCRYPTION_WITH_COLUMN_KEY(false) {}
+  bool ENCRYPTION_WITH_FOOTER_KEY :1;
+  bool ENCRYPTION_WITH_COLUMN_KEY :1;
+} _ColumnCryptoMetaData__isset;
+
+class ColumnCryptoMetaData : public virtual ::apache::thrift::TBase {
+ public:
+
+  ColumnCryptoMetaData(const ColumnCryptoMetaData&);
+  ColumnCryptoMetaData& operator=(const ColumnCryptoMetaData&);
+  ColumnCryptoMetaData() {
+  }
+
+  virtual ~ColumnCryptoMetaData() noexcept;
+  EncryptionWithFooterKey ENCRYPTION_WITH_FOOTER_KEY;
+  EncryptionWithColumnKey ENCRYPTION_WITH_COLUMN_KEY;
+
+  _ColumnCryptoMetaData__isset __isset;
+
+  void __set_ENCRYPTION_WITH_FOOTER_KEY(const EncryptionWithFooterKey& val);
+
+  void __set_ENCRYPTION_WITH_COLUMN_KEY(const EncryptionWithColumnKey& val);
+
+  bool operator == (const ColumnCryptoMetaData & rhs) const
+  {
+    if (__isset.ENCRYPTION_WITH_FOOTER_KEY != rhs.__isset.ENCRYPTION_WITH_FOOTER_KEY)
+      return false;
+    else if (__isset.ENCRYPTION_WITH_FOOTER_KEY && !(ENCRYPTION_WITH_FOOTER_KEY == rhs.ENCRYPTION_WITH_FOOTER_KEY))
+      return false;
+    if (__isset.ENCRYPTION_WITH_COLUMN_KEY != rhs.__isset.ENCRYPTION_WITH_COLUMN_KEY)
+      return false;
+    else if (__isset.ENCRYPTION_WITH_COLUMN_KEY && !(ENCRYPTION_WITH_COLUMN_KEY == rhs.ENCRYPTION_WITH_COLUMN_KEY))
+      return false;
+    return true;
+  }
+  bool operator != (const ColumnCryptoMetaData &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const ColumnCryptoMetaData & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(ColumnCryptoMetaData &a, ColumnCryptoMetaData &b);
+
+std::ostream& operator<<(std::ostream& out, const ColumnCryptoMetaData& obj);
+
+typedef struct _ColumnChunk__isset {
+  _ColumnChunk__isset() : file_path(false), meta_data(false), offset_index_offset(false), offset_index_length(false), column_index_offset(false), column_index_length(false), crypto_metadata(false), encrypted_column_metadata(false) {}
+  bool file_path :1;
+  bool meta_data :1;
+  bool offset_index_offset :1;
+  bool offset_index_length :1;
+  bool column_index_offset :1;
+  bool column_index_length :1;
+  bool crypto_metadata :1;
+  bool encrypted_column_metadata :1;
+} _ColumnChunk__isset;
+
+class ColumnChunk : public virtual ::apache::thrift::TBase {
+ public:
+
+  ColumnChunk(const ColumnChunk&);
+  ColumnChunk& operator=(const ColumnChunk&);
+  ColumnChunk() : file_path(), file_offset(0), offset_index_offset(0), offset_index_length(0), column_index_offset(0), column_index_length(0), encrypted_column_metadata() {
+  }
+
+  virtual ~ColumnChunk() noexcept;
+  std::string file_path;
+  int64_t file_offset;
+  ColumnMetaData meta_data;
+  int64_t offset_index_offset;
+  int32_t offset_index_length;
+  int64_t column_index_offset;
+  int32_t column_index_length;
+  ColumnCryptoMetaData crypto_metadata;
+  std::string encrypted_column_metadata;
+
+  _ColumnChunk__isset __isset;
+
+  void __set_file_path(const std::string& val);
+
+  void __set_file_offset(const int64_t val);
+
+  void __set_meta_data(const ColumnMetaData& val);
+
+  void __set_offset_index_offset(const int64_t val);
+
+  void __set_offset_index_length(const int32_t val);
+
+  void __set_column_index_offset(const int64_t val);
+
+  void __set_column_index_length(const int32_t val);
+
+  void __set_crypto_metadata(const ColumnCryptoMetaData& val);
+
+  void __set_encrypted_column_metadata(const std::string& val);
+
+  bool operator == (const ColumnChunk & rhs) const
+  {
+    if (__isset.file_path != rhs.__isset.file_path)
+      return false;
+    else if (__isset.file_path && !(file_path == rhs.file_path))
+      return false;
+    if (!(file_offset == rhs.file_offset))
+      return false;
+    if (__isset.meta_data != rhs.__isset.meta_data)
+      return false;
+    else if (__isset.meta_data && !(meta_data == rhs.meta_data))
+      return false;
+    if (__isset.offset_index_offset != rhs.__isset.offset_index_offset)
+      return false;
+    else if (__isset.offset_index_offset && !(offset_index_offset == rhs.offset_index_offset))
+      return false;
+    if (__isset.offset_index_length != rhs.__isset.offset_index_length)
+      return false;
+    else if (__isset.offset_index_length && !(offset_index_length == rhs.offset_index_length))
+      return false;
+    if (__isset.column_index_offset != rhs.__isset.column_index_offset)
+      return false;
+    else if (__isset.column_index_offset && !(column_index_offset == rhs.column_index_offset))
+      return false;
+    if (__isset.column_index_length != rhs.__isset.column_index_length)
+      return false;
+    else if (__isset.column_index_length && !(column_index_length == rhs.column_index_length))
+      return false;
+    if (__isset.crypto_metadata != rhs.__isset.crypto_metadata)
+      return false;
+    else if (__isset.crypto_metadata && !(crypto_metadata == rhs.crypto_metadata))
+      return false;
+    if (__isset.encrypted_column_metadata != rhs.__isset.encrypted_column_metadata)
+      return false;
+    else if (__isset.encrypted_column_metadata && !(encrypted_column_metadata == rhs.encrypted_column_metadata))
+      return false;
+    return true;
+  }
+  bool operator != (const ColumnChunk &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const ColumnChunk & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(ColumnChunk &a, ColumnChunk &b);
+
+std::ostream& operator<<(std::ostream& out, const ColumnChunk& obj);
+
+typedef struct _RowGroup__isset {
+  _RowGroup__isset() : sorting_columns(false), file_offset(false), total_compressed_size(false), ordinal(false) {}
+  bool sorting_columns :1;
+  bool file_offset :1;
+  bool total_compressed_size :1;
+  bool ordinal :1;
+} _RowGroup__isset;
+
+class RowGroup : public virtual ::apache::thrift::TBase {
+ public:
+
+  RowGroup(const RowGroup&);
+  RowGroup& operator=(const RowGroup&);
+  RowGroup() : total_byte_size(0), num_rows(0), file_offset(0), total_compressed_size(0), ordinal(0) {
+  }
+
+  virtual ~RowGroup() noexcept;
+  std::vector<ColumnChunk>  columns;
+  int64_t total_byte_size;
+  int64_t num_rows;
+  std::vector<SortingColumn>  sorting_columns;
+  int64_t file_offset;
+  int64_t total_compressed_size;
+  int16_t ordinal;
+
+  _RowGroup__isset __isset;
+
+  void __set_columns(const std::vector<ColumnChunk> & val);
+
+  void __set_total_byte_size(const int64_t val);
+
+  void __set_num_rows(const int64_t val);
+
+  void __set_sorting_columns(const std::vector<SortingColumn> & val);
+
+  void __set_file_offset(const int64_t val);
+
+  void __set_total_compressed_size(const int64_t val);
+
+  void __set_ordinal(const int16_t val);
+
+  bool operator == (const RowGroup & rhs) const
+  {
+    if (!(columns == rhs.columns))
+      return false;
+    if (!(total_byte_size == rhs.total_byte_size))
+      return false;
+    if (!(num_rows == rhs.num_rows))
+      return false;
+    if (__isset.sorting_columns != rhs.__isset.sorting_columns)
+      return false;
+    else if (__isset.sorting_columns && !(sorting_columns == rhs.sorting_columns))
+      return false;
+    if (__isset.file_offset != rhs.__isset.file_offset)
+      return false;
+    else if (__isset.file_offset && !(file_offset == rhs.file_offset))
+      return false;
+    if (__isset.total_compressed_size != rhs.__isset.total_compressed_size)
+      return false;
+    else if (__isset.total_compressed_size && !(total_compressed_size == rhs.total_compressed_size))
+      return false;
+    if (__isset.ordinal != rhs.__isset.ordinal)
+      return false;
+    else if (__isset.ordinal && !(ordinal == rhs.ordinal))
+      return false;
+    return true;
+  }
+  bool operator != (const RowGroup &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const RowGroup & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(RowGroup &a, RowGroup &b);
+
+std::ostream& operator<<(std::ostream& out, const RowGroup& obj);
+
+
+class TypeDefinedOrder : public virtual ::apache::thrift::TBase {
+ public:
+
+  TypeDefinedOrder(const TypeDefinedOrder&);
+  TypeDefinedOrder& operator=(const TypeDefinedOrder&);
+  TypeDefinedOrder() {
+  }
+
+  virtual ~TypeDefinedOrder() noexcept;
+
+  bool operator == (const TypeDefinedOrder & /* rhs */) const
+  {
+    return true;
+  }
+  bool operator != (const TypeDefinedOrder &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const TypeDefinedOrder & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(TypeDefinedOrder &a, TypeDefinedOrder &b);
+
+std::ostream& operator<<(std::ostream& out, const TypeDefinedOrder& obj);
+
+typedef struct _ColumnOrder__isset {
+  _ColumnOrder__isset() : TYPE_ORDER(false) {}
+  bool TYPE_ORDER :1;
+} _ColumnOrder__isset;
+
+class ColumnOrder : public virtual ::apache::thrift::TBase {
+ public:
+
+  ColumnOrder(const ColumnOrder&);
+  ColumnOrder& operator=(const ColumnOrder&);
+  ColumnOrder() {
+  }
+
+  virtual ~ColumnOrder() noexcept;
+  TypeDefinedOrder TYPE_ORDER;
+
+  _ColumnOrder__isset __isset;
+
+  void __set_TYPE_ORDER(const TypeDefinedOrder& val);
+
+  bool operator == (const ColumnOrder & rhs) const
+  {
+    if (__isset.TYPE_ORDER != rhs.__isset.TYPE_ORDER)
+      return false;
+    else if (__isset.TYPE_ORDER && !(TYPE_ORDER == rhs.TYPE_ORDER))
+      return false;
+    return true;
+  }
+  bool operator != (const ColumnOrder &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const ColumnOrder & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(ColumnOrder &a, ColumnOrder &b);
+
+std::ostream& operator<<(std::ostream& out, const ColumnOrder& obj);
+
+
+class PageLocation : public virtual ::apache::thrift::TBase {
+ public:
+
+  PageLocation(const PageLocation&);
+  PageLocation& operator=(const PageLocation&);
+  PageLocation() : offset(0), compressed_page_size(0), first_row_index(0) {
+  }
+
+  virtual ~PageLocation() noexcept;
+  int64_t offset;
+  int32_t compressed_page_size;
+  int64_t first_row_index;
+
+  void __set_offset(const int64_t val);
+
+  void __set_compressed_page_size(const int32_t val);
+
+  void __set_first_row_index(const int64_t val);
+
+  bool operator == (const PageLocation & rhs) const
+  {
+    if (!(offset == rhs.offset))
+      return false;
+    if (!(compressed_page_size == rhs.compressed_page_size))
+      return false;
+    if (!(first_row_index == rhs.first_row_index))
+      return false;
+    return true;
+  }
+  bool operator != (const PageLocation &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const PageLocation & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(PageLocation &a, PageLocation &b);
+
+std::ostream& operator<<(std::ostream& out, const PageLocation& obj);
+
+
+class OffsetIndex : public virtual ::apache::thrift::TBase {
+ public:
+
+  OffsetIndex(const OffsetIndex&);
+  OffsetIndex& operator=(const OffsetIndex&);
+  OffsetIndex() {
+  }
+
+  virtual ~OffsetIndex() noexcept;
+  std::vector<PageLocation>  page_locations;
+
+  void __set_page_locations(const std::vector<PageLocation> & val);
+
+  bool operator == (const OffsetIndex & rhs) const
+  {
+    if (!(page_locations == rhs.page_locations))
+      return false;
+    return true;
+  }
+  bool operator != (const OffsetIndex &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const OffsetIndex & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(OffsetIndex &a, OffsetIndex &b);
+
+std::ostream& operator<<(std::ostream& out, const OffsetIndex& obj);
+
+typedef struct _ColumnIndex__isset {
+  _ColumnIndex__isset() : null_counts(false) {}
+  bool null_counts :1;
+} _ColumnIndex__isset;
+
+class ColumnIndex : public virtual ::apache::thrift::TBase {
+ public:
+
+  ColumnIndex(const ColumnIndex&);
+  ColumnIndex& operator=(const ColumnIndex&);
+  ColumnIndex() : boundary_order((BoundaryOrder::type)0) {
+  }
+
+  virtual ~ColumnIndex() noexcept;
+  std::vector<bool>  null_pages;
+  std::vector<std::string>  min_values;
+  std::vector<std::string>  max_values;
+  BoundaryOrder::type boundary_order;
+  std::vector<int64_t>  null_counts;
+
+  _ColumnIndex__isset __isset;
+
+  void __set_null_pages(const std::vector<bool> & val);
+
+  void __set_min_values(const std::vector<std::string> & val);
+
+  void __set_max_values(const std::vector<std::string> & val);
+
+  void __set_boundary_order(const BoundaryOrder::type val);
+
+  void __set_null_counts(const std::vector<int64_t> & val);
+
+  bool operator == (const ColumnIndex & rhs) const
+  {
+    if (!(null_pages == rhs.null_pages))
+      return false;
+    if (!(min_values == rhs.min_values))
+      return false;
+    if (!(max_values == rhs.max_values))
+      return false;
+    if (!(boundary_order == rhs.boundary_order))
+      return false;
+    if (__isset.null_counts != rhs.__isset.null_counts)
+      return false;
+    else if (__isset.null_counts && !(null_counts == rhs.null_counts))
+      return false;
+    return true;
+  }
+  bool operator != (const ColumnIndex &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const ColumnIndex & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(ColumnIndex &a, ColumnIndex &b);
+
+std::ostream& operator<<(std::ostream& out, const ColumnIndex& obj);
+
+typedef struct _AesGcmV1__isset {
+  _AesGcmV1__isset() : aad_prefix(false), aad_file_unique(false), supply_aad_prefix(false) {}
+  bool aad_prefix :1;
+  bool aad_file_unique :1;
+  bool supply_aad_prefix :1;
+} _AesGcmV1__isset;
+
+class AesGcmV1 : public virtual ::apache::thrift::TBase {
+ public:
+
+  AesGcmV1(const AesGcmV1&);
+  AesGcmV1& operator=(const AesGcmV1&);
+  AesGcmV1() : aad_prefix(), aad_file_unique(), supply_aad_prefix(0) {
+  }
+
+  virtual ~AesGcmV1() noexcept;
+  std::string aad_prefix;
+  std::string aad_file_unique;
+  bool supply_aad_prefix;
+
+  _AesGcmV1__isset __isset;
+
+  void __set_aad_prefix(const std::string& val);
+
+  void __set_aad_file_unique(const std::string& val);
+
+  void __set_supply_aad_prefix(const bool val);
+
+  bool operator == (const AesGcmV1 & rhs) const
+  {
+    if (__isset.aad_prefix != rhs.__isset.aad_prefix)
+      return false;
+    else if (__isset.aad_prefix && !(aad_prefix == rhs.aad_prefix))
+      return false;
+    if (__isset.aad_file_unique != rhs.__isset.aad_file_unique)
+      return false;
+    else if (__isset.aad_file_unique && !(aad_file_unique == rhs.aad_file_unique))
+      return false;
+    if (__isset.supply_aad_prefix != rhs.__isset.supply_aad_prefix)
+      return false;
+    else if (__isset.supply_aad_prefix && !(supply_aad_prefix == rhs.supply_aad_prefix))
+      return false;
+    return true;
+  }
+  bool operator != (const AesGcmV1 &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const AesGcmV1 & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(AesGcmV1 &a, AesGcmV1 &b);
+
+std::ostream& operator<<(std::ostream& out, const AesGcmV1& obj);
+
+typedef struct _AesGcmCtrV1__isset {
+  _AesGcmCtrV1__isset() : aad_prefix(false), aad_file_unique(false), supply_aad_prefix(false) {}
+  bool aad_prefix :1;
+  bool aad_file_unique :1;
+  bool supply_aad_prefix :1;
+} _AesGcmCtrV1__isset;
+
+class AesGcmCtrV1 : public virtual ::apache::thrift::TBase {
+ public:
+
+  AesGcmCtrV1(const AesGcmCtrV1&);
+  AesGcmCtrV1& operator=(const AesGcmCtrV1&);
+  AesGcmCtrV1() : aad_prefix(), aad_file_unique(), supply_aad_prefix(0) {
+  }
+
+  virtual ~AesGcmCtrV1() noexcept;
+  std::string aad_prefix;
+  std::string aad_file_unique;
+  bool supply_aad_prefix;
+
+  _AesGcmCtrV1__isset __isset;
+
+  void __set_aad_prefix(const std::string& val);
+
+  void __set_aad_file_unique(const std::string& val);
+
+  void __set_supply_aad_prefix(const bool val);
+
+  bool operator == (const AesGcmCtrV1 & rhs) const
+  {
+    if (__isset.aad_prefix != rhs.__isset.aad_prefix)
+      return false;
+    else if (__isset.aad_prefix && !(aad_prefix == rhs.aad_prefix))
+      return false;
+    if (__isset.aad_file_unique != rhs.__isset.aad_file_unique)
+      return false;
+    else if (__isset.aad_file_unique && !(aad_file_unique == rhs.aad_file_unique))
+      return false;
+    if (__isset.supply_aad_prefix != rhs.__isset.supply_aad_prefix)
+      return false;
+    else if (__isset.supply_aad_prefix && !(supply_aad_prefix == rhs.supply_aad_prefix))
+      return false;
+    return true;
+  }
+  bool operator != (const AesGcmCtrV1 &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const AesGcmCtrV1 & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(AesGcmCtrV1 &a, AesGcmCtrV1 &b);
+
+std::ostream& operator<<(std::ostream& out, const AesGcmCtrV1& obj);
+
+typedef struct _EncryptionAlgorithm__isset {
+  _EncryptionAlgorithm__isset() : AES_GCM_V1(false), AES_GCM_CTR_V1(false) {}
+  bool AES_GCM_V1 :1;
+  bool AES_GCM_CTR_V1 :1;
+} _EncryptionAlgorithm__isset;
+
+class EncryptionAlgorithm : public virtual ::apache::thrift::TBase {
+ public:
+
+  EncryptionAlgorithm(const EncryptionAlgorithm&);
+  EncryptionAlgorithm& operator=(const EncryptionAlgorithm&);
+  EncryptionAlgorithm() {
+  }
+
+  virtual ~EncryptionAlgorithm() noexcept;
+  AesGcmV1 AES_GCM_V1;
+  AesGcmCtrV1 AES_GCM_CTR_V1;
+
+  _EncryptionAlgorithm__isset __isset;
+
+  void __set_AES_GCM_V1(const AesGcmV1& val);
+
+  void __set_AES_GCM_CTR_V1(const AesGcmCtrV1& val);
+
+  bool operator == (const EncryptionAlgorithm & rhs) const
+  {
+    if (__isset.AES_GCM_V1 != rhs.__isset.AES_GCM_V1)
+      return false;
+    else if (__isset.AES_GCM_V1 && !(AES_GCM_V1 == rhs.AES_GCM_V1))
+      return false;
+    if (__isset.AES_GCM_CTR_V1 != rhs.__isset.AES_GCM_CTR_V1)
+      return false;
+    else if (__isset.AES_GCM_CTR_V1 && !(AES_GCM_CTR_V1 == rhs.AES_GCM_CTR_V1))
+      return false;
+    return true;
+  }
+  bool operator != (const EncryptionAlgorithm &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const EncryptionAlgorithm & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(EncryptionAlgorithm &a, EncryptionAlgorithm &b);
+
+std::ostream& operator<<(std::ostream& out, const EncryptionAlgorithm& obj);
+
+typedef struct _FileMetaData__isset {
+  _FileMetaData__isset() : key_value_metadata(false), created_by(false), column_orders(false), encryption_algorithm(false), footer_signing_key_metadata(false) {}
+  bool key_value_metadata :1;
+  bool created_by :1;
+  bool column_orders :1;
+  bool encryption_algorithm :1;
+  bool footer_signing_key_metadata :1;
+} _FileMetaData__isset;
+
+class FileMetaData : public virtual ::apache::thrift::TBase {
+ public:
+
+  FileMetaData(const FileMetaData&);
+  FileMetaData& operator=(const FileMetaData&);
+  FileMetaData() : version(0), num_rows(0), created_by(), footer_signing_key_metadata() {
+  }
+
+  virtual ~FileMetaData() noexcept;
+  int32_t version;
+  std::vector<SchemaElement>  schema;
+  int64_t num_rows;
+  std::vector<RowGroup>  row_groups;
+  std::vector<KeyValue>  key_value_metadata;
+  std::string created_by;
+  std::vector<ColumnOrder>  column_orders;
+  EncryptionAlgorithm encryption_algorithm;
+  std::string footer_signing_key_metadata;
+
+  _FileMetaData__isset __isset;
+
+  void __set_version(const int32_t val);
+
+  void __set_schema(const std::vector<SchemaElement> & val);
+
+  void __set_num_rows(const int64_t val);
+
+  void __set_row_groups(const std::vector<RowGroup> & val);
+
+  void __set_key_value_metadata(const std::vector<KeyValue> & val);
+
+  void __set_created_by(const std::string& val);
+
+  void __set_column_orders(const std::vector<ColumnOrder> & val);
+
+  void __set_encryption_algorithm(const EncryptionAlgorithm& val);
+
+  void __set_footer_signing_key_metadata(const std::string& val);
+
+  bool operator == (const FileMetaData & rhs) const
+  {
+    if (!(version == rhs.version))
+      return false;
+    if (!(schema == rhs.schema))
+      return false;
+    if (!(num_rows == rhs.num_rows))
+      return false;
+    if (!(row_groups == rhs.row_groups))
+      return false;
+    if (__isset.key_value_metadata != rhs.__isset.key_value_metadata)
+      return false;
+    else if (__isset.key_value_metadata && !(key_value_metadata == rhs.key_value_metadata))
+      return false;
+    if (__isset.created_by != rhs.__isset.created_by)
+      return false;
+    else if (__isset.created_by && !(created_by == rhs.created_by))
+      return false;
+    if (__isset.column_orders != rhs.__isset.column_orders)
+      return false;
+    else if (__isset.column_orders && !(column_orders == rhs.column_orders))
+      return false;
+    if (__isset.encryption_algorithm != rhs.__isset.encryption_algorithm)
+      return false;
+    else if (__isset.encryption_algorithm && !(encryption_algorithm == rhs.encryption_algorithm))
+      return false;
+    if (__isset.footer_signing_key_metadata != rhs.__isset.footer_signing_key_metadata)
+      return false;
+    else if (__isset.footer_signing_key_metadata && !(footer_signing_key_metadata == rhs.footer_signing_key_metadata))
+      return false;
+    return true;
+  }
+  bool operator != (const FileMetaData &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const FileMetaData & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(FileMetaData &a, FileMetaData &b);
+
+std::ostream& operator<<(std::ostream& out, const FileMetaData& obj);
+
+typedef struct _FileCryptoMetaData__isset {
+  _FileCryptoMetaData__isset() : key_metadata(false) {}
+  bool key_metadata :1;
+} _FileCryptoMetaData__isset;
+
+class FileCryptoMetaData : public virtual ::apache::thrift::TBase {
+ public:
+
+  FileCryptoMetaData(const FileCryptoMetaData&);
+  FileCryptoMetaData& operator=(const FileCryptoMetaData&);
+  FileCryptoMetaData() : key_metadata() {
+  }
+
+  virtual ~FileCryptoMetaData() noexcept;
+  EncryptionAlgorithm encryption_algorithm;
+  std::string key_metadata;
+
+  _FileCryptoMetaData__isset __isset;
+
+  void __set_encryption_algorithm(const EncryptionAlgorithm& val);
+
+  void __set_key_metadata(const std::string& val);
+
+  bool operator == (const FileCryptoMetaData & rhs) const
+  {
+    if (!(encryption_algorithm == rhs.encryption_algorithm))
+      return false;
+    if (__isset.key_metadata != rhs.__isset.key_metadata)
+      return false;
+    else if (__isset.key_metadata && !(key_metadata == rhs.key_metadata))
+      return false;
+    return true;
+  }
+  bool operator != (const FileCryptoMetaData &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const FileCryptoMetaData & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot);
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(FileCryptoMetaData &a, FileCryptoMetaData &b);
+
+std::ostream& operator<<(std::ostream& out, const FileCryptoMetaData& obj);
+
+}} // namespace
+
+#endif
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/README b/contrib/libs/apache/arrow/cpp/src/parquet/README
index 326bd7253f4..fc16a46ca08 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/README
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/README
@@ -1,10 +1,10 @@
-The CompatibilityTest of bloom_filter-test.cc is used to test cross compatibility of 
-Bloom filters between parquet-mr and parquet-cpp. It reads the Bloom filter binary 
-generated by the Bloom filter class in the parquet-mr project and tests whether the 
-values inserted before could be filtered or not. 
- 
-The Bloom filter binary is generated by three steps from Parquet-mr: 
-Step 1: Construct a Bloom filter with 1024 bytes of bitset. 
-Step 2: Insert hashes of "hello", "parquet", "bloom", "filter" strings to Bloom filter 
-by calling hash and insert APIs. 
-Step 3: Call writeTo API to write to File. 
+The CompatibilityTest of bloom_filter-test.cc is used to test cross compatibility of
+Bloom filters between parquet-mr and parquet-cpp. It reads the Bloom filter binary
+generated by the Bloom filter class in the parquet-mr project and tests whether the
+values inserted before could be filtered or not.
+
+The Bloom filter binary is generated by three steps from Parquet-mr:
+Step 1: Construct a Bloom filter with 1024 bytes of bitset.
+Step 2: Insert hashes of "hello", "parquet", "bloom", "filter" strings to Bloom filter
+by calling hash and insert APIs.
+Step 3: Call writeTo API to write to File.
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/arrow/path_internal.cc b/contrib/libs/apache/arrow/cpp/src/parquet/arrow/path_internal.cc
index 62cbee22a18..a51773c44d3 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/arrow/path_internal.cc
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/arrow/path_internal.cc
@@ -1,900 +1,900 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-// Overview. 
-// 
-// The strategy used for this code for repetition/definition 
-// is to dissect the top level array into a list of paths 
-// from the top level array to the final primitive (possibly 
-// dictionary encoded array). It then evaluates each one of 
-// those paths to produce results for the callback iteratively. 
-// 
-// This approach was taken to reduce the aggregate memory required if we were 
-// to build all def/rep levels in parallel as apart of a tree traversal.  It 
-// also allows for straightforward parallelization at the path level if that is 
-// desired in the future. 
-// 
-// The main downside to this approach is it duplicates effort for nodes 
-// that share common ancestors. This can be mitigated to some degree 
-// by adding in optimizations that detect leaf arrays that share 
-// the same common list ancestor and reuse the repetition levels 
-// from the first leaf encountered (only definition levels greater 
-// the list ancestor need to be re-evaluated. This is left for future 
-// work. 
-// 
-// Algorithm. 
-// 
-// As mentioned above this code dissects arrays into constituent parts: 
-// nullability data, and list offset data. It tries to optimize for 
-// some special cases, where it is known ahead of time that a step 
-// can be skipped (e.g. a nullable array happens to have all of its 
-// values) or batch filled (a nullable array has all null values). 
-// One further optimization that is not implemented but could be done 
-// in the future is special handling for nested list arrays that 
-// have some intermediate data which indicates the final array contains only 
-// nulls. 
-// 
-// In general, the algorithm attempts to batch work at each node as much 
-// as possible.  For nullability nodes this means finding runs of null 
-// values and batch filling those interspersed with finding runs of non-null values 
-// to process in batch at the next column. 
-// 
-// Similarly, list runs of empty lists are all processed in one batch 
-// followed by either: 
-//    - A single list entry for non-terminal lists (i.e. the upper part of a nested list) 
-//    - Runs of non-empty lists for the terminal list (i.e. the lowest part of a nested 
-//    list). 
-// 
-// This makes use of the following observations. 
-// 1.  Null values at any node on the path are terminal (repetition and definition 
-//     level can be set directly when a Null value is encountered). 
-// 2.  Empty lists share this eager termination property with Null values. 
-// 3.  In order to keep repetition/definition level populated the algorithm is lazy 
-//     in assigning repetition levels. The algorithm tracks whether it is currently 
-//     in the middle of a list by comparing the lengths of repetition/definition levels. 
-//     If it is currently in the middle of a list the the number of repetition levels 
-//     populated will be greater than definition levels (the start of a List requires 
-//     adding the first element). If there are equal numbers of definition and repetition 
-//     levels populated this indicates a list is waiting to be started and the next list 
-//     encountered will have its repetition level signify the beginning of the list. 
-// 
-//     Other implementation notes. 
-// 
-//     This code hasn't been benchmarked (or assembly analyzed) but did the following 
-//     as optimizations (yes premature optimization is the root of all evil). 
-//     - This code does not use recursion, instead it constructs its own stack and manages 
-//       updating elements accordingly. 
-//     - It tries to avoid using Status for common return states. 
-//     - Avoids virtual dispatch in favor of if/else statements on a set of well known 
-//     classes. 
- 
-#include "parquet/arrow/path_internal.h" 
- 
-#include <atomic> 
-#include <cstddef> 
-#include <memory> 
-#include <type_traits> 
-#include <utility> 
-#include <vector> 
- 
-#include "arrow/array.h" 
-#include "arrow/buffer.h" 
-#include "arrow/buffer_builder.h" 
-#include "arrow/extension_type.h" 
-#include "arrow/memory_pool.h" 
-#include "arrow/type.h" 
-#include "arrow/type_traits.h" 
-#include "arrow/util/bit_run_reader.h" 
-#include "arrow/util/bit_util.h" 
-#include "arrow/util/bitmap_visit.h" 
-#include "arrow/util/logging.h" 
-#include "arrow/util/macros.h" 
-#include "arrow/util/make_unique.h" 
-#include "arrow/util/variant.h" 
-#include "arrow/visitor_inline.h" 
-#include "parquet/properties.h" 
- 
-namespace parquet { 
-namespace arrow { 
- 
-namespace { 
- 
-using ::arrow::Array; 
-using ::arrow::Status; 
-using ::arrow::TypedBufferBuilder; 
- 
-constexpr static int16_t kLevelNotSet = -1; 
- 
-/// \brief Simple result of a iterating over a column to determine values. 
-enum IterationResult { 
-  /// Processing is done at this node. Move back up the path 
-  /// to continue processing. 
-  kDone = -1, 
-  /// Move down towards the leaf for processing. 
-  kNext = 1, 
-  /// An error occurred while processing. 
-  kError = 2 
-}; 
- 
-#define RETURN_IF_ERROR(iteration_result)                  \ 
-  do {                                                     \ 
-    if (ARROW_PREDICT_FALSE(iteration_result == kError)) { \ 
-      return iteration_result;                             \ 
-    }                                                      \ 
-  } while (false) 
- 
-int64_t LazyNullCount(const Array& array) { return array.data()->null_count.load(); } 
- 
-bool LazyNoNulls(const Array& array) { 
-  int64_t null_count = LazyNullCount(array); 
-  return null_count == 0 || 
-         // kUnkownNullCount comparison is needed to account 
-         // for null arrays. 
-         (null_count == ::arrow::kUnknownNullCount && 
-          array.null_bitmap_data() == nullptr); 
-} 
- 
-struct PathWriteContext { 
-  PathWriteContext(::arrow::MemoryPool* pool, 
-                   std::shared_ptr<::arrow::ResizableBuffer> def_levels_buffer) 
-      : rep_levels(pool), def_levels(std::move(def_levels_buffer), pool) {} 
-  IterationResult ReserveDefLevels(int64_t elements) { 
-    last_status = def_levels.Reserve(elements); 
-    if (ARROW_PREDICT_TRUE(last_status.ok())) { 
-      return kDone; 
-    } 
-    return kError; 
-  } 
- 
-  IterationResult AppendDefLevel(int16_t def_level) { 
-    last_status = def_levels.Append(def_level); 
-    if (ARROW_PREDICT_TRUE(last_status.ok())) { 
-      return kDone; 
-    } 
-    return kError; 
-  } 
- 
-  IterationResult AppendDefLevels(int64_t count, int16_t def_level) { 
-    last_status = def_levels.Append(count, def_level); 
-    if (ARROW_PREDICT_TRUE(last_status.ok())) { 
-      return kDone; 
-    } 
-    return kError; 
-  } 
- 
-  void UnsafeAppendDefLevel(int16_t def_level) { def_levels.UnsafeAppend(def_level); } 
- 
-  IterationResult AppendRepLevel(int16_t rep_level) { 
-    last_status = rep_levels.Append(rep_level); 
- 
-    if (ARROW_PREDICT_TRUE(last_status.ok())) { 
-      return kDone; 
-    } 
-    return kError; 
-  } 
- 
-  IterationResult AppendRepLevels(int64_t count, int16_t rep_level) { 
-    last_status = rep_levels.Append(count, rep_level); 
-    if (ARROW_PREDICT_TRUE(last_status.ok())) { 
-      return kDone; 
-    } 
-    return kError; 
-  } 
- 
-  bool EqualRepDefLevelsLengths() const { 
-    return rep_levels.length() == def_levels.length(); 
-  } 
- 
-  // Incorporates |range| into visited elements. If the |range| is contiguous 
-  // with the last range, extend the last range, otherwise add |range| separately 
-  // tot he list. 
-  void RecordPostListVisit(const ElementRange& range) { 
-    if (!visited_elements.empty() && range.start == visited_elements.back().end) { 
-      visited_elements.back().end = range.end; 
-      return; 
-    } 
-    visited_elements.push_back(range); 
-  } 
- 
-  Status last_status; 
-  TypedBufferBuilder<int16_t> rep_levels; 
-  TypedBufferBuilder<int16_t> def_levels; 
-  std::vector<ElementRange> visited_elements; 
-}; 
- 
-IterationResult FillRepLevels(int64_t count, int16_t rep_level, 
-                              PathWriteContext* context) { 
-  if (rep_level == kLevelNotSet) { 
-    return kDone; 
-  } 
-  int64_t fill_count = count; 
-  // This condition occurs (rep and dep levels equals), in one of 
-  // in a few cases: 
-  // 1.  Before any list is encountered. 
-  // 2.  After rep-level has been filled in due to null/empty 
-  //     values above it. 
-  // 3.  After finishing a list. 
-  if (!context->EqualRepDefLevelsLengths()) { 
-    fill_count--; 
-  } 
-  return context->AppendRepLevels(fill_count, rep_level); 
-} 
- 
-// A node for handling an array that is discovered to have all 
-// null elements. It is referred to as a TerminalNode because 
-// traversal of nodes will not continue it when generating 
-// rep/def levels. However, there could be many nested children 
-// elements beyond it in the Array that is being processed. 
-class AllNullsTerminalNode { 
- public: 
-  explicit AllNullsTerminalNode(int16_t def_level, int16_t rep_level = kLevelNotSet) 
-      : def_level_(def_level), rep_level_(rep_level) {} 
-  void SetRepLevelIfNull(int16_t rep_level) { rep_level_ = rep_level; } 
-  IterationResult Run(const ElementRange& range, PathWriteContext* context) { 
-    int64_t size = range.Size(); 
-    RETURN_IF_ERROR(FillRepLevels(size, rep_level_, context)); 
-    return context->AppendDefLevels(size, def_level_); 
-  } 
- 
- private: 
-  int16_t def_level_; 
-  int16_t rep_level_; 
-}; 
- 
-// Handles the case where all remaining arrays until the leaf have no nulls 
-// (and are not interrupted by lists). Unlike AllNullsTerminalNode this is 
-// always the last node in a path. We don't need an analogue to the AllNullsTerminalNode 
-// because if all values are present at an intermediate array no node is added for it 
-// (the def-level for the next nullable node is incremented). 
-struct AllPresentTerminalNode { 
-  IterationResult Run(const ElementRange& range, PathWriteContext* context) { 
-    return context->AppendDefLevels(range.end - range.start, def_level); 
-    // No need to worry about rep levels, because this state should 
-    // only be applicable for after all list/repeated values 
-    // have been evaluated in the path. 
-  } 
-  int16_t def_level; 
-}; 
- 
-/// Node for handling the case when the leaf-array is nullable 
-/// and contains null elements. 
-struct NullableTerminalNode { 
-  NullableTerminalNode() = default; 
- 
-  NullableTerminalNode(const uint8_t* bitmap, int64_t element_offset, 
-                       int16_t def_level_if_present) 
-      : bitmap_(bitmap), 
-        element_offset_(element_offset), 
-        def_level_if_present_(def_level_if_present), 
-        def_level_if_null_(def_level_if_present - 1) {} 
- 
-  IterationResult Run(const ElementRange& range, PathWriteContext* context) { 
-    int64_t elements = range.Size(); 
-    RETURN_IF_ERROR(context->ReserveDefLevels(elements)); 
- 
-    DCHECK_GT(elements, 0); 
- 
-    auto bit_visitor = [&](bool is_set) { 
-      context->UnsafeAppendDefLevel(is_set ? def_level_if_present_ : def_level_if_null_); 
-    }; 
- 
-    if (elements > 16) {  // 16 guarantees at least one unrolled loop. 
-      ::arrow::internal::VisitBitsUnrolled(bitmap_, range.start + element_offset_, 
-                                           elements, bit_visitor); 
-    } else { 
-      ::arrow::internal::VisitBits(bitmap_, range.start + element_offset_, elements, 
-                                   bit_visitor); 
-    } 
-    return kDone; 
-  } 
-  const uint8_t* bitmap_; 
-  int64_t element_offset_; 
-  int16_t def_level_if_present_; 
-  int16_t def_level_if_null_; 
-}; 
- 
-// List nodes handle populating rep_level for Arrow Lists and def-level for empty lists. 
-// Nullability (both list and children) is handled by other Nodes. By 
-// construction all list nodes will be intermediate nodes (they will always be followed by 
-// at least one other node). 
-// 
-// Type parameters: 
-//    |RangeSelector| - A strategy for determine the the range of the child node to 
-//    process. 
-//       this varies depending on the type of list (int32_t* offsets, int64_t* offsets of 
-//       fixed. 
-template <typename RangeSelector> 
-class ListPathNode { 
- public: 
-  ListPathNode(RangeSelector selector, int16_t rep_lev, int16_t def_level_if_empty) 
-      : selector_(std::move(selector)), 
-        prev_rep_level_(rep_lev - 1), 
-        rep_level_(rep_lev), 
-        def_level_if_empty_(def_level_if_empty) {} 
- 
-  int16_t rep_level() const { return rep_level_; } 
- 
-  IterationResult Run(ElementRange* range, ElementRange* child_range, 
-                      PathWriteContext* context) { 
-    if (range->Empty()) { 
-      return kDone; 
-    } 
- 
-    // Find the first non-empty list (skipping a run of empties). 
-    int64_t start = range->start; 
-    // Retrieves the range of elements that this list contains. 
-    // Uses the strategy pattern to distinguish between the different 
-    // lists that are supported in Arrow (fixed size, normal and "large"). 
-    *child_range = selector_.GetRange(range->start); 
-    while (child_range->Empty() && !range->Empty()) { 
-      ++range->start; 
-      *child_range = selector_.GetRange(range->start); 
-    } 
-    // Loops post-condition: 
-    //   * range is either empty (we are done processing at this node) 
-    //     or start corresponds a non-empty list. 
-    //   * If range is non-empty child_range contains 
-    //     the bounds of non-empty list. 
- 
-    // Handle any skipped over empty lists. 
-    int64_t empty_elements = range->start - start; 
-    if (empty_elements > 0) { 
-      RETURN_IF_ERROR(FillRepLevels(empty_elements, prev_rep_level_, context)); 
-      RETURN_IF_ERROR(context->AppendDefLevels(empty_elements, def_level_if_empty_)); 
-    } 
-    // Start of a new list. Note that for nested lists adding the element 
-    // here effectively suppresses this code until we either encounter null 
-    // elements or empty lists between here and the innermost list (since 
-    // we make the rep levels repetition and definition levels unequal). 
-    // Similarly when we are backtracking up the stack the repetition and 
-    // definition levels are again equal so if we encounter an intermediate list 
-    // with more elements this will detect it as a new list. 
-    if (context->EqualRepDefLevelsLengths() && !range->Empty()) { 
-      RETURN_IF_ERROR(context->AppendRepLevel(prev_rep_level_)); 
-    } 
- 
-    if (range->Empty()) { 
-      return kDone; 
-    } 
- 
-    ++range->start; 
-    if (is_last_) { 
-      // If this is the last repeated node, we can extend try 
-      // to extend the child range as wide as possible before 
-      // continuing to the next node. 
-      return FillForLast(range, child_range, context); 
-    } 
-    return kNext; 
-  } 
- 
-  void SetLast() { is_last_ = true; } 
- 
- private: 
-  IterationResult FillForLast(ElementRange* range, ElementRange* child_range, 
-                              PathWriteContext* context) { 
-    // First fill int the remainder of the list. 
-    RETURN_IF_ERROR(FillRepLevels(child_range->Size(), rep_level_, context)); 
-    // Once we've reached this point the following preconditions should hold: 
-    // 1.  There are no more repeated path nodes to deal with. 
-    // 2.  All elements in |range| represent contiguous elements in the 
-    //     child array (Null values would have shortened the range to ensure 
-    //     all remaining list elements are present (though they may be empty lists)). 
-    // 3.  No element of range spans a parent list (intermediate 
-    //     list nodes only handle one list entry at a time). 
-    // 
-    // Given these preconditions it should be safe to fill runs on non-empty 
-    // lists here and expand the range in the child node accordingly. 
- 
-    while (!range->Empty()) { 
-      ElementRange size_check = selector_.GetRange(range->start); 
-      if (size_check.Empty()) { 
-        // The empty range will need to be handled after we pass down the accumulated 
-        // range because it affects def_level placement and we need to get the children 
-        // def_levels entered first. 
-        break; 
-      } 
-      // This is the start of a new list. We can be sure it only applies 
-      // to the previous list (and doesn't jump to the start of any list 
-      // further up in nesting due to the constraints mentioned at the start 
-      // of the function). 
-      RETURN_IF_ERROR(context->AppendRepLevel(prev_rep_level_)); 
-      RETURN_IF_ERROR(context->AppendRepLevels(size_check.Size() - 1, rep_level_)); 
-      DCHECK_EQ(size_check.start, child_range->end); 
-      child_range->end = size_check.end; 
-      ++range->start; 
-    } 
- 
-    // Do book-keeping to track the elements of the arrays that are actually visited 
-    // beyond this point.  This is necessary to identify "gaps" in values that should 
-    // not be processed (written out to parquet). 
-    context->RecordPostListVisit(*child_range); 
-    return kNext; 
-  } 
- 
-  RangeSelector selector_; 
-  int16_t prev_rep_level_; 
-  int16_t rep_level_; 
-  int16_t def_level_if_empty_; 
-  bool is_last_ = false; 
-}; 
- 
-template <typename OffsetType> 
-struct VarRangeSelector { 
-  ElementRange GetRange(int64_t index) const { 
-    return ElementRange{offsets[index], offsets[index + 1]}; 
-  } 
- 
-  // Either int32_t* or int64_t*. 
-  const OffsetType* offsets; 
-}; 
- 
-struct FixedSizedRangeSelector { 
-  ElementRange GetRange(int64_t index) const { 
-    int64_t start = index * list_size; 
-    return ElementRange{start, start + list_size}; 
-  } 
-  int list_size; 
-}; 
- 
-// An intermediate node that handles null values. 
-class NullableNode { 
- public: 
-  NullableNode(const uint8_t* null_bitmap, int64_t entry_offset, 
-               int16_t def_level_if_null, int16_t rep_level_if_null = kLevelNotSet) 
-      : null_bitmap_(null_bitmap), 
-        entry_offset_(entry_offset), 
-        valid_bits_reader_(MakeReader(ElementRange{0, 0})), 
-        def_level_if_null_(def_level_if_null), 
-        rep_level_if_null_(rep_level_if_null), 
-        new_range_(true) {} 
- 
-  void SetRepLevelIfNull(int16_t rep_level) { rep_level_if_null_ = rep_level; } 
- 
-  ::arrow::internal::BitRunReader MakeReader(const ElementRange& range) { 
-    return ::arrow::internal::BitRunReader(null_bitmap_, entry_offset_ + range.start, 
-                                           range.Size()); 
-  } 
- 
-  IterationResult Run(ElementRange* range, ElementRange* child_range, 
-                      PathWriteContext* context) { 
-    if (new_range_) { 
-      // Reset the reader each time we are starting fresh on a range. 
-      // We can't rely on continuity because nulls above can 
-      // cause discontinuities. 
-      valid_bits_reader_ = MakeReader(*range); 
-    } 
-    child_range->start = range->start; 
-    ::arrow::internal::BitRun run = valid_bits_reader_.NextRun(); 
-    if (!run.set) { 
-      range->start += run.length; 
-      RETURN_IF_ERROR(FillRepLevels(run.length, rep_level_if_null_, context)); 
-      RETURN_IF_ERROR(context->AppendDefLevels(run.length, def_level_if_null_)); 
-      run = valid_bits_reader_.NextRun(); 
-    } 
-    if (range->Empty()) { 
-      new_range_ = true; 
-      return kDone; 
-    } 
-    child_range->end = child_range->start = range->start; 
-    child_range->end += run.length; 
- 
-    DCHECK(!child_range->Empty()); 
-    range->start += child_range->Size(); 
-    new_range_ = false; 
-    return kNext; 
-  } 
- 
-  const uint8_t* null_bitmap_; 
-  int64_t entry_offset_; 
-  ::arrow::internal::BitRunReader valid_bits_reader_; 
-  int16_t def_level_if_null_; 
-  int16_t rep_level_if_null_; 
- 
-  // Whether the next invocation will be a new range. 
-  bool new_range_ = true; 
-}; 
- 
-using ListNode = ListPathNode<VarRangeSelector<int32_t>>; 
-using LargeListNode = ListPathNode<VarRangeSelector<int64_t>>; 
-using FixedSizeListNode = ListPathNode<FixedSizedRangeSelector>; 
- 
-// Contains static information derived from traversing the schema. 
-struct PathInfo { 
-  // The vectors are expected to the same length info. 
- 
-  // Note index order matters here. 
-  using Node = ::arrow::util::Variant<NullableTerminalNode, ListNode, LargeListNode, 
-                                      FixedSizeListNode, NullableNode, 
-                                      AllPresentTerminalNode, AllNullsTerminalNode>; 
- 
-  std::vector<Node> path; 
-  std::shared_ptr<Array> primitive_array; 
-  int16_t max_def_level = 0; 
-  int16_t max_rep_level = 0; 
-  bool has_dictionary = false; 
-  bool leaf_is_nullable = false; 
-}; 
- 
-/// Contains logic for writing a single leaf node to parquet. 
-/// This tracks the path from root to leaf. 
-/// 
-/// |writer| will be called after all of the definition/repetition 
-/// values have been calculated for root_range with the calculated 
-/// values. It is intended to abstract the complexity of writing 
-/// the levels and values to parquet. 
-Status WritePath(ElementRange root_range, PathInfo* path_info, 
-                 ArrowWriteContext* arrow_context, 
-                 MultipathLevelBuilder::CallbackFunction writer) { 
-  std::vector<ElementRange> stack(path_info->path.size()); 
-  MultipathLevelBuilderResult builder_result; 
-  builder_result.leaf_array = path_info->primitive_array; 
-  builder_result.leaf_is_nullable = path_info->leaf_is_nullable; 
- 
-  if (path_info->max_def_level == 0) { 
-    // This case only occurs when there are no nullable or repeated 
-    // columns in the path from the root to leaf. 
-    int64_t leaf_length = builder_result.leaf_array->length(); 
-    builder_result.def_rep_level_count = leaf_length; 
-    builder_result.post_list_visited_elements.push_back({0, leaf_length}); 
-    return writer(builder_result); 
-  } 
-  stack[0] = root_range; 
-  RETURN_NOT_OK( 
-      arrow_context->def_levels_buffer->Resize(/*new_size=*/0, /*shrink_to_fit*/ false)); 
-  PathWriteContext context(arrow_context->memory_pool, arrow_context->def_levels_buffer); 
-  // We should need at least this many entries so reserve the space ahead of time. 
-  RETURN_NOT_OK(context.def_levels.Reserve(root_range.Size())); 
-  if (path_info->max_rep_level > 0) { 
-    RETURN_NOT_OK(context.rep_levels.Reserve(root_range.Size())); 
-  } 
- 
-  auto stack_base = &stack[0]; 
-  auto stack_position = stack_base; 
-  // This is the main loop for calculated rep/def levels. The nodes 
-  // in the path implement a chain-of-responsibility like pattern 
-  // where each node can add some number of repetition/definition 
-  // levels to PathWriteContext and also delegate to the next node 
-  // in the path to add values. The values are added through each Run(...) 
-  // call and the choice to delegate to the next node (or return to the 
-  // previous node) is communicated by the return value of Run(...). 
-  // The loop terminates after the first node indicates all values in 
-  // |root_range| are processed. 
-  while (stack_position >= stack_base) { 
-    PathInfo::Node& node = path_info->path[stack_position - stack_base]; 
-    struct { 
-      IterationResult operator()(NullableNode* node) { 
-        return node->Run(stack_position, stack_position + 1, context); 
-      } 
-      IterationResult operator()(ListNode* node) { 
-        return node->Run(stack_position, stack_position + 1, context); 
-      } 
-      IterationResult operator()(NullableTerminalNode* node) { 
-        return node->Run(*stack_position, context); 
-      } 
-      IterationResult operator()(FixedSizeListNode* node) { 
-        return node->Run(stack_position, stack_position + 1, context); 
-      } 
-      IterationResult operator()(AllPresentTerminalNode* node) { 
-        return node->Run(*stack_position, context); 
-      } 
-      IterationResult operator()(AllNullsTerminalNode* node) { 
-        return node->Run(*stack_position, context); 
-      } 
-      IterationResult operator()(LargeListNode* node) { 
-        return node->Run(stack_position, stack_position + 1, context); 
-      } 
-      ElementRange* stack_position; 
-      PathWriteContext* context; 
-    } visitor = {stack_position, &context}; 
- 
-    IterationResult result = ::arrow::util::visit(visitor, &node); 
- 
-    if (ARROW_PREDICT_FALSE(result == kError)) { 
-      DCHECK(!context.last_status.ok()); 
-      return context.last_status; 
-    } 
-    stack_position += static_cast<int>(result); 
-  } 
-  RETURN_NOT_OK(context.last_status); 
-  builder_result.def_rep_level_count = context.def_levels.length(); 
- 
-  if (context.rep_levels.length() > 0) { 
-    // This case only occurs when there was a repeated element that needs to be 
-    // processed. 
-    builder_result.rep_levels = context.rep_levels.data(); 
-    std::swap(builder_result.post_list_visited_elements, context.visited_elements); 
-    // If it is possible when processing lists that all lists where empty. In this 
-    // case no elements would have been added to post_list_visited_elements. By 
-    // added an empty element we avoid special casing in downstream consumers. 
-    if (builder_result.post_list_visited_elements.empty()) { 
-      builder_result.post_list_visited_elements.push_back({0, 0}); 
-    } 
-  } else { 
-    builder_result.post_list_visited_elements.push_back( 
-        {0, builder_result.leaf_array->length()}); 
-    builder_result.rep_levels = nullptr; 
-  } 
- 
-  builder_result.def_levels = context.def_levels.data(); 
-  return writer(builder_result); 
-} 
- 
-struct FixupVisitor { 
-  int max_rep_level = -1; 
-  int16_t rep_level_if_null = kLevelNotSet; 
- 
-  template <typename T> 
-  void HandleListNode(T* arg) { 
-    if (arg->rep_level() == max_rep_level) { 
-      arg->SetLast(); 
-      // after the last list node we don't need to fill 
-      // rep levels on null. 
-      rep_level_if_null = kLevelNotSet; 
-    } else { 
-      rep_level_if_null = arg->rep_level(); 
-    } 
-  } 
-  void operator()(ListNode* node) { HandleListNode(node); } 
-  void operator()(LargeListNode* node) { HandleListNode(node); } 
-  void operator()(FixedSizeListNode* node) { HandleListNode(node); } 
- 
-  // For non-list intermediate nodes. 
-  template <typename T> 
-  void HandleIntermediateNode(T* arg) { 
-    if (rep_level_if_null != kLevelNotSet) { 
-      arg->SetRepLevelIfNull(rep_level_if_null); 
-    } 
-  } 
- 
-  void operator()(NullableNode* arg) { HandleIntermediateNode(arg); } 
- 
-  void operator()(AllNullsTerminalNode* arg) { 
-    // Even though no processing happens past this point we 
-    // still need to adjust it if a list occurred after an 
-    // all null array. 
-    HandleIntermediateNode(arg); 
-  } 
- 
-  void operator()(NullableTerminalNode*) {} 
-  void operator()(AllPresentTerminalNode*) {} 
-}; 
- 
-PathInfo Fixup(PathInfo info) { 
-  // We only need to fixup the path if there were repeated 
-  // elements on it. 
-  if (info.max_rep_level == 0) { 
-    return info; 
-  } 
-  FixupVisitor visitor; 
-  visitor.max_rep_level = info.max_rep_level; 
-  if (visitor.max_rep_level > 0) { 
-    visitor.rep_level_if_null = 0; 
-  } 
-  for (size_t x = 0; x < info.path.size(); x++) { 
-    ::arrow::util::visit(visitor, &info.path[x]); 
-  } 
-  return info; 
-} 
- 
-class PathBuilder { 
- public: 
-  explicit PathBuilder(bool start_nullable) : nullable_in_parent_(start_nullable) {} 
-  template <typename T> 
-  void AddTerminalInfo(const T& array) { 
-    info_.leaf_is_nullable = nullable_in_parent_; 
-    if (nullable_in_parent_) { 
-      info_.max_def_level++; 
-    } 
-    // We don't use null_count() because if the null_count isn't known 
-    // and the array does in fact contain nulls, we will end up 
-    // traversing the null bitmap twice (once here and once when calculating 
-    // rep/def levels). 
-    if (LazyNoNulls(array)) { 
-      info_.path.emplace_back(AllPresentTerminalNode{info_.max_def_level}); 
-    } else if (LazyNullCount(array) == array.length()) { 
-      info_.path.emplace_back(AllNullsTerminalNode(info_.max_def_level - 1)); 
-    } else { 
-      info_.path.emplace_back(NullableTerminalNode(array.null_bitmap_data(), 
-                                                   array.offset(), info_.max_def_level)); 
-    } 
-    info_.primitive_array = std::make_shared<T>(array.data()); 
-    paths_.push_back(Fixup(info_)); 
-  } 
- 
-  template <typename T> 
-  ::arrow::enable_if_t<std::is_base_of<::arrow::FlatArray, T>::value, Status> Visit( 
-      const T& array) { 
-    AddTerminalInfo(array); 
-    return Status::OK(); 
-  } 
- 
-  template <typename T> 
-  ::arrow::enable_if_t<std::is_same<::arrow::ListArray, T>::value || 
-                           std::is_same<::arrow::LargeListArray, T>::value, 
-                       Status> 
-  Visit(const T& array) { 
-    MaybeAddNullable(array); 
-    // Increment necessary due to empty lists. 
-    info_.max_def_level++; 
-    info_.max_rep_level++; 
-    // raw_value_offsets() accounts for any slice offset. 
-    ListPathNode<VarRangeSelector<typename T::offset_type>> node( 
-        VarRangeSelector<typename T::offset_type>{array.raw_value_offsets()}, 
-        info_.max_rep_level, info_.max_def_level - 1); 
-    info_.path.emplace_back(std::move(node)); 
-    nullable_in_parent_ = array.list_type()->value_field()->nullable(); 
-    return VisitInline(*array.values()); 
-  } 
- 
-  Status Visit(const ::arrow::DictionaryArray& array) { 
-    // Only currently handle DictionaryArray where the dictionary is a 
-    // primitive type 
-    if (array.dict_type()->value_type()->num_fields() > 0) { 
-      return Status::NotImplemented( 
-          "Writing DictionaryArray with nested dictionary " 
-          "type not yet supported"); 
-    } 
-    if (array.dictionary()->null_count() > 0) { 
-      return Status::NotImplemented( 
-          "Writing DictionaryArray with null encoded in dictionary " 
-          "type not yet supported"); 
-    } 
-    AddTerminalInfo(array); 
-    return Status::OK(); 
-  } 
- 
-  void MaybeAddNullable(const Array& array) { 
-    if (!nullable_in_parent_) { 
-      return; 
-    } 
-    info_.max_def_level++; 
-    // We don't use null_count() because if the null_count isn't known 
-    // and the array does in fact contain nulls, we will end up 
-    // traversing the null bitmap twice (once here and once when calculating 
-    // rep/def levels). Because this isn't terminal this might not be 
-    // the right decision for structs that share the same nullable 
-    // parents. 
-    if (LazyNoNulls(array)) { 
-      // Don't add anything because there won't be any point checking 
-      // null values for the array.  There will always be at least 
-      // one more array to handle nullability. 
-      return; 
-    } 
-    if (LazyNullCount(array) == array.length()) { 
-      info_.path.emplace_back(AllNullsTerminalNode(info_.max_def_level - 1)); 
-      return; 
-    } 
-    info_.path.emplace_back( 
-        NullableNode(array.null_bitmap_data(), array.offset(), 
-                     /* def_level_if_null = */ info_.max_def_level - 1)); 
-  } 
- 
-  Status VisitInline(const Array& array); 
- 
-  Status Visit(const ::arrow::MapArray& array) { 
-    return Visit(static_cast<const ::arrow::ListArray&>(array)); 
-  } 
- 
-  Status Visit(const ::arrow::StructArray& array) { 
-    MaybeAddNullable(array); 
-    PathInfo info_backup = info_; 
-    for (int x = 0; x < array.num_fields(); x++) { 
-      nullable_in_parent_ = array.type()->field(x)->nullable(); 
-      RETURN_NOT_OK(VisitInline(*array.field(x))); 
-      info_ = info_backup; 
-    } 
-    return Status::OK(); 
-  } 
- 
-  Status Visit(const ::arrow::FixedSizeListArray& array) { 
-    MaybeAddNullable(array); 
-    int32_t list_size = array.list_type()->list_size(); 
-    // Technically we could encode fixed size lists with two level encodings 
-    // but since we always use 3 level encoding we increment def levels as 
-    // well. 
-    info_.max_def_level++; 
-    info_.max_rep_level++; 
-    info_.path.emplace_back(FixedSizeListNode(FixedSizedRangeSelector{list_size}, 
-                                              info_.max_rep_level, info_.max_def_level)); 
-    nullable_in_parent_ = array.list_type()->value_field()->nullable(); 
-    if (array.offset() > 0) { 
-      return VisitInline(*array.values()->Slice(array.value_offset(0))); 
-    } 
-    return VisitInline(*array.values()); 
-  } 
- 
-  Status Visit(const ::arrow::ExtensionArray& array) { 
-    return VisitInline(*array.storage()); 
-  } 
- 
-#define NOT_IMPLEMENTED_VISIT(ArrowTypePrefix)                             \ 
-  Status Visit(const ::arrow::ArrowTypePrefix##Array& array) {             \ 
-    return Status::NotImplemented("Level generation for " #ArrowTypePrefix \ 
-                                  " not supported yet");                   \ 
-  } 
- 
-  // Union types aren't supported in Parquet. 
-  NOT_IMPLEMENTED_VISIT(Union) 
- 
-#undef NOT_IMPLEMENTED_VISIT 
-  std::vector<PathInfo>& paths() { return paths_; } 
- 
- private: 
-  PathInfo info_; 
-  std::vector<PathInfo> paths_; 
-  bool nullable_in_parent_; 
-}; 
- 
-Status PathBuilder::VisitInline(const Array& array) { 
-  return ::arrow::VisitArrayInline(array, this); 
-} 
- 
-#undef RETURN_IF_ERROR 
-}  // namespace 
- 
-class MultipathLevelBuilderImpl : public MultipathLevelBuilder { 
- public: 
-  MultipathLevelBuilderImpl(std::shared_ptr<::arrow::ArrayData> data, 
-                            std::unique_ptr<PathBuilder> path_builder) 
-      : root_range_{0, data->length}, 
-        data_(std::move(data)), 
-        path_builder_(std::move(path_builder)) {} 
- 
-  int GetLeafCount() const override { 
-    return static_cast<int>(path_builder_->paths().size()); 
-  } 
- 
-  ::arrow::Status Write(int leaf_index, ArrowWriteContext* context, 
-                        CallbackFunction write_leaf_callback) override { 
-    DCHECK_GE(leaf_index, 0); 
-    DCHECK_LT(leaf_index, GetLeafCount()); 
-    return WritePath(root_range_, &path_builder_->paths()[leaf_index], context, 
-                     std::move(write_leaf_callback)); 
-  } 
- 
- private: 
-  ElementRange root_range_; 
-  // Reference holder to ensure the data stays valid. 
-  std::shared_ptr<::arrow::ArrayData> data_; 
-  std::unique_ptr<PathBuilder> path_builder_; 
-}; 
- 
-// static 
-::arrow::Result<std::unique_ptr<MultipathLevelBuilder>> MultipathLevelBuilder::Make( 
-    const ::arrow::Array& array, bool array_field_nullable) { 
-  auto constructor = ::arrow::internal::make_unique<PathBuilder>(array_field_nullable); 
-  RETURN_NOT_OK(VisitArrayInline(array, constructor.get())); 
-  return ::arrow::internal::make_unique<MultipathLevelBuilderImpl>( 
-      array.data(), std::move(constructor)); 
-} 
- 
-// static 
-Status MultipathLevelBuilder::Write(const Array& array, bool array_field_nullable, 
-                                    ArrowWriteContext* context, 
-                                    MultipathLevelBuilder::CallbackFunction callback) { 
-  ARROW_ASSIGN_OR_RAISE(std::unique_ptr<MultipathLevelBuilder> builder, 
-                        MultipathLevelBuilder::Make(array, array_field_nullable)); 
-  PathBuilder constructor(array_field_nullable); 
-  RETURN_NOT_OK(VisitArrayInline(array, &constructor)); 
-  for (int leaf_idx = 0; leaf_idx < builder->GetLeafCount(); leaf_idx++) { 
-    RETURN_NOT_OK(builder->Write(leaf_idx, context, callback)); 
-  } 
-  return Status::OK(); 
-} 
- 
-}  // namespace arrow 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Overview.
+//
+// The strategy used for this code for repetition/definition
+// is to dissect the top level array into a list of paths
+// from the top level array to the final primitive (possibly
+// dictionary encoded array). It then evaluates each one of
+// those paths to produce results for the callback iteratively.
+//
+// This approach was taken to reduce the aggregate memory required if we were
+// to build all def/rep levels in parallel as apart of a tree traversal.  It
+// also allows for straightforward parallelization at the path level if that is
+// desired in the future.
+//
+// The main downside to this approach is it duplicates effort for nodes
+// that share common ancestors. This can be mitigated to some degree
+// by adding in optimizations that detect leaf arrays that share
+// the same common list ancestor and reuse the repetition levels
+// from the first leaf encountered (only definition levels greater
+// the list ancestor need to be re-evaluated. This is left for future
+// work.
+//
+// Algorithm.
+//
+// As mentioned above this code dissects arrays into constituent parts:
+// nullability data, and list offset data. It tries to optimize for
+// some special cases, where it is known ahead of time that a step
+// can be skipped (e.g. a nullable array happens to have all of its
+// values) or batch filled (a nullable array has all null values).
+// One further optimization that is not implemented but could be done
+// in the future is special handling for nested list arrays that
+// have some intermediate data which indicates the final array contains only
+// nulls.
+//
+// In general, the algorithm attempts to batch work at each node as much
+// as possible.  For nullability nodes this means finding runs of null
+// values and batch filling those interspersed with finding runs of non-null values
+// to process in batch at the next column.
+//
+// Similarly, list runs of empty lists are all processed in one batch
+// followed by either:
+//    - A single list entry for non-terminal lists (i.e. the upper part of a nested list)
+//    - Runs of non-empty lists for the terminal list (i.e. the lowest part of a nested
+//    list).
+//
+// This makes use of the following observations.
+// 1.  Null values at any node on the path are terminal (repetition and definition
+//     level can be set directly when a Null value is encountered).
+// 2.  Empty lists share this eager termination property with Null values.
+// 3.  In order to keep repetition/definition level populated the algorithm is lazy
+//     in assigning repetition levels. The algorithm tracks whether it is currently
+//     in the middle of a list by comparing the lengths of repetition/definition levels.
+//     If it is currently in the middle of a list the the number of repetition levels
+//     populated will be greater than definition levels (the start of a List requires
+//     adding the first element). If there are equal numbers of definition and repetition
+//     levels populated this indicates a list is waiting to be started and the next list
+//     encountered will have its repetition level signify the beginning of the list.
+//
+//     Other implementation notes.
+//
+//     This code hasn't been benchmarked (or assembly analyzed) but did the following
+//     as optimizations (yes premature optimization is the root of all evil).
+//     - This code does not use recursion, instead it constructs its own stack and manages
+//       updating elements accordingly.
+//     - It tries to avoid using Status for common return states.
+//     - Avoids virtual dispatch in favor of if/else statements on a set of well known
+//     classes.
+
+#include "parquet/arrow/path_internal.h"
+
+#include <atomic>
+#include <cstddef>
+#include <memory>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/extension_type.h"
+#include "arrow/memory_pool.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit_run_reader.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_visit.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/make_unique.h"
+#include "arrow/util/variant.h"
+#include "arrow/visitor_inline.h"
+#include "parquet/properties.h"
+
+namespace parquet {
+namespace arrow {
+
+namespace {
+
+using ::arrow::Array;
+using ::arrow::Status;
+using ::arrow::TypedBufferBuilder;
+
+constexpr static int16_t kLevelNotSet = -1;
+
+/// \brief Simple result of a iterating over a column to determine values.
+enum IterationResult {
+  /// Processing is done at this node. Move back up the path
+  /// to continue processing.
+  kDone = -1,
+  /// Move down towards the leaf for processing.
+  kNext = 1,
+  /// An error occurred while processing.
+  kError = 2
+};
+
+#define RETURN_IF_ERROR(iteration_result)                  \
+  do {                                                     \
+    if (ARROW_PREDICT_FALSE(iteration_result == kError)) { \
+      return iteration_result;                             \
+    }                                                      \
+  } while (false)
+
+int64_t LazyNullCount(const Array& array) { return array.data()->null_count.load(); }
+
+bool LazyNoNulls(const Array& array) {
+  int64_t null_count = LazyNullCount(array);
+  return null_count == 0 ||
+         // kUnkownNullCount comparison is needed to account
+         // for null arrays.
+         (null_count == ::arrow::kUnknownNullCount &&
+          array.null_bitmap_data() == nullptr);
+}
+
+struct PathWriteContext {
+  PathWriteContext(::arrow::MemoryPool* pool,
+                   std::shared_ptr<::arrow::ResizableBuffer> def_levels_buffer)
+      : rep_levels(pool), def_levels(std::move(def_levels_buffer), pool) {}
+  IterationResult ReserveDefLevels(int64_t elements) {
+    last_status = def_levels.Reserve(elements);
+    if (ARROW_PREDICT_TRUE(last_status.ok())) {
+      return kDone;
+    }
+    return kError;
+  }
+
+  IterationResult AppendDefLevel(int16_t def_level) {
+    last_status = def_levels.Append(def_level);
+    if (ARROW_PREDICT_TRUE(last_status.ok())) {
+      return kDone;
+    }
+    return kError;
+  }
+
+  IterationResult AppendDefLevels(int64_t count, int16_t def_level) {
+    last_status = def_levels.Append(count, def_level);
+    if (ARROW_PREDICT_TRUE(last_status.ok())) {
+      return kDone;
+    }
+    return kError;
+  }
+
+  void UnsafeAppendDefLevel(int16_t def_level) { def_levels.UnsafeAppend(def_level); }
+
+  IterationResult AppendRepLevel(int16_t rep_level) {
+    last_status = rep_levels.Append(rep_level);
+
+    if (ARROW_PREDICT_TRUE(last_status.ok())) {
+      return kDone;
+    }
+    return kError;
+  }
+
+  IterationResult AppendRepLevels(int64_t count, int16_t rep_level) {
+    last_status = rep_levels.Append(count, rep_level);
+    if (ARROW_PREDICT_TRUE(last_status.ok())) {
+      return kDone;
+    }
+    return kError;
+  }
+
+  bool EqualRepDefLevelsLengths() const {
+    return rep_levels.length() == def_levels.length();
+  }
+
+  // Incorporates |range| into visited elements. If the |range| is contiguous
+  // with the last range, extend the last range, otherwise add |range| separately
+  // tot he list.
+  void RecordPostListVisit(const ElementRange& range) {
+    if (!visited_elements.empty() && range.start == visited_elements.back().end) {
+      visited_elements.back().end = range.end;
+      return;
+    }
+    visited_elements.push_back(range);
+  }
+
+  Status last_status;
+  TypedBufferBuilder<int16_t> rep_levels;
+  TypedBufferBuilder<int16_t> def_levels;
+  std::vector<ElementRange> visited_elements;
+};
+
+IterationResult FillRepLevels(int64_t count, int16_t rep_level,
+                              PathWriteContext* context) {
+  if (rep_level == kLevelNotSet) {
+    return kDone;
+  }
+  int64_t fill_count = count;
+  // This condition occurs (rep and dep levels equals), in one of
+  // in a few cases:
+  // 1.  Before any list is encountered.
+  // 2.  After rep-level has been filled in due to null/empty
+  //     values above it.
+  // 3.  After finishing a list.
+  if (!context->EqualRepDefLevelsLengths()) {
+    fill_count--;
+  }
+  return context->AppendRepLevels(fill_count, rep_level);
+}
+
+// A node for handling an array that is discovered to have all
+// null elements. It is referred to as a TerminalNode because
+// traversal of nodes will not continue it when generating
+// rep/def levels. However, there could be many nested children
+// elements beyond it in the Array that is being processed.
+class AllNullsTerminalNode {
+ public:
+  explicit AllNullsTerminalNode(int16_t def_level, int16_t rep_level = kLevelNotSet)
+      : def_level_(def_level), rep_level_(rep_level) {}
+  void SetRepLevelIfNull(int16_t rep_level) { rep_level_ = rep_level; }
+  IterationResult Run(const ElementRange& range, PathWriteContext* context) {
+    int64_t size = range.Size();
+    RETURN_IF_ERROR(FillRepLevels(size, rep_level_, context));
+    return context->AppendDefLevels(size, def_level_);
+  }
+
+ private:
+  int16_t def_level_;
+  int16_t rep_level_;
+};
+
+// Handles the case where all remaining arrays until the leaf have no nulls
+// (and are not interrupted by lists). Unlike AllNullsTerminalNode this is
+// always the last node in a path. We don't need an analogue to the AllNullsTerminalNode
+// because if all values are present at an intermediate array no node is added for it
+// (the def-level for the next nullable node is incremented).
+struct AllPresentTerminalNode {
+  IterationResult Run(const ElementRange& range, PathWriteContext* context) {
+    return context->AppendDefLevels(range.end - range.start, def_level);
+    // No need to worry about rep levels, because this state should
+    // only be applicable for after all list/repeated values
+    // have been evaluated in the path.
+  }
+  int16_t def_level;
+};
+
+/// Node for handling the case when the leaf-array is nullable
+/// and contains null elements.
+struct NullableTerminalNode {
+  NullableTerminalNode() = default;
+
+  NullableTerminalNode(const uint8_t* bitmap, int64_t element_offset,
+                       int16_t def_level_if_present)
+      : bitmap_(bitmap),
+        element_offset_(element_offset),
+        def_level_if_present_(def_level_if_present),
+        def_level_if_null_(def_level_if_present - 1) {}
+
+  IterationResult Run(const ElementRange& range, PathWriteContext* context) {
+    int64_t elements = range.Size();
+    RETURN_IF_ERROR(context->ReserveDefLevels(elements));
+
+    DCHECK_GT(elements, 0);
+
+    auto bit_visitor = [&](bool is_set) {
+      context->UnsafeAppendDefLevel(is_set ? def_level_if_present_ : def_level_if_null_);
+    };
+
+    if (elements > 16) {  // 16 guarantees at least one unrolled loop.
+      ::arrow::internal::VisitBitsUnrolled(bitmap_, range.start + element_offset_,
+                                           elements, bit_visitor);
+    } else {
+      ::arrow::internal::VisitBits(bitmap_, range.start + element_offset_, elements,
+                                   bit_visitor);
+    }
+    return kDone;
+  }
+  const uint8_t* bitmap_;
+  int64_t element_offset_;
+  int16_t def_level_if_present_;
+  int16_t def_level_if_null_;
+};
+
+// List nodes handle populating rep_level for Arrow Lists and def-level for empty lists.
+// Nullability (both list and children) is handled by other Nodes. By
+// construction all list nodes will be intermediate nodes (they will always be followed by
+// at least one other node).
+//
+// Type parameters:
+//    |RangeSelector| - A strategy for determine the the range of the child node to
+//    process.
+//       this varies depending on the type of list (int32_t* offsets, int64_t* offsets of
+//       fixed.
+template <typename RangeSelector>
+class ListPathNode {
+ public:
+  ListPathNode(RangeSelector selector, int16_t rep_lev, int16_t def_level_if_empty)
+      : selector_(std::move(selector)),
+        prev_rep_level_(rep_lev - 1),
+        rep_level_(rep_lev),
+        def_level_if_empty_(def_level_if_empty) {}
+
+  int16_t rep_level() const { return rep_level_; }
+
+  IterationResult Run(ElementRange* range, ElementRange* child_range,
+                      PathWriteContext* context) {
+    if (range->Empty()) {
+      return kDone;
+    }
+
+    // Find the first non-empty list (skipping a run of empties).
+    int64_t start = range->start;
+    // Retrieves the range of elements that this list contains.
+    // Uses the strategy pattern to distinguish between the different
+    // lists that are supported in Arrow (fixed size, normal and "large").
+    *child_range = selector_.GetRange(range->start);
+    while (child_range->Empty() && !range->Empty()) {
+      ++range->start;
+      *child_range = selector_.GetRange(range->start);
+    }
+    // Loops post-condition:
+    //   * range is either empty (we are done processing at this node)
+    //     or start corresponds a non-empty list.
+    //   * If range is non-empty child_range contains
+    //     the bounds of non-empty list.
+
+    // Handle any skipped over empty lists.
+    int64_t empty_elements = range->start - start;
+    if (empty_elements > 0) {
+      RETURN_IF_ERROR(FillRepLevels(empty_elements, prev_rep_level_, context));
+      RETURN_IF_ERROR(context->AppendDefLevels(empty_elements, def_level_if_empty_));
+    }
+    // Start of a new list. Note that for nested lists adding the element
+    // here effectively suppresses this code until we either encounter null
+    // elements or empty lists between here and the innermost list (since
+    // we make the rep levels repetition and definition levels unequal).
+    // Similarly when we are backtracking up the stack the repetition and
+    // definition levels are again equal so if we encounter an intermediate list
+    // with more elements this will detect it as a new list.
+    if (context->EqualRepDefLevelsLengths() && !range->Empty()) {
+      RETURN_IF_ERROR(context->AppendRepLevel(prev_rep_level_));
+    }
+
+    if (range->Empty()) {
+      return kDone;
+    }
+
+    ++range->start;
+    if (is_last_) {
+      // If this is the last repeated node, we can extend try
+      // to extend the child range as wide as possible before
+      // continuing to the next node.
+      return FillForLast(range, child_range, context);
+    }
+    return kNext;
+  }
+
+  void SetLast() { is_last_ = true; }
+
+ private:
+  IterationResult FillForLast(ElementRange* range, ElementRange* child_range,
+                              PathWriteContext* context) {
+    // First fill int the remainder of the list.
+    RETURN_IF_ERROR(FillRepLevels(child_range->Size(), rep_level_, context));
+    // Once we've reached this point the following preconditions should hold:
+    // 1.  There are no more repeated path nodes to deal with.
+    // 2.  All elements in |range| represent contiguous elements in the
+    //     child array (Null values would have shortened the range to ensure
+    //     all remaining list elements are present (though they may be empty lists)).
+    // 3.  No element of range spans a parent list (intermediate
+    //     list nodes only handle one list entry at a time).
+    //
+    // Given these preconditions it should be safe to fill runs on non-empty
+    // lists here and expand the range in the child node accordingly.
+
+    while (!range->Empty()) {
+      ElementRange size_check = selector_.GetRange(range->start);
+      if (size_check.Empty()) {
+        // The empty range will need to be handled after we pass down the accumulated
+        // range because it affects def_level placement and we need to get the children
+        // def_levels entered first.
+        break;
+      }
+      // This is the start of a new list. We can be sure it only applies
+      // to the previous list (and doesn't jump to the start of any list
+      // further up in nesting due to the constraints mentioned at the start
+      // of the function).
+      RETURN_IF_ERROR(context->AppendRepLevel(prev_rep_level_));
+      RETURN_IF_ERROR(context->AppendRepLevels(size_check.Size() - 1, rep_level_));
+      DCHECK_EQ(size_check.start, child_range->end);
+      child_range->end = size_check.end;
+      ++range->start;
+    }
+
+    // Do book-keeping to track the elements of the arrays that are actually visited
+    // beyond this point.  This is necessary to identify "gaps" in values that should
+    // not be processed (written out to parquet).
+    context->RecordPostListVisit(*child_range);
+    return kNext;
+  }
+
+  RangeSelector selector_;
+  int16_t prev_rep_level_;
+  int16_t rep_level_;
+  int16_t def_level_if_empty_;
+  bool is_last_ = false;
+};
+
+template <typename OffsetType>
+struct VarRangeSelector {
+  ElementRange GetRange(int64_t index) const {
+    return ElementRange{offsets[index], offsets[index + 1]};
+  }
+
+  // Either int32_t* or int64_t*.
+  const OffsetType* offsets;
+};
+
+struct FixedSizedRangeSelector {
+  ElementRange GetRange(int64_t index) const {
+    int64_t start = index * list_size;
+    return ElementRange{start, start + list_size};
+  }
+  int list_size;
+};
+
+// An intermediate node that handles null values.
+class NullableNode {
+ public:
+  NullableNode(const uint8_t* null_bitmap, int64_t entry_offset,
+               int16_t def_level_if_null, int16_t rep_level_if_null = kLevelNotSet)
+      : null_bitmap_(null_bitmap),
+        entry_offset_(entry_offset),
+        valid_bits_reader_(MakeReader(ElementRange{0, 0})),
+        def_level_if_null_(def_level_if_null),
+        rep_level_if_null_(rep_level_if_null),
+        new_range_(true) {}
+
+  void SetRepLevelIfNull(int16_t rep_level) { rep_level_if_null_ = rep_level; }
+
+  ::arrow::internal::BitRunReader MakeReader(const ElementRange& range) {
+    return ::arrow::internal::BitRunReader(null_bitmap_, entry_offset_ + range.start,
+                                           range.Size());
+  }
+
+  IterationResult Run(ElementRange* range, ElementRange* child_range,
+                      PathWriteContext* context) {
+    if (new_range_) {
+      // Reset the reader each time we are starting fresh on a range.
+      // We can't rely on continuity because nulls above can
+      // cause discontinuities.
+      valid_bits_reader_ = MakeReader(*range);
+    }
+    child_range->start = range->start;
+    ::arrow::internal::BitRun run = valid_bits_reader_.NextRun();
+    if (!run.set) {
+      range->start += run.length;
+      RETURN_IF_ERROR(FillRepLevels(run.length, rep_level_if_null_, context));
+      RETURN_IF_ERROR(context->AppendDefLevels(run.length, def_level_if_null_));
+      run = valid_bits_reader_.NextRun();
+    }
+    if (range->Empty()) {
+      new_range_ = true;
+      return kDone;
+    }
+    child_range->end = child_range->start = range->start;
+    child_range->end += run.length;
+
+    DCHECK(!child_range->Empty());
+    range->start += child_range->Size();
+    new_range_ = false;
+    return kNext;
+  }
+
+  const uint8_t* null_bitmap_;
+  int64_t entry_offset_;
+  ::arrow::internal::BitRunReader valid_bits_reader_;
+  int16_t def_level_if_null_;
+  int16_t rep_level_if_null_;
+
+  // Whether the next invocation will be a new range.
+  bool new_range_ = true;
+};
+
+using ListNode = ListPathNode<VarRangeSelector<int32_t>>;
+using LargeListNode = ListPathNode<VarRangeSelector<int64_t>>;
+using FixedSizeListNode = ListPathNode<FixedSizedRangeSelector>;
+
+// Contains static information derived from traversing the schema.
+struct PathInfo {
+  // The vectors are expected to the same length info.
+
+  // Note index order matters here.
+  using Node = ::arrow::util::Variant<NullableTerminalNode, ListNode, LargeListNode,
+                                      FixedSizeListNode, NullableNode,
+                                      AllPresentTerminalNode, AllNullsTerminalNode>;
+
+  std::vector<Node> path;
+  std::shared_ptr<Array> primitive_array;
+  int16_t max_def_level = 0;
+  int16_t max_rep_level = 0;
+  bool has_dictionary = false;
+  bool leaf_is_nullable = false;
+};
+
+/// Contains logic for writing a single leaf node to parquet.
+/// This tracks the path from root to leaf.
+///
+/// |writer| will be called after all of the definition/repetition
+/// values have been calculated for root_range with the calculated
+/// values. It is intended to abstract the complexity of writing
+/// the levels and values to parquet.
+Status WritePath(ElementRange root_range, PathInfo* path_info,
+                 ArrowWriteContext* arrow_context,
+                 MultipathLevelBuilder::CallbackFunction writer) {
+  std::vector<ElementRange> stack(path_info->path.size());
+  MultipathLevelBuilderResult builder_result;
+  builder_result.leaf_array = path_info->primitive_array;
+  builder_result.leaf_is_nullable = path_info->leaf_is_nullable;
+
+  if (path_info->max_def_level == 0) {
+    // This case only occurs when there are no nullable or repeated
+    // columns in the path from the root to leaf.
+    int64_t leaf_length = builder_result.leaf_array->length();
+    builder_result.def_rep_level_count = leaf_length;
+    builder_result.post_list_visited_elements.push_back({0, leaf_length});
+    return writer(builder_result);
+  }
+  stack[0] = root_range;
+  RETURN_NOT_OK(
+      arrow_context->def_levels_buffer->Resize(/*new_size=*/0, /*shrink_to_fit*/ false));
+  PathWriteContext context(arrow_context->memory_pool, arrow_context->def_levels_buffer);
+  // We should need at least this many entries so reserve the space ahead of time.
+  RETURN_NOT_OK(context.def_levels.Reserve(root_range.Size()));
+  if (path_info->max_rep_level > 0) {
+    RETURN_NOT_OK(context.rep_levels.Reserve(root_range.Size()));
+  }
+
+  auto stack_base = &stack[0];
+  auto stack_position = stack_base;
+  // This is the main loop for calculated rep/def levels. The nodes
+  // in the path implement a chain-of-responsibility like pattern
+  // where each node can add some number of repetition/definition
+  // levels to PathWriteContext and also delegate to the next node
+  // in the path to add values. The values are added through each Run(...)
+  // call and the choice to delegate to the next node (or return to the
+  // previous node) is communicated by the return value of Run(...).
+  // The loop terminates after the first node indicates all values in
+  // |root_range| are processed.
+  while (stack_position >= stack_base) {
+    PathInfo::Node& node = path_info->path[stack_position - stack_base];
+    struct {
+      IterationResult operator()(NullableNode* node) {
+        return node->Run(stack_position, stack_position + 1, context);
+      }
+      IterationResult operator()(ListNode* node) {
+        return node->Run(stack_position, stack_position + 1, context);
+      }
+      IterationResult operator()(NullableTerminalNode* node) {
+        return node->Run(*stack_position, context);
+      }
+      IterationResult operator()(FixedSizeListNode* node) {
+        return node->Run(stack_position, stack_position + 1, context);
+      }
+      IterationResult operator()(AllPresentTerminalNode* node) {
+        return node->Run(*stack_position, context);
+      }
+      IterationResult operator()(AllNullsTerminalNode* node) {
+        return node->Run(*stack_position, context);
+      }
+      IterationResult operator()(LargeListNode* node) {
+        return node->Run(stack_position, stack_position + 1, context);
+      }
+      ElementRange* stack_position;
+      PathWriteContext* context;
+    } visitor = {stack_position, &context};
+
+    IterationResult result = ::arrow::util::visit(visitor, &node);
+
+    if (ARROW_PREDICT_FALSE(result == kError)) {
+      DCHECK(!context.last_status.ok());
+      return context.last_status;
+    }
+    stack_position += static_cast<int>(result);
+  }
+  RETURN_NOT_OK(context.last_status);
+  builder_result.def_rep_level_count = context.def_levels.length();
+
+  if (context.rep_levels.length() > 0) {
+    // This case only occurs when there was a repeated element that needs to be
+    // processed.
+    builder_result.rep_levels = context.rep_levels.data();
+    std::swap(builder_result.post_list_visited_elements, context.visited_elements);
+    // If it is possible when processing lists that all lists where empty. In this
+    // case no elements would have been added to post_list_visited_elements. By
+    // added an empty element we avoid special casing in downstream consumers.
+    if (builder_result.post_list_visited_elements.empty()) {
+      builder_result.post_list_visited_elements.push_back({0, 0});
+    }
+  } else {
+    builder_result.post_list_visited_elements.push_back(
+        {0, builder_result.leaf_array->length()});
+    builder_result.rep_levels = nullptr;
+  }
+
+  builder_result.def_levels = context.def_levels.data();
+  return writer(builder_result);
+}
+
+struct FixupVisitor {
+  int max_rep_level = -1;
+  int16_t rep_level_if_null = kLevelNotSet;
+
+  template <typename T>
+  void HandleListNode(T* arg) {
+    if (arg->rep_level() == max_rep_level) {
+      arg->SetLast();
+      // after the last list node we don't need to fill
+      // rep levels on null.
+      rep_level_if_null = kLevelNotSet;
+    } else {
+      rep_level_if_null = arg->rep_level();
+    }
+  }
+  void operator()(ListNode* node) { HandleListNode(node); }
+  void operator()(LargeListNode* node) { HandleListNode(node); }
+  void operator()(FixedSizeListNode* node) { HandleListNode(node); }
+
+  // For non-list intermediate nodes.
+  template <typename T>
+  void HandleIntermediateNode(T* arg) {
+    if (rep_level_if_null != kLevelNotSet) {
+      arg->SetRepLevelIfNull(rep_level_if_null);
+    }
+  }
+
+  void operator()(NullableNode* arg) { HandleIntermediateNode(arg); }
+
+  void operator()(AllNullsTerminalNode* arg) {
+    // Even though no processing happens past this point we
+    // still need to adjust it if a list occurred after an
+    // all null array.
+    HandleIntermediateNode(arg);
+  }
+
+  void operator()(NullableTerminalNode*) {}
+  void operator()(AllPresentTerminalNode*) {}
+};
+
+PathInfo Fixup(PathInfo info) {
+  // We only need to fixup the path if there were repeated
+  // elements on it.
+  if (info.max_rep_level == 0) {
+    return info;
+  }
+  FixupVisitor visitor;
+  visitor.max_rep_level = info.max_rep_level;
+  if (visitor.max_rep_level > 0) {
+    visitor.rep_level_if_null = 0;
+  }
+  for (size_t x = 0; x < info.path.size(); x++) {
+    ::arrow::util::visit(visitor, &info.path[x]);
+  }
+  return info;
+}
+
+class PathBuilder {
+ public:
+  explicit PathBuilder(bool start_nullable) : nullable_in_parent_(start_nullable) {}
+  template <typename T>
+  void AddTerminalInfo(const T& array) {
+    info_.leaf_is_nullable = nullable_in_parent_;
+    if (nullable_in_parent_) {
+      info_.max_def_level++;
+    }
+    // We don't use null_count() because if the null_count isn't known
+    // and the array does in fact contain nulls, we will end up
+    // traversing the null bitmap twice (once here and once when calculating
+    // rep/def levels).
+    if (LazyNoNulls(array)) {
+      info_.path.emplace_back(AllPresentTerminalNode{info_.max_def_level});
+    } else if (LazyNullCount(array) == array.length()) {
+      info_.path.emplace_back(AllNullsTerminalNode(info_.max_def_level - 1));
+    } else {
+      info_.path.emplace_back(NullableTerminalNode(array.null_bitmap_data(),
+                                                   array.offset(), info_.max_def_level));
+    }
+    info_.primitive_array = std::make_shared<T>(array.data());
+    paths_.push_back(Fixup(info_));
+  }
+
+  template <typename T>
+  ::arrow::enable_if_t<std::is_base_of<::arrow::FlatArray, T>::value, Status> Visit(
+      const T& array) {
+    AddTerminalInfo(array);
+    return Status::OK();
+  }
+
+  template <typename T>
+  ::arrow::enable_if_t<std::is_same<::arrow::ListArray, T>::value ||
+                           std::is_same<::arrow::LargeListArray, T>::value,
+                       Status>
+  Visit(const T& array) {
+    MaybeAddNullable(array);
+    // Increment necessary due to empty lists.
+    info_.max_def_level++;
+    info_.max_rep_level++;
+    // raw_value_offsets() accounts for any slice offset.
+    ListPathNode<VarRangeSelector<typename T::offset_type>> node(
+        VarRangeSelector<typename T::offset_type>{array.raw_value_offsets()},
+        info_.max_rep_level, info_.max_def_level - 1);
+    info_.path.emplace_back(std::move(node));
+    nullable_in_parent_ = array.list_type()->value_field()->nullable();
+    return VisitInline(*array.values());
+  }
+
+  Status Visit(const ::arrow::DictionaryArray& array) {
+    // Only currently handle DictionaryArray where the dictionary is a
+    // primitive type
+    if (array.dict_type()->value_type()->num_fields() > 0) {
+      return Status::NotImplemented(
+          "Writing DictionaryArray with nested dictionary "
+          "type not yet supported");
+    }
+    if (array.dictionary()->null_count() > 0) {
+      return Status::NotImplemented(
+          "Writing DictionaryArray with null encoded in dictionary "
+          "type not yet supported");
+    }
+    AddTerminalInfo(array);
+    return Status::OK();
+  }
+
+  void MaybeAddNullable(const Array& array) {
+    if (!nullable_in_parent_) {
+      return;
+    }
+    info_.max_def_level++;
+    // We don't use null_count() because if the null_count isn't known
+    // and the array does in fact contain nulls, we will end up
+    // traversing the null bitmap twice (once here and once when calculating
+    // rep/def levels). Because this isn't terminal this might not be
+    // the right decision for structs that share the same nullable
+    // parents.
+    if (LazyNoNulls(array)) {
+      // Don't add anything because there won't be any point checking
+      // null values for the array.  There will always be at least
+      // one more array to handle nullability.
+      return;
+    }
+    if (LazyNullCount(array) == array.length()) {
+      info_.path.emplace_back(AllNullsTerminalNode(info_.max_def_level - 1));
+      return;
+    }
+    info_.path.emplace_back(
+        NullableNode(array.null_bitmap_data(), array.offset(),
+                     /* def_level_if_null = */ info_.max_def_level - 1));
+  }
+
+  Status VisitInline(const Array& array);
+
+  Status Visit(const ::arrow::MapArray& array) {
+    return Visit(static_cast<const ::arrow::ListArray&>(array));
+  }
+
+  Status Visit(const ::arrow::StructArray& array) {
+    MaybeAddNullable(array);
+    PathInfo info_backup = info_;
+    for (int x = 0; x < array.num_fields(); x++) {
+      nullable_in_parent_ = array.type()->field(x)->nullable();
+      RETURN_NOT_OK(VisitInline(*array.field(x)));
+      info_ = info_backup;
+    }
+    return Status::OK();
+  }
+
+  Status Visit(const ::arrow::FixedSizeListArray& array) {
+    MaybeAddNullable(array);
+    int32_t list_size = array.list_type()->list_size();
+    // Technically we could encode fixed size lists with two level encodings
+    // but since we always use 3 level encoding we increment def levels as
+    // well.
+    info_.max_def_level++;
+    info_.max_rep_level++;
+    info_.path.emplace_back(FixedSizeListNode(FixedSizedRangeSelector{list_size},
+                                              info_.max_rep_level, info_.max_def_level));
+    nullable_in_parent_ = array.list_type()->value_field()->nullable();
+    if (array.offset() > 0) {
+      return VisitInline(*array.values()->Slice(array.value_offset(0)));
+    }
+    return VisitInline(*array.values());
+  }
+
+  Status Visit(const ::arrow::ExtensionArray& array) {
+    return VisitInline(*array.storage());
+  }
+
+#define NOT_IMPLEMENTED_VISIT(ArrowTypePrefix)                             \
+  Status Visit(const ::arrow::ArrowTypePrefix##Array& array) {             \
+    return Status::NotImplemented("Level generation for " #ArrowTypePrefix \
+                                  " not supported yet");                   \
+  }
+
+  // Union types aren't supported in Parquet.
+  NOT_IMPLEMENTED_VISIT(Union)
+
+#undef NOT_IMPLEMENTED_VISIT
+  std::vector<PathInfo>& paths() { return paths_; }
+
+ private:
+  PathInfo info_;
+  std::vector<PathInfo> paths_;
+  bool nullable_in_parent_;
+};
+
+Status PathBuilder::VisitInline(const Array& array) {
+  return ::arrow::VisitArrayInline(array, this);
+}
+
+#undef RETURN_IF_ERROR
+}  // namespace
+
+class MultipathLevelBuilderImpl : public MultipathLevelBuilder {
+ public:
+  MultipathLevelBuilderImpl(std::shared_ptr<::arrow::ArrayData> data,
+                            std::unique_ptr<PathBuilder> path_builder)
+      : root_range_{0, data->length},
+        data_(std::move(data)),
+        path_builder_(std::move(path_builder)) {}
+
+  int GetLeafCount() const override {
+    return static_cast<int>(path_builder_->paths().size());
+  }
+
+  ::arrow::Status Write(int leaf_index, ArrowWriteContext* context,
+                        CallbackFunction write_leaf_callback) override {
+    DCHECK_GE(leaf_index, 0);
+    DCHECK_LT(leaf_index, GetLeafCount());
+    return WritePath(root_range_, &path_builder_->paths()[leaf_index], context,
+                     std::move(write_leaf_callback));
+  }
+
+ private:
+  ElementRange root_range_;
+  // Reference holder to ensure the data stays valid.
+  std::shared_ptr<::arrow::ArrayData> data_;
+  std::unique_ptr<PathBuilder> path_builder_;
+};
+
+// static
+::arrow::Result<std::unique_ptr<MultipathLevelBuilder>> MultipathLevelBuilder::Make(
+    const ::arrow::Array& array, bool array_field_nullable) {
+  auto constructor = ::arrow::internal::make_unique<PathBuilder>(array_field_nullable);
+  RETURN_NOT_OK(VisitArrayInline(array, constructor.get()));
+  return ::arrow::internal::make_unique<MultipathLevelBuilderImpl>(
+      array.data(), std::move(constructor));
+}
+
+// static
+Status MultipathLevelBuilder::Write(const Array& array, bool array_field_nullable,
+                                    ArrowWriteContext* context,
+                                    MultipathLevelBuilder::CallbackFunction callback) {
+  ARROW_ASSIGN_OR_RAISE(std::unique_ptr<MultipathLevelBuilder> builder,
+                        MultipathLevelBuilder::Make(array, array_field_nullable));
+  PathBuilder constructor(array_field_nullable);
+  RETURN_NOT_OK(VisitArrayInline(array, &constructor));
+  for (int leaf_idx = 0; leaf_idx < builder->GetLeafCount(); leaf_idx++) {
+    RETURN_NOT_OK(builder->Write(leaf_idx, context, callback));
+  }
+  return Status::OK();
+}
+
+}  // namespace arrow
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/arrow/path_internal.h b/contrib/libs/apache/arrow/cpp/src/parquet/arrow/path_internal.h
index e5af186dc4f..c5b7fdfdac3 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/arrow/path_internal.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/arrow/path_internal.h
@@ -1,155 +1,155 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <cstdint> 
-#include <functional> 
-#include <memory> 
-#include <vector> 
- 
-#include "arrow/result.h" 
-#include "arrow/status.h" 
- 
-#include "parquet/platform.h" 
- 
-namespace arrow { 
- 
-class Array; 
- 
-}  // namespace arrow 
- 
-namespace parquet { 
- 
-struct ArrowWriteContext; 
- 
-namespace arrow { 
- 
-// This files contain internal implementation details and should not be considered 
-// part of the public API. 
- 
-// The MultipathLevelBuilder is intended to fully support all Arrow nested types that 
-// map to parquet types (i.e. Everything but Unions). 
-// 
- 
-/// \brief Half open range of elements in an array. 
-struct ElementRange { 
-  /// Upper bound of range (inclusive) 
-  int64_t start; 
-  /// Upper bound of range (exclusive) 
-  int64_t end; 
- 
-  bool Empty() const { return start == end; } 
- 
-  int64_t Size() const { return end - start; } 
-}; 
- 
-/// \brief Result for a single leaf array when running the builder on the 
-/// its root. 
-struct MultipathLevelBuilderResult { 
-  /// \brief The Array containing only the values to write (after all nesting has 
-  /// been processed. 
-  /// 
-  /// No additional processing is done on this array (it is copied as is when 
-  /// visited via a DFS). 
-  std::shared_ptr<::arrow::Array> leaf_array; 
- 
-  /// \brief Might be null. 
-  const int16_t* def_levels = nullptr; 
- 
-  /// \brief  Might be null. 
-  const int16_t* rep_levels = nullptr; 
- 
-  /// \brief Number of items (int16_t) contained in def/rep_levels when present. 
-  int64_t def_rep_level_count = 0; 
- 
-  /// \brief Contains element ranges of the required visiting on the 
-  /// descendants of the final list ancestor for any leaf node. 
-  /// 
-  /// The algorithm will attempt to consolidate visited ranges into 
-  /// the smallest number possible. 
-  /// 
-  /// This data is necessary to pass along because after producing 
-  /// def-rep levels for each leaf array it is impossible to determine 
-  /// which values have to be sent to parquet when a null list value 
-  /// in a nullable ListArray is non-empty. 
-  /// 
-  /// This allows for the parquet writing to determine which values ultimately 
-  /// needs to be written. 
-  std::vector<ElementRange> post_list_visited_elements; 
- 
-  /// Whether the leaf array is nullable. 
-  bool leaf_is_nullable; 
-}; 
- 
-/// \brief Logic for being able to write out nesting (rep/def level) data that is 
-/// needed for writing to parquet. 
-class PARQUET_EXPORT MultipathLevelBuilder { 
- public: 
-  /// \brief A callback function that will receive results from the call to 
-  /// Write(...) below.  The MultipathLevelBuilderResult passed in will 
-  /// only remain valid for the function call (i.e. storing it and relying 
-  /// for its data to be consistent afterwards will result in undefined 
-  /// behavior. 
-  using CallbackFunction = 
-      std::function<::arrow::Status(const MultipathLevelBuilderResult&)>; 
- 
-  /// \brief Determine rep/def level information for the array. 
-  /// 
-  /// The callback will be invoked for each leaf Array that is a 
-  /// descendant of array.  Each leaf array is processed in a depth 
-  /// first traversal-order. 
-  /// 
-  /// \param[in] array The array to process. 
-  /// \param[in] array_field_nullable Whether the algorithm should consider 
-  ///   the the array column as nullable (as determined by its type's parent 
-  ///   field). 
-  /// \param[in, out] context for use when allocating memory, etc. 
-  /// \param[out] write_leaf_callback Callback to receive results. 
-  /// There will be one call to the write_leaf_callback for each leaf node. 
-  static ::arrow::Status Write(const ::arrow::Array& array, bool array_field_nullable, 
-                               ArrowWriteContext* context, 
-                               CallbackFunction write_leaf_callback); 
- 
-  /// \brief Construct a new instance of the builder. 
-  /// 
-  /// \param[in] array The array to process. 
-  /// \param[in] array_field_nullable Whether the algorithm should consider 
-  ///   the the array column as nullable (as determined by its type's parent 
-  ///   field). 
-  static ::arrow::Result<std::unique_ptr<MultipathLevelBuilder>> Make( 
-      const ::arrow::Array& array, bool array_field_nullable); 
- 
-  virtual ~MultipathLevelBuilder() = default; 
- 
-  /// \brief Returns the number of leaf columns that need to be written 
-  /// to Parquet. 
-  virtual int GetLeafCount() const = 0; 
- 
-  /// \brief Calls write_leaf_callback with the MultipathLevelBuilderResult corresponding 
-  /// to |leaf_index|. 
-  /// 
-  /// \param[in] leaf_index The index of the leaf column to write.  Must be in the range 
-  /// [0, GetLeafCount()]. 
-  /// \param[in, out] context for use when allocating memory, etc. 
-  /// \param[out] write_leaf_callback Callback to receive the result. 
-  virtual ::arrow::Status Write(int leaf_index, ArrowWriteContext* context, 
-                                CallbackFunction write_leaf_callback) = 0; 
-}; 
- 
-}  // namespace arrow 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <vector>
+
+#include "arrow/result.h"
+#include "arrow/status.h"
+
+#include "parquet/platform.h"
+
+namespace arrow {
+
+class Array;
+
+}  // namespace arrow
+
+namespace parquet {
+
+struct ArrowWriteContext;
+
+namespace arrow {
+
+// This files contain internal implementation details and should not be considered
+// part of the public API.
+
+// The MultipathLevelBuilder is intended to fully support all Arrow nested types that
+// map to parquet types (i.e. Everything but Unions).
+//
+
+/// \brief Half open range of elements in an array.
+struct ElementRange {
+  /// Upper bound of range (inclusive)
+  int64_t start;
+  /// Upper bound of range (exclusive)
+  int64_t end;
+
+  bool Empty() const { return start == end; }
+
+  int64_t Size() const { return end - start; }
+};
+
+/// \brief Result for a single leaf array when running the builder on the
+/// its root.
+struct MultipathLevelBuilderResult {
+  /// \brief The Array containing only the values to write (after all nesting has
+  /// been processed.
+  ///
+  /// No additional processing is done on this array (it is copied as is when
+  /// visited via a DFS).
+  std::shared_ptr<::arrow::Array> leaf_array;
+
+  /// \brief Might be null.
+  const int16_t* def_levels = nullptr;
+
+  /// \brief  Might be null.
+  const int16_t* rep_levels = nullptr;
+
+  /// \brief Number of items (int16_t) contained in def/rep_levels when present.
+  int64_t def_rep_level_count = 0;
+
+  /// \brief Contains element ranges of the required visiting on the
+  /// descendants of the final list ancestor for any leaf node.
+  ///
+  /// The algorithm will attempt to consolidate visited ranges into
+  /// the smallest number possible.
+  ///
+  /// This data is necessary to pass along because after producing
+  /// def-rep levels for each leaf array it is impossible to determine
+  /// which values have to be sent to parquet when a null list value
+  /// in a nullable ListArray is non-empty.
+  ///
+  /// This allows for the parquet writing to determine which values ultimately
+  /// needs to be written.
+  std::vector<ElementRange> post_list_visited_elements;
+
+  /// Whether the leaf array is nullable.
+  bool leaf_is_nullable;
+};
+
+/// \brief Logic for being able to write out nesting (rep/def level) data that is
+/// needed for writing to parquet.
+class PARQUET_EXPORT MultipathLevelBuilder {
+ public:
+  /// \brief A callback function that will receive results from the call to
+  /// Write(...) below.  The MultipathLevelBuilderResult passed in will
+  /// only remain valid for the function call (i.e. storing it and relying
+  /// for its data to be consistent afterwards will result in undefined
+  /// behavior.
+  using CallbackFunction =
+      std::function<::arrow::Status(const MultipathLevelBuilderResult&)>;
+
+  /// \brief Determine rep/def level information for the array.
+  ///
+  /// The callback will be invoked for each leaf Array that is a
+  /// descendant of array.  Each leaf array is processed in a depth
+  /// first traversal-order.
+  ///
+  /// \param[in] array The array to process.
+  /// \param[in] array_field_nullable Whether the algorithm should consider
+  ///   the the array column as nullable (as determined by its type's parent
+  ///   field).
+  /// \param[in, out] context for use when allocating memory, etc.
+  /// \param[out] write_leaf_callback Callback to receive results.
+  /// There will be one call to the write_leaf_callback for each leaf node.
+  static ::arrow::Status Write(const ::arrow::Array& array, bool array_field_nullable,
+                               ArrowWriteContext* context,
+                               CallbackFunction write_leaf_callback);
+
+  /// \brief Construct a new instance of the builder.
+  ///
+  /// \param[in] array The array to process.
+  /// \param[in] array_field_nullable Whether the algorithm should consider
+  ///   the the array column as nullable (as determined by its type's parent
+  ///   field).
+  static ::arrow::Result<std::unique_ptr<MultipathLevelBuilder>> Make(
+      const ::arrow::Array& array, bool array_field_nullable);
+
+  virtual ~MultipathLevelBuilder() = default;
+
+  /// \brief Returns the number of leaf columns that need to be written
+  /// to Parquet.
+  virtual int GetLeafCount() const = 0;
+
+  /// \brief Calls write_leaf_callback with the MultipathLevelBuilderResult corresponding
+  /// to |leaf_index|.
+  ///
+  /// \param[in] leaf_index The index of the leaf column to write.  Must be in the range
+  /// [0, GetLeafCount()].
+  /// \param[in, out] context for use when allocating memory, etc.
+  /// \param[out] write_leaf_callback Callback to receive the result.
+  virtual ::arrow::Status Write(int leaf_index, ArrowWriteContext* context,
+                                CallbackFunction write_leaf_callback) = 0;
+};
+
+}  // namespace arrow
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader.cc b/contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader.cc
index 7f284abdee0..4f5f79c964a 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader.cc
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader.cc
@@ -1,1248 +1,1248 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "parquet/arrow/reader.h" 
- 
-#include <algorithm> 
-#include <cstring> 
-#include <unordered_set> 
-#include <utility> 
-#include <vector> 
- 
-#include "arrow/array.h" 
-#include "arrow/buffer.h" 
-#include "arrow/extension_type.h" 
-#include "arrow/io/memory.h" 
-#include "arrow/record_batch.h" 
-#include "arrow/table.h" 
-#include "arrow/type.h" 
-#include "arrow/util/async_generator.h" 
-#include "arrow/util/bit_util.h" 
-#include "arrow/util/future.h" 
-#include "arrow/util/iterator.h" 
-#include "arrow/util/logging.h" 
-#include "arrow/util/make_unique.h" 
-#include "arrow/util/parallel.h" 
-#include "arrow/util/range.h" 
-#include "parquet/arrow/reader_internal.h" 
-#include "parquet/column_reader.h" 
-#include "parquet/exception.h" 
-#include "parquet/file_reader.h" 
-#include "parquet/metadata.h" 
-#include "parquet/properties.h" 
-#include "parquet/schema.h" 
- 
-using arrow::Array; 
-using arrow::ArrayData; 
-using arrow::BooleanArray; 
-using arrow::ChunkedArray; 
-using arrow::DataType; 
-using arrow::ExtensionType; 
-using arrow::Field; 
-using arrow::Future; 
-using arrow::Int32Array; 
-using arrow::ListArray; 
-using arrow::MemoryPool; 
-using arrow::RecordBatchReader; 
-using arrow::ResizableBuffer; 
-using arrow::Status; 
-using arrow::StructArray; 
-using arrow::Table; 
-using arrow::TimestampArray; 
- 
-using arrow::internal::checked_cast; 
-using arrow::internal::Iota; 
- 
-// Help reduce verbosity 
-using ParquetReader = parquet::ParquetFileReader; 
- 
-using parquet::internal::RecordReader; 
- 
-namespace BitUtil = arrow::BitUtil; 
- 
-namespace parquet { 
-namespace arrow { 
-namespace { 
- 
-::arrow::Result<std::shared_ptr<ArrayData>> ChunksToSingle(const ChunkedArray& chunked) { 
-  switch (chunked.num_chunks()) { 
-    case 0: { 
-      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Array> array, 
-                            ::arrow::MakeArrayOfNull(chunked.type(), 0)); 
-      return array->data(); 
-    } 
-    case 1: 
-      return chunked.chunk(0)->data(); 
-    default: 
-      // ARROW-3762(wesm): If item reader yields a chunked array, we reject as 
-      // this is not yet implemented 
-      return Status::NotImplemented( 
-          "Nested data conversions not implemented for chunked array outputs"); 
-  } 
-} 
- 
-}  // namespace 
- 
-class ColumnReaderImpl : public ColumnReader { 
- public: 
-  virtual Status GetDefLevels(const int16_t** data, int64_t* length) = 0; 
-  virtual Status GetRepLevels(const int16_t** data, int64_t* length) = 0; 
-  virtual const std::shared_ptr<Field> field() = 0; 
- 
-  ::arrow::Status NextBatch(int64_t batch_size, 
-                            std::shared_ptr<::arrow::ChunkedArray>* out) final { 
-    RETURN_NOT_OK(LoadBatch(batch_size)); 
-    RETURN_NOT_OK(BuildArray(batch_size, out)); 
-    for (int x = 0; x < (*out)->num_chunks(); x++) { 
-      RETURN_NOT_OK((*out)->chunk(x)->Validate()); 
-    } 
-    return Status::OK(); 
-  } 
- 
-  virtual ::arrow::Status LoadBatch(int64_t num_records) = 0; 
- 
-  virtual ::arrow::Status BuildArray(int64_t length_upper_bound, 
-                                     std::shared_ptr<::arrow::ChunkedArray>* out) = 0; 
-  virtual bool IsOrHasRepeatedChild() const = 0; 
-}; 
- 
-namespace { 
- 
-std::shared_ptr<std::unordered_set<int>> VectorToSharedSet( 
-    const std::vector<int>& values) { 
-  std::shared_ptr<std::unordered_set<int>> result(new std::unordered_set<int>()); 
-  result->insert(values.begin(), values.end()); 
-  return result; 
-} 
- 
-// Forward declaration 
-Status GetReader(const SchemaField& field, const std::shared_ptr<ReaderContext>& context, 
-                 std::unique_ptr<ColumnReaderImpl>* out); 
- 
-// ---------------------------------------------------------------------- 
-// FileReaderImpl forward declaration 
- 
-class FileReaderImpl : public FileReader { 
- public: 
-  FileReaderImpl(MemoryPool* pool, std::unique_ptr<ParquetFileReader> reader, 
-                 ArrowReaderProperties properties) 
-      : pool_(pool), 
-        reader_(std::move(reader)), 
-        reader_properties_(std::move(properties)) {} 
- 
-  Status Init() { 
-    return SchemaManifest::Make(reader_->metadata()->schema(), 
-                                reader_->metadata()->key_value_metadata(), 
-                                reader_properties_, &manifest_); 
-  } 
- 
-  FileColumnIteratorFactory SomeRowGroupsFactory(std::vector<int> row_groups) { 
-    return [row_groups](int i, ParquetFileReader* reader) { 
-      return new FileColumnIterator(i, reader, row_groups); 
-    }; 
-  } 
- 
-  FileColumnIteratorFactory AllRowGroupsFactory() { 
-    return SomeRowGroupsFactory(Iota(reader_->metadata()->num_row_groups())); 
-  } 
- 
-  Status BoundsCheckColumn(int column) { 
-    if (column < 0 || column >= this->num_columns()) { 
-      return Status::Invalid("Column index out of bounds (got ", column, 
-                             ", should be " 
-                             "between 0 and ", 
-                             this->num_columns() - 1, ")"); 
-    } 
-    return Status::OK(); 
-  } 
- 
-  Status BoundsCheckRowGroup(int row_group) { 
-    // row group indices check 
-    if (row_group < 0 || row_group >= num_row_groups()) { 
-      return Status::Invalid("Some index in row_group_indices is ", row_group, 
-                             ", which is either < 0 or >= num_row_groups(", 
-                             num_row_groups(), ")"); 
-    } 
-    return Status::OK(); 
-  } 
- 
-  Status BoundsCheck(const std::vector<int>& row_groups, 
-                     const std::vector<int>& column_indices) { 
-    for (int i : row_groups) { 
-      RETURN_NOT_OK(BoundsCheckRowGroup(i)); 
-    } 
-    for (int i : column_indices) { 
-      RETURN_NOT_OK(BoundsCheckColumn(i)); 
-    } 
-    return Status::OK(); 
-  } 
- 
-  std::shared_ptr<RowGroupReader> RowGroup(int row_group_index) override; 
- 
-  Status ReadTable(const std::vector<int>& indices, 
-                   std::shared_ptr<Table>* out) override { 
-    return ReadRowGroups(Iota(reader_->metadata()->num_row_groups()), indices, out); 
-  } 
- 
-  Status GetFieldReader(int i, 
-                        const std::shared_ptr<std::unordered_set<int>>& included_leaves, 
-                        const std::vector<int>& row_groups, 
-                        std::unique_ptr<ColumnReaderImpl>* out) { 
-    auto ctx = std::make_shared<ReaderContext>(); 
-    ctx->reader = reader_.get(); 
-    ctx->pool = pool_; 
-    ctx->iterator_factory = SomeRowGroupsFactory(row_groups); 
-    ctx->filter_leaves = true; 
-    ctx->included_leaves = included_leaves; 
-    return GetReader(manifest_.schema_fields[i], ctx, out); 
-  } 
- 
-  Status GetFieldReaders(const std::vector<int>& column_indices, 
-                         const std::vector<int>& row_groups, 
-                         std::vector<std::shared_ptr<ColumnReaderImpl>>* out, 
-                         std::shared_ptr<::arrow::Schema>* out_schema) { 
-    // We only need to read schema fields which have columns indicated 
-    // in the indices vector 
-    ARROW_ASSIGN_OR_RAISE(std::vector<int> field_indices, 
-                          manifest_.GetFieldIndices(column_indices)); 
- 
-    auto included_leaves = VectorToSharedSet(column_indices); 
- 
-    out->resize(field_indices.size()); 
-    ::arrow::FieldVector out_fields(field_indices.size()); 
-    for (size_t i = 0; i < out->size(); ++i) { 
-      std::unique_ptr<ColumnReaderImpl> reader; 
-      RETURN_NOT_OK( 
-          GetFieldReader(field_indices[i], included_leaves, row_groups, &reader)); 
- 
-      out_fields[i] = reader->field(); 
-      out->at(i) = std::move(reader); 
-    } 
- 
-    *out_schema = ::arrow::schema(std::move(out_fields), manifest_.schema_metadata); 
-    return Status::OK(); 
-  } 
- 
-  Status GetColumn(int i, FileColumnIteratorFactory iterator_factory, 
-                   std::unique_ptr<ColumnReader>* out); 
- 
-  Status GetColumn(int i, std::unique_ptr<ColumnReader>* out) override { 
-    return GetColumn(i, AllRowGroupsFactory(), out); 
-  } 
- 
-  Status GetSchema(std::shared_ptr<::arrow::Schema>* out) override { 
-    return FromParquetSchema(reader_->metadata()->schema(), reader_properties_, 
-                             reader_->metadata()->key_value_metadata(), out); 
-  } 
- 
-  Status ReadSchemaField(int i, std::shared_ptr<ChunkedArray>* out) override { 
-    auto included_leaves = VectorToSharedSet(Iota(reader_->metadata()->num_columns())); 
-    std::vector<int> row_groups = Iota(reader_->metadata()->num_row_groups()); 
- 
-    std::unique_ptr<ColumnReaderImpl> reader; 
-    RETURN_NOT_OK(GetFieldReader(i, included_leaves, row_groups, &reader)); 
- 
-    return ReadColumn(i, row_groups, reader.get(), out); 
-  } 
- 
-  Status ReadColumn(int i, const std::vector<int>& row_groups, ColumnReader* reader, 
-                    std::shared_ptr<ChunkedArray>* out) { 
-    BEGIN_PARQUET_CATCH_EXCEPTIONS 
-    // TODO(wesm): This calculation doesn't make much sense when we have repeated 
-    // schema nodes 
-    int64_t records_to_read = 0; 
-    for (auto row_group : row_groups) { 
-      // Can throw exception 
-      records_to_read += 
-          reader_->metadata()->RowGroup(row_group)->ColumnChunk(i)->num_values(); 
-    } 
-    return reader->NextBatch(records_to_read, out); 
-    END_PARQUET_CATCH_EXCEPTIONS 
-  } 
- 
-  Status ReadColumn(int i, const std::vector<int>& row_groups, 
-                    std::shared_ptr<ChunkedArray>* out) { 
-    std::unique_ptr<ColumnReader> flat_column_reader; 
-    RETURN_NOT_OK(GetColumn(i, SomeRowGroupsFactory(row_groups), &flat_column_reader)); 
-    return ReadColumn(i, row_groups, flat_column_reader.get(), out); 
-  } 
- 
-  Status ReadColumn(int i, std::shared_ptr<ChunkedArray>* out) override { 
-    return ReadColumn(i, Iota(reader_->metadata()->num_row_groups()), out); 
-  } 
- 
-  Status ReadTable(std::shared_ptr<Table>* table) override { 
-    return ReadTable(Iota(reader_->metadata()->num_columns()), table); 
-  } 
- 
-  Status ReadRowGroups(const std::vector<int>& row_groups, 
-                       const std::vector<int>& indices, 
-                       std::shared_ptr<Table>* table) override; 
- 
-  // Helper method used by ReadRowGroups - read the given row groups/columns, skipping 
-  // bounds checks and pre-buffering. Takes a shared_ptr to self to keep the reader 
-  // alive in async contexts. 
-  Future<std::shared_ptr<Table>> DecodeRowGroups( 
-      std::shared_ptr<FileReaderImpl> self, const std::vector<int>& row_groups, 
-      const std::vector<int>& column_indices, ::arrow::internal::Executor* cpu_executor); 
- 
-  Status ReadRowGroups(const std::vector<int>& row_groups, 
-                       std::shared_ptr<Table>* table) override { 
-    return ReadRowGroups(row_groups, Iota(reader_->metadata()->num_columns()), table); 
-  } 
- 
-  Status ReadRowGroup(int row_group_index, const std::vector<int>& column_indices, 
-                      std::shared_ptr<Table>* out) override { 
-    return ReadRowGroups({row_group_index}, column_indices, out); 
-  } 
- 
-  Status ReadRowGroup(int i, std::shared_ptr<Table>* table) override { 
-    return ReadRowGroup(i, Iota(reader_->metadata()->num_columns()), table); 
-  } 
- 
-  Status GetRecordBatchReader(const std::vector<int>& row_group_indices, 
-                              const std::vector<int>& column_indices, 
-                              std::unique_ptr<RecordBatchReader>* out) override; 
- 
-  Status GetRecordBatchReader(const std::vector<int>& row_group_indices, 
-                              std::unique_ptr<RecordBatchReader>* out) override { 
-    return GetRecordBatchReader(row_group_indices, 
-                                Iota(reader_->metadata()->num_columns()), out); 
-  } 
- 
-  ::arrow::Result<::arrow::AsyncGenerator<std::shared_ptr<::arrow::RecordBatch>>> 
-  GetRecordBatchGenerator(std::shared_ptr<FileReader> reader, 
-                          const std::vector<int> row_group_indices, 
-                          const std::vector<int> column_indices, 
-                          ::arrow::internal::Executor* cpu_executor) override; 
- 
-  int num_columns() const { return reader_->metadata()->num_columns(); } 
- 
-  ParquetFileReader* parquet_reader() const override { return reader_.get(); } 
- 
-  int num_row_groups() const override { return reader_->metadata()->num_row_groups(); } 
- 
-  void set_use_threads(bool use_threads) override { 
-    reader_properties_.set_use_threads(use_threads); 
-  } 
- 
-  void set_batch_size(int64_t batch_size) override { 
-    reader_properties_.set_batch_size(batch_size); 
-  } 
- 
-  const ArrowReaderProperties& properties() const override { return reader_properties_; } 
- 
-  const SchemaManifest& manifest() const override { return manifest_; } 
- 
-  Status ScanContents(std::vector<int> columns, const int32_t column_batch_size, 
-                      int64_t* num_rows) override { 
-    BEGIN_PARQUET_CATCH_EXCEPTIONS 
-    *num_rows = ScanFileContents(columns, column_batch_size, reader_.get()); 
-    return Status::OK(); 
-    END_PARQUET_CATCH_EXCEPTIONS 
-  } 
- 
-  MemoryPool* pool_; 
-  std::unique_ptr<ParquetFileReader> reader_; 
-  ArrowReaderProperties reader_properties_; 
- 
-  SchemaManifest manifest_; 
-}; 
- 
-class RowGroupRecordBatchReader : public ::arrow::RecordBatchReader { 
- public: 
-  RowGroupRecordBatchReader(::arrow::RecordBatchIterator batches, 
-                            std::shared_ptr<::arrow::Schema> schema) 
-      : batches_(std::move(batches)), schema_(std::move(schema)) {} 
- 
-  ~RowGroupRecordBatchReader() override {} 
- 
-  Status ReadNext(std::shared_ptr<::arrow::RecordBatch>* out) override { 
-    return batches_.Next().Value(out); 
-  } 
- 
-  std::shared_ptr<::arrow::Schema> schema() const override { return schema_; } 
- 
- private: 
-  ::arrow::Iterator<std::shared_ptr<::arrow::RecordBatch>> batches_; 
-  std::shared_ptr<::arrow::Schema> schema_; 
-}; 
- 
-class ColumnChunkReaderImpl : public ColumnChunkReader { 
- public: 
-  ColumnChunkReaderImpl(FileReaderImpl* impl, int row_group_index, int column_index) 
-      : impl_(impl), column_index_(column_index), row_group_index_(row_group_index) {} 
- 
-  Status Read(std::shared_ptr<::arrow::ChunkedArray>* out) override { 
-    return impl_->ReadColumn(column_index_, {row_group_index_}, out); 
-  } 
- 
- private: 
-  FileReaderImpl* impl_; 
-  int column_index_; 
-  int row_group_index_; 
-}; 
- 
-class RowGroupReaderImpl : public RowGroupReader { 
- public: 
-  RowGroupReaderImpl(FileReaderImpl* impl, int row_group_index) 
-      : impl_(impl), row_group_index_(row_group_index) {} 
- 
-  std::shared_ptr<ColumnChunkReader> Column(int column_index) override { 
-    return std::shared_ptr<ColumnChunkReader>( 
-        new ColumnChunkReaderImpl(impl_, row_group_index_, column_index)); 
-  } 
- 
-  Status ReadTable(const std::vector<int>& column_indices, 
-                   std::shared_ptr<::arrow::Table>* out) override { 
-    return impl_->ReadRowGroup(row_group_index_, column_indices, out); 
-  } 
- 
-  Status ReadTable(std::shared_ptr<::arrow::Table>* out) override { 
-    return impl_->ReadRowGroup(row_group_index_, out); 
-  } 
- 
- private: 
-  FileReaderImpl* impl_; 
-  int row_group_index_; 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// Column reader implementations 
- 
-// Leaf reader is for primitive arrays and primitive children of nested arrays 
-class LeafReader : public ColumnReaderImpl { 
- public: 
-  LeafReader(std::shared_ptr<ReaderContext> ctx, std::shared_ptr<Field> field, 
-             std::unique_ptr<FileColumnIterator> input, 
-             ::parquet::internal::LevelInfo leaf_info) 
-      : ctx_(std::move(ctx)), 
-        field_(std::move(field)), 
-        input_(std::move(input)), 
-        descr_(input_->descr()) { 
-    record_reader_ = RecordReader::Make( 
-        descr_, leaf_info, ctx_->pool, field_->type()->id() == ::arrow::Type::DICTIONARY); 
-    NextRowGroup(); 
-  } 
- 
-  Status GetDefLevels(const int16_t** data, int64_t* length) final { 
-    *data = record_reader_->def_levels(); 
-    *length = record_reader_->levels_position(); 
-    return Status::OK(); 
-  } 
- 
-  Status GetRepLevels(const int16_t** data, int64_t* length) final { 
-    *data = record_reader_->rep_levels(); 
-    *length = record_reader_->levels_position(); 
-    return Status::OK(); 
-  } 
- 
-  bool IsOrHasRepeatedChild() const final { return false; } 
- 
-  Status LoadBatch(int64_t records_to_read) final { 
-    BEGIN_PARQUET_CATCH_EXCEPTIONS 
-    out_ = nullptr; 
-    record_reader_->Reset(); 
-    // Pre-allocation gives much better performance for flat columns 
-    record_reader_->Reserve(records_to_read); 
-    while (records_to_read > 0) { 
-      if (!record_reader_->HasMoreData()) { 
-        break; 
-      } 
-      int64_t records_read = record_reader_->ReadRecords(records_to_read); 
-      records_to_read -= records_read; 
-      if (records_read == 0) { 
-        NextRowGroup(); 
-      } 
-    } 
-    RETURN_NOT_OK(TransferColumnData(record_reader_.get(), field_->type(), descr_, 
-                                     ctx_->pool, &out_)); 
-    return Status::OK(); 
-    END_PARQUET_CATCH_EXCEPTIONS 
-  } 
- 
-  ::arrow::Status BuildArray(int64_t length_upper_bound, 
-                             std::shared_ptr<::arrow::ChunkedArray>* out) final { 
-    *out = out_; 
-    return Status::OK(); 
-  } 
- 
-  const std::shared_ptr<Field> field() override { return field_; } 
- 
- private: 
-  std::shared_ptr<ChunkedArray> out_; 
-  void NextRowGroup() { 
-    std::unique_ptr<PageReader> page_reader = input_->NextChunk(); 
-    record_reader_->SetPageReader(std::move(page_reader)); 
-  } 
- 
-  std::shared_ptr<ReaderContext> ctx_; 
-  std::shared_ptr<Field> field_; 
-  std::unique_ptr<FileColumnIterator> input_; 
-  const ColumnDescriptor* descr_; 
-  std::shared_ptr<RecordReader> record_reader_; 
-}; 
- 
-// Column reader for extension arrays 
-class ExtensionReader : public ColumnReaderImpl { 
- public: 
-  ExtensionReader(std::shared_ptr<Field> field, 
-                  std::unique_ptr<ColumnReaderImpl> storage_reader) 
-      : field_(std::move(field)), storage_reader_(std::move(storage_reader)) {} 
- 
-  Status GetDefLevels(const int16_t** data, int64_t* length) override { 
-    return storage_reader_->GetDefLevels(data, length); 
-  } 
- 
-  Status GetRepLevels(const int16_t** data, int64_t* length) override { 
-    return storage_reader_->GetRepLevels(data, length); 
-  } 
- 
-  Status LoadBatch(int64_t number_of_records) final { 
-    return storage_reader_->LoadBatch(number_of_records); 
-  } 
- 
-  Status BuildArray(int64_t length_upper_bound, 
-                    std::shared_ptr<ChunkedArray>* out) override { 
-    std::shared_ptr<ChunkedArray> storage; 
-    RETURN_NOT_OK(storage_reader_->BuildArray(length_upper_bound, &storage)); 
-    *out = ExtensionType::WrapArray(field_->type(), storage); 
-    return Status::OK(); 
-  } 
- 
-  bool IsOrHasRepeatedChild() const final { 
-    return storage_reader_->IsOrHasRepeatedChild(); 
-  } 
- 
-  const std::shared_ptr<Field> field() override { return field_; } 
- 
- private: 
-  std::shared_ptr<Field> field_; 
-  std::unique_ptr<ColumnReaderImpl> storage_reader_; 
-}; 
- 
-template <typename IndexType> 
-class ListReader : public ColumnReaderImpl { 
- public: 
-  ListReader(std::shared_ptr<ReaderContext> ctx, std::shared_ptr<Field> field, 
-             ::parquet::internal::LevelInfo level_info, 
-             std::unique_ptr<ColumnReaderImpl> child_reader) 
-      : ctx_(std::move(ctx)), 
-        field_(std::move(field)), 
-        level_info_(level_info), 
-        item_reader_(std::move(child_reader)) {} 
- 
-  Status GetDefLevels(const int16_t** data, int64_t* length) override { 
-    return item_reader_->GetDefLevels(data, length); 
-  } 
- 
-  Status GetRepLevels(const int16_t** data, int64_t* length) override { 
-    return item_reader_->GetRepLevels(data, length); 
-  } 
- 
-  bool IsOrHasRepeatedChild() const final { return true; } 
- 
-  Status LoadBatch(int64_t number_of_records) final { 
-    return item_reader_->LoadBatch(number_of_records); 
-  } 
- 
-  virtual ::arrow::Result<std::shared_ptr<ChunkedArray>> AssembleArray( 
-      std::shared_ptr<ArrayData> data) { 
-    if (field_->type()->id() == ::arrow::Type::MAP) { 
-      // Error out if data is not map-compliant instead of aborting in MakeArray below 
-      RETURN_NOT_OK(::arrow::MapArray::ValidateChildData(data->child_data)); 
-    } 
-    std::shared_ptr<Array> result = ::arrow::MakeArray(data); 
-    return std::make_shared<ChunkedArray>(result); 
-  } 
- 
-  Status BuildArray(int64_t length_upper_bound, 
-                    std::shared_ptr<ChunkedArray>* out) override { 
-    const int16_t* def_levels; 
-    const int16_t* rep_levels; 
-    int64_t num_levels; 
-    RETURN_NOT_OK(item_reader_->GetDefLevels(&def_levels, &num_levels)); 
-    RETURN_NOT_OK(item_reader_->GetRepLevels(&rep_levels, &num_levels)); 
- 
-    std::shared_ptr<ResizableBuffer> validity_buffer; 
-    ::parquet::internal::ValidityBitmapInputOutput validity_io; 
-    validity_io.values_read_upper_bound = length_upper_bound; 
-    if (field_->nullable()) { 
-      ARROW_ASSIGN_OR_RAISE( 
-          validity_buffer, 
-          AllocateResizableBuffer(BitUtil::BytesForBits(length_upper_bound), ctx_->pool)); 
-      validity_io.valid_bits = validity_buffer->mutable_data(); 
-    } 
-    ARROW_ASSIGN_OR_RAISE( 
-        std::shared_ptr<ResizableBuffer> offsets_buffer, 
-        AllocateResizableBuffer( 
-            sizeof(IndexType) * std::max(int64_t{1}, length_upper_bound + 1), 
-            ctx_->pool)); 
-    // Ensure zero initialization in case we have reached a zero length list (and 
-    // because first entry is always zero). 
-    IndexType* offset_data = reinterpret_cast<IndexType*>(offsets_buffer->mutable_data()); 
-    offset_data[0] = 0; 
-    BEGIN_PARQUET_CATCH_EXCEPTIONS 
-    ::parquet::internal::DefRepLevelsToList(def_levels, rep_levels, num_levels, 
-                                            level_info_, &validity_io, offset_data); 
-    END_PARQUET_CATCH_EXCEPTIONS 
- 
-    RETURN_NOT_OK(item_reader_->BuildArray(offset_data[validity_io.values_read], out)); 
- 
-    // Resize to actual number of elements returned. 
-    RETURN_NOT_OK( 
-        offsets_buffer->Resize((validity_io.values_read + 1) * sizeof(IndexType))); 
-    if (validity_buffer != nullptr) { 
-      RETURN_NOT_OK( 
-          validity_buffer->Resize(BitUtil::BytesForBits(validity_io.values_read))); 
-      validity_buffer->ZeroPadding(); 
-    } 
-    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ArrayData> item_chunk, ChunksToSingle(**out)); 
- 
-    std::vector<std::shared_ptr<Buffer>> buffers{ 
-        validity_io.null_count > 0 ? validity_buffer : nullptr, offsets_buffer}; 
-    auto data = std::make_shared<ArrayData>( 
-        field_->type(), 
-        /*length=*/validity_io.values_read, std::move(buffers), 
-        std::vector<std::shared_ptr<ArrayData>>{item_chunk}, validity_io.null_count); 
- 
-    ARROW_ASSIGN_OR_RAISE(*out, AssembleArray(std::move(data))); 
-    return Status::OK(); 
-  } 
- 
-  const std::shared_ptr<Field> field() override { return field_; } 
- 
- private: 
-  std::shared_ptr<ReaderContext> ctx_; 
-  std::shared_ptr<Field> field_; 
-  ::parquet::internal::LevelInfo level_info_; 
-  std::unique_ptr<ColumnReaderImpl> item_reader_; 
-}; 
- 
-class PARQUET_NO_EXPORT FixedSizeListReader : public ListReader<int32_t> { 
- public: 
-  FixedSizeListReader(std::shared_ptr<ReaderContext> ctx, std::shared_ptr<Field> field, 
-                      ::parquet::internal::LevelInfo level_info, 
-                      std::unique_ptr<ColumnReaderImpl> child_reader) 
-      : ListReader(std::move(ctx), std::move(field), level_info, 
-                   std::move(child_reader)) {} 
-  ::arrow::Result<std::shared_ptr<ChunkedArray>> AssembleArray( 
-      std::shared_ptr<ArrayData> data) final { 
-    DCHECK_EQ(data->buffers.size(), 2); 
-    DCHECK_EQ(field()->type()->id(), ::arrow::Type::FIXED_SIZE_LIST); 
-    const auto& type = checked_cast<::arrow::FixedSizeListType&>(*field()->type()); 
-    const int32_t* offsets = reinterpret_cast<const int32_t*>(data->buffers[1]->data()); 
-    for (int x = 1; x <= data->length; x++) { 
-      int32_t size = offsets[x] - offsets[x - 1]; 
-      if (size != type.list_size()) { 
-        return Status::Invalid("Expected all lists to be of size=", type.list_size(), 
-                               " but index ", x, " had size=", size); 
-      } 
-    } 
-    data->buffers.resize(1); 
-    std::shared_ptr<Array> result = ::arrow::MakeArray(data); 
-    return std::make_shared<ChunkedArray>(result); 
-  } 
-}; 
- 
-class PARQUET_NO_EXPORT StructReader : public ColumnReaderImpl { 
- public: 
-  explicit StructReader(std::shared_ptr<ReaderContext> ctx, 
-                        std::shared_ptr<Field> filtered_field, 
-                        ::parquet::internal::LevelInfo level_info, 
-                        std::vector<std::unique_ptr<ColumnReaderImpl>> children) 
-      : ctx_(std::move(ctx)), 
-        filtered_field_(std::move(filtered_field)), 
-        level_info_(level_info), 
-        children_(std::move(children)) { 
-    // There could be a mix of children some might be repeated some might not be. 
-    // If possible use one that isn't since that will be guaranteed to have the least 
-    // number of levels to reconstruct a nullable bitmap. 
-    auto result = std::find_if(children_.begin(), children_.end(), 
-                               [](const std::unique_ptr<ColumnReaderImpl>& child) { 
-                                 return !child->IsOrHasRepeatedChild(); 
-                               }); 
-    if (result != children_.end()) { 
-      def_rep_level_child_ = result->get(); 
-      has_repeated_child_ = false; 
-    } else if (!children_.empty()) { 
-      def_rep_level_child_ = children_.front().get(); 
-      has_repeated_child_ = true; 
-    } 
-  } 
- 
-  bool IsOrHasRepeatedChild() const final { return has_repeated_child_; } 
- 
-  Status LoadBatch(int64_t records_to_read) override { 
-    for (const std::unique_ptr<ColumnReaderImpl>& reader : children_) { 
-      RETURN_NOT_OK(reader->LoadBatch(records_to_read)); 
-    } 
-    return Status::OK(); 
-  } 
-  Status BuildArray(int64_t length_upper_bound, 
-                    std::shared_ptr<ChunkedArray>* out) override; 
-  Status GetDefLevels(const int16_t** data, int64_t* length) override; 
-  Status GetRepLevels(const int16_t** data, int64_t* length) override; 
-  const std::shared_ptr<Field> field() override { return filtered_field_; } 
- 
- private: 
-  const std::shared_ptr<ReaderContext> ctx_; 
-  const std::shared_ptr<Field> filtered_field_; 
-  const ::parquet::internal::LevelInfo level_info_; 
-  const std::vector<std::unique_ptr<ColumnReaderImpl>> children_; 
-  ColumnReaderImpl* def_rep_level_child_ = nullptr; 
-  bool has_repeated_child_; 
-}; 
- 
-Status StructReader::GetDefLevels(const int16_t** data, int64_t* length) { 
-  *data = nullptr; 
-  if (children_.size() == 0) { 
-    *length = 0; 
-    return Status::Invalid("StructReader had no children"); 
-  } 
- 
-  // This method should only be called when this struct or one of its parents 
-  // are optional/repeated or it has a repeated child. 
-  // Meaning all children must have rep/def levels associated 
-  // with them. 
-  RETURN_NOT_OK(def_rep_level_child_->GetDefLevels(data, length)); 
-  return Status::OK(); 
-} 
- 
-Status StructReader::GetRepLevels(const int16_t** data, int64_t* length) { 
-  *data = nullptr; 
-  if (children_.size() == 0) { 
-    *length = 0; 
-    return Status::Invalid("StructReader had no childre"); 
-  } 
- 
-  // This method should only be called when this struct or one of its parents 
-  // are optional/repeated or it has repeated child. 
-  // Meaning all children must have rep/def levels associated 
-  // with them. 
-  RETURN_NOT_OK(def_rep_level_child_->GetRepLevels(data, length)); 
-  return Status::OK(); 
-} 
- 
-Status StructReader::BuildArray(int64_t length_upper_bound, 
-                                std::shared_ptr<ChunkedArray>* out) { 
-  std::vector<std::shared_ptr<ArrayData>> children_array_data; 
-  std::shared_ptr<ResizableBuffer> null_bitmap; 
- 
-  ::parquet::internal::ValidityBitmapInputOutput validity_io; 
-  validity_io.values_read_upper_bound = length_upper_bound; 
-  // This simplifies accounting below. 
-  validity_io.values_read = length_upper_bound; 
- 
-  BEGIN_PARQUET_CATCH_EXCEPTIONS 
-  const int16_t* def_levels; 
-  const int16_t* rep_levels; 
-  int64_t num_levels; 
- 
-  if (has_repeated_child_) { 
-    ARROW_ASSIGN_OR_RAISE( 
-        null_bitmap, 
-        AllocateResizableBuffer(BitUtil::BytesForBits(length_upper_bound), ctx_->pool)); 
-    validity_io.valid_bits = null_bitmap->mutable_data(); 
-    RETURN_NOT_OK(GetDefLevels(&def_levels, &num_levels)); 
-    RETURN_NOT_OK(GetRepLevels(&rep_levels, &num_levels)); 
-    DefRepLevelsToBitmap(def_levels, rep_levels, num_levels, level_info_, &validity_io); 
-  } else if (filtered_field_->nullable()) { 
-    ARROW_ASSIGN_OR_RAISE( 
-        null_bitmap, 
-        AllocateResizableBuffer(BitUtil::BytesForBits(length_upper_bound), ctx_->pool)); 
-    validity_io.valid_bits = null_bitmap->mutable_data(); 
-    RETURN_NOT_OK(GetDefLevels(&def_levels, &num_levels)); 
-    DefLevelsToBitmap(def_levels, num_levels, level_info_, &validity_io); 
-  } 
- 
-  // Ensure all values are initialized. 
-  if (null_bitmap) { 
-    RETURN_NOT_OK(null_bitmap->Resize(BitUtil::BytesForBits(validity_io.values_read))); 
-    null_bitmap->ZeroPadding(); 
-  } 
- 
-  END_PARQUET_CATCH_EXCEPTIONS 
-  // Gather children arrays and def levels 
-  for (auto& child : children_) { 
-    std::shared_ptr<ChunkedArray> field; 
-    RETURN_NOT_OK(child->BuildArray(validity_io.values_read, &field)); 
-    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ArrayData> array_data, ChunksToSingle(*field)); 
-    children_array_data.push_back(std::move(array_data)); 
-  } 
- 
-  if (!filtered_field_->nullable() && !has_repeated_child_) { 
-    validity_io.values_read = children_array_data.front()->length; 
-  } 
- 
-  std::vector<std::shared_ptr<Buffer>> buffers{validity_io.null_count > 0 ? null_bitmap 
-                                                                          : nullptr}; 
-  auto data = 
-      std::make_shared<ArrayData>(filtered_field_->type(), 
-                                  /*length=*/validity_io.values_read, std::move(buffers), 
-                                  std::move(children_array_data)); 
-  std::shared_ptr<Array> result = ::arrow::MakeArray(data); 
- 
-  *out = std::make_shared<ChunkedArray>(result); 
-  return Status::OK(); 
-} 
- 
-// ---------------------------------------------------------------------- 
-// File reader implementation 
- 
-Status GetReader(const SchemaField& field, const std::shared_ptr<Field>& arrow_field, 
-                 const std::shared_ptr<ReaderContext>& ctx, 
-                 std::unique_ptr<ColumnReaderImpl>* out) { 
-  BEGIN_PARQUET_CATCH_EXCEPTIONS 
- 
-  auto type_id = arrow_field->type()->id(); 
- 
-  if (type_id == ::arrow::Type::EXTENSION) { 
-    auto storage_field = arrow_field->WithType( 
-        checked_cast<const ExtensionType&>(*arrow_field->type()).storage_type()); 
-    RETURN_NOT_OK(GetReader(field, storage_field, ctx, out)); 
-    out->reset(new ExtensionReader(arrow_field, std::move(*out))); 
-    return Status::OK(); 
-  } 
- 
-  if (field.children.size() == 0) { 
-    if (!field.is_leaf()) { 
-      return Status::Invalid("Parquet non-leaf node has no children"); 
-    } 
-    if (!ctx->IncludesLeaf(field.column_index)) { 
-      *out = nullptr; 
-      return Status::OK(); 
-    } 
-    std::unique_ptr<FileColumnIterator> input( 
-        ctx->iterator_factory(field.column_index, ctx->reader)); 
-    out->reset(new LeafReader(ctx, arrow_field, std::move(input), field.level_info)); 
-  } else if (type_id == ::arrow::Type::LIST || type_id == ::arrow::Type::MAP || 
-             type_id == ::arrow::Type::FIXED_SIZE_LIST || 
-             type_id == ::arrow::Type::LARGE_LIST) { 
-    auto list_field = arrow_field; 
-    auto child = &field.children[0]; 
-    std::unique_ptr<ColumnReaderImpl> child_reader; 
-    RETURN_NOT_OK(GetReader(*child, ctx, &child_reader)); 
-    if (child_reader == nullptr) { 
-      *out = nullptr; 
-      return Status::OK(); 
-    } 
-    if (type_id == ::arrow::Type::LIST || 
-        type_id == ::arrow::Type::MAP) {  // Map can be reconstructed as list of structs. 
-      if (type_id == ::arrow::Type::MAP && 
-          child_reader->field()->type()->num_fields() != 2) { 
-        // This case applies if either key or value is filtered. 
-        list_field = list_field->WithType(::arrow::list(child_reader->field())); 
-      } 
-      out->reset(new ListReader<int32_t>(ctx, list_field, field.level_info, 
-                                         std::move(child_reader))); 
-    } else if (type_id == ::arrow::Type::LARGE_LIST) { 
-      out->reset(new ListReader<int64_t>(ctx, list_field, field.level_info, 
-                                         std::move(child_reader))); 
- 
-    } else if (type_id == ::arrow::Type::FIXED_SIZE_LIST) { 
-      out->reset(new FixedSizeListReader(ctx, list_field, field.level_info, 
-                                         std::move(child_reader))); 
-    } else { 
-      return Status::UnknownError("Unknown list type: ", field.field->ToString()); 
-    } 
-  } else if (type_id == ::arrow::Type::STRUCT) { 
-    std::vector<std::shared_ptr<Field>> child_fields; 
-    std::vector<std::unique_ptr<ColumnReaderImpl>> child_readers; 
-    for (const auto& child : field.children) { 
-      std::unique_ptr<ColumnReaderImpl> child_reader; 
-      RETURN_NOT_OK(GetReader(child, ctx, &child_reader)); 
-      if (!child_reader) { 
-        // If all children were pruned, then we do not try to read this field 
-        continue; 
-      } 
-      child_fields.push_back(child.field); 
-      child_readers.emplace_back(std::move(child_reader)); 
-    } 
-    if (child_fields.size() == 0) { 
-      *out = nullptr; 
-      return Status::OK(); 
-    } 
-    auto filtered_field = 
-        ::arrow::field(arrow_field->name(), ::arrow::struct_(child_fields), 
-                       arrow_field->nullable(), arrow_field->metadata()); 
-    out->reset(new StructReader(ctx, filtered_field, field.level_info, 
-                                std::move(child_readers))); 
-  } else { 
-    return Status::Invalid("Unsupported nested type: ", arrow_field->ToString()); 
-  } 
-  return Status::OK(); 
- 
-  END_PARQUET_CATCH_EXCEPTIONS 
-} 
- 
-Status GetReader(const SchemaField& field, const std::shared_ptr<ReaderContext>& ctx, 
-                 std::unique_ptr<ColumnReaderImpl>* out) { 
-  return GetReader(field, field.field, ctx, out); 
-} 
- 
-}  // namespace 
- 
-Status FileReaderImpl::GetRecordBatchReader(const std::vector<int>& row_groups, 
-                                            const std::vector<int>& column_indices, 
-                                            std::unique_ptr<RecordBatchReader>* out) { 
-  RETURN_NOT_OK(BoundsCheck(row_groups, column_indices)); 
- 
-  if (reader_properties_.pre_buffer()) { 
-    // PARQUET-1698/PARQUET-1820: pre-buffer row groups/column chunks if enabled 
-    BEGIN_PARQUET_CATCH_EXCEPTIONS 
-    reader_->PreBuffer(row_groups, column_indices, reader_properties_.io_context(), 
-                       reader_properties_.cache_options()); 
-    END_PARQUET_CATCH_EXCEPTIONS 
-  } 
- 
-  std::vector<std::shared_ptr<ColumnReaderImpl>> readers; 
-  std::shared_ptr<::arrow::Schema> batch_schema; 
-  RETURN_NOT_OK(GetFieldReaders(column_indices, row_groups, &readers, &batch_schema)); 
- 
-  if (readers.empty()) { 
-    // Just generate all batches right now; they're cheap since they have no columns. 
-    int64_t batch_size = properties().batch_size(); 
-    auto max_sized_batch = 
-        ::arrow::RecordBatch::Make(batch_schema, batch_size, ::arrow::ArrayVector{}); 
- 
-    ::arrow::RecordBatchVector batches; 
- 
-    for (int row_group : row_groups) { 
-      int64_t num_rows = parquet_reader()->metadata()->RowGroup(row_group)->num_rows(); 
- 
-      batches.insert(batches.end(), num_rows / batch_size, max_sized_batch); 
- 
-      if (int64_t trailing_rows = num_rows % batch_size) { 
-        batches.push_back(max_sized_batch->Slice(0, trailing_rows)); 
-      } 
-    } 
- 
-    *out = ::arrow::internal::make_unique<RowGroupRecordBatchReader>( 
-        ::arrow::MakeVectorIterator(std::move(batches)), std::move(batch_schema)); 
- 
-    return Status::OK(); 
-  } 
- 
-  int64_t num_rows = 0; 
-  for (int row_group : row_groups) { 
-    num_rows += parquet_reader()->metadata()->RowGroup(row_group)->num_rows(); 
-  } 
- 
-  using ::arrow::RecordBatchIterator; 
- 
-  // NB: This lambda will be invoked outside the scope of this call to 
-  // `GetRecordBatchReader()`, so it must capture `readers` and `batch_schema` by value. 
-  // `this` is a non-owning pointer so we are relying on the parent FileReader outliving 
-  // this RecordBatchReader. 
-  ::arrow::Iterator<RecordBatchIterator> batches = ::arrow::MakeFunctionIterator( 
-      [readers, batch_schema, num_rows, 
-       this]() mutable -> ::arrow::Result<RecordBatchIterator> { 
-        ::arrow::ChunkedArrayVector columns(readers.size()); 
- 
-        // don't reserve more rows than necessary 
-        int64_t batch_size = std::min(properties().batch_size(), num_rows); 
-        num_rows -= batch_size; 
- 
-        RETURN_NOT_OK(::arrow::internal::OptionalParallelFor( 
-            reader_properties_.use_threads(), static_cast<int>(readers.size()), 
-            [&](int i) { return readers[i]->NextBatch(batch_size, &columns[i]); })); 
- 
-        for (const auto& column : columns) { 
-          if (column == nullptr || column->length() == 0) { 
-            return ::arrow::IterationTraits<RecordBatchIterator>::End(); 
-          } 
-        } 
- 
-        auto table = ::arrow::Table::Make(batch_schema, std::move(columns)); 
-        auto table_reader = std::make_shared<::arrow::TableBatchReader>(*table); 
- 
-        // NB: explicitly preserve table so that table_reader doesn't outlive it 
-        return ::arrow::MakeFunctionIterator( 
-            [table, table_reader] { return table_reader->Next(); }); 
-      }); 
- 
-  *out = ::arrow::internal::make_unique<RowGroupRecordBatchReader>( 
-      ::arrow::MakeFlattenIterator(std::move(batches)), std::move(batch_schema)); 
- 
-  return Status::OK(); 
-} 
- 
-/// Given a file reader and a list of row groups, this is a generator of record 
-/// batch generators (where each sub-generator is the contents of a single row group). 
-class RowGroupGenerator { 
- public: 
-  using RecordBatchGenerator = 
-      ::arrow::AsyncGenerator<std::shared_ptr<::arrow::RecordBatch>>; 
- 
-  explicit RowGroupGenerator(std::shared_ptr<FileReaderImpl> arrow_reader, 
-                             ::arrow::internal::Executor* cpu_executor, 
-                             std::vector<int> row_groups, std::vector<int> column_indices) 
-      : arrow_reader_(std::move(arrow_reader)), 
-        cpu_executor_(cpu_executor), 
-        row_groups_(std::move(row_groups)), 
-        column_indices_(std::move(column_indices)), 
-        index_(0) {} 
- 
-  ::arrow::Future<RecordBatchGenerator> operator()() { 
-    if (index_ >= row_groups_.size()) { 
-      return ::arrow::AsyncGeneratorEnd<RecordBatchGenerator>(); 
-    } 
-    int row_group = row_groups_[index_++]; 
-    std::vector<int> column_indices = column_indices_; 
-    auto reader = arrow_reader_; 
-    if (!reader->properties().pre_buffer()) { 
-      return SubmitRead(cpu_executor_, reader, row_group, column_indices); 
-    } 
-    auto ready = reader->parquet_reader()->WhenBuffered({row_group}, column_indices); 
-    if (cpu_executor_) ready = cpu_executor_->TransferAlways(ready); 
-    return ready.Then([=]() -> ::arrow::Future<RecordBatchGenerator> { 
-      return ReadOneRowGroup(cpu_executor_, reader, row_group, column_indices); 
-    }); 
-  } 
- 
- private: 
-  // Synchronous fallback for when pre-buffer isn't enabled. 
-  // 
-  // Making the Parquet reader truly asynchronous requires heavy refactoring, so the 
-  // generator piggybacks on ReadRangeCache. The lazy ReadRangeCache can be used for 
-  // async I/O without forcing readahead. 
-  static ::arrow::Future<RecordBatchGenerator> SubmitRead( 
-      ::arrow::internal::Executor* cpu_executor, std::shared_ptr<FileReaderImpl> self, 
-      const int row_group, const std::vector<int>& column_indices) { 
-    if (!cpu_executor) { 
-      return ReadOneRowGroup(cpu_executor, self, row_group, column_indices); 
-    } 
-    // If we have an executor, then force transfer (even if I/O was complete) 
-    return ::arrow::DeferNotOk(cpu_executor->Submit(ReadOneRowGroup, cpu_executor, self, 
-                                                    row_group, column_indices)); 
-  } 
- 
-  static ::arrow::Future<RecordBatchGenerator> ReadOneRowGroup( 
-      ::arrow::internal::Executor* cpu_executor, std::shared_ptr<FileReaderImpl> self, 
-      const int row_group, const std::vector<int>& column_indices) { 
-    // Skips bound checks/pre-buffering, since we've done that already 
-    return self->DecodeRowGroups(self, {row_group}, column_indices, cpu_executor) 
-        .Then([](const std::shared_ptr<Table>& table) 
-                  -> ::arrow::Result<RecordBatchGenerator> { 
-          ::arrow::TableBatchReader table_reader(*table); 
-          ::arrow::RecordBatchVector batches; 
-          RETURN_NOT_OK(table_reader.ReadAll(&batches)); 
-          return ::arrow::MakeVectorGenerator(std::move(batches)); 
-        }); 
-  } 
- 
-  std::shared_ptr<FileReaderImpl> arrow_reader_; 
-  ::arrow::internal::Executor* cpu_executor_; 
-  std::vector<int> row_groups_; 
-  std::vector<int> column_indices_; 
-  size_t index_; 
-}; 
- 
-::arrow::Result<::arrow::AsyncGenerator<std::shared_ptr<::arrow::RecordBatch>>> 
-FileReaderImpl::GetRecordBatchGenerator(std::shared_ptr<FileReader> reader, 
-                                        const std::vector<int> row_group_indices, 
-                                        const std::vector<int> column_indices, 
-                                        ::arrow::internal::Executor* cpu_executor) { 
-  RETURN_NOT_OK(BoundsCheck(row_group_indices, column_indices)); 
-  if (reader_properties_.pre_buffer()) { 
-    BEGIN_PARQUET_CATCH_EXCEPTIONS 
-    reader_->PreBuffer(row_group_indices, column_indices, reader_properties_.io_context(), 
-                       reader_properties_.cache_options()); 
-    END_PARQUET_CATCH_EXCEPTIONS 
-  } 
-  ::arrow::AsyncGenerator<RowGroupGenerator::RecordBatchGenerator> row_group_generator = 
-      RowGroupGenerator(::arrow::internal::checked_pointer_cast<FileReaderImpl>(reader), 
-                        cpu_executor, row_group_indices, column_indices); 
-  return ::arrow::MakeConcatenatedGenerator(std::move(row_group_generator)); 
-} 
- 
-Status FileReaderImpl::GetColumn(int i, FileColumnIteratorFactory iterator_factory, 
-                                 std::unique_ptr<ColumnReader>* out) { 
-  RETURN_NOT_OK(BoundsCheckColumn(i)); 
-  auto ctx = std::make_shared<ReaderContext>(); 
-  ctx->reader = reader_.get(); 
-  ctx->pool = pool_; 
-  ctx->iterator_factory = iterator_factory; 
-  ctx->filter_leaves = false; 
-  std::unique_ptr<ColumnReaderImpl> result; 
-  RETURN_NOT_OK(GetReader(manifest_.schema_fields[i], ctx, &result)); 
-  out->reset(result.release()); 
-  return Status::OK(); 
-} 
- 
-Status FileReaderImpl::ReadRowGroups(const std::vector<int>& row_groups, 
-                                     const std::vector<int>& column_indices, 
-                                     std::shared_ptr<Table>* out) { 
-  RETURN_NOT_OK(BoundsCheck(row_groups, column_indices)); 
- 
-  // PARQUET-1698/PARQUET-1820: pre-buffer row groups/column chunks if enabled 
-  if (reader_properties_.pre_buffer()) { 
-    BEGIN_PARQUET_CATCH_EXCEPTIONS 
-    parquet_reader()->PreBuffer(row_groups, column_indices, 
-                                reader_properties_.io_context(), 
-                                reader_properties_.cache_options()); 
-    END_PARQUET_CATCH_EXCEPTIONS 
-  } 
- 
-  auto fut = DecodeRowGroups(/*self=*/nullptr, row_groups, column_indices, 
-                             /*cpu_executor=*/nullptr); 
-  ARROW_ASSIGN_OR_RAISE(*out, fut.MoveResult()); 
-  return Status::OK(); 
-} 
- 
-Future<std::shared_ptr<Table>> FileReaderImpl::DecodeRowGroups( 
-    std::shared_ptr<FileReaderImpl> self, const std::vector<int>& row_groups, 
-    const std::vector<int>& column_indices, ::arrow::internal::Executor* cpu_executor) { 
-  // `self` is used solely to keep `this` alive in an async context - but we use this 
-  // in a sync context too so use `this` over `self` 
-  std::vector<std::shared_ptr<ColumnReaderImpl>> readers; 
-  std::shared_ptr<::arrow::Schema> result_schema; 
-  RETURN_NOT_OK(GetFieldReaders(column_indices, row_groups, &readers, &result_schema)); 
-  // OptionalParallelForAsync requires an executor 
-  if (!cpu_executor) cpu_executor = ::arrow::internal::GetCpuThreadPool(); 
- 
-  auto read_column = [row_groups, self, this](size_t i, 
-                                              std::shared_ptr<ColumnReaderImpl> reader) 
-      -> ::arrow::Result<std::shared_ptr<::arrow::ChunkedArray>> { 
-    std::shared_ptr<::arrow::ChunkedArray> column; 
-    RETURN_NOT_OK(ReadColumn(static_cast<int>(i), row_groups, reader.get(), &column)); 
-    return column; 
-  }; 
-  auto make_table = [result_schema, row_groups, self, 
-                     this](const ::arrow::ChunkedArrayVector& columns) 
-      -> ::arrow::Result<std::shared_ptr<Table>> { 
-    int64_t num_rows = 0; 
-    if (!columns.empty()) { 
-      num_rows = columns[0]->length(); 
-    } else { 
-      for (int i : row_groups) { 
-        num_rows += parquet_reader()->metadata()->RowGroup(i)->num_rows(); 
-      } 
-    } 
-    auto table = Table::Make(std::move(result_schema), columns, num_rows); 
-    RETURN_NOT_OK(table->Validate()); 
-    return table; 
-  }; 
-  return ::arrow::internal::OptionalParallelForAsync(reader_properties_.use_threads(), 
-                                                     std::move(readers), read_column, 
-                                                     cpu_executor) 
-      .Then(std::move(make_table)); 
-} 
- 
-std::shared_ptr<RowGroupReader> FileReaderImpl::RowGroup(int row_group_index) { 
-  return std::make_shared<RowGroupReaderImpl>(this, row_group_index); 
-} 
- 
-// ---------------------------------------------------------------------- 
-// Public factory functions 
- 
-Status FileReader::GetRecordBatchReader(const std::vector<int>& row_group_indices, 
-                                        std::shared_ptr<RecordBatchReader>* out) { 
-  std::unique_ptr<RecordBatchReader> tmp; 
-  ARROW_RETURN_NOT_OK(GetRecordBatchReader(row_group_indices, &tmp)); 
-  out->reset(tmp.release()); 
-  return Status::OK(); 
-} 
- 
-Status FileReader::GetRecordBatchReader(const std::vector<int>& row_group_indices, 
-                                        const std::vector<int>& column_indices, 
-                                        std::shared_ptr<RecordBatchReader>* out) { 
-  std::unique_ptr<RecordBatchReader> tmp; 
-  ARROW_RETURN_NOT_OK(GetRecordBatchReader(row_group_indices, column_indices, &tmp)); 
-  out->reset(tmp.release()); 
-  return Status::OK(); 
-} 
- 
-Status FileReader::Make(::arrow::MemoryPool* pool, 
-                        std::unique_ptr<ParquetFileReader> reader, 
-                        const ArrowReaderProperties& properties, 
-                        std::unique_ptr<FileReader>* out) { 
-  out->reset(new FileReaderImpl(pool, std::move(reader), properties)); 
-  return static_cast<FileReaderImpl*>(out->get())->Init(); 
-} 
- 
-Status FileReader::Make(::arrow::MemoryPool* pool, 
-                        std::unique_ptr<ParquetFileReader> reader, 
-                        std::unique_ptr<FileReader>* out) { 
-  return Make(pool, std::move(reader), default_arrow_reader_properties(), out); 
-} 
- 
-FileReaderBuilder::FileReaderBuilder() 
-    : pool_(::arrow::default_memory_pool()), 
-      properties_(default_arrow_reader_properties()) {} 
- 
-Status FileReaderBuilder::Open(std::shared_ptr<::arrow::io::RandomAccessFile> file, 
-                               const ReaderProperties& properties, 
-                               std::shared_ptr<FileMetaData> metadata) { 
-  PARQUET_CATCH_NOT_OK(raw_reader_ = ParquetReader::Open(std::move(file), properties, 
-                                                         std::move(metadata))); 
-  return Status::OK(); 
-} 
- 
-FileReaderBuilder* FileReaderBuilder::memory_pool(::arrow::MemoryPool* pool) { 
-  pool_ = pool; 
-  return this; 
-} 
- 
-FileReaderBuilder* FileReaderBuilder::properties( 
-    const ArrowReaderProperties& arg_properties) { 
-  properties_ = arg_properties; 
-  return this; 
-} 
- 
-Status FileReaderBuilder::Build(std::unique_ptr<FileReader>* out) { 
-  return FileReader::Make(pool_, std::move(raw_reader_), properties_, out); 
-} 
- 
-Status OpenFile(std::shared_ptr<::arrow::io::RandomAccessFile> file, MemoryPool* pool, 
-                std::unique_ptr<FileReader>* reader) { 
-  FileReaderBuilder builder; 
-  RETURN_NOT_OK(builder.Open(std::move(file))); 
-  return builder.memory_pool(pool)->Build(reader); 
-} 
- 
-namespace internal { 
- 
-Status FuzzReader(std::unique_ptr<FileReader> reader) { 
-  auto st = Status::OK(); 
-  for (int i = 0; i < reader->num_row_groups(); ++i) { 
-    std::shared_ptr<Table> table; 
-    auto row_group_status = reader->ReadRowGroup(i, &table); 
-    if (row_group_status.ok()) { 
-      row_group_status &= table->ValidateFull(); 
-    } 
-    st &= row_group_status; 
-  } 
-  return st; 
-} 
- 
-Status FuzzReader(const uint8_t* data, int64_t size) { 
-  auto buffer = std::make_shared<::arrow::Buffer>(data, size); 
-  auto file = std::make_shared<::arrow::io::BufferReader>(buffer); 
-  FileReaderBuilder builder; 
-  RETURN_NOT_OK(builder.Open(std::move(file))); 
- 
-  std::unique_ptr<FileReader> reader; 
-  RETURN_NOT_OK(builder.Build(&reader)); 
-  return FuzzReader(std::move(reader)); 
-} 
- 
-}  // namespace internal 
- 
-}  // namespace arrow 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/arrow/reader.h"
+
+#include <algorithm>
+#include <cstring>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/extension_type.h"
+#include "arrow/io/memory.h"
+#include "arrow/record_batch.h"
+#include "arrow/table.h"
+#include "arrow/type.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/future.h"
+#include "arrow/util/iterator.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/make_unique.h"
+#include "arrow/util/parallel.h"
+#include "arrow/util/range.h"
+#include "parquet/arrow/reader_internal.h"
+#include "parquet/column_reader.h"
+#include "parquet/exception.h"
+#include "parquet/file_reader.h"
+#include "parquet/metadata.h"
+#include "parquet/properties.h"
+#include "parquet/schema.h"
+
+using arrow::Array;
+using arrow::ArrayData;
+using arrow::BooleanArray;
+using arrow::ChunkedArray;
+using arrow::DataType;
+using arrow::ExtensionType;
+using arrow::Field;
+using arrow::Future;
+using arrow::Int32Array;
+using arrow::ListArray;
+using arrow::MemoryPool;
+using arrow::RecordBatchReader;
+using arrow::ResizableBuffer;
+using arrow::Status;
+using arrow::StructArray;
+using arrow::Table;
+using arrow::TimestampArray;
+
+using arrow::internal::checked_cast;
+using arrow::internal::Iota;
+
+// Help reduce verbosity
+using ParquetReader = parquet::ParquetFileReader;
+
+using parquet::internal::RecordReader;
+
+namespace BitUtil = arrow::BitUtil;
+
+namespace parquet {
+namespace arrow {
+namespace {
+
+::arrow::Result<std::shared_ptr<ArrayData>> ChunksToSingle(const ChunkedArray& chunked) {
+  switch (chunked.num_chunks()) {
+    case 0: {
+      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Array> array,
+                            ::arrow::MakeArrayOfNull(chunked.type(), 0));
+      return array->data();
+    }
+    case 1:
+      return chunked.chunk(0)->data();
+    default:
+      // ARROW-3762(wesm): If item reader yields a chunked array, we reject as
+      // this is not yet implemented
+      return Status::NotImplemented(
+          "Nested data conversions not implemented for chunked array outputs");
+  }
+}
+
+}  // namespace
+
+class ColumnReaderImpl : public ColumnReader {
+ public:
+  virtual Status GetDefLevels(const int16_t** data, int64_t* length) = 0;
+  virtual Status GetRepLevels(const int16_t** data, int64_t* length) = 0;
+  virtual const std::shared_ptr<Field> field() = 0;
+
+  ::arrow::Status NextBatch(int64_t batch_size,
+                            std::shared_ptr<::arrow::ChunkedArray>* out) final {
+    RETURN_NOT_OK(LoadBatch(batch_size));
+    RETURN_NOT_OK(BuildArray(batch_size, out));
+    for (int x = 0; x < (*out)->num_chunks(); x++) {
+      RETURN_NOT_OK((*out)->chunk(x)->Validate());
+    }
+    return Status::OK();
+  }
+
+  virtual ::arrow::Status LoadBatch(int64_t num_records) = 0;
+
+  virtual ::arrow::Status BuildArray(int64_t length_upper_bound,
+                                     std::shared_ptr<::arrow::ChunkedArray>* out) = 0;
+  virtual bool IsOrHasRepeatedChild() const = 0;
+};
+
+namespace {
+
+std::shared_ptr<std::unordered_set<int>> VectorToSharedSet(
+    const std::vector<int>& values) {
+  std::shared_ptr<std::unordered_set<int>> result(new std::unordered_set<int>());
+  result->insert(values.begin(), values.end());
+  return result;
+}
+
+// Forward declaration
+Status GetReader(const SchemaField& field, const std::shared_ptr<ReaderContext>& context,
+                 std::unique_ptr<ColumnReaderImpl>* out);
+
+// ----------------------------------------------------------------------
+// FileReaderImpl forward declaration
+
+class FileReaderImpl : public FileReader {
+ public:
+  FileReaderImpl(MemoryPool* pool, std::unique_ptr<ParquetFileReader> reader,
+                 ArrowReaderProperties properties)
+      : pool_(pool),
+        reader_(std::move(reader)),
+        reader_properties_(std::move(properties)) {}
+
+  Status Init() {
+    return SchemaManifest::Make(reader_->metadata()->schema(),
+                                reader_->metadata()->key_value_metadata(),
+                                reader_properties_, &manifest_);
+  }
+
+  FileColumnIteratorFactory SomeRowGroupsFactory(std::vector<int> row_groups) {
+    return [row_groups](int i, ParquetFileReader* reader) {
+      return new FileColumnIterator(i, reader, row_groups);
+    };
+  }
+
+  FileColumnIteratorFactory AllRowGroupsFactory() {
+    return SomeRowGroupsFactory(Iota(reader_->metadata()->num_row_groups()));
+  }
+
+  Status BoundsCheckColumn(int column) {
+    if (column < 0 || column >= this->num_columns()) {
+      return Status::Invalid("Column index out of bounds (got ", column,
+                             ", should be "
+                             "between 0 and ",
+                             this->num_columns() - 1, ")");
+    }
+    return Status::OK();
+  }
+
+  Status BoundsCheckRowGroup(int row_group) {
+    // row group indices check
+    if (row_group < 0 || row_group >= num_row_groups()) {
+      return Status::Invalid("Some index in row_group_indices is ", row_group,
+                             ", which is either < 0 or >= num_row_groups(",
+                             num_row_groups(), ")");
+    }
+    return Status::OK();
+  }
+
+  Status BoundsCheck(const std::vector<int>& row_groups,
+                     const std::vector<int>& column_indices) {
+    for (int i : row_groups) {
+      RETURN_NOT_OK(BoundsCheckRowGroup(i));
+    }
+    for (int i : column_indices) {
+      RETURN_NOT_OK(BoundsCheckColumn(i));
+    }
+    return Status::OK();
+  }
+
+  std::shared_ptr<RowGroupReader> RowGroup(int row_group_index) override;
+
+  Status ReadTable(const std::vector<int>& indices,
+                   std::shared_ptr<Table>* out) override {
+    return ReadRowGroups(Iota(reader_->metadata()->num_row_groups()), indices, out);
+  }
+
+  Status GetFieldReader(int i,
+                        const std::shared_ptr<std::unordered_set<int>>& included_leaves,
+                        const std::vector<int>& row_groups,
+                        std::unique_ptr<ColumnReaderImpl>* out) {
+    auto ctx = std::make_shared<ReaderContext>();
+    ctx->reader = reader_.get();
+    ctx->pool = pool_;
+    ctx->iterator_factory = SomeRowGroupsFactory(row_groups);
+    ctx->filter_leaves = true;
+    ctx->included_leaves = included_leaves;
+    return GetReader(manifest_.schema_fields[i], ctx, out);
+  }
+
+  Status GetFieldReaders(const std::vector<int>& column_indices,
+                         const std::vector<int>& row_groups,
+                         std::vector<std::shared_ptr<ColumnReaderImpl>>* out,
+                         std::shared_ptr<::arrow::Schema>* out_schema) {
+    // We only need to read schema fields which have columns indicated
+    // in the indices vector
+    ARROW_ASSIGN_OR_RAISE(std::vector<int> field_indices,
+                          manifest_.GetFieldIndices(column_indices));
+
+    auto included_leaves = VectorToSharedSet(column_indices);
+
+    out->resize(field_indices.size());
+    ::arrow::FieldVector out_fields(field_indices.size());
+    for (size_t i = 0; i < out->size(); ++i) {
+      std::unique_ptr<ColumnReaderImpl> reader;
+      RETURN_NOT_OK(
+          GetFieldReader(field_indices[i], included_leaves, row_groups, &reader));
+
+      out_fields[i] = reader->field();
+      out->at(i) = std::move(reader);
+    }
+
+    *out_schema = ::arrow::schema(std::move(out_fields), manifest_.schema_metadata);
+    return Status::OK();
+  }
+
+  Status GetColumn(int i, FileColumnIteratorFactory iterator_factory,
+                   std::unique_ptr<ColumnReader>* out);
+
+  Status GetColumn(int i, std::unique_ptr<ColumnReader>* out) override {
+    return GetColumn(i, AllRowGroupsFactory(), out);
+  }
+
+  Status GetSchema(std::shared_ptr<::arrow::Schema>* out) override {
+    return FromParquetSchema(reader_->metadata()->schema(), reader_properties_,
+                             reader_->metadata()->key_value_metadata(), out);
+  }
+
+  Status ReadSchemaField(int i, std::shared_ptr<ChunkedArray>* out) override {
+    auto included_leaves = VectorToSharedSet(Iota(reader_->metadata()->num_columns()));
+    std::vector<int> row_groups = Iota(reader_->metadata()->num_row_groups());
+
+    std::unique_ptr<ColumnReaderImpl> reader;
+    RETURN_NOT_OK(GetFieldReader(i, included_leaves, row_groups, &reader));
+
+    return ReadColumn(i, row_groups, reader.get(), out);
+  }
+
+  Status ReadColumn(int i, const std::vector<int>& row_groups, ColumnReader* reader,
+                    std::shared_ptr<ChunkedArray>* out) {
+    BEGIN_PARQUET_CATCH_EXCEPTIONS
+    // TODO(wesm): This calculation doesn't make much sense when we have repeated
+    // schema nodes
+    int64_t records_to_read = 0;
+    for (auto row_group : row_groups) {
+      // Can throw exception
+      records_to_read +=
+          reader_->metadata()->RowGroup(row_group)->ColumnChunk(i)->num_values();
+    }
+    return reader->NextBatch(records_to_read, out);
+    END_PARQUET_CATCH_EXCEPTIONS
+  }
+
+  Status ReadColumn(int i, const std::vector<int>& row_groups,
+                    std::shared_ptr<ChunkedArray>* out) {
+    std::unique_ptr<ColumnReader> flat_column_reader;
+    RETURN_NOT_OK(GetColumn(i, SomeRowGroupsFactory(row_groups), &flat_column_reader));
+    return ReadColumn(i, row_groups, flat_column_reader.get(), out);
+  }
+
+  Status ReadColumn(int i, std::shared_ptr<ChunkedArray>* out) override {
+    return ReadColumn(i, Iota(reader_->metadata()->num_row_groups()), out);
+  }
+
+  Status ReadTable(std::shared_ptr<Table>* table) override {
+    return ReadTable(Iota(reader_->metadata()->num_columns()), table);
+  }
+
+  Status ReadRowGroups(const std::vector<int>& row_groups,
+                       const std::vector<int>& indices,
+                       std::shared_ptr<Table>* table) override;
+
+  // Helper method used by ReadRowGroups - read the given row groups/columns, skipping
+  // bounds checks and pre-buffering. Takes a shared_ptr to self to keep the reader
+  // alive in async contexts.
+  Future<std::shared_ptr<Table>> DecodeRowGroups(
+      std::shared_ptr<FileReaderImpl> self, const std::vector<int>& row_groups,
+      const std::vector<int>& column_indices, ::arrow::internal::Executor* cpu_executor);
+
+  Status ReadRowGroups(const std::vector<int>& row_groups,
+                       std::shared_ptr<Table>* table) override {
+    return ReadRowGroups(row_groups, Iota(reader_->metadata()->num_columns()), table);
+  }
+
+  Status ReadRowGroup(int row_group_index, const std::vector<int>& column_indices,
+                      std::shared_ptr<Table>* out) override {
+    return ReadRowGroups({row_group_index}, column_indices, out);
+  }
+
+  Status ReadRowGroup(int i, std::shared_ptr<Table>* table) override {
+    return ReadRowGroup(i, Iota(reader_->metadata()->num_columns()), table);
+  }
+
+  Status GetRecordBatchReader(const std::vector<int>& row_group_indices,
+                              const std::vector<int>& column_indices,
+                              std::unique_ptr<RecordBatchReader>* out) override;
+
+  Status GetRecordBatchReader(const std::vector<int>& row_group_indices,
+                              std::unique_ptr<RecordBatchReader>* out) override {
+    return GetRecordBatchReader(row_group_indices,
+                                Iota(reader_->metadata()->num_columns()), out);
+  }
+
+  ::arrow::Result<::arrow::AsyncGenerator<std::shared_ptr<::arrow::RecordBatch>>>
+  GetRecordBatchGenerator(std::shared_ptr<FileReader> reader,
+                          const std::vector<int> row_group_indices,
+                          const std::vector<int> column_indices,
+                          ::arrow::internal::Executor* cpu_executor) override;
+
+  int num_columns() const { return reader_->metadata()->num_columns(); }
+
+  ParquetFileReader* parquet_reader() const override { return reader_.get(); }
+
+  int num_row_groups() const override { return reader_->metadata()->num_row_groups(); }
+
+  void set_use_threads(bool use_threads) override {
+    reader_properties_.set_use_threads(use_threads);
+  }
+
+  void set_batch_size(int64_t batch_size) override {
+    reader_properties_.set_batch_size(batch_size);
+  }
+
+  const ArrowReaderProperties& properties() const override { return reader_properties_; }
+
+  const SchemaManifest& manifest() const override { return manifest_; }
+
+  Status ScanContents(std::vector<int> columns, const int32_t column_batch_size,
+                      int64_t* num_rows) override {
+    BEGIN_PARQUET_CATCH_EXCEPTIONS
+    *num_rows = ScanFileContents(columns, column_batch_size, reader_.get());
+    return Status::OK();
+    END_PARQUET_CATCH_EXCEPTIONS
+  }
+
+  MemoryPool* pool_;
+  std::unique_ptr<ParquetFileReader> reader_;
+  ArrowReaderProperties reader_properties_;
+
+  SchemaManifest manifest_;
+};
+
+class RowGroupRecordBatchReader : public ::arrow::RecordBatchReader {
+ public:
+  RowGroupRecordBatchReader(::arrow::RecordBatchIterator batches,
+                            std::shared_ptr<::arrow::Schema> schema)
+      : batches_(std::move(batches)), schema_(std::move(schema)) {}
+
+  ~RowGroupRecordBatchReader() override {}
+
+  Status ReadNext(std::shared_ptr<::arrow::RecordBatch>* out) override {
+    return batches_.Next().Value(out);
+  }
+
+  std::shared_ptr<::arrow::Schema> schema() const override { return schema_; }
+
+ private:
+  ::arrow::Iterator<std::shared_ptr<::arrow::RecordBatch>> batches_;
+  std::shared_ptr<::arrow::Schema> schema_;
+};
+
+class ColumnChunkReaderImpl : public ColumnChunkReader {
+ public:
+  ColumnChunkReaderImpl(FileReaderImpl* impl, int row_group_index, int column_index)
+      : impl_(impl), column_index_(column_index), row_group_index_(row_group_index) {}
+
+  Status Read(std::shared_ptr<::arrow::ChunkedArray>* out) override {
+    return impl_->ReadColumn(column_index_, {row_group_index_}, out);
+  }
+
+ private:
+  FileReaderImpl* impl_;
+  int column_index_;
+  int row_group_index_;
+};
+
+class RowGroupReaderImpl : public RowGroupReader {
+ public:
+  RowGroupReaderImpl(FileReaderImpl* impl, int row_group_index)
+      : impl_(impl), row_group_index_(row_group_index) {}
+
+  std::shared_ptr<ColumnChunkReader> Column(int column_index) override {
+    return std::shared_ptr<ColumnChunkReader>(
+        new ColumnChunkReaderImpl(impl_, row_group_index_, column_index));
+  }
+
+  Status ReadTable(const std::vector<int>& column_indices,
+                   std::shared_ptr<::arrow::Table>* out) override {
+    return impl_->ReadRowGroup(row_group_index_, column_indices, out);
+  }
+
+  Status ReadTable(std::shared_ptr<::arrow::Table>* out) override {
+    return impl_->ReadRowGroup(row_group_index_, out);
+  }
+
+ private:
+  FileReaderImpl* impl_;
+  int row_group_index_;
+};
+
+// ----------------------------------------------------------------------
+// Column reader implementations
+
+// Leaf reader is for primitive arrays and primitive children of nested arrays
+class LeafReader : public ColumnReaderImpl {
+ public:
+  LeafReader(std::shared_ptr<ReaderContext> ctx, std::shared_ptr<Field> field,
+             std::unique_ptr<FileColumnIterator> input,
+             ::parquet::internal::LevelInfo leaf_info)
+      : ctx_(std::move(ctx)),
+        field_(std::move(field)),
+        input_(std::move(input)),
+        descr_(input_->descr()) {
+    record_reader_ = RecordReader::Make(
+        descr_, leaf_info, ctx_->pool, field_->type()->id() == ::arrow::Type::DICTIONARY);
+    NextRowGroup();
+  }
+
+  Status GetDefLevels(const int16_t** data, int64_t* length) final {
+    *data = record_reader_->def_levels();
+    *length = record_reader_->levels_position();
+    return Status::OK();
+  }
+
+  Status GetRepLevels(const int16_t** data, int64_t* length) final {
+    *data = record_reader_->rep_levels();
+    *length = record_reader_->levels_position();
+    return Status::OK();
+  }
+
+  bool IsOrHasRepeatedChild() const final { return false; }
+
+  Status LoadBatch(int64_t records_to_read) final {
+    BEGIN_PARQUET_CATCH_EXCEPTIONS
+    out_ = nullptr;
+    record_reader_->Reset();
+    // Pre-allocation gives much better performance for flat columns
+    record_reader_->Reserve(records_to_read);
+    while (records_to_read > 0) {
+      if (!record_reader_->HasMoreData()) {
+        break;
+      }
+      int64_t records_read = record_reader_->ReadRecords(records_to_read);
+      records_to_read -= records_read;
+      if (records_read == 0) {
+        NextRowGroup();
+      }
+    }
+    RETURN_NOT_OK(TransferColumnData(record_reader_.get(), field_->type(), descr_,
+                                     ctx_->pool, &out_));
+    return Status::OK();
+    END_PARQUET_CATCH_EXCEPTIONS
+  }
+
+  ::arrow::Status BuildArray(int64_t length_upper_bound,
+                             std::shared_ptr<::arrow::ChunkedArray>* out) final {
+    *out = out_;
+    return Status::OK();
+  }
+
+  const std::shared_ptr<Field> field() override { return field_; }
+
+ private:
+  std::shared_ptr<ChunkedArray> out_;
+  void NextRowGroup() {
+    std::unique_ptr<PageReader> page_reader = input_->NextChunk();
+    record_reader_->SetPageReader(std::move(page_reader));
+  }
+
+  std::shared_ptr<ReaderContext> ctx_;
+  std::shared_ptr<Field> field_;
+  std::unique_ptr<FileColumnIterator> input_;
+  const ColumnDescriptor* descr_;
+  std::shared_ptr<RecordReader> record_reader_;
+};
+
+// Column reader for extension arrays
+class ExtensionReader : public ColumnReaderImpl {
+ public:
+  ExtensionReader(std::shared_ptr<Field> field,
+                  std::unique_ptr<ColumnReaderImpl> storage_reader)
+      : field_(std::move(field)), storage_reader_(std::move(storage_reader)) {}
+
+  Status GetDefLevels(const int16_t** data, int64_t* length) override {
+    return storage_reader_->GetDefLevels(data, length);
+  }
+
+  Status GetRepLevels(const int16_t** data, int64_t* length) override {
+    return storage_reader_->GetRepLevels(data, length);
+  }
+
+  Status LoadBatch(int64_t number_of_records) final {
+    return storage_reader_->LoadBatch(number_of_records);
+  }
+
+  Status BuildArray(int64_t length_upper_bound,
+                    std::shared_ptr<ChunkedArray>* out) override {
+    std::shared_ptr<ChunkedArray> storage;
+    RETURN_NOT_OK(storage_reader_->BuildArray(length_upper_bound, &storage));
+    *out = ExtensionType::WrapArray(field_->type(), storage);
+    return Status::OK();
+  }
+
+  bool IsOrHasRepeatedChild() const final {
+    return storage_reader_->IsOrHasRepeatedChild();
+  }
+
+  const std::shared_ptr<Field> field() override { return field_; }
+
+ private:
+  std::shared_ptr<Field> field_;
+  std::unique_ptr<ColumnReaderImpl> storage_reader_;
+};
+
+template <typename IndexType>
+class ListReader : public ColumnReaderImpl {
+ public:
+  ListReader(std::shared_ptr<ReaderContext> ctx, std::shared_ptr<Field> field,
+             ::parquet::internal::LevelInfo level_info,
+             std::unique_ptr<ColumnReaderImpl> child_reader)
+      : ctx_(std::move(ctx)),
+        field_(std::move(field)),
+        level_info_(level_info),
+        item_reader_(std::move(child_reader)) {}
+
+  Status GetDefLevels(const int16_t** data, int64_t* length) override {
+    return item_reader_->GetDefLevels(data, length);
+  }
+
+  Status GetRepLevels(const int16_t** data, int64_t* length) override {
+    return item_reader_->GetRepLevels(data, length);
+  }
+
+  bool IsOrHasRepeatedChild() const final { return true; }
+
+  Status LoadBatch(int64_t number_of_records) final {
+    return item_reader_->LoadBatch(number_of_records);
+  }
+
+  virtual ::arrow::Result<std::shared_ptr<ChunkedArray>> AssembleArray(
+      std::shared_ptr<ArrayData> data) {
+    if (field_->type()->id() == ::arrow::Type::MAP) {
+      // Error out if data is not map-compliant instead of aborting in MakeArray below
+      RETURN_NOT_OK(::arrow::MapArray::ValidateChildData(data->child_data));
+    }
+    std::shared_ptr<Array> result = ::arrow::MakeArray(data);
+    return std::make_shared<ChunkedArray>(result);
+  }
+
+  Status BuildArray(int64_t length_upper_bound,
+                    std::shared_ptr<ChunkedArray>* out) override {
+    const int16_t* def_levels;
+    const int16_t* rep_levels;
+    int64_t num_levels;
+    RETURN_NOT_OK(item_reader_->GetDefLevels(&def_levels, &num_levels));
+    RETURN_NOT_OK(item_reader_->GetRepLevels(&rep_levels, &num_levels));
+
+    std::shared_ptr<ResizableBuffer> validity_buffer;
+    ::parquet::internal::ValidityBitmapInputOutput validity_io;
+    validity_io.values_read_upper_bound = length_upper_bound;
+    if (field_->nullable()) {
+      ARROW_ASSIGN_OR_RAISE(
+          validity_buffer,
+          AllocateResizableBuffer(BitUtil::BytesForBits(length_upper_bound), ctx_->pool));
+      validity_io.valid_bits = validity_buffer->mutable_data();
+    }
+    ARROW_ASSIGN_OR_RAISE(
+        std::shared_ptr<ResizableBuffer> offsets_buffer,
+        AllocateResizableBuffer(
+            sizeof(IndexType) * std::max(int64_t{1}, length_upper_bound + 1),
+            ctx_->pool));
+    // Ensure zero initialization in case we have reached a zero length list (and
+    // because first entry is always zero).
+    IndexType* offset_data = reinterpret_cast<IndexType*>(offsets_buffer->mutable_data());
+    offset_data[0] = 0;
+    BEGIN_PARQUET_CATCH_EXCEPTIONS
+    ::parquet::internal::DefRepLevelsToList(def_levels, rep_levels, num_levels,
+                                            level_info_, &validity_io, offset_data);
+    END_PARQUET_CATCH_EXCEPTIONS
+
+    RETURN_NOT_OK(item_reader_->BuildArray(offset_data[validity_io.values_read], out));
+
+    // Resize to actual number of elements returned.
+    RETURN_NOT_OK(
+        offsets_buffer->Resize((validity_io.values_read + 1) * sizeof(IndexType)));
+    if (validity_buffer != nullptr) {
+      RETURN_NOT_OK(
+          validity_buffer->Resize(BitUtil::BytesForBits(validity_io.values_read)));
+      validity_buffer->ZeroPadding();
+    }
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ArrayData> item_chunk, ChunksToSingle(**out));
+
+    std::vector<std::shared_ptr<Buffer>> buffers{
+        validity_io.null_count > 0 ? validity_buffer : nullptr, offsets_buffer};
+    auto data = std::make_shared<ArrayData>(
+        field_->type(),
+        /*length=*/validity_io.values_read, std::move(buffers),
+        std::vector<std::shared_ptr<ArrayData>>{item_chunk}, validity_io.null_count);
+
+    ARROW_ASSIGN_OR_RAISE(*out, AssembleArray(std::move(data)));
+    return Status::OK();
+  }
+
+  const std::shared_ptr<Field> field() override { return field_; }
+
+ private:
+  std::shared_ptr<ReaderContext> ctx_;
+  std::shared_ptr<Field> field_;
+  ::parquet::internal::LevelInfo level_info_;
+  std::unique_ptr<ColumnReaderImpl> item_reader_;
+};
+
+class PARQUET_NO_EXPORT FixedSizeListReader : public ListReader<int32_t> {
+ public:
+  FixedSizeListReader(std::shared_ptr<ReaderContext> ctx, std::shared_ptr<Field> field,
+                      ::parquet::internal::LevelInfo level_info,
+                      std::unique_ptr<ColumnReaderImpl> child_reader)
+      : ListReader(std::move(ctx), std::move(field), level_info,
+                   std::move(child_reader)) {}
+  ::arrow::Result<std::shared_ptr<ChunkedArray>> AssembleArray(
+      std::shared_ptr<ArrayData> data) final {
+    DCHECK_EQ(data->buffers.size(), 2);
+    DCHECK_EQ(field()->type()->id(), ::arrow::Type::FIXED_SIZE_LIST);
+    const auto& type = checked_cast<::arrow::FixedSizeListType&>(*field()->type());
+    const int32_t* offsets = reinterpret_cast<const int32_t*>(data->buffers[1]->data());
+    for (int x = 1; x <= data->length; x++) {
+      int32_t size = offsets[x] - offsets[x - 1];
+      if (size != type.list_size()) {
+        return Status::Invalid("Expected all lists to be of size=", type.list_size(),
+                               " but index ", x, " had size=", size);
+      }
+    }
+    data->buffers.resize(1);
+    std::shared_ptr<Array> result = ::arrow::MakeArray(data);
+    return std::make_shared<ChunkedArray>(result);
+  }
+};
+
+class PARQUET_NO_EXPORT StructReader : public ColumnReaderImpl {
+ public:
+  explicit StructReader(std::shared_ptr<ReaderContext> ctx,
+                        std::shared_ptr<Field> filtered_field,
+                        ::parquet::internal::LevelInfo level_info,
+                        std::vector<std::unique_ptr<ColumnReaderImpl>> children)
+      : ctx_(std::move(ctx)),
+        filtered_field_(std::move(filtered_field)),
+        level_info_(level_info),
+        children_(std::move(children)) {
+    // There could be a mix of children some might be repeated some might not be.
+    // If possible use one that isn't since that will be guaranteed to have the least
+    // number of levels to reconstruct a nullable bitmap.
+    auto result = std::find_if(children_.begin(), children_.end(),
+                               [](const std::unique_ptr<ColumnReaderImpl>& child) {
+                                 return !child->IsOrHasRepeatedChild();
+                               });
+    if (result != children_.end()) {
+      def_rep_level_child_ = result->get();
+      has_repeated_child_ = false;
+    } else if (!children_.empty()) {
+      def_rep_level_child_ = children_.front().get();
+      has_repeated_child_ = true;
+    }
+  }
+
+  bool IsOrHasRepeatedChild() const final { return has_repeated_child_; }
+
+  Status LoadBatch(int64_t records_to_read) override {
+    for (const std::unique_ptr<ColumnReaderImpl>& reader : children_) {
+      RETURN_NOT_OK(reader->LoadBatch(records_to_read));
+    }
+    return Status::OK();
+  }
+  Status BuildArray(int64_t length_upper_bound,
+                    std::shared_ptr<ChunkedArray>* out) override;
+  Status GetDefLevels(const int16_t** data, int64_t* length) override;
+  Status GetRepLevels(const int16_t** data, int64_t* length) override;
+  const std::shared_ptr<Field> field() override { return filtered_field_; }
+
+ private:
+  const std::shared_ptr<ReaderContext> ctx_;
+  const std::shared_ptr<Field> filtered_field_;
+  const ::parquet::internal::LevelInfo level_info_;
+  const std::vector<std::unique_ptr<ColumnReaderImpl>> children_;
+  ColumnReaderImpl* def_rep_level_child_ = nullptr;
+  bool has_repeated_child_;
+};
+
+Status StructReader::GetDefLevels(const int16_t** data, int64_t* length) {
+  *data = nullptr;
+  if (children_.size() == 0) {
+    *length = 0;
+    return Status::Invalid("StructReader had no children");
+  }
+
+  // This method should only be called when this struct or one of its parents
+  // are optional/repeated or it has a repeated child.
+  // Meaning all children must have rep/def levels associated
+  // with them.
+  RETURN_NOT_OK(def_rep_level_child_->GetDefLevels(data, length));
+  return Status::OK();
+}
+
+Status StructReader::GetRepLevels(const int16_t** data, int64_t* length) {
+  *data = nullptr;
+  if (children_.size() == 0) {
+    *length = 0;
+    return Status::Invalid("StructReader had no childre");
+  }
+
+  // This method should only be called when this struct or one of its parents
+  // are optional/repeated or it has repeated child.
+  // Meaning all children must have rep/def levels associated
+  // with them.
+  RETURN_NOT_OK(def_rep_level_child_->GetRepLevels(data, length));
+  return Status::OK();
+}
+
+Status StructReader::BuildArray(int64_t length_upper_bound,
+                                std::shared_ptr<ChunkedArray>* out) {
+  std::vector<std::shared_ptr<ArrayData>> children_array_data;
+  std::shared_ptr<ResizableBuffer> null_bitmap;
+
+  ::parquet::internal::ValidityBitmapInputOutput validity_io;
+  validity_io.values_read_upper_bound = length_upper_bound;
+  // This simplifies accounting below.
+  validity_io.values_read = length_upper_bound;
+
+  BEGIN_PARQUET_CATCH_EXCEPTIONS
+  const int16_t* def_levels;
+  const int16_t* rep_levels;
+  int64_t num_levels;
+
+  if (has_repeated_child_) {
+    ARROW_ASSIGN_OR_RAISE(
+        null_bitmap,
+        AllocateResizableBuffer(BitUtil::BytesForBits(length_upper_bound), ctx_->pool));
+    validity_io.valid_bits = null_bitmap->mutable_data();
+    RETURN_NOT_OK(GetDefLevels(&def_levels, &num_levels));
+    RETURN_NOT_OK(GetRepLevels(&rep_levels, &num_levels));
+    DefRepLevelsToBitmap(def_levels, rep_levels, num_levels, level_info_, &validity_io);
+  } else if (filtered_field_->nullable()) {
+    ARROW_ASSIGN_OR_RAISE(
+        null_bitmap,
+        AllocateResizableBuffer(BitUtil::BytesForBits(length_upper_bound), ctx_->pool));
+    validity_io.valid_bits = null_bitmap->mutable_data();
+    RETURN_NOT_OK(GetDefLevels(&def_levels, &num_levels));
+    DefLevelsToBitmap(def_levels, num_levels, level_info_, &validity_io);
+  }
+
+  // Ensure all values are initialized.
+  if (null_bitmap) {
+    RETURN_NOT_OK(null_bitmap->Resize(BitUtil::BytesForBits(validity_io.values_read)));
+    null_bitmap->ZeroPadding();
+  }
+
+  END_PARQUET_CATCH_EXCEPTIONS
+  // Gather children arrays and def levels
+  for (auto& child : children_) {
+    std::shared_ptr<ChunkedArray> field;
+    RETURN_NOT_OK(child->BuildArray(validity_io.values_read, &field));
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ArrayData> array_data, ChunksToSingle(*field));
+    children_array_data.push_back(std::move(array_data));
+  }
+
+  if (!filtered_field_->nullable() && !has_repeated_child_) {
+    validity_io.values_read = children_array_data.front()->length;
+  }
+
+  std::vector<std::shared_ptr<Buffer>> buffers{validity_io.null_count > 0 ? null_bitmap
+                                                                          : nullptr};
+  auto data =
+      std::make_shared<ArrayData>(filtered_field_->type(),
+                                  /*length=*/validity_io.values_read, std::move(buffers),
+                                  std::move(children_array_data));
+  std::shared_ptr<Array> result = ::arrow::MakeArray(data);
+
+  *out = std::make_shared<ChunkedArray>(result);
+  return Status::OK();
+}
+
+// ----------------------------------------------------------------------
+// File reader implementation
+
+Status GetReader(const SchemaField& field, const std::shared_ptr<Field>& arrow_field,
+                 const std::shared_ptr<ReaderContext>& ctx,
+                 std::unique_ptr<ColumnReaderImpl>* out) {
+  BEGIN_PARQUET_CATCH_EXCEPTIONS
+
+  auto type_id = arrow_field->type()->id();
+
+  if (type_id == ::arrow::Type::EXTENSION) {
+    auto storage_field = arrow_field->WithType(
+        checked_cast<const ExtensionType&>(*arrow_field->type()).storage_type());
+    RETURN_NOT_OK(GetReader(field, storage_field, ctx, out));
+    out->reset(new ExtensionReader(arrow_field, std::move(*out)));
+    return Status::OK();
+  }
+
+  if (field.children.size() == 0) {
+    if (!field.is_leaf()) {
+      return Status::Invalid("Parquet non-leaf node has no children");
+    }
+    if (!ctx->IncludesLeaf(field.column_index)) {
+      *out = nullptr;
+      return Status::OK();
+    }
+    std::unique_ptr<FileColumnIterator> input(
+        ctx->iterator_factory(field.column_index, ctx->reader));
+    out->reset(new LeafReader(ctx, arrow_field, std::move(input), field.level_info));
+  } else if (type_id == ::arrow::Type::LIST || type_id == ::arrow::Type::MAP ||
+             type_id == ::arrow::Type::FIXED_SIZE_LIST ||
+             type_id == ::arrow::Type::LARGE_LIST) {
+    auto list_field = arrow_field;
+    auto child = &field.children[0];
+    std::unique_ptr<ColumnReaderImpl> child_reader;
+    RETURN_NOT_OK(GetReader(*child, ctx, &child_reader));
+    if (child_reader == nullptr) {
+      *out = nullptr;
+      return Status::OK();
+    }
+    if (type_id == ::arrow::Type::LIST ||
+        type_id == ::arrow::Type::MAP) {  // Map can be reconstructed as list of structs.
+      if (type_id == ::arrow::Type::MAP &&
+          child_reader->field()->type()->num_fields() != 2) {
+        // This case applies if either key or value is filtered.
+        list_field = list_field->WithType(::arrow::list(child_reader->field()));
+      }
+      out->reset(new ListReader<int32_t>(ctx, list_field, field.level_info,
+                                         std::move(child_reader)));
+    } else if (type_id == ::arrow::Type::LARGE_LIST) {
+      out->reset(new ListReader<int64_t>(ctx, list_field, field.level_info,
+                                         std::move(child_reader)));
+
+    } else if (type_id == ::arrow::Type::FIXED_SIZE_LIST) {
+      out->reset(new FixedSizeListReader(ctx, list_field, field.level_info,
+                                         std::move(child_reader)));
+    } else {
+      return Status::UnknownError("Unknown list type: ", field.field->ToString());
+    }
+  } else if (type_id == ::arrow::Type::STRUCT) {
+    std::vector<std::shared_ptr<Field>> child_fields;
+    std::vector<std::unique_ptr<ColumnReaderImpl>> child_readers;
+    for (const auto& child : field.children) {
+      std::unique_ptr<ColumnReaderImpl> child_reader;
+      RETURN_NOT_OK(GetReader(child, ctx, &child_reader));
+      if (!child_reader) {
+        // If all children were pruned, then we do not try to read this field
+        continue;
+      }
+      child_fields.push_back(child.field);
+      child_readers.emplace_back(std::move(child_reader));
+    }
+    if (child_fields.size() == 0) {
+      *out = nullptr;
+      return Status::OK();
+    }
+    auto filtered_field =
+        ::arrow::field(arrow_field->name(), ::arrow::struct_(child_fields),
+                       arrow_field->nullable(), arrow_field->metadata());
+    out->reset(new StructReader(ctx, filtered_field, field.level_info,
+                                std::move(child_readers)));
+  } else {
+    return Status::Invalid("Unsupported nested type: ", arrow_field->ToString());
+  }
+  return Status::OK();
+
+  END_PARQUET_CATCH_EXCEPTIONS
+}
+
+Status GetReader(const SchemaField& field, const std::shared_ptr<ReaderContext>& ctx,
+                 std::unique_ptr<ColumnReaderImpl>* out) {
+  return GetReader(field, field.field, ctx, out);
+}
+
+}  // namespace
+
+Status FileReaderImpl::GetRecordBatchReader(const std::vector<int>& row_groups,
+                                            const std::vector<int>& column_indices,
+                                            std::unique_ptr<RecordBatchReader>* out) {
+  RETURN_NOT_OK(BoundsCheck(row_groups, column_indices));
+
+  if (reader_properties_.pre_buffer()) {
+    // PARQUET-1698/PARQUET-1820: pre-buffer row groups/column chunks if enabled
+    BEGIN_PARQUET_CATCH_EXCEPTIONS
+    reader_->PreBuffer(row_groups, column_indices, reader_properties_.io_context(),
+                       reader_properties_.cache_options());
+    END_PARQUET_CATCH_EXCEPTIONS
+  }
+
+  std::vector<std::shared_ptr<ColumnReaderImpl>> readers;
+  std::shared_ptr<::arrow::Schema> batch_schema;
+  RETURN_NOT_OK(GetFieldReaders(column_indices, row_groups, &readers, &batch_schema));
+
+  if (readers.empty()) {
+    // Just generate all batches right now; they're cheap since they have no columns.
+    int64_t batch_size = properties().batch_size();
+    auto max_sized_batch =
+        ::arrow::RecordBatch::Make(batch_schema, batch_size, ::arrow::ArrayVector{});
+
+    ::arrow::RecordBatchVector batches;
+
+    for (int row_group : row_groups) {
+      int64_t num_rows = parquet_reader()->metadata()->RowGroup(row_group)->num_rows();
+
+      batches.insert(batches.end(), num_rows / batch_size, max_sized_batch);
+
+      if (int64_t trailing_rows = num_rows % batch_size) {
+        batches.push_back(max_sized_batch->Slice(0, trailing_rows));
+      }
+    }
+
+    *out = ::arrow::internal::make_unique<RowGroupRecordBatchReader>(
+        ::arrow::MakeVectorIterator(std::move(batches)), std::move(batch_schema));
+
+    return Status::OK();
+  }
+
+  int64_t num_rows = 0;
+  for (int row_group : row_groups) {
+    num_rows += parquet_reader()->metadata()->RowGroup(row_group)->num_rows();
+  }
+
+  using ::arrow::RecordBatchIterator;
+
+  // NB: This lambda will be invoked outside the scope of this call to
+  // `GetRecordBatchReader()`, so it must capture `readers` and `batch_schema` by value.
+  // `this` is a non-owning pointer so we are relying on the parent FileReader outliving
+  // this RecordBatchReader.
+  ::arrow::Iterator<RecordBatchIterator> batches = ::arrow::MakeFunctionIterator(
+      [readers, batch_schema, num_rows,
+       this]() mutable -> ::arrow::Result<RecordBatchIterator> {
+        ::arrow::ChunkedArrayVector columns(readers.size());
+
+        // don't reserve more rows than necessary
+        int64_t batch_size = std::min(properties().batch_size(), num_rows);
+        num_rows -= batch_size;
+
+        RETURN_NOT_OK(::arrow::internal::OptionalParallelFor(
+            reader_properties_.use_threads(), static_cast<int>(readers.size()),
+            [&](int i) { return readers[i]->NextBatch(batch_size, &columns[i]); }));
+
+        for (const auto& column : columns) {
+          if (column == nullptr || column->length() == 0) {
+            return ::arrow::IterationTraits<RecordBatchIterator>::End();
+          }
+        }
+
+        auto table = ::arrow::Table::Make(batch_schema, std::move(columns));
+        auto table_reader = std::make_shared<::arrow::TableBatchReader>(*table);
+
+        // NB: explicitly preserve table so that table_reader doesn't outlive it
+        return ::arrow::MakeFunctionIterator(
+            [table, table_reader] { return table_reader->Next(); });
+      });
+
+  *out = ::arrow::internal::make_unique<RowGroupRecordBatchReader>(
+      ::arrow::MakeFlattenIterator(std::move(batches)), std::move(batch_schema));
+
+  return Status::OK();
+}
+
+/// Given a file reader and a list of row groups, this is a generator of record
+/// batch generators (where each sub-generator is the contents of a single row group).
+class RowGroupGenerator {
+ public:
+  using RecordBatchGenerator =
+      ::arrow::AsyncGenerator<std::shared_ptr<::arrow::RecordBatch>>;
+
+  explicit RowGroupGenerator(std::shared_ptr<FileReaderImpl> arrow_reader,
+                             ::arrow::internal::Executor* cpu_executor,
+                             std::vector<int> row_groups, std::vector<int> column_indices)
+      : arrow_reader_(std::move(arrow_reader)),
+        cpu_executor_(cpu_executor),
+        row_groups_(std::move(row_groups)),
+        column_indices_(std::move(column_indices)),
+        index_(0) {}
+
+  ::arrow::Future<RecordBatchGenerator> operator()() {
+    if (index_ >= row_groups_.size()) {
+      return ::arrow::AsyncGeneratorEnd<RecordBatchGenerator>();
+    }
+    int row_group = row_groups_[index_++];
+    std::vector<int> column_indices = column_indices_;
+    auto reader = arrow_reader_;
+    if (!reader->properties().pre_buffer()) {
+      return SubmitRead(cpu_executor_, reader, row_group, column_indices);
+    }
+    auto ready = reader->parquet_reader()->WhenBuffered({row_group}, column_indices);
+    if (cpu_executor_) ready = cpu_executor_->TransferAlways(ready);
+    return ready.Then([=]() -> ::arrow::Future<RecordBatchGenerator> {
+      return ReadOneRowGroup(cpu_executor_, reader, row_group, column_indices);
+    });
+  }
+
+ private:
+  // Synchronous fallback for when pre-buffer isn't enabled.
+  //
+  // Making the Parquet reader truly asynchronous requires heavy refactoring, so the
+  // generator piggybacks on ReadRangeCache. The lazy ReadRangeCache can be used for
+  // async I/O without forcing readahead.
+  static ::arrow::Future<RecordBatchGenerator> SubmitRead(
+      ::arrow::internal::Executor* cpu_executor, std::shared_ptr<FileReaderImpl> self,
+      const int row_group, const std::vector<int>& column_indices) {
+    if (!cpu_executor) {
+      return ReadOneRowGroup(cpu_executor, self, row_group, column_indices);
+    }
+    // If we have an executor, then force transfer (even if I/O was complete)
+    return ::arrow::DeferNotOk(cpu_executor->Submit(ReadOneRowGroup, cpu_executor, self,
+                                                    row_group, column_indices));
+  }
+
+  static ::arrow::Future<RecordBatchGenerator> ReadOneRowGroup(
+      ::arrow::internal::Executor* cpu_executor, std::shared_ptr<FileReaderImpl> self,
+      const int row_group, const std::vector<int>& column_indices) {
+    // Skips bound checks/pre-buffering, since we've done that already
+    return self->DecodeRowGroups(self, {row_group}, column_indices, cpu_executor)
+        .Then([](const std::shared_ptr<Table>& table)
+                  -> ::arrow::Result<RecordBatchGenerator> {
+          ::arrow::TableBatchReader table_reader(*table);
+          ::arrow::RecordBatchVector batches;
+          RETURN_NOT_OK(table_reader.ReadAll(&batches));
+          return ::arrow::MakeVectorGenerator(std::move(batches));
+        });
+  }
+
+  std::shared_ptr<FileReaderImpl> arrow_reader_;
+  ::arrow::internal::Executor* cpu_executor_;
+  std::vector<int> row_groups_;
+  std::vector<int> column_indices_;
+  size_t index_;
+};
+
+::arrow::Result<::arrow::AsyncGenerator<std::shared_ptr<::arrow::RecordBatch>>>
+FileReaderImpl::GetRecordBatchGenerator(std::shared_ptr<FileReader> reader,
+                                        const std::vector<int> row_group_indices,
+                                        const std::vector<int> column_indices,
+                                        ::arrow::internal::Executor* cpu_executor) {
+  RETURN_NOT_OK(BoundsCheck(row_group_indices, column_indices));
+  if (reader_properties_.pre_buffer()) {
+    BEGIN_PARQUET_CATCH_EXCEPTIONS
+    reader_->PreBuffer(row_group_indices, column_indices, reader_properties_.io_context(),
+                       reader_properties_.cache_options());
+    END_PARQUET_CATCH_EXCEPTIONS
+  }
+  ::arrow::AsyncGenerator<RowGroupGenerator::RecordBatchGenerator> row_group_generator =
+      RowGroupGenerator(::arrow::internal::checked_pointer_cast<FileReaderImpl>(reader),
+                        cpu_executor, row_group_indices, column_indices);
+  return ::arrow::MakeConcatenatedGenerator(std::move(row_group_generator));
+}
+
+Status FileReaderImpl::GetColumn(int i, FileColumnIteratorFactory iterator_factory,
+                                 std::unique_ptr<ColumnReader>* out) {
+  RETURN_NOT_OK(BoundsCheckColumn(i));
+  auto ctx = std::make_shared<ReaderContext>();
+  ctx->reader = reader_.get();
+  ctx->pool = pool_;
+  ctx->iterator_factory = iterator_factory;
+  ctx->filter_leaves = false;
+  std::unique_ptr<ColumnReaderImpl> result;
+  RETURN_NOT_OK(GetReader(manifest_.schema_fields[i], ctx, &result));
+  out->reset(result.release());
+  return Status::OK();
+}
+
+Status FileReaderImpl::ReadRowGroups(const std::vector<int>& row_groups,
+                                     const std::vector<int>& column_indices,
+                                     std::shared_ptr<Table>* out) {
+  RETURN_NOT_OK(BoundsCheck(row_groups, column_indices));
+
+  // PARQUET-1698/PARQUET-1820: pre-buffer row groups/column chunks if enabled
+  if (reader_properties_.pre_buffer()) {
+    BEGIN_PARQUET_CATCH_EXCEPTIONS
+    parquet_reader()->PreBuffer(row_groups, column_indices,
+                                reader_properties_.io_context(),
+                                reader_properties_.cache_options());
+    END_PARQUET_CATCH_EXCEPTIONS
+  }
+
+  auto fut = DecodeRowGroups(/*self=*/nullptr, row_groups, column_indices,
+                             /*cpu_executor=*/nullptr);
+  ARROW_ASSIGN_OR_RAISE(*out, fut.MoveResult());
+  return Status::OK();
+}
+
+Future<std::shared_ptr<Table>> FileReaderImpl::DecodeRowGroups(
+    std::shared_ptr<FileReaderImpl> self, const std::vector<int>& row_groups,
+    const std::vector<int>& column_indices, ::arrow::internal::Executor* cpu_executor) {
+  // `self` is used solely to keep `this` alive in an async context - but we use this
+  // in a sync context too so use `this` over `self`
+  std::vector<std::shared_ptr<ColumnReaderImpl>> readers;
+  std::shared_ptr<::arrow::Schema> result_schema;
+  RETURN_NOT_OK(GetFieldReaders(column_indices, row_groups, &readers, &result_schema));
+  // OptionalParallelForAsync requires an executor
+  if (!cpu_executor) cpu_executor = ::arrow::internal::GetCpuThreadPool();
+
+  auto read_column = [row_groups, self, this](size_t i,
+                                              std::shared_ptr<ColumnReaderImpl> reader)
+      -> ::arrow::Result<std::shared_ptr<::arrow::ChunkedArray>> {
+    std::shared_ptr<::arrow::ChunkedArray> column;
+    RETURN_NOT_OK(ReadColumn(static_cast<int>(i), row_groups, reader.get(), &column));
+    return column;
+  };
+  auto make_table = [result_schema, row_groups, self,
+                     this](const ::arrow::ChunkedArrayVector& columns)
+      -> ::arrow::Result<std::shared_ptr<Table>> {
+    int64_t num_rows = 0;
+    if (!columns.empty()) {
+      num_rows = columns[0]->length();
+    } else {
+      for (int i : row_groups) {
+        num_rows += parquet_reader()->metadata()->RowGroup(i)->num_rows();
+      }
+    }
+    auto table = Table::Make(std::move(result_schema), columns, num_rows);
+    RETURN_NOT_OK(table->Validate());
+    return table;
+  };
+  return ::arrow::internal::OptionalParallelForAsync(reader_properties_.use_threads(),
+                                                     std::move(readers), read_column,
+                                                     cpu_executor)
+      .Then(std::move(make_table));
+}
+
+std::shared_ptr<RowGroupReader> FileReaderImpl::RowGroup(int row_group_index) {
+  return std::make_shared<RowGroupReaderImpl>(this, row_group_index);
+}
+
+// ----------------------------------------------------------------------
+// Public factory functions
+
+Status FileReader::GetRecordBatchReader(const std::vector<int>& row_group_indices,
+                                        std::shared_ptr<RecordBatchReader>* out) {
+  std::unique_ptr<RecordBatchReader> tmp;
+  ARROW_RETURN_NOT_OK(GetRecordBatchReader(row_group_indices, &tmp));
+  out->reset(tmp.release());
+  return Status::OK();
+}
+
+Status FileReader::GetRecordBatchReader(const std::vector<int>& row_group_indices,
+                                        const std::vector<int>& column_indices,
+                                        std::shared_ptr<RecordBatchReader>* out) {
+  std::unique_ptr<RecordBatchReader> tmp;
+  ARROW_RETURN_NOT_OK(GetRecordBatchReader(row_group_indices, column_indices, &tmp));
+  out->reset(tmp.release());
+  return Status::OK();
+}
+
+Status FileReader::Make(::arrow::MemoryPool* pool,
+                        std::unique_ptr<ParquetFileReader> reader,
+                        const ArrowReaderProperties& properties,
+                        std::unique_ptr<FileReader>* out) {
+  out->reset(new FileReaderImpl(pool, std::move(reader), properties));
+  return static_cast<FileReaderImpl*>(out->get())->Init();
+}
+
+Status FileReader::Make(::arrow::MemoryPool* pool,
+                        std::unique_ptr<ParquetFileReader> reader,
+                        std::unique_ptr<FileReader>* out) {
+  return Make(pool, std::move(reader), default_arrow_reader_properties(), out);
+}
+
+FileReaderBuilder::FileReaderBuilder()
+    : pool_(::arrow::default_memory_pool()),
+      properties_(default_arrow_reader_properties()) {}
+
+Status FileReaderBuilder::Open(std::shared_ptr<::arrow::io::RandomAccessFile> file,
+                               const ReaderProperties& properties,
+                               std::shared_ptr<FileMetaData> metadata) {
+  PARQUET_CATCH_NOT_OK(raw_reader_ = ParquetReader::Open(std::move(file), properties,
+                                                         std::move(metadata)));
+  return Status::OK();
+}
+
+FileReaderBuilder* FileReaderBuilder::memory_pool(::arrow::MemoryPool* pool) {
+  pool_ = pool;
+  return this;
+}
+
+FileReaderBuilder* FileReaderBuilder::properties(
+    const ArrowReaderProperties& arg_properties) {
+  properties_ = arg_properties;
+  return this;
+}
+
+Status FileReaderBuilder::Build(std::unique_ptr<FileReader>* out) {
+  return FileReader::Make(pool_, std::move(raw_reader_), properties_, out);
+}
+
+Status OpenFile(std::shared_ptr<::arrow::io::RandomAccessFile> file, MemoryPool* pool,
+                std::unique_ptr<FileReader>* reader) {
+  FileReaderBuilder builder;
+  RETURN_NOT_OK(builder.Open(std::move(file)));
+  return builder.memory_pool(pool)->Build(reader);
+}
+
+namespace internal {
+
+Status FuzzReader(std::unique_ptr<FileReader> reader) {
+  auto st = Status::OK();
+  for (int i = 0; i < reader->num_row_groups(); ++i) {
+    std::shared_ptr<Table> table;
+    auto row_group_status = reader->ReadRowGroup(i, &table);
+    if (row_group_status.ok()) {
+      row_group_status &= table->ValidateFull();
+    }
+    st &= row_group_status;
+  }
+  return st;
+}
+
+Status FuzzReader(const uint8_t* data, int64_t size) {
+  auto buffer = std::make_shared<::arrow::Buffer>(data, size);
+  auto file = std::make_shared<::arrow::io::BufferReader>(buffer);
+  FileReaderBuilder builder;
+  RETURN_NOT_OK(builder.Open(std::move(file)));
+
+  std::unique_ptr<FileReader> reader;
+  RETURN_NOT_OK(builder.Build(&reader));
+  return FuzzReader(std::move(reader));
+}
+
+}  // namespace internal
+
+}  // namespace arrow
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader.h b/contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader.h
index e8a2dd889da..2d6a5ef2c3e 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader.h
@@ -1,343 +1,343 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <cstdint> 
-// N.B. we don't include async_generator.h as it's relatively heavy 
-#include <functional> 
-#include <memory> 
-#include <vector> 
- 
-#include "parquet/file_reader.h" 
-#include "parquet/platform.h" 
-#include "parquet/properties.h" 
- 
-namespace arrow { 
- 
-class ChunkedArray; 
-class KeyValueMetadata; 
-class RecordBatchReader; 
-struct Scalar; 
-class Schema; 
-class Table; 
-class RecordBatch; 
- 
-}  // namespace arrow 
- 
-namespace parquet { 
- 
-class FileMetaData; 
-class SchemaDescriptor; 
- 
-namespace arrow { 
- 
-class ColumnChunkReader; 
-class ColumnReader; 
-struct SchemaManifest; 
-class RowGroupReader; 
- 
-/// \brief Arrow read adapter class for deserializing Parquet files as Arrow row batches. 
-/// 
-/// This interfaces caters for different use cases and thus provides different 
-/// interfaces. In its most simplistic form, we cater for a user that wants to 
-/// read the whole Parquet at once with the `FileReader::ReadTable` method. 
-/// 
-/// More advanced users that also want to implement parallelism on top of each 
-/// single Parquet files should do this on the RowGroup level. For this, they can 
-/// call `FileReader::RowGroup(i)->ReadTable` to receive only the specified 
-/// RowGroup as a table. 
-/// 
-/// In the most advanced situation, where a consumer wants to independently read 
-/// RowGroups in parallel and consume each column individually, they can call 
-/// `FileReader::RowGroup(i)->Column(j)->Read` and receive an `arrow::Column` 
-/// instance. 
-/// 
-/// The parquet format supports an optional integer field_id which can be assigned 
-/// to a field.  Arrow will convert these field IDs to a metadata key named 
-/// PARQUET:field_id on the appropriate field. 
-// TODO(wesm): nested data does not always make sense with this user 
-// interface unless you are only reading a single leaf node from a branch of 
-// a table. For example: 
-// 
-// repeated group data { 
-//   optional group record { 
-//     optional int32 val1; 
-//     optional byte_array val2; 
-//     optional bool val3; 
-//   } 
-//   optional int32 val4; 
-// } 
-// 
-// In the Parquet file, there are 3 leaf nodes: 
-// 
-// * data.record.val1 
-// * data.record.val2 
-// * data.record.val3 
-// * data.val4 
-// 
-// When materializing this data in an Arrow array, we would have: 
-// 
-// data: list<struct< 
-//   record: struct< 
-//    val1: int32, 
-//    val2: string (= list<uint8>), 
-//    val3: bool, 
-//   >, 
-//   val4: int32 
-// >> 
-// 
-// However, in the Parquet format, each leaf node has its own repetition and 
-// definition levels describing the structure of the intermediate nodes in 
-// this array structure. Thus, we will need to scan the leaf data for a group 
-// of leaf nodes part of the same type tree to create a single result Arrow 
-// nested array structure. 
-// 
-// This is additionally complicated "chunky" repeated fields or very large byte 
-// arrays 
-class PARQUET_EXPORT FileReader { 
- public: 
-  /// Factory function to create a FileReader from a ParquetFileReader and properties 
-  static ::arrow::Status Make(::arrow::MemoryPool* pool, 
-                              std::unique_ptr<ParquetFileReader> reader, 
-                              const ArrowReaderProperties& properties, 
-                              std::unique_ptr<FileReader>* out); 
- 
-  /// Factory function to create a FileReader from a ParquetFileReader 
-  static ::arrow::Status Make(::arrow::MemoryPool* pool, 
-                              std::unique_ptr<ParquetFileReader> reader, 
-                              std::unique_ptr<FileReader>* out); 
- 
-  // Since the distribution of columns amongst a Parquet file's row groups may 
-  // be uneven (the number of values in each column chunk can be different), we 
-  // provide a column-oriented read interface. The ColumnReader hides the 
-  // details of paging through the file's row groups and yielding 
-  // fully-materialized arrow::Array instances 
-  // 
-  // Returns error status if the column of interest is not flat. 
-  virtual ::arrow::Status GetColumn(int i, std::unique_ptr<ColumnReader>* out) = 0; 
- 
-  /// \brief Return arrow schema for all the columns. 
-  virtual ::arrow::Status GetSchema(std::shared_ptr<::arrow::Schema>* out) = 0; 
- 
-  /// \brief Read column as a whole into a chunked array. 
-  /// 
-  /// The indicated column index is relative to the schema 
-  virtual ::arrow::Status ReadColumn(int i, 
-                                     std::shared_ptr<::arrow::ChunkedArray>* out) = 0; 
- 
-  // NOTE: Experimental API 
-  // Reads a specific top level schema field into an Array 
-  // The index i refers the index of the top level schema field, which may 
-  // be nested or flat - e.g. 
-  // 
-  // 0 foo.bar 
-  //   foo.bar.baz 
-  //   foo.qux 
-  // 1 foo2 
-  // 2 foo3 
-  // 
-  // i=0 will read the entire foo struct, i=1 the foo2 primitive column etc 
-  virtual ::arrow::Status ReadSchemaField( 
-      int i, std::shared_ptr<::arrow::ChunkedArray>* out) = 0; 
- 
-  /// \brief Return a RecordBatchReader of row groups selected from row_group_indices. 
-  /// 
-  /// Note that the ordering in row_group_indices matters. FileReaders must outlive 
-  /// their RecordBatchReaders. 
-  /// 
-  /// \returns error Status if row_group_indices contains an invalid index 
-  virtual ::arrow::Status GetRecordBatchReader( 
-      const std::vector<int>& row_group_indices, 
-      std::unique_ptr<::arrow::RecordBatchReader>* out) = 0; 
- 
-  ::arrow::Status GetRecordBatchReader(const std::vector<int>& row_group_indices, 
-                                       std::shared_ptr<::arrow::RecordBatchReader>* out); 
- 
-  /// \brief Return a RecordBatchReader of row groups selected from 
-  /// row_group_indices, whose columns are selected by column_indices. 
-  /// 
-  /// Note that the ordering in row_group_indices and column_indices 
-  /// matter. FileReaders must outlive their RecordBatchReaders. 
-  /// 
-  /// \returns error Status if either row_group_indices or column_indices 
-  ///     contains an invalid index 
-  virtual ::arrow::Status GetRecordBatchReader( 
-      const std::vector<int>& row_group_indices, const std::vector<int>& column_indices, 
-      std::unique_ptr<::arrow::RecordBatchReader>* out) = 0; 
- 
-  /// \brief Return a generator of record batches. 
-  /// 
-  /// The FileReader must outlive the generator, so this requires that you pass in a 
-  /// shared_ptr. 
-  /// 
-  /// \returns error Result if either row_group_indices or column_indices contains an 
-  ///     invalid index 
-  virtual ::arrow::Result< 
-      std::function<::arrow::Future<std::shared_ptr<::arrow::RecordBatch>>()>> 
-  GetRecordBatchGenerator(std::shared_ptr<FileReader> reader, 
-                          const std::vector<int> row_group_indices, 
-                          const std::vector<int> column_indices, 
-                          ::arrow::internal::Executor* cpu_executor = NULLPTR) = 0; 
- 
-  ::arrow::Status GetRecordBatchReader(const std::vector<int>& row_group_indices, 
-                                       const std::vector<int>& column_indices, 
-                                       std::shared_ptr<::arrow::RecordBatchReader>* out); 
- 
-  /// Read all columns into a Table 
-  virtual ::arrow::Status ReadTable(std::shared_ptr<::arrow::Table>* out) = 0; 
- 
-  /// \brief Read the given columns into a Table 
-  /// 
-  /// The indicated column indices are relative to the schema 
-  virtual ::arrow::Status ReadTable(const std::vector<int>& column_indices, 
-                                    std::shared_ptr<::arrow::Table>* out) = 0; 
- 
-  virtual ::arrow::Status ReadRowGroup(int i, const std::vector<int>& column_indices, 
-                                       std::shared_ptr<::arrow::Table>* out) = 0; 
- 
-  virtual ::arrow::Status ReadRowGroup(int i, std::shared_ptr<::arrow::Table>* out) = 0; 
- 
-  virtual ::arrow::Status ReadRowGroups(const std::vector<int>& row_groups, 
-                                        const std::vector<int>& column_indices, 
-                                        std::shared_ptr<::arrow::Table>* out) = 0; 
- 
-  virtual ::arrow::Status ReadRowGroups(const std::vector<int>& row_groups, 
-                                        std::shared_ptr<::arrow::Table>* out) = 0; 
- 
-  /// \brief Scan file contents with one thread, return number of rows 
-  virtual ::arrow::Status ScanContents(std::vector<int> columns, 
-                                       const int32_t column_batch_size, 
-                                       int64_t* num_rows) = 0; 
- 
-  /// \brief Return a reader for the RowGroup, this object must not outlive the 
-  ///   FileReader. 
-  virtual std::shared_ptr<RowGroupReader> RowGroup(int row_group_index) = 0; 
- 
-  /// \brief The number of row groups in the file 
-  virtual int num_row_groups() const = 0; 
- 
-  virtual ParquetFileReader* parquet_reader() const = 0; 
- 
-  /// Set whether to use multiple threads during reads of multiple columns. 
-  /// By default only one thread is used. 
-  virtual void set_use_threads(bool use_threads) = 0; 
- 
-  /// Set number of records to read per batch for the RecordBatchReader. 
-  virtual void set_batch_size(int64_t batch_size) = 0; 
- 
-  virtual const ArrowReaderProperties& properties() const = 0; 
- 
-  virtual const SchemaManifest& manifest() const = 0; 
- 
-  virtual ~FileReader() = default; 
-}; 
- 
-class RowGroupReader { 
- public: 
-  virtual ~RowGroupReader() = default; 
-  virtual std::shared_ptr<ColumnChunkReader> Column(int column_index) = 0; 
-  virtual ::arrow::Status ReadTable(const std::vector<int>& column_indices, 
-                                    std::shared_ptr<::arrow::Table>* out) = 0; 
-  virtual ::arrow::Status ReadTable(std::shared_ptr<::arrow::Table>* out) = 0; 
- 
- private: 
-  struct Iterator; 
-}; 
- 
-class ColumnChunkReader { 
- public: 
-  virtual ~ColumnChunkReader() = default; 
-  virtual ::arrow::Status Read(std::shared_ptr<::arrow::ChunkedArray>* out) = 0; 
-}; 
- 
-// At this point, the column reader is a stream iterator. It only knows how to 
-// read the next batch of values for a particular column from the file until it 
-// runs out. 
-// 
-// We also do not expose any internal Parquet details, such as row groups. This 
-// might change in the future. 
-class PARQUET_EXPORT ColumnReader { 
- public: 
-  virtual ~ColumnReader() = default; 
- 
-  // Scan the next array of the indicated size. The actual size of the 
-  // returned array may be less than the passed size depending how much data is 
-  // available in the file. 
-  // 
-  // When all the data in the file has been exhausted, the result is set to 
-  // nullptr. 
-  // 
-  // Returns Status::OK on a successful read, including if you have exhausted 
-  // the data available in the file. 
-  virtual ::arrow::Status NextBatch(int64_t batch_size, 
-                                    std::shared_ptr<::arrow::ChunkedArray>* out) = 0; 
-}; 
- 
-/// \brief Experimental helper class for bindings (like Python) that struggle 
-/// either with std::move or C++ exceptions 
-class PARQUET_EXPORT FileReaderBuilder { 
- public: 
-  FileReaderBuilder(); 
- 
-  /// Create FileReaderBuilder from Arrow file and optional properties / metadata 
-  ::arrow::Status Open(std::shared_ptr<::arrow::io::RandomAccessFile> file, 
-                       const ReaderProperties& properties = default_reader_properties(), 
-                       std::shared_ptr<FileMetaData> metadata = NULLPTR); 
- 
-  ParquetFileReader* raw_reader() { return raw_reader_.get(); } 
- 
-  /// Set Arrow MemoryPool for memory allocation 
-  FileReaderBuilder* memory_pool(::arrow::MemoryPool* pool); 
-  /// Set Arrow reader properties 
-  FileReaderBuilder* properties(const ArrowReaderProperties& arg_properties); 
-  /// Build FileReader instance 
-  ::arrow::Status Build(std::unique_ptr<FileReader>* out); 
- 
- private: 
-  ::arrow::MemoryPool* pool_; 
-  ArrowReaderProperties properties_; 
-  std::unique_ptr<ParquetFileReader> raw_reader_; 
-}; 
- 
-/// \defgroup parquet-arrow-reader-factories Factory functions for Parquet Arrow readers 
-/// 
-/// @{ 
- 
-/// \brief Build FileReader from Arrow file and MemoryPool 
-/// 
-/// Advanced settings are supported through the FileReaderBuilder class. 
-PARQUET_EXPORT 
-::arrow::Status OpenFile(std::shared_ptr<::arrow::io::RandomAccessFile>, 
-                         ::arrow::MemoryPool* allocator, 
-                         std::unique_ptr<FileReader>* reader); 
- 
-/// @} 
- 
-PARQUET_EXPORT 
-::arrow::Status StatisticsAsScalars(const Statistics& Statistics, 
-                                    std::shared_ptr<::arrow::Scalar>* min, 
-                                    std::shared_ptr<::arrow::Scalar>* max); 
- 
-namespace internal { 
- 
-PARQUET_EXPORT 
-::arrow::Status FuzzReader(const uint8_t* data, int64_t size); 
- 
-}  // namespace internal 
-}  // namespace arrow 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+// N.B. we don't include async_generator.h as it's relatively heavy
+#include <functional>
+#include <memory>
+#include <vector>
+
+#include "parquet/file_reader.h"
+#include "parquet/platform.h"
+#include "parquet/properties.h"
+
+namespace arrow {
+
+class ChunkedArray;
+class KeyValueMetadata;
+class RecordBatchReader;
+struct Scalar;
+class Schema;
+class Table;
+class RecordBatch;
+
+}  // namespace arrow
+
+namespace parquet {
+
+class FileMetaData;
+class SchemaDescriptor;
+
+namespace arrow {
+
+class ColumnChunkReader;
+class ColumnReader;
+struct SchemaManifest;
+class RowGroupReader;
+
+/// \brief Arrow read adapter class for deserializing Parquet files as Arrow row batches.
+///
+/// This interfaces caters for different use cases and thus provides different
+/// interfaces. In its most simplistic form, we cater for a user that wants to
+/// read the whole Parquet at once with the `FileReader::ReadTable` method.
+///
+/// More advanced users that also want to implement parallelism on top of each
+/// single Parquet files should do this on the RowGroup level. For this, they can
+/// call `FileReader::RowGroup(i)->ReadTable` to receive only the specified
+/// RowGroup as a table.
+///
+/// In the most advanced situation, where a consumer wants to independently read
+/// RowGroups in parallel and consume each column individually, they can call
+/// `FileReader::RowGroup(i)->Column(j)->Read` and receive an `arrow::Column`
+/// instance.
+///
+/// The parquet format supports an optional integer field_id which can be assigned
+/// to a field.  Arrow will convert these field IDs to a metadata key named
+/// PARQUET:field_id on the appropriate field.
+// TODO(wesm): nested data does not always make sense with this user
+// interface unless you are only reading a single leaf node from a branch of
+// a table. For example:
+//
+// repeated group data {
+//   optional group record {
+//     optional int32 val1;
+//     optional byte_array val2;
+//     optional bool val3;
+//   }
+//   optional int32 val4;
+// }
+//
+// In the Parquet file, there are 3 leaf nodes:
+//
+// * data.record.val1
+// * data.record.val2
+// * data.record.val3
+// * data.val4
+//
+// When materializing this data in an Arrow array, we would have:
+//
+// data: list<struct<
+//   record: struct<
+//    val1: int32,
+//    val2: string (= list<uint8>),
+//    val3: bool,
+//   >,
+//   val4: int32
+// >>
+//
+// However, in the Parquet format, each leaf node has its own repetition and
+// definition levels describing the structure of the intermediate nodes in
+// this array structure. Thus, we will need to scan the leaf data for a group
+// of leaf nodes part of the same type tree to create a single result Arrow
+// nested array structure.
+//
+// This is additionally complicated "chunky" repeated fields or very large byte
+// arrays
+class PARQUET_EXPORT FileReader {
+ public:
+  /// Factory function to create a FileReader from a ParquetFileReader and properties
+  static ::arrow::Status Make(::arrow::MemoryPool* pool,
+                              std::unique_ptr<ParquetFileReader> reader,
+                              const ArrowReaderProperties& properties,
+                              std::unique_ptr<FileReader>* out);
+
+  /// Factory function to create a FileReader from a ParquetFileReader
+  static ::arrow::Status Make(::arrow::MemoryPool* pool,
+                              std::unique_ptr<ParquetFileReader> reader,
+                              std::unique_ptr<FileReader>* out);
+
+  // Since the distribution of columns amongst a Parquet file's row groups may
+  // be uneven (the number of values in each column chunk can be different), we
+  // provide a column-oriented read interface. The ColumnReader hides the
+  // details of paging through the file's row groups and yielding
+  // fully-materialized arrow::Array instances
+  //
+  // Returns error status if the column of interest is not flat.
+  virtual ::arrow::Status GetColumn(int i, std::unique_ptr<ColumnReader>* out) = 0;
+
+  /// \brief Return arrow schema for all the columns.
+  virtual ::arrow::Status GetSchema(std::shared_ptr<::arrow::Schema>* out) = 0;
+
+  /// \brief Read column as a whole into a chunked array.
+  ///
+  /// The indicated column index is relative to the schema
+  virtual ::arrow::Status ReadColumn(int i,
+                                     std::shared_ptr<::arrow::ChunkedArray>* out) = 0;
+
+  // NOTE: Experimental API
+  // Reads a specific top level schema field into an Array
+  // The index i refers the index of the top level schema field, which may
+  // be nested or flat - e.g.
+  //
+  // 0 foo.bar
+  //   foo.bar.baz
+  //   foo.qux
+  // 1 foo2
+  // 2 foo3
+  //
+  // i=0 will read the entire foo struct, i=1 the foo2 primitive column etc
+  virtual ::arrow::Status ReadSchemaField(
+      int i, std::shared_ptr<::arrow::ChunkedArray>* out) = 0;
+
+  /// \brief Return a RecordBatchReader of row groups selected from row_group_indices.
+  ///
+  /// Note that the ordering in row_group_indices matters. FileReaders must outlive
+  /// their RecordBatchReaders.
+  ///
+  /// \returns error Status if row_group_indices contains an invalid index
+  virtual ::arrow::Status GetRecordBatchReader(
+      const std::vector<int>& row_group_indices,
+      std::unique_ptr<::arrow::RecordBatchReader>* out) = 0;
+
+  ::arrow::Status GetRecordBatchReader(const std::vector<int>& row_group_indices,
+                                       std::shared_ptr<::arrow::RecordBatchReader>* out);
+
+  /// \brief Return a RecordBatchReader of row groups selected from
+  /// row_group_indices, whose columns are selected by column_indices.
+  ///
+  /// Note that the ordering in row_group_indices and column_indices
+  /// matter. FileReaders must outlive their RecordBatchReaders.
+  ///
+  /// \returns error Status if either row_group_indices or column_indices
+  ///     contains an invalid index
+  virtual ::arrow::Status GetRecordBatchReader(
+      const std::vector<int>& row_group_indices, const std::vector<int>& column_indices,
+      std::unique_ptr<::arrow::RecordBatchReader>* out) = 0;
+
+  /// \brief Return a generator of record batches.
+  ///
+  /// The FileReader must outlive the generator, so this requires that you pass in a
+  /// shared_ptr.
+  ///
+  /// \returns error Result if either row_group_indices or column_indices contains an
+  ///     invalid index
+  virtual ::arrow::Result<
+      std::function<::arrow::Future<std::shared_ptr<::arrow::RecordBatch>>()>>
+  GetRecordBatchGenerator(std::shared_ptr<FileReader> reader,
+                          const std::vector<int> row_group_indices,
+                          const std::vector<int> column_indices,
+                          ::arrow::internal::Executor* cpu_executor = NULLPTR) = 0;
+
+  ::arrow::Status GetRecordBatchReader(const std::vector<int>& row_group_indices,
+                                       const std::vector<int>& column_indices,
+                                       std::shared_ptr<::arrow::RecordBatchReader>* out);
+
+  /// Read all columns into a Table
+  virtual ::arrow::Status ReadTable(std::shared_ptr<::arrow::Table>* out) = 0;
+
+  /// \brief Read the given columns into a Table
+  ///
+  /// The indicated column indices are relative to the schema
+  virtual ::arrow::Status ReadTable(const std::vector<int>& column_indices,
+                                    std::shared_ptr<::arrow::Table>* out) = 0;
+
+  virtual ::arrow::Status ReadRowGroup(int i, const std::vector<int>& column_indices,
+                                       std::shared_ptr<::arrow::Table>* out) = 0;
+
+  virtual ::arrow::Status ReadRowGroup(int i, std::shared_ptr<::arrow::Table>* out) = 0;
+
+  virtual ::arrow::Status ReadRowGroups(const std::vector<int>& row_groups,
+                                        const std::vector<int>& column_indices,
+                                        std::shared_ptr<::arrow::Table>* out) = 0;
+
+  virtual ::arrow::Status ReadRowGroups(const std::vector<int>& row_groups,
+                                        std::shared_ptr<::arrow::Table>* out) = 0;
+
+  /// \brief Scan file contents with one thread, return number of rows
+  virtual ::arrow::Status ScanContents(std::vector<int> columns,
+                                       const int32_t column_batch_size,
+                                       int64_t* num_rows) = 0;
+
+  /// \brief Return a reader for the RowGroup, this object must not outlive the
+  ///   FileReader.
+  virtual std::shared_ptr<RowGroupReader> RowGroup(int row_group_index) = 0;
+
+  /// \brief The number of row groups in the file
+  virtual int num_row_groups() const = 0;
+
+  virtual ParquetFileReader* parquet_reader() const = 0;
+
+  /// Set whether to use multiple threads during reads of multiple columns.
+  /// By default only one thread is used.
+  virtual void set_use_threads(bool use_threads) = 0;
+
+  /// Set number of records to read per batch for the RecordBatchReader.
+  virtual void set_batch_size(int64_t batch_size) = 0;
+
+  virtual const ArrowReaderProperties& properties() const = 0;
+
+  virtual const SchemaManifest& manifest() const = 0;
+
+  virtual ~FileReader() = default;
+};
+
+class RowGroupReader {
+ public:
+  virtual ~RowGroupReader() = default;
+  virtual std::shared_ptr<ColumnChunkReader> Column(int column_index) = 0;
+  virtual ::arrow::Status ReadTable(const std::vector<int>& column_indices,
+                                    std::shared_ptr<::arrow::Table>* out) = 0;
+  virtual ::arrow::Status ReadTable(std::shared_ptr<::arrow::Table>* out) = 0;
+
+ private:
+  struct Iterator;
+};
+
+class ColumnChunkReader {
+ public:
+  virtual ~ColumnChunkReader() = default;
+  virtual ::arrow::Status Read(std::shared_ptr<::arrow::ChunkedArray>* out) = 0;
+};
+
+// At this point, the column reader is a stream iterator. It only knows how to
+// read the next batch of values for a particular column from the file until it
+// runs out.
+//
+// We also do not expose any internal Parquet details, such as row groups. This
+// might change in the future.
+class PARQUET_EXPORT ColumnReader {
+ public:
+  virtual ~ColumnReader() = default;
+
+  // Scan the next array of the indicated size. The actual size of the
+  // returned array may be less than the passed size depending how much data is
+  // available in the file.
+  //
+  // When all the data in the file has been exhausted, the result is set to
+  // nullptr.
+  //
+  // Returns Status::OK on a successful read, including if you have exhausted
+  // the data available in the file.
+  virtual ::arrow::Status NextBatch(int64_t batch_size,
+                                    std::shared_ptr<::arrow::ChunkedArray>* out) = 0;
+};
+
+/// \brief Experimental helper class for bindings (like Python) that struggle
+/// either with std::move or C++ exceptions
+class PARQUET_EXPORT FileReaderBuilder {
+ public:
+  FileReaderBuilder();
+
+  /// Create FileReaderBuilder from Arrow file and optional properties / metadata
+  ::arrow::Status Open(std::shared_ptr<::arrow::io::RandomAccessFile> file,
+                       const ReaderProperties& properties = default_reader_properties(),
+                       std::shared_ptr<FileMetaData> metadata = NULLPTR);
+
+  ParquetFileReader* raw_reader() { return raw_reader_.get(); }
+
+  /// Set Arrow MemoryPool for memory allocation
+  FileReaderBuilder* memory_pool(::arrow::MemoryPool* pool);
+  /// Set Arrow reader properties
+  FileReaderBuilder* properties(const ArrowReaderProperties& arg_properties);
+  /// Build FileReader instance
+  ::arrow::Status Build(std::unique_ptr<FileReader>* out);
+
+ private:
+  ::arrow::MemoryPool* pool_;
+  ArrowReaderProperties properties_;
+  std::unique_ptr<ParquetFileReader> raw_reader_;
+};
+
+/// \defgroup parquet-arrow-reader-factories Factory functions for Parquet Arrow readers
+///
+/// @{
+
+/// \brief Build FileReader from Arrow file and MemoryPool
+///
+/// Advanced settings are supported through the FileReaderBuilder class.
+PARQUET_EXPORT
+::arrow::Status OpenFile(std::shared_ptr<::arrow::io::RandomAccessFile>,
+                         ::arrow::MemoryPool* allocator,
+                         std::unique_ptr<FileReader>* reader);
+
+/// @}
+
+PARQUET_EXPORT
+::arrow::Status StatisticsAsScalars(const Statistics& Statistics,
+                                    std::shared_ptr<::arrow::Scalar>* min,
+                                    std::shared_ptr<::arrow::Scalar>* max);
+
+namespace internal {
+
+PARQUET_EXPORT
+::arrow::Status FuzzReader(const uint8_t* data, int64_t size);
+
+}  // namespace internal
+}  // namespace arrow
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader_internal.cc b/contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader_internal.cc
index 3fbbfa8da26..f13687079d4 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader_internal.cc
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader_internal.cc
@@ -1,791 +1,791 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "parquet/arrow/reader_internal.h" 
- 
-#include <algorithm> 
-#include <climits> 
-#include <cstdint> 
-#include <cstring> 
-#include <memory> 
-#include <string> 
-#include <type_traits> 
-#include <vector> 
- 
-#include "arrow/array.h" 
-#include "arrow/compute/api.h" 
-#include "arrow/datum.h" 
-#include "arrow/io/memory.h" 
-#include "arrow/ipc/reader.h" 
-#include "arrow/ipc/writer.h" 
-#include "arrow/scalar.h" 
-#include "arrow/status.h" 
-#include "arrow/table.h" 
-#include "arrow/type.h" 
-#include "arrow/type_traits.h" 
-#include "arrow/util/base64.h" 
-#include "arrow/util/bit_util.h" 
-#include "arrow/util/checked_cast.h" 
-#include "arrow/util/endian.h" 
-#include "arrow/util/int_util_internal.h" 
-#include "arrow/util/logging.h" 
-#include "arrow/util/string_view.h" 
-#include "arrow/util/ubsan.h" 
-#include "arrow/visitor_inline.h" 
-#include "parquet/arrow/reader.h" 
-#include "parquet/arrow/schema.h" 
-#include "parquet/arrow/schema_internal.h" 
-#include "parquet/column_reader.h" 
-#include "parquet/platform.h" 
-#include "parquet/properties.h" 
-#include "parquet/schema.h" 
-#include "parquet/statistics.h" 
-#include "parquet/types.h" 
-// Required after "arrow/util/int_util_internal.h" (for OPTIONAL) 
-#include "parquet/windows_compatibility.h" 
- 
-using arrow::Array; 
-using arrow::BooleanArray; 
-using arrow::ChunkedArray; 
-using arrow::DataType; 
-using arrow::Datum; 
-using arrow::Decimal128; 
-using arrow::Decimal128Array; 
-using arrow::Decimal128Type; 
-using arrow::Decimal256; 
-using arrow::Decimal256Array; 
-using arrow::Decimal256Type; 
-using arrow::Field; 
-using arrow::Int32Array; 
-using arrow::ListArray; 
-using arrow::MemoryPool; 
-using arrow::ResizableBuffer; 
-using arrow::Status; 
-using arrow::StructArray; 
-using arrow::Table; 
-using arrow::TimestampArray; 
- 
-using ::arrow::BitUtil::FromBigEndian; 
-using ::arrow::internal::checked_cast; 
-using ::arrow::internal::checked_pointer_cast; 
-using ::arrow::internal::SafeLeftShift; 
-using ::arrow::util::SafeLoadAs; 
- 
-using parquet::internal::BinaryRecordReader; 
-using parquet::internal::DictionaryRecordReader; 
-using parquet::internal::RecordReader; 
-using parquet::schema::GroupNode; 
-using parquet::schema::Node; 
-using parquet::schema::PrimitiveNode; 
-using ParquetType = parquet::Type; 
- 
-namespace BitUtil = arrow::BitUtil; 
- 
-namespace parquet { 
-namespace arrow { 
-namespace { 
- 
-template <typename ArrowType> 
-using ArrayType = typename ::arrow::TypeTraits<ArrowType>::ArrayType; 
- 
-template <typename CType, typename StatisticsType> 
-Status MakeMinMaxScalar(const StatisticsType& statistics, 
-                        std::shared_ptr<::arrow::Scalar>* min, 
-                        std::shared_ptr<::arrow::Scalar>* max) { 
-  *min = ::arrow::MakeScalar(static_cast<CType>(statistics.min())); 
-  *max = ::arrow::MakeScalar(static_cast<CType>(statistics.max())); 
-  return Status::OK(); 
-} 
- 
-template <typename CType, typename StatisticsType> 
-Status MakeMinMaxTypedScalar(const StatisticsType& statistics, 
-                             std::shared_ptr<DataType> type, 
-                             std::shared_ptr<::arrow::Scalar>* min, 
-                             std::shared_ptr<::arrow::Scalar>* max) { 
-  ARROW_ASSIGN_OR_RAISE(*min, ::arrow::MakeScalar(type, statistics.min())); 
-  ARROW_ASSIGN_OR_RAISE(*max, ::arrow::MakeScalar(type, statistics.max())); 
-  return Status::OK(); 
-} 
- 
-template <typename StatisticsType> 
-Status MakeMinMaxIntegralScalar(const StatisticsType& statistics, 
-                                const ::arrow::DataType& arrow_type, 
-                                std::shared_ptr<::arrow::Scalar>* min, 
-                                std::shared_ptr<::arrow::Scalar>* max) { 
-  const auto column_desc = statistics.descr(); 
-  const auto& logical_type = column_desc->logical_type(); 
-  const auto& integer = checked_pointer_cast<const IntLogicalType>(logical_type); 
-  const bool is_signed = integer->is_signed(); 
- 
-  switch (integer->bit_width()) { 
-    case 8: 
-      return is_signed ? MakeMinMaxScalar<int8_t>(statistics, min, max) 
-                       : MakeMinMaxScalar<uint8_t>(statistics, min, max); 
-    case 16: 
-      return is_signed ? MakeMinMaxScalar<int16_t>(statistics, min, max) 
-                       : MakeMinMaxScalar<uint16_t>(statistics, min, max); 
-    case 32: 
-      return is_signed ? MakeMinMaxScalar<int32_t>(statistics, min, max) 
-                       : MakeMinMaxScalar<uint32_t>(statistics, min, max); 
-    case 64: 
-      return is_signed ? MakeMinMaxScalar<int64_t>(statistics, min, max) 
-                       : MakeMinMaxScalar<uint64_t>(statistics, min, max); 
-  } 
- 
-  return Status::OK(); 
-} 
- 
-static Status FromInt32Statistics(const Int32Statistics& statistics, 
-                                  const LogicalType& logical_type, 
-                                  std::shared_ptr<::arrow::Scalar>* min, 
-                                  std::shared_ptr<::arrow::Scalar>* max) { 
-  ARROW_ASSIGN_OR_RAISE(auto type, FromInt32(logical_type)); 
- 
-  switch (logical_type.type()) { 
-    case LogicalType::Type::INT: 
-      return MakeMinMaxIntegralScalar(statistics, *type, min, max); 
-      break; 
-    case LogicalType::Type::DATE: 
-    case LogicalType::Type::TIME: 
-    case LogicalType::Type::NONE: 
-      return MakeMinMaxTypedScalar<int32_t>(statistics, type, min, max); 
-      break; 
-    default: 
-      break; 
-  } 
- 
-  return Status::NotImplemented("Cannot extract statistics for type "); 
-} 
- 
-static Status FromInt64Statistics(const Int64Statistics& statistics, 
-                                  const LogicalType& logical_type, 
-                                  std::shared_ptr<::arrow::Scalar>* min, 
-                                  std::shared_ptr<::arrow::Scalar>* max) { 
-  ARROW_ASSIGN_OR_RAISE(auto type, FromInt64(logical_type)); 
- 
-  switch (logical_type.type()) { 
-    case LogicalType::Type::INT: 
-      return MakeMinMaxIntegralScalar(statistics, *type, min, max); 
-      break; 
-    case LogicalType::Type::TIME: 
-    case LogicalType::Type::TIMESTAMP: 
-    case LogicalType::Type::NONE: 
-      return MakeMinMaxTypedScalar<int64_t>(statistics, type, min, max); 
-      break; 
-    default: 
-      break; 
-  } 
- 
-  return Status::NotImplemented("Cannot extract statistics for type "); 
-} 
- 
-template <typename DecimalType> 
-Result<std::shared_ptr<::arrow::Scalar>> FromBigEndianString( 
-    const std::string& data, std::shared_ptr<DataType> arrow_type) { 
-  ARROW_ASSIGN_OR_RAISE( 
-      DecimalType decimal, 
-      DecimalType::FromBigEndian(reinterpret_cast<const uint8_t*>(data.data()), 
-                                 static_cast<int32_t>(data.size()))); 
-  return ::arrow::MakeScalar(std::move(arrow_type), decimal); 
-} 
- 
-// Extracts Min and Max scalar from bytes like types (i.e. types where 
-// decimal is encoded as little endian. 
-Status ExtractDecimalMinMaxFromBytesType(const Statistics& statistics, 
-                                         const LogicalType& logical_type, 
-                                         std::shared_ptr<::arrow::Scalar>* min, 
-                                         std::shared_ptr<::arrow::Scalar>* max) { 
-  const DecimalLogicalType& decimal_type = 
-      checked_cast<const DecimalLogicalType&>(logical_type); 
- 
-  Result<std::shared_ptr<DataType>> maybe_type = 
-      Decimal128Type::Make(decimal_type.precision(), decimal_type.scale()); 
-  std::shared_ptr<DataType> arrow_type; 
-  if (maybe_type.ok()) { 
-    arrow_type = maybe_type.ValueOrDie(); 
-    ARROW_ASSIGN_OR_RAISE( 
-        *min, FromBigEndianString<Decimal128>(statistics.EncodeMin(), arrow_type)); 
-    ARROW_ASSIGN_OR_RAISE(*max, FromBigEndianString<Decimal128>(statistics.EncodeMax(), 
-                                                                std::move(arrow_type))); 
-    return Status::OK(); 
-  } 
-  // Fallback to see if Decimal256 can represent the type. 
-  ARROW_ASSIGN_OR_RAISE( 
-      arrow_type, Decimal256Type::Make(decimal_type.precision(), decimal_type.scale())); 
-  ARROW_ASSIGN_OR_RAISE( 
-      *min, FromBigEndianString<Decimal256>(statistics.EncodeMin(), arrow_type)); 
-  ARROW_ASSIGN_OR_RAISE(*max, FromBigEndianString<Decimal256>(statistics.EncodeMax(), 
-                                                              std::move(arrow_type))); 
- 
-  return Status::OK(); 
-} 
- 
-Status ByteArrayStatisticsAsScalars(const Statistics& statistics, 
-                                    std::shared_ptr<::arrow::Scalar>* min, 
-                                    std::shared_ptr<::arrow::Scalar>* max) { 
-  auto logical_type = statistics.descr()->logical_type(); 
-  if (logical_type->type() == LogicalType::Type::DECIMAL) { 
-    return ExtractDecimalMinMaxFromBytesType(statistics, *logical_type, min, max); 
-  } 
-  std::shared_ptr<::arrow::DataType> type; 
-  if (statistics.descr()->physical_type() == Type::FIXED_LEN_BYTE_ARRAY) { 
-    type = ::arrow::fixed_size_binary(statistics.descr()->type_length()); 
-  } else { 
-    type = logical_type->type() == LogicalType::Type::STRING ? ::arrow::utf8() 
-                                                             : ::arrow::binary(); 
-  } 
-  ARROW_ASSIGN_OR_RAISE( 
-      *min, ::arrow::MakeScalar(type, Buffer::FromString(statistics.EncodeMin()))); 
-  ARROW_ASSIGN_OR_RAISE( 
-      *max, ::arrow::MakeScalar(type, Buffer::FromString(statistics.EncodeMax()))); 
- 
-  return Status::OK(); 
-} 
- 
-}  // namespace 
- 
-Status StatisticsAsScalars(const Statistics& statistics, 
-                           std::shared_ptr<::arrow::Scalar>* min, 
-                           std::shared_ptr<::arrow::Scalar>* max) { 
-  if (!statistics.HasMinMax()) { 
-    return Status::Invalid("Statistics has no min max."); 
-  } 
- 
-  auto column_desc = statistics.descr(); 
-  if (column_desc == nullptr) { 
-    return Status::Invalid("Statistics carries no descriptor, can't infer arrow type."); 
-  } 
- 
-  auto physical_type = column_desc->physical_type(); 
-  auto logical_type = column_desc->logical_type(); 
-  switch (physical_type) { 
-    case Type::BOOLEAN: 
-      return MakeMinMaxScalar<bool, BoolStatistics>( 
-          checked_cast<const BoolStatistics&>(statistics), min, max); 
-    case Type::FLOAT: 
-      return MakeMinMaxScalar<float, FloatStatistics>( 
-          checked_cast<const FloatStatistics&>(statistics), min, max); 
-    case Type::DOUBLE: 
-      return MakeMinMaxScalar<double, DoubleStatistics>( 
-          checked_cast<const DoubleStatistics&>(statistics), min, max); 
-    case Type::INT32: 
-      return FromInt32Statistics(checked_cast<const Int32Statistics&>(statistics), 
-                                 *logical_type, min, max); 
-    case Type::INT64: 
-      return FromInt64Statistics(checked_cast<const Int64Statistics&>(statistics), 
-                                 *logical_type, min, max); 
-    case Type::BYTE_ARRAY: 
-    case Type::FIXED_LEN_BYTE_ARRAY: 
-      return ByteArrayStatisticsAsScalars(statistics, min, max); 
-    default: 
-      return Status::NotImplemented("Extract statistics unsupported for physical_type ", 
-                                    physical_type, " unsupported."); 
-  } 
- 
-  return Status::OK(); 
-} 
- 
-// ---------------------------------------------------------------------- 
-// Primitive types 
- 
-namespace { 
- 
-template <typename ArrowType, typename ParquetType> 
-Status TransferInt(RecordReader* reader, MemoryPool* pool, 
-                   const std::shared_ptr<DataType>& type, Datum* out) { 
-  using ArrowCType = typename ArrowType::c_type; 
-  using ParquetCType = typename ParquetType::c_type; 
-  int64_t length = reader->values_written(); 
-  ARROW_ASSIGN_OR_RAISE(auto data, 
-                        ::arrow::AllocateBuffer(length * sizeof(ArrowCType), pool)); 
- 
-  auto values = reinterpret_cast<const ParquetCType*>(reader->values()); 
-  auto out_ptr = reinterpret_cast<ArrowCType*>(data->mutable_data()); 
-  std::copy(values, values + length, out_ptr); 
-  *out = std::make_shared<ArrayType<ArrowType>>( 
-      type, length, std::move(data), reader->ReleaseIsValid(), reader->null_count()); 
-  return Status::OK(); 
-} 
- 
-std::shared_ptr<Array> TransferZeroCopy(RecordReader* reader, 
-                                        const std::shared_ptr<DataType>& type) { 
-  std::vector<std::shared_ptr<Buffer>> buffers = {reader->ReleaseIsValid(), 
-                                                  reader->ReleaseValues()}; 
-  auto data = std::make_shared<::arrow::ArrayData>(type, reader->values_written(), 
-                                                   buffers, reader->null_count()); 
-  return ::arrow::MakeArray(data); 
-} 
- 
-Status TransferBool(RecordReader* reader, MemoryPool* pool, Datum* out) { 
-  int64_t length = reader->values_written(); 
- 
-  const int64_t buffer_size = BitUtil::BytesForBits(length); 
-  ARROW_ASSIGN_OR_RAISE(auto data, ::arrow::AllocateBuffer(buffer_size, pool)); 
- 
-  // Transfer boolean values to packed bitmap 
-  auto values = reinterpret_cast<const bool*>(reader->values()); 
-  uint8_t* data_ptr = data->mutable_data(); 
-  memset(data_ptr, 0, buffer_size); 
- 
-  for (int64_t i = 0; i < length; i++) { 
-    if (values[i]) { 
-      ::arrow::BitUtil::SetBit(data_ptr, i); 
-    } 
-  } 
- 
-  *out = std::make_shared<BooleanArray>(length, std::move(data), reader->ReleaseIsValid(), 
-                                        reader->null_count()); 
-  return Status::OK(); 
-} 
- 
-Status TransferInt96(RecordReader* reader, MemoryPool* pool, 
-                     const std::shared_ptr<DataType>& type, Datum* out, 
-                     const ::arrow::TimeUnit::type int96_arrow_time_unit) { 
-  int64_t length = reader->values_written(); 
-  auto values = reinterpret_cast<const Int96*>(reader->values()); 
-  ARROW_ASSIGN_OR_RAISE(auto data, 
-                        ::arrow::AllocateBuffer(length * sizeof(int64_t), pool)); 
-  auto data_ptr = reinterpret_cast<int64_t*>(data->mutable_data()); 
-  for (int64_t i = 0; i < length; i++) { 
-    if (values[i].value[2] == 0) { 
-      // Happens for null entries: avoid triggering UBSAN as that Int96 timestamp 
-      // isn't representable as a 64-bit Unix timestamp. 
-      *data_ptr++ = 0; 
-    } else { 
-      switch (int96_arrow_time_unit) { 
-        case ::arrow::TimeUnit::NANO: 
-          *data_ptr++ = Int96GetNanoSeconds(values[i]); 
-          break; 
-        case ::arrow::TimeUnit::MICRO: 
-          *data_ptr++ = Int96GetMicroSeconds(values[i]); 
-          break; 
-        case ::arrow::TimeUnit::MILLI: 
-          *data_ptr++ = Int96GetMilliSeconds(values[i]); 
-          break; 
-        case ::arrow::TimeUnit::SECOND: 
-          *data_ptr++ = Int96GetSeconds(values[i]); 
-          break; 
-      } 
-    } 
-  } 
-  *out = std::make_shared<TimestampArray>(type, length, std::move(data), 
-                                          reader->ReleaseIsValid(), reader->null_count()); 
-  return Status::OK(); 
-} 
- 
-Status TransferDate64(RecordReader* reader, MemoryPool* pool, 
-                      const std::shared_ptr<DataType>& type, Datum* out) { 
-  int64_t length = reader->values_written(); 
-  auto values = reinterpret_cast<const int32_t*>(reader->values()); 
- 
-  ARROW_ASSIGN_OR_RAISE(auto data, 
-                        ::arrow::AllocateBuffer(length * sizeof(int64_t), pool)); 
-  auto out_ptr = reinterpret_cast<int64_t*>(data->mutable_data()); 
- 
-  for (int64_t i = 0; i < length; i++) { 
-    *out_ptr++ = static_cast<int64_t>(values[i]) * kMillisecondsPerDay; 
-  } 
- 
-  *out = std::make_shared<::arrow::Date64Array>( 
-      type, length, std::move(data), reader->ReleaseIsValid(), reader->null_count()); 
-  return Status::OK(); 
-} 
- 
-// ---------------------------------------------------------------------- 
-// Binary, direct to dictionary-encoded 
- 
-Status TransferDictionary(RecordReader* reader, 
-                          const std::shared_ptr<DataType>& logical_value_type, 
-                          std::shared_ptr<ChunkedArray>* out) { 
-  auto dict_reader = dynamic_cast<DictionaryRecordReader*>(reader); 
-  DCHECK(dict_reader); 
-  *out = dict_reader->GetResult(); 
-  if (!logical_value_type->Equals(*(*out)->type())) { 
-    ARROW_ASSIGN_OR_RAISE(*out, (*out)->View(logical_value_type)); 
-  } 
-  return Status::OK(); 
-} 
- 
-Status TransferBinary(RecordReader* reader, MemoryPool* pool, 
-                      const std::shared_ptr<DataType>& logical_value_type, 
-                      std::shared_ptr<ChunkedArray>* out) { 
-  if (reader->read_dictionary()) { 
-    return TransferDictionary( 
-        reader, ::arrow::dictionary(::arrow::int32(), logical_value_type), out); 
-  } 
-  ::arrow::compute::ExecContext ctx(pool); 
-  ::arrow::compute::CastOptions cast_options; 
-  cast_options.allow_invalid_utf8 = true;  // avoid spending time validating UTF8 data 
- 
-  auto binary_reader = dynamic_cast<BinaryRecordReader*>(reader); 
-  DCHECK(binary_reader); 
-  auto chunks = binary_reader->GetBuilderChunks(); 
-  for (auto& chunk : chunks) { 
-    if (!chunk->type()->Equals(*logical_value_type)) { 
-      // XXX: if a LargeBinary chunk is larger than 2GB, the MSBs of offsets 
-      // will be lost because they are first created as int32 and then cast to int64. 
-      ARROW_ASSIGN_OR_RAISE( 
-          chunk, ::arrow::compute::Cast(*chunk, logical_value_type, cast_options, &ctx)); 
-    } 
-  } 
-  *out = std::make_shared<ChunkedArray>(chunks, logical_value_type); 
-  return Status::OK(); 
-} 
- 
-// ---------------------------------------------------------------------- 
-// INT32 / INT64 / BYTE_ARRAY / FIXED_LEN_BYTE_ARRAY -> Decimal128 || Decimal256 
- 
-template <typename DecimalType> 
-Status RawBytesToDecimalBytes(const uint8_t* value, int32_t byte_width, 
-                              uint8_t* out_buf) { 
-  ARROW_ASSIGN_OR_RAISE(DecimalType t, DecimalType::FromBigEndian(value, byte_width)); 
-  t.ToBytes(out_buf); 
-  return ::arrow::Status::OK(); 
-} 
- 
-template <typename DecimalArrayType> 
-struct DecimalTypeTrait; 
- 
-template <> 
-struct DecimalTypeTrait<::arrow::Decimal128Array> { 
-  using value = ::arrow::Decimal128; 
-}; 
- 
-template <> 
-struct DecimalTypeTrait<::arrow::Decimal256Array> { 
-  using value = ::arrow::Decimal256; 
-}; 
- 
-template <typename DecimalArrayType, typename ParquetType> 
-struct DecimalConverter { 
-  static inline Status ConvertToDecimal(const Array& array, 
-                                        const std::shared_ptr<DataType>&, 
-                                        MemoryPool* pool, std::shared_ptr<Array>*) { 
-    return Status::NotImplemented("not implemented"); 
-  } 
-}; 
- 
-template <typename DecimalArrayType> 
-struct DecimalConverter<DecimalArrayType, FLBAType> { 
-  static inline Status ConvertToDecimal(const Array& array, 
-                                        const std::shared_ptr<DataType>& type, 
-                                        MemoryPool* pool, std::shared_ptr<Array>* out) { 
-    const auto& fixed_size_binary_array = 
-        checked_cast<const ::arrow::FixedSizeBinaryArray&>(array); 
- 
-    // The byte width of each decimal value 
-    const int32_t type_length = 
-        checked_cast<const ::arrow::DecimalType&>(*type).byte_width(); 
- 
-    // number of elements in the entire array 
-    const int64_t length = fixed_size_binary_array.length(); 
- 
-    // Get the byte width of the values in the FixedSizeBinaryArray. Most of the time 
-    // this will be different from the decimal array width because we write the minimum 
-    // number of bytes necessary to represent a given precision 
-    const int32_t byte_width = 
-        checked_cast<const ::arrow::FixedSizeBinaryType&>(*fixed_size_binary_array.type()) 
-            .byte_width(); 
-    // allocate memory for the decimal array 
-    ARROW_ASSIGN_OR_RAISE(auto data, ::arrow::AllocateBuffer(length * type_length, pool)); 
- 
-    // raw bytes that we can write to 
-    uint8_t* out_ptr = data->mutable_data(); 
- 
-    // convert each FixedSizeBinary value to valid decimal bytes 
-    const int64_t null_count = fixed_size_binary_array.null_count(); 
- 
-    using DecimalType = typename DecimalTypeTrait<DecimalArrayType>::value; 
-    if (null_count > 0) { 
-      for (int64_t i = 0; i < length; ++i, out_ptr += type_length) { 
-        if (!fixed_size_binary_array.IsNull(i)) { 
-          RETURN_NOT_OK(RawBytesToDecimalBytes<DecimalType>( 
-              fixed_size_binary_array.GetValue(i), byte_width, out_ptr)); 
-        } else { 
-          std::memset(out_ptr, 0, type_length); 
-        } 
-      } 
-    } else { 
-      for (int64_t i = 0; i < length; ++i, out_ptr += type_length) { 
-        RETURN_NOT_OK(RawBytesToDecimalBytes<DecimalType>( 
-            fixed_size_binary_array.GetValue(i), byte_width, out_ptr)); 
-      } 
-    } 
- 
-    *out = std::make_shared<DecimalArrayType>( 
-        type, length, std::move(data), fixed_size_binary_array.null_bitmap(), null_count); 
- 
-    return Status::OK(); 
-  } 
-}; 
- 
-template <typename DecimalArrayType> 
-struct DecimalConverter<DecimalArrayType, ByteArrayType> { 
-  static inline Status ConvertToDecimal(const Array& array, 
-                                        const std::shared_ptr<DataType>& type, 
-                                        MemoryPool* pool, std::shared_ptr<Array>* out) { 
-    const auto& binary_array = checked_cast<const ::arrow::BinaryArray&>(array); 
-    const int64_t length = binary_array.length(); 
- 
-    const auto& decimal_type = checked_cast<const ::arrow::DecimalType&>(*type); 
-    const int64_t type_length = decimal_type.byte_width(); 
- 
-    ARROW_ASSIGN_OR_RAISE(auto data, ::arrow::AllocateBuffer(length * type_length, pool)); 
- 
-    // raw bytes that we can write to 
-    uint8_t* out_ptr = data->mutable_data(); 
- 
-    const int64_t null_count = binary_array.null_count(); 
- 
-    // convert each BinaryArray value to valid decimal bytes 
-    for (int64_t i = 0; i < length; i++, out_ptr += type_length) { 
-      int32_t record_len = 0; 
-      const uint8_t* record_loc = binary_array.GetValue(i, &record_len); 
- 
-      if (record_len < 0 || record_len > type_length) { 
-        return Status::Invalid("Invalid BYTE_ARRAY length for ", type->ToString()); 
-      } 
- 
-      auto out_ptr_view = reinterpret_cast<uint64_t*>(out_ptr); 
-      out_ptr_view[0] = 0; 
-      out_ptr_view[1] = 0; 
- 
-      // only convert rows that are not null if there are nulls, or 
-      // all rows, if there are not 
-      if ((null_count > 0 && !binary_array.IsNull(i)) || null_count <= 0) { 
-        using DecimalType = typename DecimalTypeTrait<DecimalArrayType>::value; 
-        RETURN_NOT_OK( 
-            RawBytesToDecimalBytes<DecimalType>(record_loc, record_len, out_ptr)); 
-      } 
-    } 
-    *out = std::make_shared<DecimalArrayType>(type, length, std::move(data), 
-                                              binary_array.null_bitmap(), null_count); 
-    return Status::OK(); 
-  } 
-}; 
- 
-/// \brief Convert an Int32 or Int64 array into a Decimal128Array 
-/// The parquet spec allows systems to write decimals in int32, int64 if the values are 
-/// small enough to fit in less 4 bytes or less than 8 bytes, respectively. 
-/// This function implements the conversion from int32 and int64 arrays to decimal arrays. 
-template < 
-    typename ParquetIntegerType, 
-    typename = ::arrow::enable_if_t<std::is_same<ParquetIntegerType, Int32Type>::value || 
-                                    std::is_same<ParquetIntegerType, Int64Type>::value>> 
-static Status DecimalIntegerTransfer(RecordReader* reader, MemoryPool* pool, 
-                                     const std::shared_ptr<DataType>& type, Datum* out) { 
-  // Decimal128 and Decimal256 are only Arrow constructs.  Parquet does not 
-  // specifically distinguish between decimal byte widths. 
-  // Decimal256 isn't relevant here because the Arrow-Parquet C++ bindings never 
-  // write Decimal values as integers and if the decimal value can fit in an 
-  // integer it is wasteful to use Decimal256. Put another way, the only 
-  // way an integer column could be construed as Decimal256 is if an arrow 
-  // schema was stored as metadata in the file indicating the column was 
-  // Decimal256. The current Arrow-Parquet C++ bindings will never do this. 
-  DCHECK(type->id() == ::arrow::Type::DECIMAL128); 
- 
-  const int64_t length = reader->values_written(); 
- 
-  using ElementType = typename ParquetIntegerType::c_type; 
-  static_assert(std::is_same<ElementType, int32_t>::value || 
-                    std::is_same<ElementType, int64_t>::value, 
-                "ElementType must be int32_t or int64_t"); 
- 
-  const auto values = reinterpret_cast<const ElementType*>(reader->values()); 
- 
-  const auto& decimal_type = checked_cast<const ::arrow::DecimalType&>(*type); 
-  const int64_t type_length = decimal_type.byte_width(); 
- 
-  ARROW_ASSIGN_OR_RAISE(auto data, ::arrow::AllocateBuffer(length * type_length, pool)); 
-  uint8_t* out_ptr = data->mutable_data(); 
- 
-  using ::arrow::BitUtil::FromLittleEndian; 
- 
-  for (int64_t i = 0; i < length; ++i, out_ptr += type_length) { 
-    // sign/zero extend int32_t values, otherwise a no-op 
-    const auto value = static_cast<int64_t>(values[i]); 
- 
-    ::arrow::Decimal128 decimal(value); 
-    decimal.ToBytes(out_ptr); 
-  } 
- 
-  if (reader->nullable_values()) { 
-    std::shared_ptr<ResizableBuffer> is_valid = reader->ReleaseIsValid(); 
-    *out = std::make_shared<Decimal128Array>(type, length, std::move(data), is_valid, 
-                                             reader->null_count()); 
-  } else { 
-    *out = std::make_shared<Decimal128Array>(type, length, std::move(data)); 
-  } 
-  return Status::OK(); 
-} 
- 
-/// \brief Convert an arrow::BinaryArray to an arrow::Decimal{128,256}Array 
-/// We do this by: 
-/// 1. Creating an arrow::BinaryArray from the RecordReader's builder 
-/// 2. Allocating a buffer for the arrow::Decimal{128,256}Array 
-/// 3. Converting the big-endian bytes in each BinaryArray entry to two integers 
-///    representing the high and low bits of each decimal value. 
-template <typename DecimalArrayType, typename ParquetType> 
-Status TransferDecimal(RecordReader* reader, MemoryPool* pool, 
-                       const std::shared_ptr<DataType>& type, Datum* out) { 
-  auto binary_reader = dynamic_cast<BinaryRecordReader*>(reader); 
-  DCHECK(binary_reader); 
-  ::arrow::ArrayVector chunks = binary_reader->GetBuilderChunks(); 
-  for (size_t i = 0; i < chunks.size(); ++i) { 
-    std::shared_ptr<Array> chunk_as_decimal; 
-    auto fn = &DecimalConverter<DecimalArrayType, ParquetType>::ConvertToDecimal; 
-    RETURN_NOT_OK(fn(*chunks[i], type, pool, &chunk_as_decimal)); 
-    // Replace the chunk, which will hopefully also free memory as we go 
-    chunks[i] = chunk_as_decimal; 
-  } 
-  *out = std::make_shared<ChunkedArray>(chunks, type); 
-  return Status::OK(); 
-} 
- 
-}  // namespace 
- 
-#define TRANSFER_INT32(ENUM, ArrowType)                                              \ 
-  case ::arrow::Type::ENUM: {                                                        \ 
-    Status s = TransferInt<ArrowType, Int32Type>(reader, pool, value_type, &result); \ 
-    RETURN_NOT_OK(s);                                                                \ 
-  } break; 
- 
-#define TRANSFER_INT64(ENUM, ArrowType)                                              \ 
-  case ::arrow::Type::ENUM: {                                                        \ 
-    Status s = TransferInt<ArrowType, Int64Type>(reader, pool, value_type, &result); \ 
-    RETURN_NOT_OK(s);                                                                \ 
-  } break; 
- 
-Status TransferColumnData(RecordReader* reader, std::shared_ptr<DataType> value_type, 
-                          const ColumnDescriptor* descr, MemoryPool* pool, 
-                          std::shared_ptr<ChunkedArray>* out) { 
-  Datum result; 
-  std::shared_ptr<ChunkedArray> chunked_result; 
-  switch (value_type->id()) { 
-    case ::arrow::Type::DICTIONARY: { 
-      RETURN_NOT_OK(TransferDictionary(reader, value_type, &chunked_result)); 
-      result = chunked_result; 
-    } break; 
-    case ::arrow::Type::NA: { 
-      result = std::make_shared<::arrow::NullArray>(reader->values_written()); 
-      break; 
-    } 
-    case ::arrow::Type::INT32: 
-    case ::arrow::Type::INT64: 
-    case ::arrow::Type::FLOAT: 
-    case ::arrow::Type::DOUBLE: 
-      result = TransferZeroCopy(reader, value_type); 
-      break; 
-    case ::arrow::Type::BOOL: 
-      RETURN_NOT_OK(TransferBool(reader, pool, &result)); 
-      break; 
-      TRANSFER_INT32(UINT8, ::arrow::UInt8Type); 
-      TRANSFER_INT32(INT8, ::arrow::Int8Type); 
-      TRANSFER_INT32(UINT16, ::arrow::UInt16Type); 
-      TRANSFER_INT32(INT16, ::arrow::Int16Type); 
-      TRANSFER_INT32(UINT32, ::arrow::UInt32Type); 
-      TRANSFER_INT64(UINT64, ::arrow::UInt64Type); 
-      TRANSFER_INT32(DATE32, ::arrow::Date32Type); 
-      TRANSFER_INT32(TIME32, ::arrow::Time32Type); 
-      TRANSFER_INT64(TIME64, ::arrow::Time64Type); 
-    case ::arrow::Type::DATE64: 
-      RETURN_NOT_OK(TransferDate64(reader, pool, value_type, &result)); 
-      break; 
-    case ::arrow::Type::FIXED_SIZE_BINARY: 
-    case ::arrow::Type::BINARY: 
-    case ::arrow::Type::STRING: 
-    case ::arrow::Type::LARGE_BINARY: 
-    case ::arrow::Type::LARGE_STRING: { 
-      RETURN_NOT_OK(TransferBinary(reader, pool, value_type, &chunked_result)); 
-      result = chunked_result; 
-    } break; 
-    case ::arrow::Type::DECIMAL128: { 
-      switch (descr->physical_type()) { 
-        case ::parquet::Type::INT32: { 
-          auto fn = DecimalIntegerTransfer<Int32Type>; 
-          RETURN_NOT_OK(fn(reader, pool, value_type, &result)); 
-        } break; 
-        case ::parquet::Type::INT64: { 
-          auto fn = &DecimalIntegerTransfer<Int64Type>; 
-          RETURN_NOT_OK(fn(reader, pool, value_type, &result)); 
-        } break; 
-        case ::parquet::Type::BYTE_ARRAY: { 
-          auto fn = &TransferDecimal<Decimal128Array, ByteArrayType>; 
-          RETURN_NOT_OK(fn(reader, pool, value_type, &result)); 
-        } break; 
-        case ::parquet::Type::FIXED_LEN_BYTE_ARRAY: { 
-          auto fn = &TransferDecimal<Decimal128Array, FLBAType>; 
-          RETURN_NOT_OK(fn(reader, pool, value_type, &result)); 
-        } break; 
-        default: 
-          return Status::Invalid( 
-              "Physical type for decimal128 must be int32, int64, byte array, or fixed " 
-              "length binary"); 
-      } 
-    } break; 
-    case ::arrow::Type::DECIMAL256: 
-      switch (descr->physical_type()) { 
-        case ::parquet::Type::BYTE_ARRAY: { 
-          auto fn = &TransferDecimal<Decimal256Array, ByteArrayType>; 
-          RETURN_NOT_OK(fn(reader, pool, value_type, &result)); 
-        } break; 
-        case ::parquet::Type::FIXED_LEN_BYTE_ARRAY: { 
-          auto fn = &TransferDecimal<Decimal256Array, FLBAType>; 
-          RETURN_NOT_OK(fn(reader, pool, value_type, &result)); 
-        } break; 
-        default: 
-          return Status::Invalid( 
-              "Physical type for decimal256 must be fixed length binary"); 
-      } 
-      break; 
- 
-    case ::arrow::Type::TIMESTAMP: { 
-      const ::arrow::TimestampType& timestamp_type = 
-          checked_cast<::arrow::TimestampType&>(*value_type); 
-      if (descr->physical_type() == ::parquet::Type::INT96) { 
-        RETURN_NOT_OK( 
-            TransferInt96(reader, pool, value_type, &result, timestamp_type.unit())); 
-      } else { 
-        switch (timestamp_type.unit()) { 
-          case ::arrow::TimeUnit::MILLI: 
-          case ::arrow::TimeUnit::MICRO: 
-          case ::arrow::TimeUnit::NANO: 
-            result = TransferZeroCopy(reader, value_type); 
-            break; 
-          default: 
-            return Status::NotImplemented("TimeUnit not supported"); 
-        } 
-      } 
-    } break; 
-    default: 
-      return Status::NotImplemented("No support for reading columns of type ", 
-                                    value_type->ToString()); 
-  } 
- 
-  if (result.kind() == Datum::ARRAY) { 
-    *out = std::make_shared<ChunkedArray>(result.make_array()); 
-  } else if (result.kind() == Datum::CHUNKED_ARRAY) { 
-    *out = result.chunked_array(); 
-  } else { 
-    DCHECK(false) << "Should be impossible, result was " << result.ToString(); 
-  } 
- 
-  return Status::OK(); 
-} 
- 
-}  // namespace arrow 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/arrow/reader_internal.h"
+
+#include <algorithm>
+#include <climits>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <type_traits>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/compute/api.h"
+#include "arrow/datum.h"
+#include "arrow/io/memory.h"
+#include "arrow/ipc/reader.h"
+#include "arrow/ipc/writer.h"
+#include "arrow/scalar.h"
+#include "arrow/status.h"
+#include "arrow/table.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/base64.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/endian.h"
+#include "arrow/util/int_util_internal.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/ubsan.h"
+#include "arrow/visitor_inline.h"
+#include "parquet/arrow/reader.h"
+#include "parquet/arrow/schema.h"
+#include "parquet/arrow/schema_internal.h"
+#include "parquet/column_reader.h"
+#include "parquet/platform.h"
+#include "parquet/properties.h"
+#include "parquet/schema.h"
+#include "parquet/statistics.h"
+#include "parquet/types.h"
+// Required after "arrow/util/int_util_internal.h" (for OPTIONAL)
+#include "parquet/windows_compatibility.h"
+
+using arrow::Array;
+using arrow::BooleanArray;
+using arrow::ChunkedArray;
+using arrow::DataType;
+using arrow::Datum;
+using arrow::Decimal128;
+using arrow::Decimal128Array;
+using arrow::Decimal128Type;
+using arrow::Decimal256;
+using arrow::Decimal256Array;
+using arrow::Decimal256Type;
+using arrow::Field;
+using arrow::Int32Array;
+using arrow::ListArray;
+using arrow::MemoryPool;
+using arrow::ResizableBuffer;
+using arrow::Status;
+using arrow::StructArray;
+using arrow::Table;
+using arrow::TimestampArray;
+
+using ::arrow::BitUtil::FromBigEndian;
+using ::arrow::internal::checked_cast;
+using ::arrow::internal::checked_pointer_cast;
+using ::arrow::internal::SafeLeftShift;
+using ::arrow::util::SafeLoadAs;
+
+using parquet::internal::BinaryRecordReader;
+using parquet::internal::DictionaryRecordReader;
+using parquet::internal::RecordReader;
+using parquet::schema::GroupNode;
+using parquet::schema::Node;
+using parquet::schema::PrimitiveNode;
+using ParquetType = parquet::Type;
+
+namespace BitUtil = arrow::BitUtil;
+
+namespace parquet {
+namespace arrow {
+namespace {
+
+template <typename ArrowType>
+using ArrayType = typename ::arrow::TypeTraits<ArrowType>::ArrayType;
+
+template <typename CType, typename StatisticsType>
+Status MakeMinMaxScalar(const StatisticsType& statistics,
+                        std::shared_ptr<::arrow::Scalar>* min,
+                        std::shared_ptr<::arrow::Scalar>* max) {
+  *min = ::arrow::MakeScalar(static_cast<CType>(statistics.min()));
+  *max = ::arrow::MakeScalar(static_cast<CType>(statistics.max()));
+  return Status::OK();
+}
+
+template <typename CType, typename StatisticsType>
+Status MakeMinMaxTypedScalar(const StatisticsType& statistics,
+                             std::shared_ptr<DataType> type,
+                             std::shared_ptr<::arrow::Scalar>* min,
+                             std::shared_ptr<::arrow::Scalar>* max) {
+  ARROW_ASSIGN_OR_RAISE(*min, ::arrow::MakeScalar(type, statistics.min()));
+  ARROW_ASSIGN_OR_RAISE(*max, ::arrow::MakeScalar(type, statistics.max()));
+  return Status::OK();
+}
+
+template <typename StatisticsType>
+Status MakeMinMaxIntegralScalar(const StatisticsType& statistics,
+                                const ::arrow::DataType& arrow_type,
+                                std::shared_ptr<::arrow::Scalar>* min,
+                                std::shared_ptr<::arrow::Scalar>* max) {
+  const auto column_desc = statistics.descr();
+  const auto& logical_type = column_desc->logical_type();
+  const auto& integer = checked_pointer_cast<const IntLogicalType>(logical_type);
+  const bool is_signed = integer->is_signed();
+
+  switch (integer->bit_width()) {
+    case 8:
+      return is_signed ? MakeMinMaxScalar<int8_t>(statistics, min, max)
+                       : MakeMinMaxScalar<uint8_t>(statistics, min, max);
+    case 16:
+      return is_signed ? MakeMinMaxScalar<int16_t>(statistics, min, max)
+                       : MakeMinMaxScalar<uint16_t>(statistics, min, max);
+    case 32:
+      return is_signed ? MakeMinMaxScalar<int32_t>(statistics, min, max)
+                       : MakeMinMaxScalar<uint32_t>(statistics, min, max);
+    case 64:
+      return is_signed ? MakeMinMaxScalar<int64_t>(statistics, min, max)
+                       : MakeMinMaxScalar<uint64_t>(statistics, min, max);
+  }
+
+  return Status::OK();
+}
+
+static Status FromInt32Statistics(const Int32Statistics& statistics,
+                                  const LogicalType& logical_type,
+                                  std::shared_ptr<::arrow::Scalar>* min,
+                                  std::shared_ptr<::arrow::Scalar>* max) {
+  ARROW_ASSIGN_OR_RAISE(auto type, FromInt32(logical_type));
+
+  switch (logical_type.type()) {
+    case LogicalType::Type::INT:
+      return MakeMinMaxIntegralScalar(statistics, *type, min, max);
+      break;
+    case LogicalType::Type::DATE:
+    case LogicalType::Type::TIME:
+    case LogicalType::Type::NONE:
+      return MakeMinMaxTypedScalar<int32_t>(statistics, type, min, max);
+      break;
+    default:
+      break;
+  }
+
+  return Status::NotImplemented("Cannot extract statistics for type ");
+}
+
+static Status FromInt64Statistics(const Int64Statistics& statistics,
+                                  const LogicalType& logical_type,
+                                  std::shared_ptr<::arrow::Scalar>* min,
+                                  std::shared_ptr<::arrow::Scalar>* max) {
+  ARROW_ASSIGN_OR_RAISE(auto type, FromInt64(logical_type));
+
+  switch (logical_type.type()) {
+    case LogicalType::Type::INT:
+      return MakeMinMaxIntegralScalar(statistics, *type, min, max);
+      break;
+    case LogicalType::Type::TIME:
+    case LogicalType::Type::TIMESTAMP:
+    case LogicalType::Type::NONE:
+      return MakeMinMaxTypedScalar<int64_t>(statistics, type, min, max);
+      break;
+    default:
+      break;
+  }
+
+  return Status::NotImplemented("Cannot extract statistics for type ");
+}
+
+template <typename DecimalType>
+Result<std::shared_ptr<::arrow::Scalar>> FromBigEndianString(
+    const std::string& data, std::shared_ptr<DataType> arrow_type) {
+  ARROW_ASSIGN_OR_RAISE(
+      DecimalType decimal,
+      DecimalType::FromBigEndian(reinterpret_cast<const uint8_t*>(data.data()),
+                                 static_cast<int32_t>(data.size())));
+  return ::arrow::MakeScalar(std::move(arrow_type), decimal);
+}
+
+// Extracts Min and Max scalar from bytes like types (i.e. types where
+// decimal is encoded as little endian.
+Status ExtractDecimalMinMaxFromBytesType(const Statistics& statistics,
+                                         const LogicalType& logical_type,
+                                         std::shared_ptr<::arrow::Scalar>* min,
+                                         std::shared_ptr<::arrow::Scalar>* max) {
+  const DecimalLogicalType& decimal_type =
+      checked_cast<const DecimalLogicalType&>(logical_type);
+
+  Result<std::shared_ptr<DataType>> maybe_type =
+      Decimal128Type::Make(decimal_type.precision(), decimal_type.scale());
+  std::shared_ptr<DataType> arrow_type;
+  if (maybe_type.ok()) {
+    arrow_type = maybe_type.ValueOrDie();
+    ARROW_ASSIGN_OR_RAISE(
+        *min, FromBigEndianString<Decimal128>(statistics.EncodeMin(), arrow_type));
+    ARROW_ASSIGN_OR_RAISE(*max, FromBigEndianString<Decimal128>(statistics.EncodeMax(),
+                                                                std::move(arrow_type)));
+    return Status::OK();
+  }
+  // Fallback to see if Decimal256 can represent the type.
+  ARROW_ASSIGN_OR_RAISE(
+      arrow_type, Decimal256Type::Make(decimal_type.precision(), decimal_type.scale()));
+  ARROW_ASSIGN_OR_RAISE(
+      *min, FromBigEndianString<Decimal256>(statistics.EncodeMin(), arrow_type));
+  ARROW_ASSIGN_OR_RAISE(*max, FromBigEndianString<Decimal256>(statistics.EncodeMax(),
+                                                              std::move(arrow_type)));
+
+  return Status::OK();
+}
+
+Status ByteArrayStatisticsAsScalars(const Statistics& statistics,
+                                    std::shared_ptr<::arrow::Scalar>* min,
+                                    std::shared_ptr<::arrow::Scalar>* max) {
+  auto logical_type = statistics.descr()->logical_type();
+  if (logical_type->type() == LogicalType::Type::DECIMAL) {
+    return ExtractDecimalMinMaxFromBytesType(statistics, *logical_type, min, max);
+  }
+  std::shared_ptr<::arrow::DataType> type;
+  if (statistics.descr()->physical_type() == Type::FIXED_LEN_BYTE_ARRAY) {
+    type = ::arrow::fixed_size_binary(statistics.descr()->type_length());
+  } else {
+    type = logical_type->type() == LogicalType::Type::STRING ? ::arrow::utf8()
+                                                             : ::arrow::binary();
+  }
+  ARROW_ASSIGN_OR_RAISE(
+      *min, ::arrow::MakeScalar(type, Buffer::FromString(statistics.EncodeMin())));
+  ARROW_ASSIGN_OR_RAISE(
+      *max, ::arrow::MakeScalar(type, Buffer::FromString(statistics.EncodeMax())));
+
+  return Status::OK();
+}
+
+}  // namespace
+
+Status StatisticsAsScalars(const Statistics& statistics,
+                           std::shared_ptr<::arrow::Scalar>* min,
+                           std::shared_ptr<::arrow::Scalar>* max) {
+  if (!statistics.HasMinMax()) {
+    return Status::Invalid("Statistics has no min max.");
+  }
+
+  auto column_desc = statistics.descr();
+  if (column_desc == nullptr) {
+    return Status::Invalid("Statistics carries no descriptor, can't infer arrow type.");
+  }
+
+  auto physical_type = column_desc->physical_type();
+  auto logical_type = column_desc->logical_type();
+  switch (physical_type) {
+    case Type::BOOLEAN:
+      return MakeMinMaxScalar<bool, BoolStatistics>(
+          checked_cast<const BoolStatistics&>(statistics), min, max);
+    case Type::FLOAT:
+      return MakeMinMaxScalar<float, FloatStatistics>(
+          checked_cast<const FloatStatistics&>(statistics), min, max);
+    case Type::DOUBLE:
+      return MakeMinMaxScalar<double, DoubleStatistics>(
+          checked_cast<const DoubleStatistics&>(statistics), min, max);
+    case Type::INT32:
+      return FromInt32Statistics(checked_cast<const Int32Statistics&>(statistics),
+                                 *logical_type, min, max);
+    case Type::INT64:
+      return FromInt64Statistics(checked_cast<const Int64Statistics&>(statistics),
+                                 *logical_type, min, max);
+    case Type::BYTE_ARRAY:
+    case Type::FIXED_LEN_BYTE_ARRAY:
+      return ByteArrayStatisticsAsScalars(statistics, min, max);
+    default:
+      return Status::NotImplemented("Extract statistics unsupported for physical_type ",
+                                    physical_type, " unsupported.");
+  }
+
+  return Status::OK();
+}
+
+// ----------------------------------------------------------------------
+// Primitive types
+
+namespace {
+
+template <typename ArrowType, typename ParquetType>
+Status TransferInt(RecordReader* reader, MemoryPool* pool,
+                   const std::shared_ptr<DataType>& type, Datum* out) {
+  using ArrowCType = typename ArrowType::c_type;
+  using ParquetCType = typename ParquetType::c_type;
+  int64_t length = reader->values_written();
+  ARROW_ASSIGN_OR_RAISE(auto data,
+                        ::arrow::AllocateBuffer(length * sizeof(ArrowCType), pool));
+
+  auto values = reinterpret_cast<const ParquetCType*>(reader->values());
+  auto out_ptr = reinterpret_cast<ArrowCType*>(data->mutable_data());
+  std::copy(values, values + length, out_ptr);
+  *out = std::make_shared<ArrayType<ArrowType>>(
+      type, length, std::move(data), reader->ReleaseIsValid(), reader->null_count());
+  return Status::OK();
+}
+
+std::shared_ptr<Array> TransferZeroCopy(RecordReader* reader,
+                                        const std::shared_ptr<DataType>& type) {
+  std::vector<std::shared_ptr<Buffer>> buffers = {reader->ReleaseIsValid(),
+                                                  reader->ReleaseValues()};
+  auto data = std::make_shared<::arrow::ArrayData>(type, reader->values_written(),
+                                                   buffers, reader->null_count());
+  return ::arrow::MakeArray(data);
+}
+
+Status TransferBool(RecordReader* reader, MemoryPool* pool, Datum* out) {
+  int64_t length = reader->values_written();
+
+  const int64_t buffer_size = BitUtil::BytesForBits(length);
+  ARROW_ASSIGN_OR_RAISE(auto data, ::arrow::AllocateBuffer(buffer_size, pool));
+
+  // Transfer boolean values to packed bitmap
+  auto values = reinterpret_cast<const bool*>(reader->values());
+  uint8_t* data_ptr = data->mutable_data();
+  memset(data_ptr, 0, buffer_size);
+
+  for (int64_t i = 0; i < length; i++) {
+    if (values[i]) {
+      ::arrow::BitUtil::SetBit(data_ptr, i);
+    }
+  }
+
+  *out = std::make_shared<BooleanArray>(length, std::move(data), reader->ReleaseIsValid(),
+                                        reader->null_count());
+  return Status::OK();
+}
+
+Status TransferInt96(RecordReader* reader, MemoryPool* pool,
+                     const std::shared_ptr<DataType>& type, Datum* out,
+                     const ::arrow::TimeUnit::type int96_arrow_time_unit) {
+  int64_t length = reader->values_written();
+  auto values = reinterpret_cast<const Int96*>(reader->values());
+  ARROW_ASSIGN_OR_RAISE(auto data,
+                        ::arrow::AllocateBuffer(length * sizeof(int64_t), pool));
+  auto data_ptr = reinterpret_cast<int64_t*>(data->mutable_data());
+  for (int64_t i = 0; i < length; i++) {
+    if (values[i].value[2] == 0) {
+      // Happens for null entries: avoid triggering UBSAN as that Int96 timestamp
+      // isn't representable as a 64-bit Unix timestamp.
+      *data_ptr++ = 0;
+    } else {
+      switch (int96_arrow_time_unit) {
+        case ::arrow::TimeUnit::NANO:
+          *data_ptr++ = Int96GetNanoSeconds(values[i]);
+          break;
+        case ::arrow::TimeUnit::MICRO:
+          *data_ptr++ = Int96GetMicroSeconds(values[i]);
+          break;
+        case ::arrow::TimeUnit::MILLI:
+          *data_ptr++ = Int96GetMilliSeconds(values[i]);
+          break;
+        case ::arrow::TimeUnit::SECOND:
+          *data_ptr++ = Int96GetSeconds(values[i]);
+          break;
+      }
+    }
+  }
+  *out = std::make_shared<TimestampArray>(type, length, std::move(data),
+                                          reader->ReleaseIsValid(), reader->null_count());
+  return Status::OK();
+}
+
+Status TransferDate64(RecordReader* reader, MemoryPool* pool,
+                      const std::shared_ptr<DataType>& type, Datum* out) {
+  int64_t length = reader->values_written();
+  auto values = reinterpret_cast<const int32_t*>(reader->values());
+
+  ARROW_ASSIGN_OR_RAISE(auto data,
+                        ::arrow::AllocateBuffer(length * sizeof(int64_t), pool));
+  auto out_ptr = reinterpret_cast<int64_t*>(data->mutable_data());
+
+  for (int64_t i = 0; i < length; i++) {
+    *out_ptr++ = static_cast<int64_t>(values[i]) * kMillisecondsPerDay;
+  }
+
+  *out = std::make_shared<::arrow::Date64Array>(
+      type, length, std::move(data), reader->ReleaseIsValid(), reader->null_count());
+  return Status::OK();
+}
+
+// ----------------------------------------------------------------------
+// Binary, direct to dictionary-encoded
+
+Status TransferDictionary(RecordReader* reader,
+                          const std::shared_ptr<DataType>& logical_value_type,
+                          std::shared_ptr<ChunkedArray>* out) {
+  auto dict_reader = dynamic_cast<DictionaryRecordReader*>(reader);
+  DCHECK(dict_reader);
+  *out = dict_reader->GetResult();
+  if (!logical_value_type->Equals(*(*out)->type())) {
+    ARROW_ASSIGN_OR_RAISE(*out, (*out)->View(logical_value_type));
+  }
+  return Status::OK();
+}
+
+Status TransferBinary(RecordReader* reader, MemoryPool* pool,
+                      const std::shared_ptr<DataType>& logical_value_type,
+                      std::shared_ptr<ChunkedArray>* out) {
+  if (reader->read_dictionary()) {
+    return TransferDictionary(
+        reader, ::arrow::dictionary(::arrow::int32(), logical_value_type), out);
+  }
+  ::arrow::compute::ExecContext ctx(pool);
+  ::arrow::compute::CastOptions cast_options;
+  cast_options.allow_invalid_utf8 = true;  // avoid spending time validating UTF8 data
+
+  auto binary_reader = dynamic_cast<BinaryRecordReader*>(reader);
+  DCHECK(binary_reader);
+  auto chunks = binary_reader->GetBuilderChunks();
+  for (auto& chunk : chunks) {
+    if (!chunk->type()->Equals(*logical_value_type)) {
+      // XXX: if a LargeBinary chunk is larger than 2GB, the MSBs of offsets
+      // will be lost because they are first created as int32 and then cast to int64.
+      ARROW_ASSIGN_OR_RAISE(
+          chunk, ::arrow::compute::Cast(*chunk, logical_value_type, cast_options, &ctx));
+    }
+  }
+  *out = std::make_shared<ChunkedArray>(chunks, logical_value_type);
+  return Status::OK();
+}
+
+// ----------------------------------------------------------------------
+// INT32 / INT64 / BYTE_ARRAY / FIXED_LEN_BYTE_ARRAY -> Decimal128 || Decimal256
+
+template <typename DecimalType>
+Status RawBytesToDecimalBytes(const uint8_t* value, int32_t byte_width,
+                              uint8_t* out_buf) {
+  ARROW_ASSIGN_OR_RAISE(DecimalType t, DecimalType::FromBigEndian(value, byte_width));
+  t.ToBytes(out_buf);
+  return ::arrow::Status::OK();
+}
+
+template <typename DecimalArrayType>
+struct DecimalTypeTrait;
+
+template <>
+struct DecimalTypeTrait<::arrow::Decimal128Array> {
+  using value = ::arrow::Decimal128;
+};
+
+template <>
+struct DecimalTypeTrait<::arrow::Decimal256Array> {
+  using value = ::arrow::Decimal256;
+};
+
+template <typename DecimalArrayType, typename ParquetType>
+struct DecimalConverter {
+  static inline Status ConvertToDecimal(const Array& array,
+                                        const std::shared_ptr<DataType>&,
+                                        MemoryPool* pool, std::shared_ptr<Array>*) {
+    return Status::NotImplemented("not implemented");
+  }
+};
+
+template <typename DecimalArrayType>
+struct DecimalConverter<DecimalArrayType, FLBAType> {
+  static inline Status ConvertToDecimal(const Array& array,
+                                        const std::shared_ptr<DataType>& type,
+                                        MemoryPool* pool, std::shared_ptr<Array>* out) {
+    const auto& fixed_size_binary_array =
+        checked_cast<const ::arrow::FixedSizeBinaryArray&>(array);
+
+    // The byte width of each decimal value
+    const int32_t type_length =
+        checked_cast<const ::arrow::DecimalType&>(*type).byte_width();
+
+    // number of elements in the entire array
+    const int64_t length = fixed_size_binary_array.length();
+
+    // Get the byte width of the values in the FixedSizeBinaryArray. Most of the time
+    // this will be different from the decimal array width because we write the minimum
+    // number of bytes necessary to represent a given precision
+    const int32_t byte_width =
+        checked_cast<const ::arrow::FixedSizeBinaryType&>(*fixed_size_binary_array.type())
+            .byte_width();
+    // allocate memory for the decimal array
+    ARROW_ASSIGN_OR_RAISE(auto data, ::arrow::AllocateBuffer(length * type_length, pool));
+
+    // raw bytes that we can write to
+    uint8_t* out_ptr = data->mutable_data();
+
+    // convert each FixedSizeBinary value to valid decimal bytes
+    const int64_t null_count = fixed_size_binary_array.null_count();
+
+    using DecimalType = typename DecimalTypeTrait<DecimalArrayType>::value;
+    if (null_count > 0) {
+      for (int64_t i = 0; i < length; ++i, out_ptr += type_length) {
+        if (!fixed_size_binary_array.IsNull(i)) {
+          RETURN_NOT_OK(RawBytesToDecimalBytes<DecimalType>(
+              fixed_size_binary_array.GetValue(i), byte_width, out_ptr));
+        } else {
+          std::memset(out_ptr, 0, type_length);
+        }
+      }
+    } else {
+      for (int64_t i = 0; i < length; ++i, out_ptr += type_length) {
+        RETURN_NOT_OK(RawBytesToDecimalBytes<DecimalType>(
+            fixed_size_binary_array.GetValue(i), byte_width, out_ptr));
+      }
+    }
+
+    *out = std::make_shared<DecimalArrayType>(
+        type, length, std::move(data), fixed_size_binary_array.null_bitmap(), null_count);
+
+    return Status::OK();
+  }
+};
+
+template <typename DecimalArrayType>
+struct DecimalConverter<DecimalArrayType, ByteArrayType> {
+  static inline Status ConvertToDecimal(const Array& array,
+                                        const std::shared_ptr<DataType>& type,
+                                        MemoryPool* pool, std::shared_ptr<Array>* out) {
+    const auto& binary_array = checked_cast<const ::arrow::BinaryArray&>(array);
+    const int64_t length = binary_array.length();
+
+    const auto& decimal_type = checked_cast<const ::arrow::DecimalType&>(*type);
+    const int64_t type_length = decimal_type.byte_width();
+
+    ARROW_ASSIGN_OR_RAISE(auto data, ::arrow::AllocateBuffer(length * type_length, pool));
+
+    // raw bytes that we can write to
+    uint8_t* out_ptr = data->mutable_data();
+
+    const int64_t null_count = binary_array.null_count();
+
+    // convert each BinaryArray value to valid decimal bytes
+    for (int64_t i = 0; i < length; i++, out_ptr += type_length) {
+      int32_t record_len = 0;
+      const uint8_t* record_loc = binary_array.GetValue(i, &record_len);
+
+      if (record_len < 0 || record_len > type_length) {
+        return Status::Invalid("Invalid BYTE_ARRAY length for ", type->ToString());
+      }
+
+      auto out_ptr_view = reinterpret_cast<uint64_t*>(out_ptr);
+      out_ptr_view[0] = 0;
+      out_ptr_view[1] = 0;
+
+      // only convert rows that are not null if there are nulls, or
+      // all rows, if there are not
+      if ((null_count > 0 && !binary_array.IsNull(i)) || null_count <= 0) {
+        using DecimalType = typename DecimalTypeTrait<DecimalArrayType>::value;
+        RETURN_NOT_OK(
+            RawBytesToDecimalBytes<DecimalType>(record_loc, record_len, out_ptr));
+      }
+    }
+    *out = std::make_shared<DecimalArrayType>(type, length, std::move(data),
+                                              binary_array.null_bitmap(), null_count);
+    return Status::OK();
+  }
+};
+
+/// \brief Convert an Int32 or Int64 array into a Decimal128Array
+/// The parquet spec allows systems to write decimals in int32, int64 if the values are
+/// small enough to fit in less 4 bytes or less than 8 bytes, respectively.
+/// This function implements the conversion from int32 and int64 arrays to decimal arrays.
+template <
+    typename ParquetIntegerType,
+    typename = ::arrow::enable_if_t<std::is_same<ParquetIntegerType, Int32Type>::value ||
+                                    std::is_same<ParquetIntegerType, Int64Type>::value>>
+static Status DecimalIntegerTransfer(RecordReader* reader, MemoryPool* pool,
+                                     const std::shared_ptr<DataType>& type, Datum* out) {
+  // Decimal128 and Decimal256 are only Arrow constructs.  Parquet does not
+  // specifically distinguish between decimal byte widths.
+  // Decimal256 isn't relevant here because the Arrow-Parquet C++ bindings never
+  // write Decimal values as integers and if the decimal value can fit in an
+  // integer it is wasteful to use Decimal256. Put another way, the only
+  // way an integer column could be construed as Decimal256 is if an arrow
+  // schema was stored as metadata in the file indicating the column was
+  // Decimal256. The current Arrow-Parquet C++ bindings will never do this.
+  DCHECK(type->id() == ::arrow::Type::DECIMAL128);
+
+  const int64_t length = reader->values_written();
+
+  using ElementType = typename ParquetIntegerType::c_type;
+  static_assert(std::is_same<ElementType, int32_t>::value ||
+                    std::is_same<ElementType, int64_t>::value,
+                "ElementType must be int32_t or int64_t");
+
+  const auto values = reinterpret_cast<const ElementType*>(reader->values());
+
+  const auto& decimal_type = checked_cast<const ::arrow::DecimalType&>(*type);
+  const int64_t type_length = decimal_type.byte_width();
+
+  ARROW_ASSIGN_OR_RAISE(auto data, ::arrow::AllocateBuffer(length * type_length, pool));
+  uint8_t* out_ptr = data->mutable_data();
+
+  using ::arrow::BitUtil::FromLittleEndian;
+
+  for (int64_t i = 0; i < length; ++i, out_ptr += type_length) {
+    // sign/zero extend int32_t values, otherwise a no-op
+    const auto value = static_cast<int64_t>(values[i]);
+
+    ::arrow::Decimal128 decimal(value);
+    decimal.ToBytes(out_ptr);
+  }
+
+  if (reader->nullable_values()) {
+    std::shared_ptr<ResizableBuffer> is_valid = reader->ReleaseIsValid();
+    *out = std::make_shared<Decimal128Array>(type, length, std::move(data), is_valid,
+                                             reader->null_count());
+  } else {
+    *out = std::make_shared<Decimal128Array>(type, length, std::move(data));
+  }
+  return Status::OK();
+}
+
+/// \brief Convert an arrow::BinaryArray to an arrow::Decimal{128,256}Array
+/// We do this by:
+/// 1. Creating an arrow::BinaryArray from the RecordReader's builder
+/// 2. Allocating a buffer for the arrow::Decimal{128,256}Array
+/// 3. Converting the big-endian bytes in each BinaryArray entry to two integers
+///    representing the high and low bits of each decimal value.
+template <typename DecimalArrayType, typename ParquetType>
+Status TransferDecimal(RecordReader* reader, MemoryPool* pool,
+                       const std::shared_ptr<DataType>& type, Datum* out) {
+  auto binary_reader = dynamic_cast<BinaryRecordReader*>(reader);
+  DCHECK(binary_reader);
+  ::arrow::ArrayVector chunks = binary_reader->GetBuilderChunks();
+  for (size_t i = 0; i < chunks.size(); ++i) {
+    std::shared_ptr<Array> chunk_as_decimal;
+    auto fn = &DecimalConverter<DecimalArrayType, ParquetType>::ConvertToDecimal;
+    RETURN_NOT_OK(fn(*chunks[i], type, pool, &chunk_as_decimal));
+    // Replace the chunk, which will hopefully also free memory as we go
+    chunks[i] = chunk_as_decimal;
+  }
+  *out = std::make_shared<ChunkedArray>(chunks, type);
+  return Status::OK();
+}
+
+}  // namespace
+
+#define TRANSFER_INT32(ENUM, ArrowType)                                              \
+  case ::arrow::Type::ENUM: {                                                        \
+    Status s = TransferInt<ArrowType, Int32Type>(reader, pool, value_type, &result); \
+    RETURN_NOT_OK(s);                                                                \
+  } break;
+
+#define TRANSFER_INT64(ENUM, ArrowType)                                              \
+  case ::arrow::Type::ENUM: {                                                        \
+    Status s = TransferInt<ArrowType, Int64Type>(reader, pool, value_type, &result); \
+    RETURN_NOT_OK(s);                                                                \
+  } break;
+
+Status TransferColumnData(RecordReader* reader, std::shared_ptr<DataType> value_type,
+                          const ColumnDescriptor* descr, MemoryPool* pool,
+                          std::shared_ptr<ChunkedArray>* out) {
+  Datum result;
+  std::shared_ptr<ChunkedArray> chunked_result;
+  switch (value_type->id()) {
+    case ::arrow::Type::DICTIONARY: {
+      RETURN_NOT_OK(TransferDictionary(reader, value_type, &chunked_result));
+      result = chunked_result;
+    } break;
+    case ::arrow::Type::NA: {
+      result = std::make_shared<::arrow::NullArray>(reader->values_written());
+      break;
+    }
+    case ::arrow::Type::INT32:
+    case ::arrow::Type::INT64:
+    case ::arrow::Type::FLOAT:
+    case ::arrow::Type::DOUBLE:
+      result = TransferZeroCopy(reader, value_type);
+      break;
+    case ::arrow::Type::BOOL:
+      RETURN_NOT_OK(TransferBool(reader, pool, &result));
+      break;
+      TRANSFER_INT32(UINT8, ::arrow::UInt8Type);
+      TRANSFER_INT32(INT8, ::arrow::Int8Type);
+      TRANSFER_INT32(UINT16, ::arrow::UInt16Type);
+      TRANSFER_INT32(INT16, ::arrow::Int16Type);
+      TRANSFER_INT32(UINT32, ::arrow::UInt32Type);
+      TRANSFER_INT64(UINT64, ::arrow::UInt64Type);
+      TRANSFER_INT32(DATE32, ::arrow::Date32Type);
+      TRANSFER_INT32(TIME32, ::arrow::Time32Type);
+      TRANSFER_INT64(TIME64, ::arrow::Time64Type);
+    case ::arrow::Type::DATE64:
+      RETURN_NOT_OK(TransferDate64(reader, pool, value_type, &result));
+      break;
+    case ::arrow::Type::FIXED_SIZE_BINARY:
+    case ::arrow::Type::BINARY:
+    case ::arrow::Type::STRING:
+    case ::arrow::Type::LARGE_BINARY:
+    case ::arrow::Type::LARGE_STRING: {
+      RETURN_NOT_OK(TransferBinary(reader, pool, value_type, &chunked_result));
+      result = chunked_result;
+    } break;
+    case ::arrow::Type::DECIMAL128: {
+      switch (descr->physical_type()) {
+        case ::parquet::Type::INT32: {
+          auto fn = DecimalIntegerTransfer<Int32Type>;
+          RETURN_NOT_OK(fn(reader, pool, value_type, &result));
+        } break;
+        case ::parquet::Type::INT64: {
+          auto fn = &DecimalIntegerTransfer<Int64Type>;
+          RETURN_NOT_OK(fn(reader, pool, value_type, &result));
+        } break;
+        case ::parquet::Type::BYTE_ARRAY: {
+          auto fn = &TransferDecimal<Decimal128Array, ByteArrayType>;
+          RETURN_NOT_OK(fn(reader, pool, value_type, &result));
+        } break;
+        case ::parquet::Type::FIXED_LEN_BYTE_ARRAY: {
+          auto fn = &TransferDecimal<Decimal128Array, FLBAType>;
+          RETURN_NOT_OK(fn(reader, pool, value_type, &result));
+        } break;
+        default:
+          return Status::Invalid(
+              "Physical type for decimal128 must be int32, int64, byte array, or fixed "
+              "length binary");
+      }
+    } break;
+    case ::arrow::Type::DECIMAL256:
+      switch (descr->physical_type()) {
+        case ::parquet::Type::BYTE_ARRAY: {
+          auto fn = &TransferDecimal<Decimal256Array, ByteArrayType>;
+          RETURN_NOT_OK(fn(reader, pool, value_type, &result));
+        } break;
+        case ::parquet::Type::FIXED_LEN_BYTE_ARRAY: {
+          auto fn = &TransferDecimal<Decimal256Array, FLBAType>;
+          RETURN_NOT_OK(fn(reader, pool, value_type, &result));
+        } break;
+        default:
+          return Status::Invalid(
+              "Physical type for decimal256 must be fixed length binary");
+      }
+      break;
+
+    case ::arrow::Type::TIMESTAMP: {
+      const ::arrow::TimestampType& timestamp_type =
+          checked_cast<::arrow::TimestampType&>(*value_type);
+      if (descr->physical_type() == ::parquet::Type::INT96) {
+        RETURN_NOT_OK(
+            TransferInt96(reader, pool, value_type, &result, timestamp_type.unit()));
+      } else {
+        switch (timestamp_type.unit()) {
+          case ::arrow::TimeUnit::MILLI:
+          case ::arrow::TimeUnit::MICRO:
+          case ::arrow::TimeUnit::NANO:
+            result = TransferZeroCopy(reader, value_type);
+            break;
+          default:
+            return Status::NotImplemented("TimeUnit not supported");
+        }
+      }
+    } break;
+    default:
+      return Status::NotImplemented("No support for reading columns of type ",
+                                    value_type->ToString());
+  }
+
+  if (result.kind() == Datum::ARRAY) {
+    *out = std::make_shared<ChunkedArray>(result.make_array());
+  } else if (result.kind() == Datum::CHUNKED_ARRAY) {
+    *out = result.chunked_array();
+  } else {
+    DCHECK(false) << "Should be impossible, result was " << result.ToString();
+  }
+
+  return Status::OK();
+}
+
+}  // namespace arrow
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader_internal.h b/contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader_internal.h
index cd54e499aa5..ad0b781576f 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader_internal.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader_internal.h
@@ -1,122 +1,122 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <algorithm> 
-#include <cstdint> 
-#include <deque> 
-#include <functional> 
-#include <memory> 
-#include <unordered_set> 
-#include <utility> 
-#include <vector> 
- 
-#include "parquet/arrow/schema.h" 
-#include "parquet/column_reader.h" 
-#include "parquet/file_reader.h" 
-#include "parquet/metadata.h" 
-#include "parquet/platform.h" 
-#include "parquet/schema.h" 
- 
-namespace arrow { 
- 
-class Array; 
-class ChunkedArray; 
-class DataType; 
-class Field; 
-class KeyValueMetadata; 
-class Schema; 
- 
-}  // namespace arrow 
- 
-using arrow::Status; 
- 
-namespace parquet { 
- 
-class ArrowReaderProperties; 
- 
-namespace arrow { 
- 
-class ColumnReaderImpl; 
- 
-// ---------------------------------------------------------------------- 
-// Iteration utilities 
- 
-// Abstraction to decouple row group iteration details from the ColumnReader, 
-// so we can read only a single row group if we want 
-class FileColumnIterator { 
- public: 
-  explicit FileColumnIterator(int column_index, ParquetFileReader* reader, 
-                              std::vector<int> row_groups) 
-      : column_index_(column_index), 
-        reader_(reader), 
-        schema_(reader->metadata()->schema()), 
-        row_groups_(row_groups.begin(), row_groups.end()) {} 
- 
-  virtual ~FileColumnIterator() {} 
- 
-  std::unique_ptr<::parquet::PageReader> NextChunk() { 
-    if (row_groups_.empty()) { 
-      return nullptr; 
-    } 
- 
-    auto row_group_reader = reader_->RowGroup(row_groups_.front()); 
-    row_groups_.pop_front(); 
-    return row_group_reader->GetColumnPageReader(column_index_); 
-  } 
- 
-  const SchemaDescriptor* schema() const { return schema_; } 
- 
-  const ColumnDescriptor* descr() const { return schema_->Column(column_index_); } 
- 
-  std::shared_ptr<FileMetaData> metadata() const { return reader_->metadata(); } 
- 
-  int column_index() const { return column_index_; } 
- 
- protected: 
-  int column_index_; 
-  ParquetFileReader* reader_; 
-  const SchemaDescriptor* schema_; 
-  std::deque<int> row_groups_; 
-}; 
- 
-using FileColumnIteratorFactory = 
-    std::function<FileColumnIterator*(int, ParquetFileReader*)>; 
- 
-Status TransferColumnData(::parquet::internal::RecordReader* reader, 
-                          std::shared_ptr<::arrow::DataType> value_type, 
-                          const ColumnDescriptor* descr, ::arrow::MemoryPool* pool, 
-                          std::shared_ptr<::arrow::ChunkedArray>* out); 
- 
-struct ReaderContext { 
-  ParquetFileReader* reader; 
-  ::arrow::MemoryPool* pool; 
-  FileColumnIteratorFactory iterator_factory; 
-  bool filter_leaves; 
-  std::shared_ptr<std::unordered_set<int>> included_leaves; 
- 
-  bool IncludesLeaf(int leaf_index) const { 
-    if (this->filter_leaves) { 
-      return this->included_leaves->find(leaf_index) != this->included_leaves->end(); 
-    } 
-    return true; 
-  } 
-}; 
- 
-}  // namespace arrow 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <cstdint>
+#include <deque>
+#include <functional>
+#include <memory>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "parquet/arrow/schema.h"
+#include "parquet/column_reader.h"
+#include "parquet/file_reader.h"
+#include "parquet/metadata.h"
+#include "parquet/platform.h"
+#include "parquet/schema.h"
+
+namespace arrow {
+
+class Array;
+class ChunkedArray;
+class DataType;
+class Field;
+class KeyValueMetadata;
+class Schema;
+
+}  // namespace arrow
+
+using arrow::Status;
+
+namespace parquet {
+
+class ArrowReaderProperties;
+
+namespace arrow {
+
+class ColumnReaderImpl;
+
+// ----------------------------------------------------------------------
+// Iteration utilities
+
+// Abstraction to decouple row group iteration details from the ColumnReader,
+// so we can read only a single row group if we want
+class FileColumnIterator {
+ public:
+  explicit FileColumnIterator(int column_index, ParquetFileReader* reader,
+                              std::vector<int> row_groups)
+      : column_index_(column_index),
+        reader_(reader),
+        schema_(reader->metadata()->schema()),
+        row_groups_(row_groups.begin(), row_groups.end()) {}
+
+  virtual ~FileColumnIterator() {}
+
+  std::unique_ptr<::parquet::PageReader> NextChunk() {
+    if (row_groups_.empty()) {
+      return nullptr;
+    }
+
+    auto row_group_reader = reader_->RowGroup(row_groups_.front());
+    row_groups_.pop_front();
+    return row_group_reader->GetColumnPageReader(column_index_);
+  }
+
+  const SchemaDescriptor* schema() const { return schema_; }
+
+  const ColumnDescriptor* descr() const { return schema_->Column(column_index_); }
+
+  std::shared_ptr<FileMetaData> metadata() const { return reader_->metadata(); }
+
+  int column_index() const { return column_index_; }
+
+ protected:
+  int column_index_;
+  ParquetFileReader* reader_;
+  const SchemaDescriptor* schema_;
+  std::deque<int> row_groups_;
+};
+
+using FileColumnIteratorFactory =
+    std::function<FileColumnIterator*(int, ParquetFileReader*)>;
+
+Status TransferColumnData(::parquet::internal::RecordReader* reader,
+                          std::shared_ptr<::arrow::DataType> value_type,
+                          const ColumnDescriptor* descr, ::arrow::MemoryPool* pool,
+                          std::shared_ptr<::arrow::ChunkedArray>* out);
+
+struct ReaderContext {
+  ParquetFileReader* reader;
+  ::arrow::MemoryPool* pool;
+  FileColumnIteratorFactory iterator_factory;
+  bool filter_leaves;
+  std::shared_ptr<std::unordered_set<int>> included_leaves;
+
+  bool IncludesLeaf(int leaf_index) const {
+    if (this->filter_leaves) {
+      return this->included_leaves->find(leaf_index) != this->included_leaves->end();
+    }
+    return true;
+  }
+};
+
+}  // namespace arrow
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/arrow/schema.cc b/contrib/libs/apache/arrow/cpp/src/parquet/arrow/schema.cc
index 454b0e2289a..eb7fd628dfc 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/arrow/schema.cc
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/arrow/schema.cc
@@ -1,1087 +1,1087 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "parquet/arrow/schema.h" 
- 
-#include <functional> 
-#include <string> 
-#include <vector> 
- 
-#include "arrow/extension_type.h" 
-#include "arrow/io/memory.h" 
-#include "arrow/ipc/api.h" 
-#include "arrow/result_internal.h" 
-#include "arrow/type.h" 
-#include "arrow/util/base64.h" 
-#include "arrow/util/checked_cast.h" 
-#include "arrow/util/key_value_metadata.h" 
-#include "arrow/util/logging.h" 
-#include "arrow/util/value_parsing.h" 
- 
-#include "parquet/arrow/schema_internal.h" 
-#include "parquet/exception.h" 
-#include "parquet/properties.h" 
-#include "parquet/types.h" 
- 
-using arrow::DecimalType; 
-using arrow::Field; 
-using arrow::FieldVector; 
-using arrow::KeyValueMetadata; 
-using arrow::Status; 
-using arrow::internal::checked_cast; 
- 
-using ArrowType = arrow::DataType; 
-using ArrowTypeId = arrow::Type; 
- 
-using parquet::Repetition; 
-using parquet::schema::GroupNode; 
-using parquet::schema::Node; 
-using parquet::schema::NodePtr; 
-using parquet::schema::PrimitiveNode; 
- 
-using ParquetType = parquet::Type; 
-using parquet::ConvertedType; 
-using parquet::LogicalType; 
- 
-using parquet::internal::LevelInfo; 
- 
-namespace parquet { 
- 
-namespace arrow { 
- 
-// ---------------------------------------------------------------------- 
-// Parquet to Arrow schema conversion 
- 
-namespace { 
- 
-Repetition::type RepetitionFromNullable(bool is_nullable) { 
-  return is_nullable ? Repetition::OPTIONAL : Repetition::REQUIRED; 
-} 
- 
-Status FieldToNode(const std::string& name, const std::shared_ptr<Field>& field, 
-                   const WriterProperties& properties, 
-                   const ArrowWriterProperties& arrow_properties, NodePtr* out); 
- 
-Status ListToNode(const std::shared_ptr<::arrow::BaseListType>& type, 
-                  const std::string& name, bool nullable, 
-                  const WriterProperties& properties, 
-                  const ArrowWriterProperties& arrow_properties, NodePtr* out) { 
-  NodePtr element; 
-  std::string value_name = 
-      arrow_properties.compliant_nested_types() ? "element" : type->value_field()->name(); 
-  RETURN_NOT_OK(FieldToNode(value_name, type->value_field(), properties, arrow_properties, 
-                            &element)); 
- 
-  NodePtr list = GroupNode::Make("list", Repetition::REPEATED, {element}); 
-  *out = GroupNode::Make(name, RepetitionFromNullable(nullable), {list}, 
-                         LogicalType::List()); 
-  return Status::OK(); 
-} 
- 
-Status MapToNode(const std::shared_ptr<::arrow::MapType>& type, const std::string& name, 
-                 bool nullable, const WriterProperties& properties, 
-                 const ArrowWriterProperties& arrow_properties, NodePtr* out) { 
-  // TODO: Should we offer a non-compliant mode that forwards the type names? 
-  NodePtr key_node; 
-  RETURN_NOT_OK( 
-      FieldToNode("key", type->key_field(), properties, arrow_properties, &key_node)); 
- 
-  NodePtr value_node; 
-  RETURN_NOT_OK(FieldToNode("value", type->item_field(), properties, arrow_properties, 
-                            &value_node)); 
- 
-  NodePtr key_value = 
-      GroupNode::Make("key_value", Repetition::REPEATED, {key_node, value_node}); 
-  *out = GroupNode::Make(name, RepetitionFromNullable(nullable), {key_value}, 
-                         LogicalType::Map()); 
-  return Status::OK(); 
-} 
- 
-Status StructToNode(const std::shared_ptr<::arrow::StructType>& type, 
-                    const std::string& name, bool nullable, 
-                    const WriterProperties& properties, 
-                    const ArrowWriterProperties& arrow_properties, NodePtr* out) { 
-  std::vector<NodePtr> children(type->num_fields()); 
-  if (type->num_fields() != 0) { 
-    for (int i = 0; i < type->num_fields(); i++) { 
-      RETURN_NOT_OK(FieldToNode(type->field(i)->name(), type->field(i), properties, 
-                                arrow_properties, &children[i])); 
-    } 
-  } else { 
-    // XXX (ARROW-10928) We could add a dummy primitive node but that would 
-    // require special handling when writing and reading, to avoid column index 
-    // mismatches. 
-    return Status::NotImplemented("Cannot write struct type '", name, 
-                                  "' with no child field to Parquet. " 
-                                  "Consider adding a dummy child field."); 
-  } 
- 
-  *out = GroupNode::Make(name, RepetitionFromNullable(nullable), std::move(children)); 
-  return Status::OK(); 
-} 
- 
-static std::shared_ptr<const LogicalType> TimestampLogicalTypeFromArrowTimestamp( 
-    const ::arrow::TimestampType& timestamp_type, ::arrow::TimeUnit::type time_unit) { 
-  const bool utc = !(timestamp_type.timezone().empty()); 
-  // ARROW-5878(wesm): for forward compatibility reasons, and because 
-  // there's no other way to signal to old readers that values are 
-  // timestamps, we force the ConvertedType field to be set to the 
-  // corresponding TIMESTAMP_* value. This does cause some ambiguity 
-  // as Parquet readers have not been consistent about the 
-  // interpretation of TIMESTAMP_* values as being UTC-normalized. 
-  switch (time_unit) { 
-    case ::arrow::TimeUnit::MILLI: 
-      return LogicalType::Timestamp(utc, LogicalType::TimeUnit::MILLIS, 
-                                    /*is_from_converted_type=*/false, 
-                                    /*force_set_converted_type=*/true); 
-    case ::arrow::TimeUnit::MICRO: 
-      return LogicalType::Timestamp(utc, LogicalType::TimeUnit::MICROS, 
-                                    /*is_from_converted_type=*/false, 
-                                    /*force_set_converted_type=*/true); 
-    case ::arrow::TimeUnit::NANO: 
-      return LogicalType::Timestamp(utc, LogicalType::TimeUnit::NANOS); 
-    case ::arrow::TimeUnit::SECOND: 
-      // No equivalent parquet logical type. 
-      break; 
-  } 
-  return LogicalType::None(); 
-} 
- 
-static Status GetTimestampMetadata(const ::arrow::TimestampType& type, 
-                                   const WriterProperties& properties, 
-                                   const ArrowWriterProperties& arrow_properties, 
-                                   ParquetType::type* physical_type, 
-                                   std::shared_ptr<const LogicalType>* logical_type) { 
-  const bool coerce = arrow_properties.coerce_timestamps_enabled(); 
-  const auto target_unit = 
-      coerce ? arrow_properties.coerce_timestamps_unit() : type.unit(); 
- 
-  // The user is explicitly asking for Impala int96 encoding, there is no 
-  // logical type. 
-  if (arrow_properties.support_deprecated_int96_timestamps()) { 
-    *physical_type = ParquetType::INT96; 
-    return Status::OK(); 
-  } 
- 
-  *physical_type = ParquetType::INT64; 
-  *logical_type = TimestampLogicalTypeFromArrowTimestamp(type, target_unit); 
- 
-  // The user is explicitly asking for timestamp data to be converted to the 
-  // specified units (target_unit). 
-  if (coerce) { 
-    if (properties.version() == ::parquet::ParquetVersion::PARQUET_1_0) { 
-      switch (target_unit) { 
-        case ::arrow::TimeUnit::MILLI: 
-        case ::arrow::TimeUnit::MICRO: 
-          break; 
-        case ::arrow::TimeUnit::NANO: 
-        case ::arrow::TimeUnit::SECOND: 
-          return Status::NotImplemented( 
-              "For Parquet version 1.0 files, can only coerce Arrow timestamps to " 
-              "milliseconds or microseconds"); 
-      } 
-    } else { 
-      switch (target_unit) { 
-        case ::arrow::TimeUnit::MILLI: 
-        case ::arrow::TimeUnit::MICRO: 
-        case ::arrow::TimeUnit::NANO: 
-          break; 
-        case ::arrow::TimeUnit::SECOND: 
-          return Status::NotImplemented( 
-              "For Parquet files, can only coerce Arrow timestamps to milliseconds, " 
-              "microseconds, or nanoseconds"); 
-      } 
-    } 
-    return Status::OK(); 
-  } 
- 
-  // The user implicitly wants timestamp data to retain its original time units, 
-  // however the ConvertedType field used to indicate logical types for Parquet 
-  // version 1.0 fields does not allow for nanosecond time units and so nanoseconds 
-  // must be coerced to microseconds. 
-  if (properties.version() == ::parquet::ParquetVersion::PARQUET_1_0 && 
-      type.unit() == ::arrow::TimeUnit::NANO) { 
-    *logical_type = 
-        TimestampLogicalTypeFromArrowTimestamp(type, ::arrow::TimeUnit::MICRO); 
-    return Status::OK(); 
-  } 
- 
-  // The user implicitly wants timestamp data to retain its original time units, 
-  // however the Arrow seconds time unit can not be represented (annotated) in 
-  // any version of Parquet and so must be coerced to milliseconds. 
-  if (type.unit() == ::arrow::TimeUnit::SECOND) { 
-    *logical_type = 
-        TimestampLogicalTypeFromArrowTimestamp(type, ::arrow::TimeUnit::MILLI); 
-    return Status::OK(); 
-  } 
- 
-  return Status::OK(); 
-} 
- 
-static constexpr char FIELD_ID_KEY[] = "PARQUET:field_id"; 
- 
-std::shared_ptr<::arrow::KeyValueMetadata> FieldIdMetadata(int field_id) { 
-  if (field_id >= 0) { 
-    return ::arrow::key_value_metadata({FIELD_ID_KEY}, {std::to_string(field_id)}); 
-  } else { 
-    return nullptr; 
-  } 
-} 
- 
-int FieldIdFromMetadata( 
-    const std::shared_ptr<const ::arrow::KeyValueMetadata>& metadata) { 
-  if (!metadata) { 
-    return -1; 
-  } 
-  int key = metadata->FindKey(FIELD_ID_KEY); 
-  if (key < 0) { 
-    return -1; 
-  } 
-  std::string field_id_str = metadata->value(key); 
-  int field_id; 
-  if (::arrow::internal::ParseValue<::arrow::Int32Type>( 
-          field_id_str.c_str(), field_id_str.length(), &field_id)) { 
-    if (field_id < 0) { 
-      // Thrift should convert any negative value to null but normalize to -1 here in case 
-      // we later check this in logic. 
-      return -1; 
-    } 
-    return field_id; 
-  } else { 
-    return -1; 
-  } 
-} 
- 
-Status FieldToNode(const std::string& name, const std::shared_ptr<Field>& field, 
-                   const WriterProperties& properties, 
-                   const ArrowWriterProperties& arrow_properties, NodePtr* out) { 
-  std::shared_ptr<const LogicalType> logical_type = LogicalType::None(); 
-  ParquetType::type type; 
-  Repetition::type repetition = RepetitionFromNullable(field->nullable()); 
- 
-  int length = -1; 
-  int precision = -1; 
-  int scale = -1; 
- 
-  switch (field->type()->id()) { 
-    case ArrowTypeId::NA: { 
-      type = ParquetType::INT32; 
-      logical_type = LogicalType::Null(); 
-      if (repetition != Repetition::OPTIONAL) { 
-        return Status::Invalid("NullType Arrow field must be nullable"); 
-      } 
-    } break; 
-    case ArrowTypeId::BOOL: 
-      type = ParquetType::BOOLEAN; 
-      break; 
-    case ArrowTypeId::UINT8: 
-      type = ParquetType::INT32; 
-      logical_type = LogicalType::Int(8, false); 
-      break; 
-    case ArrowTypeId::INT8: 
-      type = ParquetType::INT32; 
-      logical_type = LogicalType::Int(8, true); 
-      break; 
-    case ArrowTypeId::UINT16: 
-      type = ParquetType::INT32; 
-      logical_type = LogicalType::Int(16, false); 
-      break; 
-    case ArrowTypeId::INT16: 
-      type = ParquetType::INT32; 
-      logical_type = LogicalType::Int(16, true); 
-      break; 
-    case ArrowTypeId::UINT32: 
-      if (properties.version() == ::parquet::ParquetVersion::PARQUET_1_0) { 
-        type = ParquetType::INT64; 
-      } else { 
-        type = ParquetType::INT32; 
-        logical_type = LogicalType::Int(32, false); 
-      } 
-      break; 
-    case ArrowTypeId::INT32: 
-      type = ParquetType::INT32; 
-      break; 
-    case ArrowTypeId::UINT64: 
-      type = ParquetType::INT64; 
-      logical_type = LogicalType::Int(64, false); 
-      break; 
-    case ArrowTypeId::INT64: 
-      type = ParquetType::INT64; 
-      break; 
-    case ArrowTypeId::FLOAT: 
-      type = ParquetType::FLOAT; 
-      break; 
-    case ArrowTypeId::DOUBLE: 
-      type = ParquetType::DOUBLE; 
-      break; 
-    case ArrowTypeId::LARGE_STRING: 
-    case ArrowTypeId::STRING: 
-      type = ParquetType::BYTE_ARRAY; 
-      logical_type = LogicalType::String(); 
-      break; 
-    case ArrowTypeId::LARGE_BINARY: 
-    case ArrowTypeId::BINARY: 
-      type = ParquetType::BYTE_ARRAY; 
-      break; 
-    case ArrowTypeId::FIXED_SIZE_BINARY: { 
-      type = ParquetType::FIXED_LEN_BYTE_ARRAY; 
-      const auto& fixed_size_binary_type = 
-          static_cast<const ::arrow::FixedSizeBinaryType&>(*field->type()); 
-      length = fixed_size_binary_type.byte_width(); 
-    } break; 
-    case ArrowTypeId::DECIMAL128: 
-    case ArrowTypeId::DECIMAL256: { 
-      type = ParquetType::FIXED_LEN_BYTE_ARRAY; 
-      const auto& decimal_type = static_cast<const ::arrow::DecimalType&>(*field->type()); 
-      precision = decimal_type.precision(); 
-      scale = decimal_type.scale(); 
-      length = DecimalType::DecimalSize(precision); 
-      PARQUET_CATCH_NOT_OK(logical_type = LogicalType::Decimal(precision, scale)); 
-    } break; 
-    case ArrowTypeId::DATE32: 
-      type = ParquetType::INT32; 
-      logical_type = LogicalType::Date(); 
-      break; 
-    case ArrowTypeId::DATE64: 
-      type = ParquetType::INT32; 
-      logical_type = LogicalType::Date(); 
-      break; 
-    case ArrowTypeId::TIMESTAMP: 
-      RETURN_NOT_OK( 
-          GetTimestampMetadata(static_cast<::arrow::TimestampType&>(*field->type()), 
-                               properties, arrow_properties, &type, &logical_type)); 
-      break; 
-    case ArrowTypeId::TIME32: 
-      type = ParquetType::INT32; 
-      logical_type = 
-          LogicalType::Time(/*is_adjusted_to_utc=*/true, LogicalType::TimeUnit::MILLIS); 
-      break; 
-    case ArrowTypeId::TIME64: { 
-      type = ParquetType::INT64; 
-      auto time_type = static_cast<::arrow::Time64Type*>(field->type().get()); 
-      if (time_type->unit() == ::arrow::TimeUnit::NANO) { 
-        logical_type = 
-            LogicalType::Time(/*is_adjusted_to_utc=*/true, LogicalType::TimeUnit::NANOS); 
-      } else { 
-        logical_type = 
-            LogicalType::Time(/*is_adjusted_to_utc=*/true, LogicalType::TimeUnit::MICROS); 
-      } 
-    } break; 
-    case ArrowTypeId::STRUCT: { 
-      auto struct_type = std::static_pointer_cast<::arrow::StructType>(field->type()); 
-      return StructToNode(struct_type, name, field->nullable(), properties, 
-                          arrow_properties, out); 
-    } 
-    case ArrowTypeId::FIXED_SIZE_LIST: 
-    case ArrowTypeId::LARGE_LIST: 
-    case ArrowTypeId::LIST: { 
-      auto list_type = std::static_pointer_cast<::arrow::BaseListType>(field->type()); 
-      return ListToNode(list_type, name, field->nullable(), properties, arrow_properties, 
-                        out); 
-    } 
-    case ArrowTypeId::DICTIONARY: { 
-      // Parquet has no Dictionary type, dictionary-encoded is handled on 
-      // the encoding, not the schema level. 
-      const ::arrow::DictionaryType& dict_type = 
-          static_cast<const ::arrow::DictionaryType&>(*field->type()); 
-      std::shared_ptr<::arrow::Field> unpacked_field = ::arrow::field( 
-          name, dict_type.value_type(), field->nullable(), field->metadata()); 
-      return FieldToNode(name, unpacked_field, properties, arrow_properties, out); 
-    } 
-    case ArrowTypeId::EXTENSION: { 
-      auto ext_type = std::static_pointer_cast<::arrow::ExtensionType>(field->type()); 
-      std::shared_ptr<::arrow::Field> storage_field = ::arrow::field( 
-          name, ext_type->storage_type(), field->nullable(), field->metadata()); 
-      return FieldToNode(name, storage_field, properties, arrow_properties, out); 
-    } 
-    case ArrowTypeId::MAP: { 
-      auto map_type = std::static_pointer_cast<::arrow::MapType>(field->type()); 
-      return MapToNode(map_type, name, field->nullable(), properties, arrow_properties, 
-                       out); 
-    } 
- 
-    default: { 
-      // TODO: DENSE_UNION, SPARE_UNION, JSON_SCALAR, DECIMAL_TEXT, VARCHAR 
-      return Status::NotImplemented( 
-          "Unhandled type for Arrow to Parquet schema conversion: ", 
-          field->type()->ToString()); 
-    } 
-  } 
- 
-  int field_id = FieldIdFromMetadata(field->metadata()); 
-  PARQUET_CATCH_NOT_OK(*out = PrimitiveNode::Make(name, repetition, logical_type, type, 
-                                                  length, field_id)); 
- 
-  return Status::OK(); 
-} 
- 
-struct SchemaTreeContext { 
-  SchemaManifest* manifest; 
-  ArrowReaderProperties properties; 
-  const SchemaDescriptor* schema; 
- 
-  void LinkParent(const SchemaField* child, const SchemaField* parent) { 
-    manifest->child_to_parent[child] = parent; 
-  } 
- 
-  void RecordLeaf(const SchemaField* leaf) { 
-    manifest->column_index_to_field[leaf->column_index] = leaf; 
-  } 
-}; 
- 
-bool IsDictionaryReadSupported(const ArrowType& type) { 
-  // Only supported currently for BYTE_ARRAY types 
-  return type.id() == ::arrow::Type::BINARY || type.id() == ::arrow::Type::STRING; 
-} 
- 
-// ---------------------------------------------------------------------- 
-// Schema logic 
- 
-::arrow::Result<std::shared_ptr<ArrowType>> GetTypeForNode( 
-    int column_index, const schema::PrimitiveNode& primitive_node, 
-    SchemaTreeContext* ctx) { 
-  ASSIGN_OR_RAISE( 
-      std::shared_ptr<ArrowType> storage_type, 
-      GetArrowType(primitive_node, ctx->properties.coerce_int96_timestamp_unit())); 
-  if (ctx->properties.read_dictionary(column_index) && 
-      IsDictionaryReadSupported(*storage_type)) { 
-    return ::arrow::dictionary(::arrow::int32(), storage_type); 
-  } 
-  return storage_type; 
-} 
- 
-Status NodeToSchemaField(const Node& node, LevelInfo current_levels, 
-                         SchemaTreeContext* ctx, const SchemaField* parent, 
-                         SchemaField* out); 
- 
-Status GroupToSchemaField(const GroupNode& node, LevelInfo current_levels, 
-                          SchemaTreeContext* ctx, const SchemaField* parent, 
-                          SchemaField* out); 
- 
-Status PopulateLeaf(int column_index, const std::shared_ptr<Field>& field, 
-                    LevelInfo current_levels, SchemaTreeContext* ctx, 
-                    const SchemaField* parent, SchemaField* out) { 
-  out->field = field; 
-  out->column_index = column_index; 
-  out->level_info = current_levels; 
-  ctx->RecordLeaf(out); 
-  ctx->LinkParent(out, parent); 
-  return Status::OK(); 
-} 
- 
-// Special case mentioned in the format spec: 
-//   If the name is array or ends in _tuple, this should be a list of struct 
-//   even for single child elements. 
-bool HasStructListName(const GroupNode& node) { 
-  ::arrow::util::string_view name{node.name()}; 
-  return name == "array" || name.ends_with("_tuple"); 
-} 
- 
-Status GroupToStruct(const GroupNode& node, LevelInfo current_levels, 
-                     SchemaTreeContext* ctx, const SchemaField* parent, 
-                     SchemaField* out) { 
-  std::vector<std::shared_ptr<Field>> arrow_fields; 
-  out->children.resize(node.field_count()); 
-  // All level increments for the node are expected to happen by callers. 
-  // This is required because repeated elements need to have there own 
-  // SchemaField. 
- 
-  for (int i = 0; i < node.field_count(); i++) { 
-    RETURN_NOT_OK( 
-        NodeToSchemaField(*node.field(i), current_levels, ctx, out, &out->children[i])); 
-    arrow_fields.push_back(out->children[i].field); 
-  } 
-  auto struct_type = ::arrow::struct_(arrow_fields); 
-  out->field = ::arrow::field(node.name(), struct_type, node.is_optional(), 
-                              FieldIdMetadata(node.field_id())); 
-  out->level_info = current_levels; 
-  return Status::OK(); 
-} 
- 
-Status ListToSchemaField(const GroupNode& group, LevelInfo current_levels, 
-                         SchemaTreeContext* ctx, const SchemaField* parent, 
-                         SchemaField* out); 
- 
-Status MapToSchemaField(const GroupNode& group, LevelInfo current_levels, 
-                        SchemaTreeContext* ctx, const SchemaField* parent, 
-                        SchemaField* out) { 
-  if (group.field_count() != 1) { 
-    return Status::Invalid("MAP-annotated groups must have a single child."); 
-  } 
-  if (group.is_repeated()) { 
-    return Status::Invalid("MAP-annotated groups must not be repeated."); 
-  } 
- 
-  const Node& key_value_node = *group.field(0); 
- 
-  if (!key_value_node.is_repeated()) { 
-    return Status::Invalid( 
-        "Non-repeated key value in a MAP-annotated group are not supported."); 
-  } 
- 
-  if (!key_value_node.is_group()) { 
-    return Status::Invalid("Key-value node must be a group."); 
-  } 
- 
-  const GroupNode& key_value = checked_cast<const GroupNode&>(key_value_node); 
-  if (key_value.field_count() != 1 && key_value.field_count() != 2) { 
-    return Status::Invalid("Key-value map node must have 1 or 2 child elements. Found: ", 
-                           key_value.field_count()); 
-  } 
-  const Node& key_node = *key_value.field(0); 
-  if (!key_node.is_required()) { 
-    return Status::Invalid("Map keys must be annotated as required."); 
-  } 
-  // Arrow doesn't support 1 column maps (i.e. Sets).  The options are to either 
-  // make the values column nullable, or process the map as a list.  We choose the latter 
-  // as it is simpler. 
-  if (key_value.field_count() == 1) { 
-    return ListToSchemaField(group, current_levels, ctx, parent, out); 
-  } 
- 
-  current_levels.Increment(group); 
-  int16_t repeated_ancestor_def_level = current_levels.IncrementRepeated(); 
- 
-  out->children.resize(1); 
-  SchemaField* key_value_field = &out->children[0]; 
- 
-  key_value_field->children.resize(2); 
-  SchemaField* key_field = &key_value_field->children[0]; 
-  SchemaField* value_field = &key_value_field->children[1]; 
- 
-  ctx->LinkParent(out, parent); 
-  ctx->LinkParent(key_value_field, out); 
-  ctx->LinkParent(key_field, key_value_field); 
-  ctx->LinkParent(value_field, key_value_field); 
- 
-  // required/optional group name=whatever { 
-  //   repeated group name=key_values{ 
-  //     required TYPE key; 
-  // required/optional TYPE value; 
-  //   } 
-  // } 
-  // 
- 
-  RETURN_NOT_OK(NodeToSchemaField(*key_value.field(0), current_levels, ctx, 
-                                  key_value_field, key_field)); 
-  RETURN_NOT_OK(NodeToSchemaField(*key_value.field(1), current_levels, ctx, 
-                                  key_value_field, value_field)); 
- 
-  key_value_field->field = ::arrow::field( 
-      group.name(), ::arrow::struct_({key_field->field, value_field->field}), 
-      /*nullable=*/false, FieldIdMetadata(key_value.field_id())); 
-  key_value_field->level_info = current_levels; 
- 
-  out->field = ::arrow::field(group.name(), 
-                              ::arrow::map(key_field->field->type(), value_field->field), 
-                              group.is_optional(), FieldIdMetadata(group.field_id())); 
-  out->level_info = current_levels; 
-  // At this point current levels contains the def level for this list, 
-  // we need to reset to the prior parent. 
-  out->level_info.repeated_ancestor_def_level = repeated_ancestor_def_level; 
-  return Status::OK(); 
-} 
- 
-Status ListToSchemaField(const GroupNode& group, LevelInfo current_levels, 
-                         SchemaTreeContext* ctx, const SchemaField* parent, 
-                         SchemaField* out) { 
-  if (group.field_count() != 1) { 
-    return Status::Invalid("LIST-annotated groups must have a single child."); 
-  } 
-  if (group.is_repeated()) { 
-    return Status::Invalid("LIST-annotated groups must not be repeated."); 
-  } 
-  current_levels.Increment(group); 
- 
-  out->children.resize(group.field_count()); 
-  SchemaField* child_field = &out->children[0]; 
- 
-  ctx->LinkParent(out, parent); 
-  ctx->LinkParent(child_field, out); 
- 
-  const Node& list_node = *group.field(0); 
- 
-  if (!list_node.is_repeated()) { 
-    return Status::Invalid( 
-        "Non-repeated nodes in a LIST-annotated group are not supported."); 
-  } 
- 
-  int16_t repeated_ancestor_def_level = current_levels.IncrementRepeated(); 
-  if (list_node.is_group()) { 
-    // Resolve 3-level encoding 
-    // 
-    // required/optional group name=whatever { 
-    //   repeated group name=list { 
-    //     required/optional TYPE item; 
-    //   } 
-    // } 
-    // 
-    // yields list<item: TYPE ?nullable> ?nullable 
-    // 
-    // We distinguish the special case that we have 
-    // 
-    // required/optional group name=whatever { 
-    //   repeated group name=array or $SOMETHING_tuple { 
-    //     required/optional TYPE item; 
-    //   } 
-    // } 
-    // 
-    // In this latter case, the inner type of the list should be a struct 
-    // rather than a primitive value 
-    // 
-    // yields list<item: struct<item: TYPE ?nullable> not null> ?nullable 
-    const auto& list_group = static_cast<const GroupNode&>(list_node); 
-    // Special case mentioned in the format spec: 
-    //   If the name is array or ends in _tuple, this should be a list of struct 
-    //   even for single child elements. 
-    if (list_group.field_count() == 1 && !HasStructListName(list_group)) { 
-      // List of primitive type 
-      RETURN_NOT_OK( 
-          NodeToSchemaField(*list_group.field(0), current_levels, ctx, out, child_field)); 
-    } else { 
-      RETURN_NOT_OK(GroupToStruct(list_group, current_levels, ctx, out, child_field)); 
-    } 
-  } else { 
-    // Two-level list encoding 
-    // 
-    // required/optional group LIST { 
-    //   repeated TYPE; 
-    // } 
-    const auto& primitive_node = static_cast<const PrimitiveNode&>(list_node); 
-    int column_index = ctx->schema->GetColumnIndex(primitive_node); 
-    ASSIGN_OR_RAISE(std::shared_ptr<ArrowType> type, 
-                    GetTypeForNode(column_index, primitive_node, ctx)); 
-    auto item_field = ::arrow::field(list_node.name(), type, /*nullable=*/false, 
-                                     FieldIdMetadata(list_node.field_id())); 
-    RETURN_NOT_OK( 
-        PopulateLeaf(column_index, item_field, current_levels, ctx, out, child_field)); 
-  } 
-  out->field = ::arrow::field(group.name(), ::arrow::list(child_field->field), 
-                              group.is_optional(), FieldIdMetadata(group.field_id())); 
-  out->level_info = current_levels; 
-  // At this point current levels contains the def level for this list, 
-  // we need to reset to the prior parent. 
-  out->level_info.repeated_ancestor_def_level = repeated_ancestor_def_level; 
-  return Status::OK(); 
-} 
- 
-Status GroupToSchemaField(const GroupNode& node, LevelInfo current_levels, 
-                          SchemaTreeContext* ctx, const SchemaField* parent, 
-                          SchemaField* out) { 
-  if (node.logical_type()->is_list()) { 
-    return ListToSchemaField(node, current_levels, ctx, parent, out); 
-  } else if (node.logical_type()->is_map()) { 
-    return MapToSchemaField(node, current_levels, ctx, parent, out); 
-  } 
-  std::shared_ptr<ArrowType> type; 
-  if (node.is_repeated()) { 
-    // Simple repeated struct 
-    // 
-    // repeated group $NAME { 
-    //   r/o TYPE[0] f0 
-    //   r/o TYPE[1] f1 
-    // } 
-    out->children.resize(1); 
- 
-    int16_t repeated_ancestor_def_level = current_levels.IncrementRepeated(); 
-    RETURN_NOT_OK(GroupToStruct(node, current_levels, ctx, out, &out->children[0])); 
-    out->field = ::arrow::field(node.name(), ::arrow::list(out->children[0].field), 
-                                /*nullable=*/false, FieldIdMetadata(node.field_id())); 
- 
-    ctx->LinkParent(&out->children[0], out); 
-    out->level_info = current_levels; 
-    // At this point current_levels contains this list as the def level, we need to 
-    // use the previous ancenstor of thi slist. 
-    out->level_info.repeated_ancestor_def_level = repeated_ancestor_def_level; 
-    return Status::OK(); 
-  } else { 
-    current_levels.Increment(node); 
-    return GroupToStruct(node, current_levels, ctx, parent, out); 
-  } 
-} 
- 
-Status NodeToSchemaField(const Node& node, LevelInfo current_levels, 
-                         SchemaTreeContext* ctx, const SchemaField* parent, 
-                         SchemaField* out) { 
-  // Workhorse function for converting a Parquet schema node to an Arrow 
-  // type. Handles different conventions for nested data. 
- 
-  ctx->LinkParent(out, parent); 
- 
-  // Now, walk the schema and create a ColumnDescriptor for each leaf node 
-  if (node.is_group()) { 
-    // A nested field, but we don't know what kind yet 
-    return GroupToSchemaField(static_cast<const GroupNode&>(node), current_levels, ctx, 
-                              parent, out); 
-  } else { 
-    // Either a normal flat primitive type, or a list type encoded with 1-level 
-    // list encoding. Note that the 3-level encoding is the form recommended by 
-    // the parquet specification, but technically we can have either 
-    // 
-    // required/optional $TYPE $FIELD_NAME 
-    // 
-    // or 
-    // 
-    // repeated $TYPE $FIELD_NAME 
-    const auto& primitive_node = static_cast<const PrimitiveNode&>(node); 
-    int column_index = ctx->schema->GetColumnIndex(primitive_node); 
-    ASSIGN_OR_RAISE(std::shared_ptr<ArrowType> type, 
-                    GetTypeForNode(column_index, primitive_node, ctx)); 
-    if (node.is_repeated()) { 
-      // One-level list encoding, e.g. 
-      // a: repeated int32; 
-      int16_t repeated_ancestor_def_level = current_levels.IncrementRepeated(); 
-      out->children.resize(1); 
-      auto child_field = ::arrow::field(node.name(), type, /*nullable=*/false); 
-      RETURN_NOT_OK(PopulateLeaf(column_index, child_field, current_levels, ctx, out, 
-                                 &out->children[0])); 
- 
-      out->field = ::arrow::field(node.name(), ::arrow::list(child_field), 
-                                  /*nullable=*/false, FieldIdMetadata(node.field_id())); 
-      out->level_info = current_levels; 
-      // At this point current_levels has consider this list the ancestor so restore 
-      // the actual ancenstor. 
-      out->level_info.repeated_ancestor_def_level = repeated_ancestor_def_level; 
-      return Status::OK(); 
-    } else { 
-      current_levels.Increment(node); 
-      // A normal (required/optional) primitive node 
-      return PopulateLeaf(column_index, 
-                          ::arrow::field(node.name(), type, node.is_optional(), 
-                                         FieldIdMetadata(node.field_id())), 
-                          current_levels, ctx, parent, out); 
-    } 
-  } 
-} 
- 
-// Get the original Arrow schema, as serialized in the Parquet metadata 
-Status GetOriginSchema(const std::shared_ptr<const KeyValueMetadata>& metadata, 
-                       std::shared_ptr<const KeyValueMetadata>* clean_metadata, 
-                       std::shared_ptr<::arrow::Schema>* out) { 
-  if (metadata == nullptr) { 
-    *out = nullptr; 
-    *clean_metadata = nullptr; 
-    return Status::OK(); 
-  } 
- 
-  static const std::string kArrowSchemaKey = "ARROW:schema"; 
-  int schema_index = metadata->FindKey(kArrowSchemaKey); 
-  if (schema_index == -1) { 
-    *out = nullptr; 
-    *clean_metadata = metadata; 
-    return Status::OK(); 
-  } 
- 
-  // The original Arrow schema was serialized using the store_schema option. 
-  // We deserialize it here and use it to inform read options such as 
-  // dictionary-encoded fields. 
-  auto decoded = ::arrow::util::base64_decode(metadata->value(schema_index)); 
-  auto schema_buf = std::make_shared<Buffer>(decoded); 
- 
-  ::arrow::ipc::DictionaryMemo dict_memo; 
-  ::arrow::io::BufferReader input(schema_buf); 
- 
-  ARROW_ASSIGN_OR_RAISE(*out, ::arrow::ipc::ReadSchema(&input, &dict_memo)); 
- 
-  if (metadata->size() > 1) { 
-    // Copy the metadata without the schema key 
-    auto new_metadata = ::arrow::key_value_metadata({}, {}); 
-    new_metadata->reserve(metadata->size() - 1); 
-    for (int64_t i = 0; i < metadata->size(); ++i) { 
-      if (i == schema_index) continue; 
-      new_metadata->Append(metadata->key(i), metadata->value(i)); 
-    } 
-    *clean_metadata = new_metadata; 
-  } else { 
-    // No other keys, let metadata be null 
-    *clean_metadata = nullptr; 
-  } 
-  return Status::OK(); 
-} 
- 
-// Restore original Arrow field information that was serialized as Parquet metadata 
-// but that is not necessarily present in the field reconstitued from Parquet data 
-// (for example, Parquet timestamp types doesn't carry timezone information). 
- 
-Result<bool> ApplyOriginalMetadata(const Field& origin_field, SchemaField* inferred); 
- 
-std::function<std::shared_ptr<::arrow::DataType>(FieldVector)> GetNestedFactory( 
-    const ArrowType& origin_type, const ArrowType& inferred_type) { 
-  switch (inferred_type.id()) { 
-    case ::arrow::Type::STRUCT: 
-      if (origin_type.id() == ::arrow::Type::STRUCT) { 
-        return ::arrow::struct_; 
-      } 
-      break; 
-    case ::arrow::Type::LIST: 
-      if (origin_type.id() == ::arrow::Type::LIST) { 
-        return [](FieldVector fields) { 
-          DCHECK_EQ(fields.size(), 1); 
-          return ::arrow::list(std::move(fields[0])); 
-        }; 
-      } 
-      if (origin_type.id() == ::arrow::Type::LARGE_LIST) { 
-        return [](FieldVector fields) { 
-          DCHECK_EQ(fields.size(), 1); 
-          return ::arrow::large_list(std::move(fields[0])); 
-        }; 
-      } 
-      if (origin_type.id() == ::arrow::Type::FIXED_SIZE_LIST) { 
-        const auto list_size = 
-            checked_cast<const ::arrow::FixedSizeListType&>(origin_type).list_size(); 
-        return [list_size](FieldVector fields) { 
-          DCHECK_EQ(fields.size(), 1); 
-          return ::arrow::fixed_size_list(std::move(fields[0]), list_size); 
-        }; 
-      } 
-      break; 
-    default: 
-      break; 
-  } 
-  return {}; 
-} 
- 
-Result<bool> ApplyOriginalStorageMetadata(const Field& origin_field, 
-                                          SchemaField* inferred) { 
-  bool modified = false; 
- 
-  auto origin_type = origin_field.type(); 
-  auto inferred_type = inferred->field->type(); 
- 
-  const int num_children = inferred_type->num_fields(); 
- 
-  if (num_children > 0 && origin_type->num_fields() == num_children) { 
-    DCHECK_EQ(static_cast<int>(inferred->children.size()), num_children); 
-    const auto factory = GetNestedFactory(*origin_type, *inferred_type); 
-    if (factory) { 
-      // The type may be modified (e.g. LargeList) while the children stay the same 
-      modified |= origin_type->id() != inferred_type->id(); 
- 
-      // Apply original metadata recursively to children 
-      for (int i = 0; i < inferred_type->num_fields(); ++i) { 
-        ARROW_ASSIGN_OR_RAISE( 
-            const bool child_modified, 
-            ApplyOriginalMetadata(*origin_type->field(i), &inferred->children[i])); 
-        modified |= child_modified; 
-      } 
-      if (modified) { 
-        // Recreate this field using the modified child fields 
-        ::arrow::FieldVector modified_children(inferred_type->num_fields()); 
-        for (int i = 0; i < inferred_type->num_fields(); ++i) { 
-          modified_children[i] = inferred->children[i].field; 
-        } 
-        inferred->field = 
-            inferred->field->WithType(factory(std::move(modified_children))); 
-      } 
-    } 
-  } 
- 
-  if (origin_type->id() == ::arrow::Type::TIMESTAMP && 
-      inferred_type->id() == ::arrow::Type::TIMESTAMP) { 
-    // Restore time zone, if any 
-    const auto& ts_type = checked_cast<const ::arrow::TimestampType&>(*inferred_type); 
-    const auto& ts_origin_type = 
-        checked_cast<const ::arrow::TimestampType&>(*origin_type); 
- 
-    // If the data is tz-aware, then set the original time zone, since Parquet 
-    // has no native storage for timezones 
-    if (ts_type.timezone() == "UTC" && ts_origin_type.timezone() != "") { 
-      if (ts_type.unit() == ts_origin_type.unit()) { 
-        inferred->field = inferred->field->WithType(origin_type); 
-      } else { 
-        auto ts_type_new = ::arrow::timestamp(ts_type.unit(), ts_origin_type.timezone()); 
-        inferred->field = inferred->field->WithType(ts_type_new); 
-      } 
-    } 
-    modified = true; 
-  } 
- 
-  if (origin_type->id() == ::arrow::Type::DICTIONARY && 
-      inferred_type->id() != ::arrow::Type::DICTIONARY && 
-      IsDictionaryReadSupported(*inferred_type)) { 
-    // Direct dictionary reads are only suppored for a couple primitive types, 
-    // so no need to recurse on value types. 
-    const auto& dict_origin_type = 
-        checked_cast<const ::arrow::DictionaryType&>(*origin_type); 
-    inferred->field = inferred->field->WithType( 
-        ::arrow::dictionary(::arrow::int32(), inferred_type, dict_origin_type.ordered())); 
-    modified = true; 
-  } 
- 
-  if ((origin_type->id() == ::arrow::Type::LARGE_BINARY && 
-       inferred_type->id() == ::arrow::Type::BINARY) || 
-      (origin_type->id() == ::arrow::Type::LARGE_STRING && 
-       inferred_type->id() == ::arrow::Type::STRING)) { 
-    // Read back binary-like arrays with the intended offset width. 
-    inferred->field = inferred->field->WithType(origin_type); 
-    modified = true; 
-  } 
- 
-  if (origin_type->id() == ::arrow::Type::DECIMAL256 && 
-      inferred_type->id() == ::arrow::Type::DECIMAL128) { 
-    inferred->field = inferred->field->WithType(origin_type); 
-    modified = true; 
-  } 
- 
-  // Restore field metadata 
-  std::shared_ptr<const KeyValueMetadata> field_metadata = origin_field.metadata(); 
-  if (field_metadata != nullptr) { 
-    if (inferred->field->metadata()) { 
-      // Prefer the metadata keys (like field_id) from the current metadata 
-      field_metadata = field_metadata->Merge(*inferred->field->metadata()); 
-    } 
-    inferred->field = inferred->field->WithMetadata(field_metadata); 
-    modified = true; 
-  } 
- 
-  return modified; 
-} 
- 
-Result<bool> ApplyOriginalMetadata(const Field& origin_field, SchemaField* inferred) { 
-  bool modified = false; 
- 
-  auto origin_type = origin_field.type(); 
-  auto inferred_type = inferred->field->type(); 
- 
-  if (origin_type->id() == ::arrow::Type::EXTENSION) { 
-    const auto& ex_type = checked_cast<const ::arrow::ExtensionType&>(*origin_type); 
-    auto origin_storage_field = origin_field.WithType(ex_type.storage_type()); 
- 
-    // Apply metadata recursively to storage type 
-    RETURN_NOT_OK(ApplyOriginalStorageMetadata(*origin_storage_field, inferred)); 
- 
-    // Restore extension type, if the storage type is the same as inferred 
-    // from the Parquet type 
-    if (ex_type.storage_type()->Equals(*inferred->field->type())) { 
-      inferred->field = inferred->field->WithType(origin_type); 
-    } 
-    modified = true; 
-  } else { 
-    ARROW_ASSIGN_OR_RAISE(modified, ApplyOriginalStorageMetadata(origin_field, inferred)); 
-  } 
- 
-  return modified; 
-} 
- 
-}  // namespace 
- 
-Status FieldToNode(const std::shared_ptr<Field>& field, 
-                   const WriterProperties& properties, 
-                   const ArrowWriterProperties& arrow_properties, NodePtr* out) { 
-  return FieldToNode(field->name(), field, properties, arrow_properties, out); 
-} 
- 
-Status ToParquetSchema(const ::arrow::Schema* arrow_schema, 
-                       const WriterProperties& properties, 
-                       const ArrowWriterProperties& arrow_properties, 
-                       std::shared_ptr<SchemaDescriptor>* out) { 
-  std::vector<NodePtr> nodes(arrow_schema->num_fields()); 
-  for (int i = 0; i < arrow_schema->num_fields(); i++) { 
-    RETURN_NOT_OK( 
-        FieldToNode(arrow_schema->field(i), properties, arrow_properties, &nodes[i])); 
-  } 
- 
-  NodePtr schema = GroupNode::Make("schema", Repetition::REQUIRED, nodes); 
-  *out = std::make_shared<::parquet::SchemaDescriptor>(); 
-  PARQUET_CATCH_NOT_OK((*out)->Init(schema)); 
- 
-  return Status::OK(); 
-} 
- 
-Status ToParquetSchema(const ::arrow::Schema* arrow_schema, 
-                       const WriterProperties& properties, 
-                       std::shared_ptr<SchemaDescriptor>* out) { 
-  return ToParquetSchema(arrow_schema, properties, *default_arrow_writer_properties(), 
-                         out); 
-} 
- 
-Status FromParquetSchema( 
-    const SchemaDescriptor* schema, const ArrowReaderProperties& properties, 
-    const std::shared_ptr<const KeyValueMetadata>& key_value_metadata, 
-    std::shared_ptr<::arrow::Schema>* out) { 
-  SchemaManifest manifest; 
-  RETURN_NOT_OK(SchemaManifest::Make(schema, key_value_metadata, properties, &manifest)); 
-  std::vector<std::shared_ptr<Field>> fields(manifest.schema_fields.size()); 
- 
-  for (int i = 0; i < static_cast<int>(fields.size()); i++) { 
-    const auto& schema_field = manifest.schema_fields[i]; 
-    fields[i] = schema_field.field; 
-  } 
-  if (manifest.origin_schema) { 
-    // ARROW-8980: If the ARROW:schema was in the input metadata, then 
-    // manifest.origin_schema will have it scrubbed out 
-    *out = ::arrow::schema(fields, manifest.origin_schema->metadata()); 
-  } else { 
-    *out = ::arrow::schema(fields, key_value_metadata); 
-  } 
-  return Status::OK(); 
-} 
- 
-Status FromParquetSchema(const SchemaDescriptor* parquet_schema, 
-                         const ArrowReaderProperties& properties, 
-                         std::shared_ptr<::arrow::Schema>* out) { 
-  return FromParquetSchema(parquet_schema, properties, nullptr, out); 
-} 
- 
-Status FromParquetSchema(const SchemaDescriptor* parquet_schema, 
-                         std::shared_ptr<::arrow::Schema>* out) { 
-  ArrowReaderProperties properties; 
-  return FromParquetSchema(parquet_schema, properties, nullptr, out); 
-} 
- 
-Status SchemaManifest::Make(const SchemaDescriptor* schema, 
-                            const std::shared_ptr<const KeyValueMetadata>& metadata, 
-                            const ArrowReaderProperties& properties, 
-                            SchemaManifest* manifest) { 
-  SchemaTreeContext ctx; 
-  ctx.manifest = manifest; 
-  ctx.properties = properties; 
-  ctx.schema = schema; 
-  const GroupNode& schema_node = *schema->group_node(); 
-  manifest->descr = schema; 
-  manifest->schema_fields.resize(schema_node.field_count()); 
- 
-  // Try to deserialize original Arrow schema 
-  RETURN_NOT_OK( 
-      GetOriginSchema(metadata, &manifest->schema_metadata, &manifest->origin_schema)); 
-  // Ignore original schema if it's not compatible with the Parquet schema 
-  if (manifest->origin_schema != nullptr && 
-      manifest->origin_schema->num_fields() != schema_node.field_count()) { 
-    manifest->origin_schema = nullptr; 
-  } 
- 
-  for (int i = 0; i < static_cast<int>(schema_node.field_count()); ++i) { 
-    SchemaField* out_field = &manifest->schema_fields[i]; 
-    RETURN_NOT_OK(NodeToSchemaField(*schema_node.field(i), LevelInfo(), &ctx, 
-                                    /*parent=*/nullptr, out_field)); 
- 
-    // TODO(wesm): as follow up to ARROW-3246, we should really pass the origin 
-    // schema (if any) through all functions in the schema reconstruction, but 
-    // I'm being lazy and just setting dictionary fields at the top level for 
-    // now 
-    if (manifest->origin_schema == nullptr) { 
-      continue; 
-    } 
- 
-    auto origin_field = manifest->origin_schema->field(i); 
-    RETURN_NOT_OK(ApplyOriginalMetadata(*origin_field, out_field)); 
-  } 
-  return Status::OK(); 
-} 
- 
-}  // namespace arrow 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/arrow/schema.h"
+
+#include <functional>
+#include <string>
+#include <vector>
+
+#include "arrow/extension_type.h"
+#include "arrow/io/memory.h"
+#include "arrow/ipc/api.h"
+#include "arrow/result_internal.h"
+#include "arrow/type.h"
+#include "arrow/util/base64.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/value_parsing.h"
+
+#include "parquet/arrow/schema_internal.h"
+#include "parquet/exception.h"
+#include "parquet/properties.h"
+#include "parquet/types.h"
+
+using arrow::DecimalType;
+using arrow::Field;
+using arrow::FieldVector;
+using arrow::KeyValueMetadata;
+using arrow::Status;
+using arrow::internal::checked_cast;
+
+using ArrowType = arrow::DataType;
+using ArrowTypeId = arrow::Type;
+
+using parquet::Repetition;
+using parquet::schema::GroupNode;
+using parquet::schema::Node;
+using parquet::schema::NodePtr;
+using parquet::schema::PrimitiveNode;
+
+using ParquetType = parquet::Type;
+using parquet::ConvertedType;
+using parquet::LogicalType;
+
+using parquet::internal::LevelInfo;
+
+namespace parquet {
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// Parquet to Arrow schema conversion
+
+namespace {
+
+Repetition::type RepetitionFromNullable(bool is_nullable) {
+  return is_nullable ? Repetition::OPTIONAL : Repetition::REQUIRED;
+}
+
+Status FieldToNode(const std::string& name, const std::shared_ptr<Field>& field,
+                   const WriterProperties& properties,
+                   const ArrowWriterProperties& arrow_properties, NodePtr* out);
+
+Status ListToNode(const std::shared_ptr<::arrow::BaseListType>& type,
+                  const std::string& name, bool nullable,
+                  const WriterProperties& properties,
+                  const ArrowWriterProperties& arrow_properties, NodePtr* out) {
+  NodePtr element;
+  std::string value_name =
+      arrow_properties.compliant_nested_types() ? "element" : type->value_field()->name();
+  RETURN_NOT_OK(FieldToNode(value_name, type->value_field(), properties, arrow_properties,
+                            &element));
+
+  NodePtr list = GroupNode::Make("list", Repetition::REPEATED, {element});
+  *out = GroupNode::Make(name, RepetitionFromNullable(nullable), {list},
+                         LogicalType::List());
+  return Status::OK();
+}
+
+Status MapToNode(const std::shared_ptr<::arrow::MapType>& type, const std::string& name,
+                 bool nullable, const WriterProperties& properties,
+                 const ArrowWriterProperties& arrow_properties, NodePtr* out) {
+  // TODO: Should we offer a non-compliant mode that forwards the type names?
+  NodePtr key_node;
+  RETURN_NOT_OK(
+      FieldToNode("key", type->key_field(), properties, arrow_properties, &key_node));
+
+  NodePtr value_node;
+  RETURN_NOT_OK(FieldToNode("value", type->item_field(), properties, arrow_properties,
+                            &value_node));
+
+  NodePtr key_value =
+      GroupNode::Make("key_value", Repetition::REPEATED, {key_node, value_node});
+  *out = GroupNode::Make(name, RepetitionFromNullable(nullable), {key_value},
+                         LogicalType::Map());
+  return Status::OK();
+}
+
+Status StructToNode(const std::shared_ptr<::arrow::StructType>& type,
+                    const std::string& name, bool nullable,
+                    const WriterProperties& properties,
+                    const ArrowWriterProperties& arrow_properties, NodePtr* out) {
+  std::vector<NodePtr> children(type->num_fields());
+  if (type->num_fields() != 0) {
+    for (int i = 0; i < type->num_fields(); i++) {
+      RETURN_NOT_OK(FieldToNode(type->field(i)->name(), type->field(i), properties,
+                                arrow_properties, &children[i]));
+    }
+  } else {
+    // XXX (ARROW-10928) We could add a dummy primitive node but that would
+    // require special handling when writing and reading, to avoid column index
+    // mismatches.
+    return Status::NotImplemented("Cannot write struct type '", name,
+                                  "' with no child field to Parquet. "
+                                  "Consider adding a dummy child field.");
+  }
+
+  *out = GroupNode::Make(name, RepetitionFromNullable(nullable), std::move(children));
+  return Status::OK();
+}
+
+static std::shared_ptr<const LogicalType> TimestampLogicalTypeFromArrowTimestamp(
+    const ::arrow::TimestampType& timestamp_type, ::arrow::TimeUnit::type time_unit) {
+  const bool utc = !(timestamp_type.timezone().empty());
+  // ARROW-5878(wesm): for forward compatibility reasons, and because
+  // there's no other way to signal to old readers that values are
+  // timestamps, we force the ConvertedType field to be set to the
+  // corresponding TIMESTAMP_* value. This does cause some ambiguity
+  // as Parquet readers have not been consistent about the
+  // interpretation of TIMESTAMP_* values as being UTC-normalized.
+  switch (time_unit) {
+    case ::arrow::TimeUnit::MILLI:
+      return LogicalType::Timestamp(utc, LogicalType::TimeUnit::MILLIS,
+                                    /*is_from_converted_type=*/false,
+                                    /*force_set_converted_type=*/true);
+    case ::arrow::TimeUnit::MICRO:
+      return LogicalType::Timestamp(utc, LogicalType::TimeUnit::MICROS,
+                                    /*is_from_converted_type=*/false,
+                                    /*force_set_converted_type=*/true);
+    case ::arrow::TimeUnit::NANO:
+      return LogicalType::Timestamp(utc, LogicalType::TimeUnit::NANOS);
+    case ::arrow::TimeUnit::SECOND:
+      // No equivalent parquet logical type.
+      break;
+  }
+  return LogicalType::None();
+}
+
+static Status GetTimestampMetadata(const ::arrow::TimestampType& type,
+                                   const WriterProperties& properties,
+                                   const ArrowWriterProperties& arrow_properties,
+                                   ParquetType::type* physical_type,
+                                   std::shared_ptr<const LogicalType>* logical_type) {
+  const bool coerce = arrow_properties.coerce_timestamps_enabled();
+  const auto target_unit =
+      coerce ? arrow_properties.coerce_timestamps_unit() : type.unit();
+
+  // The user is explicitly asking for Impala int96 encoding, there is no
+  // logical type.
+  if (arrow_properties.support_deprecated_int96_timestamps()) {
+    *physical_type = ParquetType::INT96;
+    return Status::OK();
+  }
+
+  *physical_type = ParquetType::INT64;
+  *logical_type = TimestampLogicalTypeFromArrowTimestamp(type, target_unit);
+
+  // The user is explicitly asking for timestamp data to be converted to the
+  // specified units (target_unit).
+  if (coerce) {
+    if (properties.version() == ::parquet::ParquetVersion::PARQUET_1_0) {
+      switch (target_unit) {
+        case ::arrow::TimeUnit::MILLI:
+        case ::arrow::TimeUnit::MICRO:
+          break;
+        case ::arrow::TimeUnit::NANO:
+        case ::arrow::TimeUnit::SECOND:
+          return Status::NotImplemented(
+              "For Parquet version 1.0 files, can only coerce Arrow timestamps to "
+              "milliseconds or microseconds");
+      }
+    } else {
+      switch (target_unit) {
+        case ::arrow::TimeUnit::MILLI:
+        case ::arrow::TimeUnit::MICRO:
+        case ::arrow::TimeUnit::NANO:
+          break;
+        case ::arrow::TimeUnit::SECOND:
+          return Status::NotImplemented(
+              "For Parquet files, can only coerce Arrow timestamps to milliseconds, "
+              "microseconds, or nanoseconds");
+      }
+    }
+    return Status::OK();
+  }
+
+  // The user implicitly wants timestamp data to retain its original time units,
+  // however the ConvertedType field used to indicate logical types for Parquet
+  // version 1.0 fields does not allow for nanosecond time units and so nanoseconds
+  // must be coerced to microseconds.
+  if (properties.version() == ::parquet::ParquetVersion::PARQUET_1_0 &&
+      type.unit() == ::arrow::TimeUnit::NANO) {
+    *logical_type =
+        TimestampLogicalTypeFromArrowTimestamp(type, ::arrow::TimeUnit::MICRO);
+    return Status::OK();
+  }
+
+  // The user implicitly wants timestamp data to retain its original time units,
+  // however the Arrow seconds time unit can not be represented (annotated) in
+  // any version of Parquet and so must be coerced to milliseconds.
+  if (type.unit() == ::arrow::TimeUnit::SECOND) {
+    *logical_type =
+        TimestampLogicalTypeFromArrowTimestamp(type, ::arrow::TimeUnit::MILLI);
+    return Status::OK();
+  }
+
+  return Status::OK();
+}
+
+static constexpr char FIELD_ID_KEY[] = "PARQUET:field_id";
+
+std::shared_ptr<::arrow::KeyValueMetadata> FieldIdMetadata(int field_id) {
+  if (field_id >= 0) {
+    return ::arrow::key_value_metadata({FIELD_ID_KEY}, {std::to_string(field_id)});
+  } else {
+    return nullptr;
+  }
+}
+
+int FieldIdFromMetadata(
+    const std::shared_ptr<const ::arrow::KeyValueMetadata>& metadata) {
+  if (!metadata) {
+    return -1;
+  }
+  int key = metadata->FindKey(FIELD_ID_KEY);
+  if (key < 0) {
+    return -1;
+  }
+  std::string field_id_str = metadata->value(key);
+  int field_id;
+  if (::arrow::internal::ParseValue<::arrow::Int32Type>(
+          field_id_str.c_str(), field_id_str.length(), &field_id)) {
+    if (field_id < 0) {
+      // Thrift should convert any negative value to null but normalize to -1 here in case
+      // we later check this in logic.
+      return -1;
+    }
+    return field_id;
+  } else {
+    return -1;
+  }
+}
+
+Status FieldToNode(const std::string& name, const std::shared_ptr<Field>& field,
+                   const WriterProperties& properties,
+                   const ArrowWriterProperties& arrow_properties, NodePtr* out) {
+  std::shared_ptr<const LogicalType> logical_type = LogicalType::None();
+  ParquetType::type type;
+  Repetition::type repetition = RepetitionFromNullable(field->nullable());
+
+  int length = -1;
+  int precision = -1;
+  int scale = -1;
+
+  switch (field->type()->id()) {
+    case ArrowTypeId::NA: {
+      type = ParquetType::INT32;
+      logical_type = LogicalType::Null();
+      if (repetition != Repetition::OPTIONAL) {
+        return Status::Invalid("NullType Arrow field must be nullable");
+      }
+    } break;
+    case ArrowTypeId::BOOL:
+      type = ParquetType::BOOLEAN;
+      break;
+    case ArrowTypeId::UINT8:
+      type = ParquetType::INT32;
+      logical_type = LogicalType::Int(8, false);
+      break;
+    case ArrowTypeId::INT8:
+      type = ParquetType::INT32;
+      logical_type = LogicalType::Int(8, true);
+      break;
+    case ArrowTypeId::UINT16:
+      type = ParquetType::INT32;
+      logical_type = LogicalType::Int(16, false);
+      break;
+    case ArrowTypeId::INT16:
+      type = ParquetType::INT32;
+      logical_type = LogicalType::Int(16, true);
+      break;
+    case ArrowTypeId::UINT32:
+      if (properties.version() == ::parquet::ParquetVersion::PARQUET_1_0) {
+        type = ParquetType::INT64;
+      } else {
+        type = ParquetType::INT32;
+        logical_type = LogicalType::Int(32, false);
+      }
+      break;
+    case ArrowTypeId::INT32:
+      type = ParquetType::INT32;
+      break;
+    case ArrowTypeId::UINT64:
+      type = ParquetType::INT64;
+      logical_type = LogicalType::Int(64, false);
+      break;
+    case ArrowTypeId::INT64:
+      type = ParquetType::INT64;
+      break;
+    case ArrowTypeId::FLOAT:
+      type = ParquetType::FLOAT;
+      break;
+    case ArrowTypeId::DOUBLE:
+      type = ParquetType::DOUBLE;
+      break;
+    case ArrowTypeId::LARGE_STRING:
+    case ArrowTypeId::STRING:
+      type = ParquetType::BYTE_ARRAY;
+      logical_type = LogicalType::String();
+      break;
+    case ArrowTypeId::LARGE_BINARY:
+    case ArrowTypeId::BINARY:
+      type = ParquetType::BYTE_ARRAY;
+      break;
+    case ArrowTypeId::FIXED_SIZE_BINARY: {
+      type = ParquetType::FIXED_LEN_BYTE_ARRAY;
+      const auto& fixed_size_binary_type =
+          static_cast<const ::arrow::FixedSizeBinaryType&>(*field->type());
+      length = fixed_size_binary_type.byte_width();
+    } break;
+    case ArrowTypeId::DECIMAL128:
+    case ArrowTypeId::DECIMAL256: {
+      type = ParquetType::FIXED_LEN_BYTE_ARRAY;
+      const auto& decimal_type = static_cast<const ::arrow::DecimalType&>(*field->type());
+      precision = decimal_type.precision();
+      scale = decimal_type.scale();
+      length = DecimalType::DecimalSize(precision);
+      PARQUET_CATCH_NOT_OK(logical_type = LogicalType::Decimal(precision, scale));
+    } break;
+    case ArrowTypeId::DATE32:
+      type = ParquetType::INT32;
+      logical_type = LogicalType::Date();
+      break;
+    case ArrowTypeId::DATE64:
+      type = ParquetType::INT32;
+      logical_type = LogicalType::Date();
+      break;
+    case ArrowTypeId::TIMESTAMP:
+      RETURN_NOT_OK(
+          GetTimestampMetadata(static_cast<::arrow::TimestampType&>(*field->type()),
+                               properties, arrow_properties, &type, &logical_type));
+      break;
+    case ArrowTypeId::TIME32:
+      type = ParquetType::INT32;
+      logical_type =
+          LogicalType::Time(/*is_adjusted_to_utc=*/true, LogicalType::TimeUnit::MILLIS);
+      break;
+    case ArrowTypeId::TIME64: {
+      type = ParquetType::INT64;
+      auto time_type = static_cast<::arrow::Time64Type*>(field->type().get());
+      if (time_type->unit() == ::arrow::TimeUnit::NANO) {
+        logical_type =
+            LogicalType::Time(/*is_adjusted_to_utc=*/true, LogicalType::TimeUnit::NANOS);
+      } else {
+        logical_type =
+            LogicalType::Time(/*is_adjusted_to_utc=*/true, LogicalType::TimeUnit::MICROS);
+      }
+    } break;
+    case ArrowTypeId::STRUCT: {
+      auto struct_type = std::static_pointer_cast<::arrow::StructType>(field->type());
+      return StructToNode(struct_type, name, field->nullable(), properties,
+                          arrow_properties, out);
+    }
+    case ArrowTypeId::FIXED_SIZE_LIST:
+    case ArrowTypeId::LARGE_LIST:
+    case ArrowTypeId::LIST: {
+      auto list_type = std::static_pointer_cast<::arrow::BaseListType>(field->type());
+      return ListToNode(list_type, name, field->nullable(), properties, arrow_properties,
+                        out);
+    }
+    case ArrowTypeId::DICTIONARY: {
+      // Parquet has no Dictionary type, dictionary-encoded is handled on
+      // the encoding, not the schema level.
+      const ::arrow::DictionaryType& dict_type =
+          static_cast<const ::arrow::DictionaryType&>(*field->type());
+      std::shared_ptr<::arrow::Field> unpacked_field = ::arrow::field(
+          name, dict_type.value_type(), field->nullable(), field->metadata());
+      return FieldToNode(name, unpacked_field, properties, arrow_properties, out);
+    }
+    case ArrowTypeId::EXTENSION: {
+      auto ext_type = std::static_pointer_cast<::arrow::ExtensionType>(field->type());
+      std::shared_ptr<::arrow::Field> storage_field = ::arrow::field(
+          name, ext_type->storage_type(), field->nullable(), field->metadata());
+      return FieldToNode(name, storage_field, properties, arrow_properties, out);
+    }
+    case ArrowTypeId::MAP: {
+      auto map_type = std::static_pointer_cast<::arrow::MapType>(field->type());
+      return MapToNode(map_type, name, field->nullable(), properties, arrow_properties,
+                       out);
+    }
+
+    default: {
+      // TODO: DENSE_UNION, SPARE_UNION, JSON_SCALAR, DECIMAL_TEXT, VARCHAR
+      return Status::NotImplemented(
+          "Unhandled type for Arrow to Parquet schema conversion: ",
+          field->type()->ToString());
+    }
+  }
+
+  int field_id = FieldIdFromMetadata(field->metadata());
+  PARQUET_CATCH_NOT_OK(*out = PrimitiveNode::Make(name, repetition, logical_type, type,
+                                                  length, field_id));
+
+  return Status::OK();
+}
+
+struct SchemaTreeContext {
+  SchemaManifest* manifest;
+  ArrowReaderProperties properties;
+  const SchemaDescriptor* schema;
+
+  void LinkParent(const SchemaField* child, const SchemaField* parent) {
+    manifest->child_to_parent[child] = parent;
+  }
+
+  void RecordLeaf(const SchemaField* leaf) {
+    manifest->column_index_to_field[leaf->column_index] = leaf;
+  }
+};
+
+bool IsDictionaryReadSupported(const ArrowType& type) {
+  // Only supported currently for BYTE_ARRAY types
+  return type.id() == ::arrow::Type::BINARY || type.id() == ::arrow::Type::STRING;
+}
+
+// ----------------------------------------------------------------------
+// Schema logic
+
+::arrow::Result<std::shared_ptr<ArrowType>> GetTypeForNode(
+    int column_index, const schema::PrimitiveNode& primitive_node,
+    SchemaTreeContext* ctx) {
+  ASSIGN_OR_RAISE(
+      std::shared_ptr<ArrowType> storage_type,
+      GetArrowType(primitive_node, ctx->properties.coerce_int96_timestamp_unit()));
+  if (ctx->properties.read_dictionary(column_index) &&
+      IsDictionaryReadSupported(*storage_type)) {
+    return ::arrow::dictionary(::arrow::int32(), storage_type);
+  }
+  return storage_type;
+}
+
+Status NodeToSchemaField(const Node& node, LevelInfo current_levels,
+                         SchemaTreeContext* ctx, const SchemaField* parent,
+                         SchemaField* out);
+
+Status GroupToSchemaField(const GroupNode& node, LevelInfo current_levels,
+                          SchemaTreeContext* ctx, const SchemaField* parent,
+                          SchemaField* out);
+
+Status PopulateLeaf(int column_index, const std::shared_ptr<Field>& field,
+                    LevelInfo current_levels, SchemaTreeContext* ctx,
+                    const SchemaField* parent, SchemaField* out) {
+  out->field = field;
+  out->column_index = column_index;
+  out->level_info = current_levels;
+  ctx->RecordLeaf(out);
+  ctx->LinkParent(out, parent);
+  return Status::OK();
+}
+
+// Special case mentioned in the format spec:
+//   If the name is array or ends in _tuple, this should be a list of struct
+//   even for single child elements.
+bool HasStructListName(const GroupNode& node) {
+  ::arrow::util::string_view name{node.name()};
+  return name == "array" || name.ends_with("_tuple");
+}
+
+Status GroupToStruct(const GroupNode& node, LevelInfo current_levels,
+                     SchemaTreeContext* ctx, const SchemaField* parent,
+                     SchemaField* out) {
+  std::vector<std::shared_ptr<Field>> arrow_fields;
+  out->children.resize(node.field_count());
+  // All level increments for the node are expected to happen by callers.
+  // This is required because repeated elements need to have there own
+  // SchemaField.
+
+  for (int i = 0; i < node.field_count(); i++) {
+    RETURN_NOT_OK(
+        NodeToSchemaField(*node.field(i), current_levels, ctx, out, &out->children[i]));
+    arrow_fields.push_back(out->children[i].field);
+  }
+  auto struct_type = ::arrow::struct_(arrow_fields);
+  out->field = ::arrow::field(node.name(), struct_type, node.is_optional(),
+                              FieldIdMetadata(node.field_id()));
+  out->level_info = current_levels;
+  return Status::OK();
+}
+
+Status ListToSchemaField(const GroupNode& group, LevelInfo current_levels,
+                         SchemaTreeContext* ctx, const SchemaField* parent,
+                         SchemaField* out);
+
+Status MapToSchemaField(const GroupNode& group, LevelInfo current_levels,
+                        SchemaTreeContext* ctx, const SchemaField* parent,
+                        SchemaField* out) {
+  if (group.field_count() != 1) {
+    return Status::Invalid("MAP-annotated groups must have a single child.");
+  }
+  if (group.is_repeated()) {
+    return Status::Invalid("MAP-annotated groups must not be repeated.");
+  }
+
+  const Node& key_value_node = *group.field(0);
+
+  if (!key_value_node.is_repeated()) {
+    return Status::Invalid(
+        "Non-repeated key value in a MAP-annotated group are not supported.");
+  }
+
+  if (!key_value_node.is_group()) {
+    return Status::Invalid("Key-value node must be a group.");
+  }
+
+  const GroupNode& key_value = checked_cast<const GroupNode&>(key_value_node);
+  if (key_value.field_count() != 1 && key_value.field_count() != 2) {
+    return Status::Invalid("Key-value map node must have 1 or 2 child elements. Found: ",
+                           key_value.field_count());
+  }
+  const Node& key_node = *key_value.field(0);
+  if (!key_node.is_required()) {
+    return Status::Invalid("Map keys must be annotated as required.");
+  }
+  // Arrow doesn't support 1 column maps (i.e. Sets).  The options are to either
+  // make the values column nullable, or process the map as a list.  We choose the latter
+  // as it is simpler.
+  if (key_value.field_count() == 1) {
+    return ListToSchemaField(group, current_levels, ctx, parent, out);
+  }
+
+  current_levels.Increment(group);
+  int16_t repeated_ancestor_def_level = current_levels.IncrementRepeated();
+
+  out->children.resize(1);
+  SchemaField* key_value_field = &out->children[0];
+
+  key_value_field->children.resize(2);
+  SchemaField* key_field = &key_value_field->children[0];
+  SchemaField* value_field = &key_value_field->children[1];
+
+  ctx->LinkParent(out, parent);
+  ctx->LinkParent(key_value_field, out);
+  ctx->LinkParent(key_field, key_value_field);
+  ctx->LinkParent(value_field, key_value_field);
+
+  // required/optional group name=whatever {
+  //   repeated group name=key_values{
+  //     required TYPE key;
+  // required/optional TYPE value;
+  //   }
+  // }
+  //
+
+  RETURN_NOT_OK(NodeToSchemaField(*key_value.field(0), current_levels, ctx,
+                                  key_value_field, key_field));
+  RETURN_NOT_OK(NodeToSchemaField(*key_value.field(1), current_levels, ctx,
+                                  key_value_field, value_field));
+
+  key_value_field->field = ::arrow::field(
+      group.name(), ::arrow::struct_({key_field->field, value_field->field}),
+      /*nullable=*/false, FieldIdMetadata(key_value.field_id()));
+  key_value_field->level_info = current_levels;
+
+  out->field = ::arrow::field(group.name(),
+                              ::arrow::map(key_field->field->type(), value_field->field),
+                              group.is_optional(), FieldIdMetadata(group.field_id()));
+  out->level_info = current_levels;
+  // At this point current levels contains the def level for this list,
+  // we need to reset to the prior parent.
+  out->level_info.repeated_ancestor_def_level = repeated_ancestor_def_level;
+  return Status::OK();
+}
+
+Status ListToSchemaField(const GroupNode& group, LevelInfo current_levels,
+                         SchemaTreeContext* ctx, const SchemaField* parent,
+                         SchemaField* out) {
+  if (group.field_count() != 1) {
+    return Status::Invalid("LIST-annotated groups must have a single child.");
+  }
+  if (group.is_repeated()) {
+    return Status::Invalid("LIST-annotated groups must not be repeated.");
+  }
+  current_levels.Increment(group);
+
+  out->children.resize(group.field_count());
+  SchemaField* child_field = &out->children[0];
+
+  ctx->LinkParent(out, parent);
+  ctx->LinkParent(child_field, out);
+
+  const Node& list_node = *group.field(0);
+
+  if (!list_node.is_repeated()) {
+    return Status::Invalid(
+        "Non-repeated nodes in a LIST-annotated group are not supported.");
+  }
+
+  int16_t repeated_ancestor_def_level = current_levels.IncrementRepeated();
+  if (list_node.is_group()) {
+    // Resolve 3-level encoding
+    //
+    // required/optional group name=whatever {
+    //   repeated group name=list {
+    //     required/optional TYPE item;
+    //   }
+    // }
+    //
+    // yields list<item: TYPE ?nullable> ?nullable
+    //
+    // We distinguish the special case that we have
+    //
+    // required/optional group name=whatever {
+    //   repeated group name=array or $SOMETHING_tuple {
+    //     required/optional TYPE item;
+    //   }
+    // }
+    //
+    // In this latter case, the inner type of the list should be a struct
+    // rather than a primitive value
+    //
+    // yields list<item: struct<item: TYPE ?nullable> not null> ?nullable
+    const auto& list_group = static_cast<const GroupNode&>(list_node);
+    // Special case mentioned in the format spec:
+    //   If the name is array or ends in _tuple, this should be a list of struct
+    //   even for single child elements.
+    if (list_group.field_count() == 1 && !HasStructListName(list_group)) {
+      // List of primitive type
+      RETURN_NOT_OK(
+          NodeToSchemaField(*list_group.field(0), current_levels, ctx, out, child_field));
+    } else {
+      RETURN_NOT_OK(GroupToStruct(list_group, current_levels, ctx, out, child_field));
+    }
+  } else {
+    // Two-level list encoding
+    //
+    // required/optional group LIST {
+    //   repeated TYPE;
+    // }
+    const auto& primitive_node = static_cast<const PrimitiveNode&>(list_node);
+    int column_index = ctx->schema->GetColumnIndex(primitive_node);
+    ASSIGN_OR_RAISE(std::shared_ptr<ArrowType> type,
+                    GetTypeForNode(column_index, primitive_node, ctx));
+    auto item_field = ::arrow::field(list_node.name(), type, /*nullable=*/false,
+                                     FieldIdMetadata(list_node.field_id()));
+    RETURN_NOT_OK(
+        PopulateLeaf(column_index, item_field, current_levels, ctx, out, child_field));
+  }
+  out->field = ::arrow::field(group.name(), ::arrow::list(child_field->field),
+                              group.is_optional(), FieldIdMetadata(group.field_id()));
+  out->level_info = current_levels;
+  // At this point current levels contains the def level for this list,
+  // we need to reset to the prior parent.
+  out->level_info.repeated_ancestor_def_level = repeated_ancestor_def_level;
+  return Status::OK();
+}
+
+Status GroupToSchemaField(const GroupNode& node, LevelInfo current_levels,
+                          SchemaTreeContext* ctx, const SchemaField* parent,
+                          SchemaField* out) {
+  if (node.logical_type()->is_list()) {
+    return ListToSchemaField(node, current_levels, ctx, parent, out);
+  } else if (node.logical_type()->is_map()) {
+    return MapToSchemaField(node, current_levels, ctx, parent, out);
+  }
+  std::shared_ptr<ArrowType> type;
+  if (node.is_repeated()) {
+    // Simple repeated struct
+    //
+    // repeated group $NAME {
+    //   r/o TYPE[0] f0
+    //   r/o TYPE[1] f1
+    // }
+    out->children.resize(1);
+
+    int16_t repeated_ancestor_def_level = current_levels.IncrementRepeated();
+    RETURN_NOT_OK(GroupToStruct(node, current_levels, ctx, out, &out->children[0]));
+    out->field = ::arrow::field(node.name(), ::arrow::list(out->children[0].field),
+                                /*nullable=*/false, FieldIdMetadata(node.field_id()));
+
+    ctx->LinkParent(&out->children[0], out);
+    out->level_info = current_levels;
+    // At this point current_levels contains this list as the def level, we need to
+    // use the previous ancenstor of thi slist.
+    out->level_info.repeated_ancestor_def_level = repeated_ancestor_def_level;
+    return Status::OK();
+  } else {
+    current_levels.Increment(node);
+    return GroupToStruct(node, current_levels, ctx, parent, out);
+  }
+}
+
+Status NodeToSchemaField(const Node& node, LevelInfo current_levels,
+                         SchemaTreeContext* ctx, const SchemaField* parent,
+                         SchemaField* out) {
+  // Workhorse function for converting a Parquet schema node to an Arrow
+  // type. Handles different conventions for nested data.
+
+  ctx->LinkParent(out, parent);
+
+  // Now, walk the schema and create a ColumnDescriptor for each leaf node
+  if (node.is_group()) {
+    // A nested field, but we don't know what kind yet
+    return GroupToSchemaField(static_cast<const GroupNode&>(node), current_levels, ctx,
+                              parent, out);
+  } else {
+    // Either a normal flat primitive type, or a list type encoded with 1-level
+    // list encoding. Note that the 3-level encoding is the form recommended by
+    // the parquet specification, but technically we can have either
+    //
+    // required/optional $TYPE $FIELD_NAME
+    //
+    // or
+    //
+    // repeated $TYPE $FIELD_NAME
+    const auto& primitive_node = static_cast<const PrimitiveNode&>(node);
+    int column_index = ctx->schema->GetColumnIndex(primitive_node);
+    ASSIGN_OR_RAISE(std::shared_ptr<ArrowType> type,
+                    GetTypeForNode(column_index, primitive_node, ctx));
+    if (node.is_repeated()) {
+      // One-level list encoding, e.g.
+      // a: repeated int32;
+      int16_t repeated_ancestor_def_level = current_levels.IncrementRepeated();
+      out->children.resize(1);
+      auto child_field = ::arrow::field(node.name(), type, /*nullable=*/false);
+      RETURN_NOT_OK(PopulateLeaf(column_index, child_field, current_levels, ctx, out,
+                                 &out->children[0]));
+
+      out->field = ::arrow::field(node.name(), ::arrow::list(child_field),
+                                  /*nullable=*/false, FieldIdMetadata(node.field_id()));
+      out->level_info = current_levels;
+      // At this point current_levels has consider this list the ancestor so restore
+      // the actual ancenstor.
+      out->level_info.repeated_ancestor_def_level = repeated_ancestor_def_level;
+      return Status::OK();
+    } else {
+      current_levels.Increment(node);
+      // A normal (required/optional) primitive node
+      return PopulateLeaf(column_index,
+                          ::arrow::field(node.name(), type, node.is_optional(),
+                                         FieldIdMetadata(node.field_id())),
+                          current_levels, ctx, parent, out);
+    }
+  }
+}
+
+// Get the original Arrow schema, as serialized in the Parquet metadata
+Status GetOriginSchema(const std::shared_ptr<const KeyValueMetadata>& metadata,
+                       std::shared_ptr<const KeyValueMetadata>* clean_metadata,
+                       std::shared_ptr<::arrow::Schema>* out) {
+  if (metadata == nullptr) {
+    *out = nullptr;
+    *clean_metadata = nullptr;
+    return Status::OK();
+  }
+
+  static const std::string kArrowSchemaKey = "ARROW:schema";
+  int schema_index = metadata->FindKey(kArrowSchemaKey);
+  if (schema_index == -1) {
+    *out = nullptr;
+    *clean_metadata = metadata;
+    return Status::OK();
+  }
+
+  // The original Arrow schema was serialized using the store_schema option.
+  // We deserialize it here and use it to inform read options such as
+  // dictionary-encoded fields.
+  auto decoded = ::arrow::util::base64_decode(metadata->value(schema_index));
+  auto schema_buf = std::make_shared<Buffer>(decoded);
+
+  ::arrow::ipc::DictionaryMemo dict_memo;
+  ::arrow::io::BufferReader input(schema_buf);
+
+  ARROW_ASSIGN_OR_RAISE(*out, ::arrow::ipc::ReadSchema(&input, &dict_memo));
+
+  if (metadata->size() > 1) {
+    // Copy the metadata without the schema key
+    auto new_metadata = ::arrow::key_value_metadata({}, {});
+    new_metadata->reserve(metadata->size() - 1);
+    for (int64_t i = 0; i < metadata->size(); ++i) {
+      if (i == schema_index) continue;
+      new_metadata->Append(metadata->key(i), metadata->value(i));
+    }
+    *clean_metadata = new_metadata;
+  } else {
+    // No other keys, let metadata be null
+    *clean_metadata = nullptr;
+  }
+  return Status::OK();
+}
+
+// Restore original Arrow field information that was serialized as Parquet metadata
+// but that is not necessarily present in the field reconstitued from Parquet data
+// (for example, Parquet timestamp types doesn't carry timezone information).
+
+Result<bool> ApplyOriginalMetadata(const Field& origin_field, SchemaField* inferred);
+
+std::function<std::shared_ptr<::arrow::DataType>(FieldVector)> GetNestedFactory(
+    const ArrowType& origin_type, const ArrowType& inferred_type) {
+  switch (inferred_type.id()) {
+    case ::arrow::Type::STRUCT:
+      if (origin_type.id() == ::arrow::Type::STRUCT) {
+        return ::arrow::struct_;
+      }
+      break;
+    case ::arrow::Type::LIST:
+      if (origin_type.id() == ::arrow::Type::LIST) {
+        return [](FieldVector fields) {
+          DCHECK_EQ(fields.size(), 1);
+          return ::arrow::list(std::move(fields[0]));
+        };
+      }
+      if (origin_type.id() == ::arrow::Type::LARGE_LIST) {
+        return [](FieldVector fields) {
+          DCHECK_EQ(fields.size(), 1);
+          return ::arrow::large_list(std::move(fields[0]));
+        };
+      }
+      if (origin_type.id() == ::arrow::Type::FIXED_SIZE_LIST) {
+        const auto list_size =
+            checked_cast<const ::arrow::FixedSizeListType&>(origin_type).list_size();
+        return [list_size](FieldVector fields) {
+          DCHECK_EQ(fields.size(), 1);
+          return ::arrow::fixed_size_list(std::move(fields[0]), list_size);
+        };
+      }
+      break;
+    default:
+      break;
+  }
+  return {};
+}
+
+Result<bool> ApplyOriginalStorageMetadata(const Field& origin_field,
+                                          SchemaField* inferred) {
+  bool modified = false;
+
+  auto origin_type = origin_field.type();
+  auto inferred_type = inferred->field->type();
+
+  const int num_children = inferred_type->num_fields();
+
+  if (num_children > 0 && origin_type->num_fields() == num_children) {
+    DCHECK_EQ(static_cast<int>(inferred->children.size()), num_children);
+    const auto factory = GetNestedFactory(*origin_type, *inferred_type);
+    if (factory) {
+      // The type may be modified (e.g. LargeList) while the children stay the same
+      modified |= origin_type->id() != inferred_type->id();
+
+      // Apply original metadata recursively to children
+      for (int i = 0; i < inferred_type->num_fields(); ++i) {
+        ARROW_ASSIGN_OR_RAISE(
+            const bool child_modified,
+            ApplyOriginalMetadata(*origin_type->field(i), &inferred->children[i]));
+        modified |= child_modified;
+      }
+      if (modified) {
+        // Recreate this field using the modified child fields
+        ::arrow::FieldVector modified_children(inferred_type->num_fields());
+        for (int i = 0; i < inferred_type->num_fields(); ++i) {
+          modified_children[i] = inferred->children[i].field;
+        }
+        inferred->field =
+            inferred->field->WithType(factory(std::move(modified_children)));
+      }
+    }
+  }
+
+  if (origin_type->id() == ::arrow::Type::TIMESTAMP &&
+      inferred_type->id() == ::arrow::Type::TIMESTAMP) {
+    // Restore time zone, if any
+    const auto& ts_type = checked_cast<const ::arrow::TimestampType&>(*inferred_type);
+    const auto& ts_origin_type =
+        checked_cast<const ::arrow::TimestampType&>(*origin_type);
+
+    // If the data is tz-aware, then set the original time zone, since Parquet
+    // has no native storage for timezones
+    if (ts_type.timezone() == "UTC" && ts_origin_type.timezone() != "") {
+      if (ts_type.unit() == ts_origin_type.unit()) {
+        inferred->field = inferred->field->WithType(origin_type);
+      } else {
+        auto ts_type_new = ::arrow::timestamp(ts_type.unit(), ts_origin_type.timezone());
+        inferred->field = inferred->field->WithType(ts_type_new);
+      }
+    }
+    modified = true;
+  }
+
+  if (origin_type->id() == ::arrow::Type::DICTIONARY &&
+      inferred_type->id() != ::arrow::Type::DICTIONARY &&
+      IsDictionaryReadSupported(*inferred_type)) {
+    // Direct dictionary reads are only suppored for a couple primitive types,
+    // so no need to recurse on value types.
+    const auto& dict_origin_type =
+        checked_cast<const ::arrow::DictionaryType&>(*origin_type);
+    inferred->field = inferred->field->WithType(
+        ::arrow::dictionary(::arrow::int32(), inferred_type, dict_origin_type.ordered()));
+    modified = true;
+  }
+
+  if ((origin_type->id() == ::arrow::Type::LARGE_BINARY &&
+       inferred_type->id() == ::arrow::Type::BINARY) ||
+      (origin_type->id() == ::arrow::Type::LARGE_STRING &&
+       inferred_type->id() == ::arrow::Type::STRING)) {
+    // Read back binary-like arrays with the intended offset width.
+    inferred->field = inferred->field->WithType(origin_type);
+    modified = true;
+  }
+
+  if (origin_type->id() == ::arrow::Type::DECIMAL256 &&
+      inferred_type->id() == ::arrow::Type::DECIMAL128) {
+    inferred->field = inferred->field->WithType(origin_type);
+    modified = true;
+  }
+
+  // Restore field metadata
+  std::shared_ptr<const KeyValueMetadata> field_metadata = origin_field.metadata();
+  if (field_metadata != nullptr) {
+    if (inferred->field->metadata()) {
+      // Prefer the metadata keys (like field_id) from the current metadata
+      field_metadata = field_metadata->Merge(*inferred->field->metadata());
+    }
+    inferred->field = inferred->field->WithMetadata(field_metadata);
+    modified = true;
+  }
+
+  return modified;
+}
+
+Result<bool> ApplyOriginalMetadata(const Field& origin_field, SchemaField* inferred) {
+  bool modified = false;
+
+  auto origin_type = origin_field.type();
+  auto inferred_type = inferred->field->type();
+
+  if (origin_type->id() == ::arrow::Type::EXTENSION) {
+    const auto& ex_type = checked_cast<const ::arrow::ExtensionType&>(*origin_type);
+    auto origin_storage_field = origin_field.WithType(ex_type.storage_type());
+
+    // Apply metadata recursively to storage type
+    RETURN_NOT_OK(ApplyOriginalStorageMetadata(*origin_storage_field, inferred));
+
+    // Restore extension type, if the storage type is the same as inferred
+    // from the Parquet type
+    if (ex_type.storage_type()->Equals(*inferred->field->type())) {
+      inferred->field = inferred->field->WithType(origin_type);
+    }
+    modified = true;
+  } else {
+    ARROW_ASSIGN_OR_RAISE(modified, ApplyOriginalStorageMetadata(origin_field, inferred));
+  }
+
+  return modified;
+}
+
+}  // namespace
+
+Status FieldToNode(const std::shared_ptr<Field>& field,
+                   const WriterProperties& properties,
+                   const ArrowWriterProperties& arrow_properties, NodePtr* out) {
+  return FieldToNode(field->name(), field, properties, arrow_properties, out);
+}
+
+Status ToParquetSchema(const ::arrow::Schema* arrow_schema,
+                       const WriterProperties& properties,
+                       const ArrowWriterProperties& arrow_properties,
+                       std::shared_ptr<SchemaDescriptor>* out) {
+  std::vector<NodePtr> nodes(arrow_schema->num_fields());
+  for (int i = 0; i < arrow_schema->num_fields(); i++) {
+    RETURN_NOT_OK(
+        FieldToNode(arrow_schema->field(i), properties, arrow_properties, &nodes[i]));
+  }
+
+  NodePtr schema = GroupNode::Make("schema", Repetition::REQUIRED, nodes);
+  *out = std::make_shared<::parquet::SchemaDescriptor>();
+  PARQUET_CATCH_NOT_OK((*out)->Init(schema));
+
+  return Status::OK();
+}
+
+Status ToParquetSchema(const ::arrow::Schema* arrow_schema,
+                       const WriterProperties& properties,
+                       std::shared_ptr<SchemaDescriptor>* out) {
+  return ToParquetSchema(arrow_schema, properties, *default_arrow_writer_properties(),
+                         out);
+}
+
+Status FromParquetSchema(
+    const SchemaDescriptor* schema, const ArrowReaderProperties& properties,
+    const std::shared_ptr<const KeyValueMetadata>& key_value_metadata,
+    std::shared_ptr<::arrow::Schema>* out) {
+  SchemaManifest manifest;
+  RETURN_NOT_OK(SchemaManifest::Make(schema, key_value_metadata, properties, &manifest));
+  std::vector<std::shared_ptr<Field>> fields(manifest.schema_fields.size());
+
+  for (int i = 0; i < static_cast<int>(fields.size()); i++) {
+    const auto& schema_field = manifest.schema_fields[i];
+    fields[i] = schema_field.field;
+  }
+  if (manifest.origin_schema) {
+    // ARROW-8980: If the ARROW:schema was in the input metadata, then
+    // manifest.origin_schema will have it scrubbed out
+    *out = ::arrow::schema(fields, manifest.origin_schema->metadata());
+  } else {
+    *out = ::arrow::schema(fields, key_value_metadata);
+  }
+  return Status::OK();
+}
+
+Status FromParquetSchema(const SchemaDescriptor* parquet_schema,
+                         const ArrowReaderProperties& properties,
+                         std::shared_ptr<::arrow::Schema>* out) {
+  return FromParquetSchema(parquet_schema, properties, nullptr, out);
+}
+
+Status FromParquetSchema(const SchemaDescriptor* parquet_schema,
+                         std::shared_ptr<::arrow::Schema>* out) {
+  ArrowReaderProperties properties;
+  return FromParquetSchema(parquet_schema, properties, nullptr, out);
+}
+
+Status SchemaManifest::Make(const SchemaDescriptor* schema,
+                            const std::shared_ptr<const KeyValueMetadata>& metadata,
+                            const ArrowReaderProperties& properties,
+                            SchemaManifest* manifest) {
+  SchemaTreeContext ctx;
+  ctx.manifest = manifest;
+  ctx.properties = properties;
+  ctx.schema = schema;
+  const GroupNode& schema_node = *schema->group_node();
+  manifest->descr = schema;
+  manifest->schema_fields.resize(schema_node.field_count());
+
+  // Try to deserialize original Arrow schema
+  RETURN_NOT_OK(
+      GetOriginSchema(metadata, &manifest->schema_metadata, &manifest->origin_schema));
+  // Ignore original schema if it's not compatible with the Parquet schema
+  if (manifest->origin_schema != nullptr &&
+      manifest->origin_schema->num_fields() != schema_node.field_count()) {
+    manifest->origin_schema = nullptr;
+  }
+
+  for (int i = 0; i < static_cast<int>(schema_node.field_count()); ++i) {
+    SchemaField* out_field = &manifest->schema_fields[i];
+    RETURN_NOT_OK(NodeToSchemaField(*schema_node.field(i), LevelInfo(), &ctx,
+                                    /*parent=*/nullptr, out_field));
+
+    // TODO(wesm): as follow up to ARROW-3246, we should really pass the origin
+    // schema (if any) through all functions in the schema reconstruction, but
+    // I'm being lazy and just setting dictionary fields at the top level for
+    // now
+    if (manifest->origin_schema == nullptr) {
+      continue;
+    }
+
+    auto origin_field = manifest->origin_schema->field(i);
+    RETURN_NOT_OK(ApplyOriginalMetadata(*origin_field, out_field));
+  }
+  return Status::OK();
+}
+
+}  // namespace arrow
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/arrow/schema.h b/contrib/libs/apache/arrow/cpp/src/parquet/arrow/schema.h
index a5c3a58176d..dd60fde4342 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/arrow/schema.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/arrow/schema.h
@@ -1,184 +1,184 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <cassert> 
-#include <memory> 
-#include <unordered_map> 
-#include <unordered_set> 
-#include <vector> 
- 
-#include "arrow/result.h" 
-#include "arrow/status.h" 
-#include "arrow/type.h" 
-#include "arrow/type_fwd.h" 
- 
-#include "parquet/level_conversion.h" 
-#include "parquet/platform.h" 
-#include "parquet/schema.h" 
- 
-namespace parquet { 
- 
-class ArrowReaderProperties; 
-class ArrowWriterProperties; 
-class WriterProperties; 
- 
-namespace arrow { 
- 
-/// \defgroup arrow-to-parquet-schema-conversion Functions to convert an Arrow 
-/// schema into a Parquet schema. 
-/// 
-/// @{ 
- 
-PARQUET_EXPORT 
-::arrow::Status FieldToNode(const std::shared_ptr<::arrow::Field>& field, 
-                            const WriterProperties& properties, 
-                            const ArrowWriterProperties& arrow_properties, 
-                            schema::NodePtr* out); 
- 
-PARQUET_EXPORT 
-::arrow::Status ToParquetSchema(const ::arrow::Schema* arrow_schema, 
-                                const WriterProperties& properties, 
-                                const ArrowWriterProperties& arrow_properties, 
-                                std::shared_ptr<SchemaDescriptor>* out); 
- 
-PARQUET_EXPORT 
-::arrow::Status ToParquetSchema(const ::arrow::Schema* arrow_schema, 
-                                const WriterProperties& properties, 
-                                std::shared_ptr<SchemaDescriptor>* out); 
- 
-/// @} 
- 
-/// \defgroup parquet-to-arrow-schema-conversion Functions to convert a Parquet 
-/// schema into an Arrow schema. 
-/// 
-/// @{ 
- 
-PARQUET_EXPORT 
-::arrow::Status FromParquetSchema( 
-    const SchemaDescriptor* parquet_schema, const ArrowReaderProperties& properties, 
-    const std::shared_ptr<const ::arrow::KeyValueMetadata>& key_value_metadata, 
-    std::shared_ptr<::arrow::Schema>* out); 
- 
-PARQUET_EXPORT 
-::arrow::Status FromParquetSchema(const SchemaDescriptor* parquet_schema, 
-                                  const ArrowReaderProperties& properties, 
-                                  std::shared_ptr<::arrow::Schema>* out); 
- 
-PARQUET_EXPORT 
-::arrow::Status FromParquetSchema(const SchemaDescriptor* parquet_schema, 
-                                  std::shared_ptr<::arrow::Schema>* out); 
- 
-/// @} 
- 
-/// \brief Bridge between an arrow::Field and parquet column indices. 
-struct PARQUET_EXPORT SchemaField { 
-  std::shared_ptr<::arrow::Field> field; 
-  std::vector<SchemaField> children; 
- 
-  // Only set for leaf nodes 
-  int column_index = -1; 
- 
-  parquet::internal::LevelInfo level_info; 
- 
-  bool is_leaf() const { return column_index != -1; } 
-}; 
- 
-/// \brief Bridge between a parquet Schema and an arrow Schema. 
-/// 
-/// Expose parquet columns as a tree structure. Useful traverse and link 
-/// between arrow's Schema and parquet's Schema. 
-struct PARQUET_EXPORT SchemaManifest { 
-  static ::arrow::Status Make( 
-      const SchemaDescriptor* schema, 
-      const std::shared_ptr<const ::arrow::KeyValueMetadata>& metadata, 
-      const ArrowReaderProperties& properties, SchemaManifest* manifest); 
- 
-  const SchemaDescriptor* descr; 
-  std::shared_ptr<::arrow::Schema> origin_schema; 
-  std::shared_ptr<const ::arrow::KeyValueMetadata> schema_metadata; 
-  std::vector<SchemaField> schema_fields; 
- 
-  std::unordered_map<int, const SchemaField*> column_index_to_field; 
-  std::unordered_map<const SchemaField*, const SchemaField*> child_to_parent; 
- 
-  ::arrow::Status GetColumnField(int column_index, const SchemaField** out) const { 
-    auto it = column_index_to_field.find(column_index); 
-    if (it == column_index_to_field.end()) { 
-      return ::arrow::Status::KeyError("Column index ", column_index, 
-                                       " not found in schema manifest, may be malformed"); 
-    } 
-    *out = it->second; 
-    return ::arrow::Status::OK(); 
-  } 
- 
-  const SchemaField* GetParent(const SchemaField* field) const { 
-    // Returns nullptr also if not found 
-    auto it = child_to_parent.find(field); 
-    if (it == child_to_parent.end()) { 
-      return NULLPTR; 
-    } 
-    return it->second; 
-  } 
- 
-  /// Coalesce a list of field indices (relative to the equivalent arrow::Schema) which 
-  /// correspond to the column root (first node below the parquet schema's root group) of 
-  /// each leaf referenced in column_indices. 
-  /// 
-  /// For example, for leaves `a.b.c`, `a.b.d.e`, and `i.j.k` (column_indices=[0,1,3]) 
-  /// the roots are `a` and `i` (return=[0,2]). 
-  /// 
-  /// root 
-  /// -- a  <------ 
-  /// -- -- b  |  | 
-  /// -- -- -- c  | 
-  /// -- -- -- d  | 
-  /// -- -- -- -- e 
-  /// -- f 
-  /// -- -- g 
-  /// -- -- -- h 
-  /// -- i  <--- 
-  /// -- -- j  | 
-  /// -- -- -- k 
-  ::arrow::Result<std::vector<int>> GetFieldIndices( 
-      const std::vector<int>& column_indices) const { 
-    const schema::GroupNode* group = descr->group_node(); 
-    std::unordered_set<int> already_added; 
- 
-    std::vector<int> out; 
-    for (int column_idx : column_indices) { 
-      if (column_idx < 0 || column_idx >= descr->num_columns()) { 
-        return ::arrow::Status::IndexError("Column index ", column_idx, " is not valid"); 
-      } 
- 
-      auto field_node = descr->GetColumnRoot(column_idx); 
-      auto field_idx = group->FieldIndex(*field_node); 
-      if (field_idx == -1) { 
-        return ::arrow::Status::IndexError("Column index ", column_idx, " is not valid"); 
-      } 
- 
-      if (already_added.insert(field_idx).second) { 
-        out.push_back(field_idx); 
-      } 
-    } 
-    return out; 
-  } 
-}; 
- 
-}  // namespace arrow 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cassert>
+#include <memory>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_fwd.h"
+
+#include "parquet/level_conversion.h"
+#include "parquet/platform.h"
+#include "parquet/schema.h"
+
+namespace parquet {
+
+class ArrowReaderProperties;
+class ArrowWriterProperties;
+class WriterProperties;
+
+namespace arrow {
+
+/// \defgroup arrow-to-parquet-schema-conversion Functions to convert an Arrow
+/// schema into a Parquet schema.
+///
+/// @{
+
+PARQUET_EXPORT
+::arrow::Status FieldToNode(const std::shared_ptr<::arrow::Field>& field,
+                            const WriterProperties& properties,
+                            const ArrowWriterProperties& arrow_properties,
+                            schema::NodePtr* out);
+
+PARQUET_EXPORT
+::arrow::Status ToParquetSchema(const ::arrow::Schema* arrow_schema,
+                                const WriterProperties& properties,
+                                const ArrowWriterProperties& arrow_properties,
+                                std::shared_ptr<SchemaDescriptor>* out);
+
+PARQUET_EXPORT
+::arrow::Status ToParquetSchema(const ::arrow::Schema* arrow_schema,
+                                const WriterProperties& properties,
+                                std::shared_ptr<SchemaDescriptor>* out);
+
+/// @}
+
+/// \defgroup parquet-to-arrow-schema-conversion Functions to convert a Parquet
+/// schema into an Arrow schema.
+///
+/// @{
+
+PARQUET_EXPORT
+::arrow::Status FromParquetSchema(
+    const SchemaDescriptor* parquet_schema, const ArrowReaderProperties& properties,
+    const std::shared_ptr<const ::arrow::KeyValueMetadata>& key_value_metadata,
+    std::shared_ptr<::arrow::Schema>* out);
+
+PARQUET_EXPORT
+::arrow::Status FromParquetSchema(const SchemaDescriptor* parquet_schema,
+                                  const ArrowReaderProperties& properties,
+                                  std::shared_ptr<::arrow::Schema>* out);
+
+PARQUET_EXPORT
+::arrow::Status FromParquetSchema(const SchemaDescriptor* parquet_schema,
+                                  std::shared_ptr<::arrow::Schema>* out);
+
+/// @}
+
+/// \brief Bridge between an arrow::Field and parquet column indices.
+struct PARQUET_EXPORT SchemaField {
+  std::shared_ptr<::arrow::Field> field;
+  std::vector<SchemaField> children;
+
+  // Only set for leaf nodes
+  int column_index = -1;
+
+  parquet::internal::LevelInfo level_info;
+
+  bool is_leaf() const { return column_index != -1; }
+};
+
+/// \brief Bridge between a parquet Schema and an arrow Schema.
+///
+/// Expose parquet columns as a tree structure. Useful traverse and link
+/// between arrow's Schema and parquet's Schema.
+struct PARQUET_EXPORT SchemaManifest {
+  static ::arrow::Status Make(
+      const SchemaDescriptor* schema,
+      const std::shared_ptr<const ::arrow::KeyValueMetadata>& metadata,
+      const ArrowReaderProperties& properties, SchemaManifest* manifest);
+
+  const SchemaDescriptor* descr;
+  std::shared_ptr<::arrow::Schema> origin_schema;
+  std::shared_ptr<const ::arrow::KeyValueMetadata> schema_metadata;
+  std::vector<SchemaField> schema_fields;
+
+  std::unordered_map<int, const SchemaField*> column_index_to_field;
+  std::unordered_map<const SchemaField*, const SchemaField*> child_to_parent;
+
+  ::arrow::Status GetColumnField(int column_index, const SchemaField** out) const {
+    auto it = column_index_to_field.find(column_index);
+    if (it == column_index_to_field.end()) {
+      return ::arrow::Status::KeyError("Column index ", column_index,
+                                       " not found in schema manifest, may be malformed");
+    }
+    *out = it->second;
+    return ::arrow::Status::OK();
+  }
+
+  const SchemaField* GetParent(const SchemaField* field) const {
+    // Returns nullptr also if not found
+    auto it = child_to_parent.find(field);
+    if (it == child_to_parent.end()) {
+      return NULLPTR;
+    }
+    return it->second;
+  }
+
+  /// Coalesce a list of field indices (relative to the equivalent arrow::Schema) which
+  /// correspond to the column root (first node below the parquet schema's root group) of
+  /// each leaf referenced in column_indices.
+  ///
+  /// For example, for leaves `a.b.c`, `a.b.d.e`, and `i.j.k` (column_indices=[0,1,3])
+  /// the roots are `a` and `i` (return=[0,2]).
+  ///
+  /// root
+  /// -- a  <------
+  /// -- -- b  |  |
+  /// -- -- -- c  |
+  /// -- -- -- d  |
+  /// -- -- -- -- e
+  /// -- f
+  /// -- -- g
+  /// -- -- -- h
+  /// -- i  <---
+  /// -- -- j  |
+  /// -- -- -- k
+  ::arrow::Result<std::vector<int>> GetFieldIndices(
+      const std::vector<int>& column_indices) const {
+    const schema::GroupNode* group = descr->group_node();
+    std::unordered_set<int> already_added;
+
+    std::vector<int> out;
+    for (int column_idx : column_indices) {
+      if (column_idx < 0 || column_idx >= descr->num_columns()) {
+        return ::arrow::Status::IndexError("Column index ", column_idx, " is not valid");
+      }
+
+      auto field_node = descr->GetColumnRoot(column_idx);
+      auto field_idx = group->FieldIndex(*field_node);
+      if (field_idx == -1) {
+        return ::arrow::Status::IndexError("Column index ", column_idx, " is not valid");
+      }
+
+      if (already_added.insert(field_idx).second) {
+        out.push_back(field_idx);
+      }
+    }
+    return out;
+  }
+};
+
+}  // namespace arrow
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/arrow/schema_internal.cc b/contrib/libs/apache/arrow/cpp/src/parquet/arrow/schema_internal.cc
index 13acbb3d555..064bf4f55cc 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/arrow/schema_internal.cc
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/arrow/schema_internal.cc
@@ -1,222 +1,222 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "parquet/arrow/schema_internal.h" 
- 
-#include "arrow/type.h" 
- 
-using ArrowType = ::arrow::DataType; 
-using ArrowTypeId = ::arrow::Type; 
-using ParquetType = parquet::Type; 
- 
-namespace parquet { 
- 
-namespace arrow { 
- 
-using ::arrow::Result; 
-using ::arrow::Status; 
-using ::arrow::internal::checked_cast; 
- 
-Result<std::shared_ptr<ArrowType>> MakeArrowDecimal(const LogicalType& logical_type) { 
-  const auto& decimal = checked_cast<const DecimalLogicalType&>(logical_type); 
-  if (decimal.precision() <= ::arrow::Decimal128Type::kMaxPrecision) { 
-    return ::arrow::Decimal128Type::Make(decimal.precision(), decimal.scale()); 
-  } 
-  return ::arrow::Decimal256Type::Make(decimal.precision(), decimal.scale()); 
-} 
- 
-Result<std::shared_ptr<ArrowType>> MakeArrowInt(const LogicalType& logical_type) { 
-  const auto& integer = checked_cast<const IntLogicalType&>(logical_type); 
-  switch (integer.bit_width()) { 
-    case 8: 
-      return integer.is_signed() ? ::arrow::int8() : ::arrow::uint8(); 
-    case 16: 
-      return integer.is_signed() ? ::arrow::int16() : ::arrow::uint16(); 
-    case 32: 
-      return integer.is_signed() ? ::arrow::int32() : ::arrow::uint32(); 
-    default: 
-      return Status::TypeError(logical_type.ToString(), 
-                               " can not annotate physical type Int32"); 
-  } 
-} 
- 
-Result<std::shared_ptr<ArrowType>> MakeArrowInt64(const LogicalType& logical_type) { 
-  const auto& integer = checked_cast<const IntLogicalType&>(logical_type); 
-  switch (integer.bit_width()) { 
-    case 64: 
-      return integer.is_signed() ? ::arrow::int64() : ::arrow::uint64(); 
-    default: 
-      return Status::TypeError(logical_type.ToString(), 
-                               " can not annotate physical type Int64"); 
-  } 
-} 
- 
-Result<std::shared_ptr<ArrowType>> MakeArrowTime32(const LogicalType& logical_type) { 
-  const auto& time = checked_cast<const TimeLogicalType&>(logical_type); 
-  switch (time.time_unit()) { 
-    case LogicalType::TimeUnit::MILLIS: 
-      return ::arrow::time32(::arrow::TimeUnit::MILLI); 
-    default: 
-      return Status::TypeError(logical_type.ToString(), 
-                               " can not annotate physical type Time32"); 
-  } 
-} 
- 
-Result<std::shared_ptr<ArrowType>> MakeArrowTime64(const LogicalType& logical_type) { 
-  const auto& time = checked_cast<const TimeLogicalType&>(logical_type); 
-  switch (time.time_unit()) { 
-    case LogicalType::TimeUnit::MICROS: 
-      return ::arrow::time64(::arrow::TimeUnit::MICRO); 
-    case LogicalType::TimeUnit::NANOS: 
-      return ::arrow::time64(::arrow::TimeUnit::NANO); 
-    default: 
-      return Status::TypeError(logical_type.ToString(), 
-                               " can not annotate physical type Time64"); 
-  } 
-} 
- 
-Result<std::shared_ptr<ArrowType>> MakeArrowTimestamp(const LogicalType& logical_type) { 
-  const auto& timestamp = checked_cast<const TimestampLogicalType&>(logical_type); 
-  const bool utc_normalized = 
-      timestamp.is_from_converted_type() ? false : timestamp.is_adjusted_to_utc(); 
-  static const char* utc_timezone = "UTC"; 
-  switch (timestamp.time_unit()) { 
-    case LogicalType::TimeUnit::MILLIS: 
-      return (utc_normalized ? ::arrow::timestamp(::arrow::TimeUnit::MILLI, utc_timezone) 
-                             : ::arrow::timestamp(::arrow::TimeUnit::MILLI)); 
-    case LogicalType::TimeUnit::MICROS: 
-      return (utc_normalized ? ::arrow::timestamp(::arrow::TimeUnit::MICRO, utc_timezone) 
-                             : ::arrow::timestamp(::arrow::TimeUnit::MICRO)); 
-    case LogicalType::TimeUnit::NANOS: 
-      return (utc_normalized ? ::arrow::timestamp(::arrow::TimeUnit::NANO, utc_timezone) 
-                             : ::arrow::timestamp(::arrow::TimeUnit::NANO)); 
-    default: 
-      return Status::TypeError("Unrecognized time unit in timestamp logical_type: ", 
-                               logical_type.ToString()); 
-  } 
-} 
- 
-Result<std::shared_ptr<ArrowType>> FromByteArray(const LogicalType& logical_type) { 
-  switch (logical_type.type()) { 
-    case LogicalType::Type::STRING: 
-      return ::arrow::utf8(); 
-    case LogicalType::Type::DECIMAL: 
-      return MakeArrowDecimal(logical_type); 
-    case LogicalType::Type::NONE: 
-    case LogicalType::Type::ENUM: 
-    case LogicalType::Type::JSON: 
-    case LogicalType::Type::BSON: 
-      return ::arrow::binary(); 
-    default: 
-      return Status::NotImplemented("Unhandled logical logical_type ", 
-                                    logical_type.ToString(), " for binary array"); 
-  } 
-} 
- 
-Result<std::shared_ptr<ArrowType>> FromFLBA(const LogicalType& logical_type, 
-                                            int32_t physical_length) { 
-  switch (logical_type.type()) { 
-    case LogicalType::Type::DECIMAL: 
-      return MakeArrowDecimal(logical_type); 
-    case LogicalType::Type::NONE: 
-    case LogicalType::Type::INTERVAL: 
-    case LogicalType::Type::UUID: 
-      return ::arrow::fixed_size_binary(physical_length); 
-    default: 
-      return Status::NotImplemented("Unhandled logical logical_type ", 
-                                    logical_type.ToString(), 
-                                    " for fixed-length binary array"); 
-  } 
-} 
- 
-::arrow::Result<std::shared_ptr<ArrowType>> FromInt32(const LogicalType& logical_type) { 
-  switch (logical_type.type()) { 
-    case LogicalType::Type::INT: 
-      return MakeArrowInt(logical_type); 
-    case LogicalType::Type::DATE: 
-      return ::arrow::date32(); 
-    case LogicalType::Type::TIME: 
-      return MakeArrowTime32(logical_type); 
-    case LogicalType::Type::DECIMAL: 
-      return MakeArrowDecimal(logical_type); 
-    case LogicalType::Type::NONE: 
-      return ::arrow::int32(); 
-    default: 
-      return Status::NotImplemented("Unhandled logical type ", logical_type.ToString(), 
-                                    " for INT32"); 
-  } 
-} 
- 
-Result<std::shared_ptr<ArrowType>> FromInt64(const LogicalType& logical_type) { 
-  switch (logical_type.type()) { 
-    case LogicalType::Type::INT: 
-      return MakeArrowInt64(logical_type); 
-    case LogicalType::Type::DECIMAL: 
-      return MakeArrowDecimal(logical_type); 
-    case LogicalType::Type::TIMESTAMP: 
-      return MakeArrowTimestamp(logical_type); 
-    case LogicalType::Type::TIME: 
-      return MakeArrowTime64(logical_type); 
-    case LogicalType::Type::NONE: 
-      return ::arrow::int64(); 
-    default: 
-      return Status::NotImplemented("Unhandled logical type ", logical_type.ToString(), 
-                                    " for INT64"); 
-  } 
-} 
- 
-Result<std::shared_ptr<ArrowType>> GetArrowType( 
-    Type::type physical_type, const LogicalType& logical_type, int type_length, 
-    const ::arrow::TimeUnit::type int96_arrow_time_unit) { 
-  if (logical_type.is_invalid() || logical_type.is_null()) { 
-    return ::arrow::null(); 
-  } 
- 
-  switch (physical_type) { 
-    case ParquetType::BOOLEAN: 
-      return ::arrow::boolean(); 
-    case ParquetType::INT32: 
-      return FromInt32(logical_type); 
-    case ParquetType::INT64: 
-      return FromInt64(logical_type); 
-    case ParquetType::INT96: 
-      return ::arrow::timestamp(int96_arrow_time_unit); 
-    case ParquetType::FLOAT: 
-      return ::arrow::float32(); 
-    case ParquetType::DOUBLE: 
-      return ::arrow::float64(); 
-    case ParquetType::BYTE_ARRAY: 
-      return FromByteArray(logical_type); 
-    case ParquetType::FIXED_LEN_BYTE_ARRAY: 
-      return FromFLBA(logical_type, type_length); 
-    default: { 
-      // PARQUET-1565: This can occur if the file is corrupt 
-      return Status::IOError("Invalid physical column type: ", 
-                             TypeToString(physical_type)); 
-    } 
-  } 
-} 
- 
-Result<std::shared_ptr<ArrowType>> GetArrowType( 
-    const schema::PrimitiveNode& primitive, 
-    const ::arrow::TimeUnit::type int96_arrow_time_unit) { 
-  return GetArrowType(primitive.physical_type(), *primitive.logical_type(), 
-                      primitive.type_length(), int96_arrow_time_unit); 
-} 
- 
-}  // namespace arrow 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/arrow/schema_internal.h"
+
+#include "arrow/type.h"
+
+using ArrowType = ::arrow::DataType;
+using ArrowTypeId = ::arrow::Type;
+using ParquetType = parquet::Type;
+
+namespace parquet {
+
+namespace arrow {
+
+using ::arrow::Result;
+using ::arrow::Status;
+using ::arrow::internal::checked_cast;
+
+Result<std::shared_ptr<ArrowType>> MakeArrowDecimal(const LogicalType& logical_type) {
+  const auto& decimal = checked_cast<const DecimalLogicalType&>(logical_type);
+  if (decimal.precision() <= ::arrow::Decimal128Type::kMaxPrecision) {
+    return ::arrow::Decimal128Type::Make(decimal.precision(), decimal.scale());
+  }
+  return ::arrow::Decimal256Type::Make(decimal.precision(), decimal.scale());
+}
+
+Result<std::shared_ptr<ArrowType>> MakeArrowInt(const LogicalType& logical_type) {
+  const auto& integer = checked_cast<const IntLogicalType&>(logical_type);
+  switch (integer.bit_width()) {
+    case 8:
+      return integer.is_signed() ? ::arrow::int8() : ::arrow::uint8();
+    case 16:
+      return integer.is_signed() ? ::arrow::int16() : ::arrow::uint16();
+    case 32:
+      return integer.is_signed() ? ::arrow::int32() : ::arrow::uint32();
+    default:
+      return Status::TypeError(logical_type.ToString(),
+                               " can not annotate physical type Int32");
+  }
+}
+
+Result<std::shared_ptr<ArrowType>> MakeArrowInt64(const LogicalType& logical_type) {
+  const auto& integer = checked_cast<const IntLogicalType&>(logical_type);
+  switch (integer.bit_width()) {
+    case 64:
+      return integer.is_signed() ? ::arrow::int64() : ::arrow::uint64();
+    default:
+      return Status::TypeError(logical_type.ToString(),
+                               " can not annotate physical type Int64");
+  }
+}
+
+Result<std::shared_ptr<ArrowType>> MakeArrowTime32(const LogicalType& logical_type) {
+  const auto& time = checked_cast<const TimeLogicalType&>(logical_type);
+  switch (time.time_unit()) {
+    case LogicalType::TimeUnit::MILLIS:
+      return ::arrow::time32(::arrow::TimeUnit::MILLI);
+    default:
+      return Status::TypeError(logical_type.ToString(),
+                               " can not annotate physical type Time32");
+  }
+}
+
+Result<std::shared_ptr<ArrowType>> MakeArrowTime64(const LogicalType& logical_type) {
+  const auto& time = checked_cast<const TimeLogicalType&>(logical_type);
+  switch (time.time_unit()) {
+    case LogicalType::TimeUnit::MICROS:
+      return ::arrow::time64(::arrow::TimeUnit::MICRO);
+    case LogicalType::TimeUnit::NANOS:
+      return ::arrow::time64(::arrow::TimeUnit::NANO);
+    default:
+      return Status::TypeError(logical_type.ToString(),
+                               " can not annotate physical type Time64");
+  }
+}
+
+Result<std::shared_ptr<ArrowType>> MakeArrowTimestamp(const LogicalType& logical_type) {
+  const auto& timestamp = checked_cast<const TimestampLogicalType&>(logical_type);
+  const bool utc_normalized =
+      timestamp.is_from_converted_type() ? false : timestamp.is_adjusted_to_utc();
+  static const char* utc_timezone = "UTC";
+  switch (timestamp.time_unit()) {
+    case LogicalType::TimeUnit::MILLIS:
+      return (utc_normalized ? ::arrow::timestamp(::arrow::TimeUnit::MILLI, utc_timezone)
+                             : ::arrow::timestamp(::arrow::TimeUnit::MILLI));
+    case LogicalType::TimeUnit::MICROS:
+      return (utc_normalized ? ::arrow::timestamp(::arrow::TimeUnit::MICRO, utc_timezone)
+                             : ::arrow::timestamp(::arrow::TimeUnit::MICRO));
+    case LogicalType::TimeUnit::NANOS:
+      return (utc_normalized ? ::arrow::timestamp(::arrow::TimeUnit::NANO, utc_timezone)
+                             : ::arrow::timestamp(::arrow::TimeUnit::NANO));
+    default:
+      return Status::TypeError("Unrecognized time unit in timestamp logical_type: ",
+                               logical_type.ToString());
+  }
+}
+
+Result<std::shared_ptr<ArrowType>> FromByteArray(const LogicalType& logical_type) {
+  switch (logical_type.type()) {
+    case LogicalType::Type::STRING:
+      return ::arrow::utf8();
+    case LogicalType::Type::DECIMAL:
+      return MakeArrowDecimal(logical_type);
+    case LogicalType::Type::NONE:
+    case LogicalType::Type::ENUM:
+    case LogicalType::Type::JSON:
+    case LogicalType::Type::BSON:
+      return ::arrow::binary();
+    default:
+      return Status::NotImplemented("Unhandled logical logical_type ",
+                                    logical_type.ToString(), " for binary array");
+  }
+}
+
+Result<std::shared_ptr<ArrowType>> FromFLBA(const LogicalType& logical_type,
+                                            int32_t physical_length) {
+  switch (logical_type.type()) {
+    case LogicalType::Type::DECIMAL:
+      return MakeArrowDecimal(logical_type);
+    case LogicalType::Type::NONE:
+    case LogicalType::Type::INTERVAL:
+    case LogicalType::Type::UUID:
+      return ::arrow::fixed_size_binary(physical_length);
+    default:
+      return Status::NotImplemented("Unhandled logical logical_type ",
+                                    logical_type.ToString(),
+                                    " for fixed-length binary array");
+  }
+}
+
+::arrow::Result<std::shared_ptr<ArrowType>> FromInt32(const LogicalType& logical_type) {
+  switch (logical_type.type()) {
+    case LogicalType::Type::INT:
+      return MakeArrowInt(logical_type);
+    case LogicalType::Type::DATE:
+      return ::arrow::date32();
+    case LogicalType::Type::TIME:
+      return MakeArrowTime32(logical_type);
+    case LogicalType::Type::DECIMAL:
+      return MakeArrowDecimal(logical_type);
+    case LogicalType::Type::NONE:
+      return ::arrow::int32();
+    default:
+      return Status::NotImplemented("Unhandled logical type ", logical_type.ToString(),
+                                    " for INT32");
+  }
+}
+
+Result<std::shared_ptr<ArrowType>> FromInt64(const LogicalType& logical_type) {
+  switch (logical_type.type()) {
+    case LogicalType::Type::INT:
+      return MakeArrowInt64(logical_type);
+    case LogicalType::Type::DECIMAL:
+      return MakeArrowDecimal(logical_type);
+    case LogicalType::Type::TIMESTAMP:
+      return MakeArrowTimestamp(logical_type);
+    case LogicalType::Type::TIME:
+      return MakeArrowTime64(logical_type);
+    case LogicalType::Type::NONE:
+      return ::arrow::int64();
+    default:
+      return Status::NotImplemented("Unhandled logical type ", logical_type.ToString(),
+                                    " for INT64");
+  }
+}
+
+Result<std::shared_ptr<ArrowType>> GetArrowType(
+    Type::type physical_type, const LogicalType& logical_type, int type_length,
+    const ::arrow::TimeUnit::type int96_arrow_time_unit) {
+  if (logical_type.is_invalid() || logical_type.is_null()) {
+    return ::arrow::null();
+  }
+
+  switch (physical_type) {
+    case ParquetType::BOOLEAN:
+      return ::arrow::boolean();
+    case ParquetType::INT32:
+      return FromInt32(logical_type);
+    case ParquetType::INT64:
+      return FromInt64(logical_type);
+    case ParquetType::INT96:
+      return ::arrow::timestamp(int96_arrow_time_unit);
+    case ParquetType::FLOAT:
+      return ::arrow::float32();
+    case ParquetType::DOUBLE:
+      return ::arrow::float64();
+    case ParquetType::BYTE_ARRAY:
+      return FromByteArray(logical_type);
+    case ParquetType::FIXED_LEN_BYTE_ARRAY:
+      return FromFLBA(logical_type, type_length);
+    default: {
+      // PARQUET-1565: This can occur if the file is corrupt
+      return Status::IOError("Invalid physical column type: ",
+                             TypeToString(physical_type));
+    }
+  }
+}
+
+Result<std::shared_ptr<ArrowType>> GetArrowType(
+    const schema::PrimitiveNode& primitive,
+    const ::arrow::TimeUnit::type int96_arrow_time_unit) {
+  return GetArrowType(primitive.physical_type(), *primitive.logical_type(),
+                      primitive.type_length(), int96_arrow_time_unit);
+}
+
+}  // namespace arrow
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/arrow/schema_internal.h b/contrib/libs/apache/arrow/cpp/src/parquet/arrow/schema_internal.h
index c48fd7c938a..fb837c3ee6c 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/arrow/schema_internal.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/arrow/schema_internal.h
@@ -1,51 +1,51 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include "arrow/result.h" 
-#include "parquet/schema.h" 
- 
-namespace arrow { 
-class DataType; 
-} 
- 
-namespace parquet { 
-namespace arrow { 
- 
-using ::arrow::Result; 
- 
-Result<std::shared_ptr<::arrow::DataType>> FromByteArray(const LogicalType& logical_type); 
-Result<std::shared_ptr<::arrow::DataType>> FromFLBA(const LogicalType& logical_type, 
-                                                    int32_t physical_length); 
-Result<std::shared_ptr<::arrow::DataType>> FromInt32(const LogicalType& logical_type); 
-Result<std::shared_ptr<::arrow::DataType>> FromInt64(const LogicalType& logical_type); 
- 
-Result<std::shared_ptr<::arrow::DataType>> GetArrowType(Type::type physical_type, 
-                                                        const LogicalType& logical_type, 
-                                                        int type_length); 
- 
-Result<std::shared_ptr<::arrow::DataType>> GetArrowType( 
-    Type::type physical_type, const LogicalType& logical_type, int type_length, 
-    ::arrow::TimeUnit::type int96_arrow_time_unit = ::arrow::TimeUnit::NANO); 
- 
-Result<std::shared_ptr<::arrow::DataType>> GetArrowType( 
-    const schema::PrimitiveNode& primitive, 
-    ::arrow::TimeUnit::type int96_arrow_time_unit = ::arrow::TimeUnit::NANO); 
- 
-}  // namespace arrow 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/result.h"
+#include "parquet/schema.h"
+
+namespace arrow {
+class DataType;
+}
+
+namespace parquet {
+namespace arrow {
+
+using ::arrow::Result;
+
+Result<std::shared_ptr<::arrow::DataType>> FromByteArray(const LogicalType& logical_type);
+Result<std::shared_ptr<::arrow::DataType>> FromFLBA(const LogicalType& logical_type,
+                                                    int32_t physical_length);
+Result<std::shared_ptr<::arrow::DataType>> FromInt32(const LogicalType& logical_type);
+Result<std::shared_ptr<::arrow::DataType>> FromInt64(const LogicalType& logical_type);
+
+Result<std::shared_ptr<::arrow::DataType>> GetArrowType(Type::type physical_type,
+                                                        const LogicalType& logical_type,
+                                                        int type_length);
+
+Result<std::shared_ptr<::arrow::DataType>> GetArrowType(
+    Type::type physical_type, const LogicalType& logical_type, int type_length,
+    ::arrow::TimeUnit::type int96_arrow_time_unit = ::arrow::TimeUnit::NANO);
+
+Result<std::shared_ptr<::arrow::DataType>> GetArrowType(
+    const schema::PrimitiveNode& primitive,
+    ::arrow::TimeUnit::type int96_arrow_time_unit = ::arrow::TimeUnit::NANO);
+
+}  // namespace arrow
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/arrow/writer.cc b/contrib/libs/apache/arrow/cpp/src/parquet/arrow/writer.cc
index 797069eb327..2fbebf27fce 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/arrow/writer.cc
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/arrow/writer.cc
@@ -1,482 +1,482 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "parquet/arrow/writer.h" 
- 
-#include <algorithm> 
-#include <deque> 
-#include <string> 
-#include <type_traits> 
-#include <utility> 
-#include <vector> 
- 
-#include "arrow/array.h" 
-#include "arrow/extension_type.h" 
-#include "arrow/ipc/writer.h" 
-#include "arrow/table.h" 
-#include "arrow/type.h" 
-#include "arrow/util/base64.h" 
-#include "arrow/util/checked_cast.h" 
-#include "arrow/util/key_value_metadata.h" 
-#include "arrow/util/logging.h" 
-#include "arrow/util/make_unique.h" 
-#include "arrow/visitor_inline.h" 
- 
-#include "parquet/arrow/path_internal.h" 
-#include "parquet/arrow/reader_internal.h" 
-#include "parquet/arrow/schema.h" 
-#include "parquet/column_writer.h" 
-#include "parquet/exception.h" 
-#include "parquet/file_writer.h" 
-#include "parquet/platform.h" 
-#include "parquet/schema.h" 
- 
-using arrow::Array; 
-using arrow::BinaryArray; 
-using arrow::BooleanArray; 
-using arrow::ChunkedArray; 
-using arrow::DataType; 
-using arrow::DictionaryArray; 
-using arrow::ExtensionArray; 
-using arrow::ExtensionType; 
-using arrow::Field; 
-using arrow::FixedSizeBinaryArray; 
-using arrow::ListArray; 
-using arrow::MemoryPool; 
-using arrow::NumericArray; 
-using arrow::PrimitiveArray; 
-using arrow::ResizableBuffer; 
-using arrow::Status; 
-using arrow::Table; 
-using arrow::TimeUnit; 
- 
-using arrow::internal::checked_cast; 
- 
-using parquet::ParquetFileWriter; 
-using parquet::ParquetVersion; 
-using parquet::schema::GroupNode; 
- 
-namespace parquet { 
-namespace arrow { 
- 
-namespace { 
- 
-int CalculateLeafCount(const DataType* type) { 
-  if (type->id() == ::arrow::Type::EXTENSION) { 
-    type = checked_cast<const ExtensionType&>(*type).storage_type().get(); 
-  } 
-  // Note num_fields() can be 0 for an empty struct type 
-  if (!::arrow::is_nested(type->id())) { 
-    // Primitive type. 
-    return 1; 
-  } 
- 
-  int num_leaves = 0; 
-  for (const auto& field : type->fields()) { 
-    num_leaves += CalculateLeafCount(field->type().get()); 
-  } 
-  return num_leaves; 
-} 
- 
-// Determines if the |schema_field|'s root ancestor is nullable. 
-bool HasNullableRoot(const SchemaManifest& schema_manifest, 
-                     const SchemaField* schema_field) { 
-  DCHECK(schema_field != nullptr); 
-  const SchemaField* current_field = schema_field; 
-  bool nullable = schema_field->field->nullable(); 
-  while (current_field != nullptr) { 
-    nullable = current_field->field->nullable(); 
-    current_field = schema_manifest.GetParent(current_field); 
-  } 
-  return nullable; 
-} 
- 
-// Manages writing nested parquet columns with support for all nested types 
-// supported by parquet. 
-class ArrowColumnWriterV2 { 
- public: 
-  // Constructs a new object (use Make() method below to construct from 
-  // A ChunkedArray). 
-  // level_builders should contain one MultipathLevelBuilder per chunk of the 
-  // Arrow-column to write. 
-  ArrowColumnWriterV2(std::vector<std::unique_ptr<MultipathLevelBuilder>> level_builders, 
-                      int leaf_count, RowGroupWriter* row_group_writer) 
-      : level_builders_(std::move(level_builders)), 
-        leaf_count_(leaf_count), 
-        row_group_writer_(row_group_writer) {} 
- 
-  // Writes out all leaf parquet columns to the RowGroupWriter that this 
-  // object was constructed with.  Each leaf column is written fully before 
-  // the next column is written (i.e. no buffering is assumed). 
-  // 
-  // Columns are written in DFS order. 
-  Status Write(ArrowWriteContext* ctx) { 
-    for (int leaf_idx = 0; leaf_idx < leaf_count_; leaf_idx++) { 
-      ColumnWriter* column_writer; 
-      PARQUET_CATCH_NOT_OK(column_writer = row_group_writer_->NextColumn()); 
-      for (auto& level_builder : level_builders_) { 
-        RETURN_NOT_OK(level_builder->Write( 
-            leaf_idx, ctx, [&](const MultipathLevelBuilderResult& result) { 
-              size_t visited_component_size = result.post_list_visited_elements.size(); 
-              DCHECK_GT(visited_component_size, 0); 
-              if (visited_component_size != 1) { 
-                return Status::NotImplemented( 
-                    "Lists with non-zero length null components are not supported"); 
-              } 
-              const ElementRange& range = result.post_list_visited_elements[0]; 
-              std::shared_ptr<Array> values_array = 
-                  result.leaf_array->Slice(range.start, range.Size()); 
- 
-              return column_writer->WriteArrow(result.def_levels, result.rep_levels, 
-                                               result.def_rep_level_count, *values_array, 
-                                               ctx, result.leaf_is_nullable); 
-            })); 
-      } 
- 
-      PARQUET_CATCH_NOT_OK(column_writer->Close()); 
-    } 
-    return Status::OK(); 
-  } 
- 
-  // Make a new object by converting each chunk in |data| to a MultipathLevelBuilder. 
-  // 
-  // It is necessary to create a new builder per array because the MultipathlevelBuilder 
-  // extracts the data necessary for writing each leaf column at construction time. 
-  // (it optimizes based on null count) and with slicing via |offset| ephemeral 
-  // chunks are created which need to be tracked across each leaf column-write. 
-  // This decision could potentially be revisited if we wanted to use "buffered" 
-  // RowGroupWriters (we could construct each builder on demand in that case). 
-  static ::arrow::Result<std::unique_ptr<ArrowColumnWriterV2>> Make( 
-      const ChunkedArray& data, int64_t offset, const int64_t size, 
-      const SchemaManifest& schema_manifest, RowGroupWriter* row_group_writer) { 
-    int64_t absolute_position = 0; 
-    int chunk_index = 0; 
-    int64_t chunk_offset = 0; 
-    if (data.length() == 0) { 
-      return ::arrow::internal::make_unique<ArrowColumnWriterV2>( 
-          std::vector<std::unique_ptr<MultipathLevelBuilder>>{}, 
-          CalculateLeafCount(data.type().get()), row_group_writer); 
-    } 
-    while (chunk_index < data.num_chunks() && absolute_position < offset) { 
-      const int64_t chunk_length = data.chunk(chunk_index)->length(); 
-      if (absolute_position + chunk_length > offset) { 
-        // Relative offset into the chunk to reach the desired start offset for 
-        // writing 
-        chunk_offset = offset - absolute_position; 
-        break; 
-      } else { 
-        ++chunk_index; 
-        absolute_position += chunk_length; 
-      } 
-    } 
- 
-    if (absolute_position >= data.length()) { 
-      return Status::Invalid("Cannot write data at offset past end of chunked array"); 
-    } 
- 
-    int64_t values_written = 0; 
-    std::vector<std::unique_ptr<MultipathLevelBuilder>> builders; 
-    const int leaf_count = CalculateLeafCount(data.type().get()); 
-    bool is_nullable = false; 
-    // The row_group_writer hasn't been advanced yet so add 1 to the current 
-    // which is the one this instance will start writing for. 
-    int column_index = row_group_writer->current_column() + 1; 
-    for (int leaf_offset = 0; leaf_offset < leaf_count; ++leaf_offset) { 
-      const SchemaField* schema_field = nullptr; 
-      RETURN_NOT_OK( 
-          schema_manifest.GetColumnField(column_index + leaf_offset, &schema_field)); 
-      bool nullable_root = HasNullableRoot(schema_manifest, schema_field); 
-      if (leaf_offset == 0) { 
-        is_nullable = nullable_root; 
-      } 
- 
-// Don't validate common ancestry for all leafs if not in debug. 
-#ifndef NDEBUG 
-      break; 
-#else 
-      if (is_nullable != nullable_root) { 
-        return Status::UnknownError( 
-            "Unexpected mismatched nullability between column index", 
-            column_index + leaf_offset, " and ", column_index); 
-      } 
-#endif 
-    } 
-    while (values_written < size) { 
-      const Array& chunk = *data.chunk(chunk_index); 
-      const int64_t available_values = chunk.length() - chunk_offset; 
-      const int64_t chunk_write_size = std::min(size - values_written, available_values); 
- 
-      // The chunk offset here will be 0 except for possibly the first chunk 
-      // because of the advancing logic above 
-      std::shared_ptr<Array> array_to_write = chunk.Slice(chunk_offset, chunk_write_size); 
- 
-      if (array_to_write->length() > 0) { 
-        ARROW_ASSIGN_OR_RAISE(std::unique_ptr<MultipathLevelBuilder> builder, 
-                              MultipathLevelBuilder::Make(*array_to_write, is_nullable)); 
-        if (leaf_count != builder->GetLeafCount()) { 
-          return Status::UnknownError("data type leaf_count != builder_leaf_count", 
-                                      leaf_count, " ", builder->GetLeafCount()); 
-        } 
-        builders.emplace_back(std::move(builder)); 
-      } 
- 
-      if (chunk_write_size == available_values) { 
-        chunk_offset = 0; 
-        ++chunk_index; 
-      } 
-      values_written += chunk_write_size; 
-    } 
-    return ::arrow::internal::make_unique<ArrowColumnWriterV2>( 
-        std::move(builders), leaf_count, row_group_writer); 
-  } 
- 
- private: 
-  // One builder per column-chunk. 
-  std::vector<std::unique_ptr<MultipathLevelBuilder>> level_builders_; 
-  int leaf_count_; 
-  RowGroupWriter* row_group_writer_; 
-}; 
- 
-}  // namespace 
- 
-// ---------------------------------------------------------------------- 
-// FileWriter implementation 
- 
-class FileWriterImpl : public FileWriter { 
- public: 
-  FileWriterImpl(std::shared_ptr<::arrow::Schema> schema, MemoryPool* pool, 
-                 std::unique_ptr<ParquetFileWriter> writer, 
-                 std::shared_ptr<ArrowWriterProperties> arrow_properties) 
-      : schema_(std::move(schema)), 
-        writer_(std::move(writer)), 
-        row_group_writer_(nullptr), 
-        column_write_context_(pool, arrow_properties.get()), 
-        arrow_properties_(std::move(arrow_properties)), 
-        closed_(false) {} 
- 
-  Status Init() { 
-    return SchemaManifest::Make(writer_->schema(), /*schema_metadata=*/nullptr, 
-                                default_arrow_reader_properties(), &schema_manifest_); 
-  } 
- 
-  Status NewRowGroup(int64_t chunk_size) override { 
-    if (row_group_writer_ != nullptr) { 
-      PARQUET_CATCH_NOT_OK(row_group_writer_->Close()); 
-    } 
-    PARQUET_CATCH_NOT_OK(row_group_writer_ = writer_->AppendRowGroup()); 
-    return Status::OK(); 
-  } 
- 
-  Status Close() override { 
-    if (!closed_) { 
-      // Make idempotent 
-      closed_ = true; 
-      if (row_group_writer_ != nullptr) { 
-        PARQUET_CATCH_NOT_OK(row_group_writer_->Close()); 
-      } 
-      PARQUET_CATCH_NOT_OK(writer_->Close()); 
-    } 
-    return Status::OK(); 
-  } 
- 
-  Status WriteColumnChunk(const Array& data) override { 
-    // A bit awkward here since cannot instantiate ChunkedArray from const Array& 
-    auto chunk = ::arrow::MakeArray(data.data()); 
-    auto chunked_array = std::make_shared<::arrow::ChunkedArray>(chunk); 
-    return WriteColumnChunk(chunked_array, 0, data.length()); 
-  } 
- 
-  Status WriteColumnChunk(const std::shared_ptr<ChunkedArray>& data, int64_t offset, 
-                          int64_t size) override { 
-    if (arrow_properties_->engine_version() == ArrowWriterProperties::V2 || 
-        arrow_properties_->engine_version() == ArrowWriterProperties::V1) { 
-      ARROW_ASSIGN_OR_RAISE( 
-          std::unique_ptr<ArrowColumnWriterV2> writer, 
-          ArrowColumnWriterV2::Make(*data, offset, size, schema_manifest_, 
-                                    row_group_writer_)); 
-      return writer->Write(&column_write_context_); 
-    } 
-    return Status::NotImplemented("Unknown engine version."); 
-  } 
- 
-  Status WriteColumnChunk(const std::shared_ptr<::arrow::ChunkedArray>& data) override { 
-    return WriteColumnChunk(data, 0, data->length()); 
-  } 
- 
-  std::shared_ptr<::arrow::Schema> schema() const override { return schema_; } 
- 
-  Status WriteTable(const Table& table, int64_t chunk_size) override { 
-    RETURN_NOT_OK(table.Validate()); 
- 
-    if (chunk_size <= 0 && table.num_rows() > 0) { 
-      return Status::Invalid("chunk size per row_group must be greater than 0"); 
-    } else if (!table.schema()->Equals(*schema_, false)) { 
-      return Status::Invalid("table schema does not match this writer's. table:'", 
-                             table.schema()->ToString(), "' this:'", schema_->ToString(), 
-                             "'"); 
-    } else if (chunk_size > this->properties().max_row_group_length()) { 
-      chunk_size = this->properties().max_row_group_length(); 
-    } 
- 
-    auto WriteRowGroup = [&](int64_t offset, int64_t size) { 
-      RETURN_NOT_OK(NewRowGroup(size)); 
-      for (int i = 0; i < table.num_columns(); i++) { 
-        RETURN_NOT_OK(WriteColumnChunk(table.column(i), offset, size)); 
-      } 
-      return Status::OK(); 
-    }; 
- 
-    if (table.num_rows() == 0) { 
-      // Append a row group with 0 rows 
-      RETURN_NOT_OK_ELSE(WriteRowGroup(0, 0), PARQUET_IGNORE_NOT_OK(Close())); 
-      return Status::OK(); 
-    } 
- 
-    for (int chunk = 0; chunk * chunk_size < table.num_rows(); chunk++) { 
-      int64_t offset = chunk * chunk_size; 
-      RETURN_NOT_OK_ELSE( 
-          WriteRowGroup(offset, std::min(chunk_size, table.num_rows() - offset)), 
-          PARQUET_IGNORE_NOT_OK(Close())); 
-    } 
-    return Status::OK(); 
-  } 
- 
-  const WriterProperties& properties() const { return *writer_->properties(); } 
- 
-  ::arrow::MemoryPool* memory_pool() const override { 
-    return column_write_context_.memory_pool; 
-  } 
- 
-  const std::shared_ptr<FileMetaData> metadata() const override { 
-    return writer_->metadata(); 
-  } 
- 
- private: 
-  friend class FileWriter; 
- 
-  std::shared_ptr<::arrow::Schema> schema_; 
- 
-  SchemaManifest schema_manifest_; 
- 
-  std::unique_ptr<ParquetFileWriter> writer_; 
-  RowGroupWriter* row_group_writer_; 
-  ArrowWriteContext column_write_context_; 
-  std::shared_ptr<ArrowWriterProperties> arrow_properties_; 
-  bool closed_; 
-}; 
- 
-FileWriter::~FileWriter() {} 
- 
-Status FileWriter::Make(::arrow::MemoryPool* pool, 
-                        std::unique_ptr<ParquetFileWriter> writer, 
-                        std::shared_ptr<::arrow::Schema> schema, 
-                        std::shared_ptr<ArrowWriterProperties> arrow_properties, 
-                        std::unique_ptr<FileWriter>* out) { 
-  std::unique_ptr<FileWriterImpl> impl(new FileWriterImpl( 
-      std::move(schema), pool, std::move(writer), std::move(arrow_properties))); 
-  RETURN_NOT_OK(impl->Init()); 
-  *out = std::move(impl); 
-  return Status::OK(); 
-} 
- 
-Status FileWriter::Open(const ::arrow::Schema& schema, ::arrow::MemoryPool* pool, 
-                        std::shared_ptr<::arrow::io::OutputStream> sink, 
-                        std::shared_ptr<WriterProperties> properties, 
-                        std::unique_ptr<FileWriter>* writer) { 
-  return Open(std::move(schema), pool, std::move(sink), std::move(properties), 
-              default_arrow_writer_properties(), writer); 
-} 
- 
-Status GetSchemaMetadata(const ::arrow::Schema& schema, ::arrow::MemoryPool* pool, 
-                         const ArrowWriterProperties& properties, 
-                         std::shared_ptr<const KeyValueMetadata>* out) { 
-  if (!properties.store_schema()) { 
-    *out = nullptr; 
-    return Status::OK(); 
-  } 
- 
-  static const std::string kArrowSchemaKey = "ARROW:schema"; 
-  std::shared_ptr<KeyValueMetadata> result; 
-  if (schema.metadata()) { 
-    result = schema.metadata()->Copy(); 
-  } else { 
-    result = ::arrow::key_value_metadata({}, {}); 
-  } 
- 
-  ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> serialized, 
-                        ::arrow::ipc::SerializeSchema(schema, pool)); 
- 
-  // The serialized schema is not UTF-8, which is required for Thrift 
-  std::string schema_as_string = serialized->ToString(); 
-  std::string schema_base64 = ::arrow::util::base64_encode( 
-      reinterpret_cast<const unsigned char*>(schema_as_string.data()), 
-      static_cast<unsigned int>(schema_as_string.size())); 
-  result->Append(kArrowSchemaKey, schema_base64); 
-  *out = result; 
-  return Status::OK(); 
-} 
- 
-Status FileWriter::Open(const ::arrow::Schema& schema, ::arrow::MemoryPool* pool, 
-                        std::shared_ptr<::arrow::io::OutputStream> sink, 
-                        std::shared_ptr<WriterProperties> properties, 
-                        std::shared_ptr<ArrowWriterProperties> arrow_properties, 
-                        std::unique_ptr<FileWriter>* writer) { 
-  std::shared_ptr<SchemaDescriptor> parquet_schema; 
-  RETURN_NOT_OK( 
-      ToParquetSchema(&schema, *properties, *arrow_properties, &parquet_schema)); 
- 
-  auto schema_node = std::static_pointer_cast<GroupNode>(parquet_schema->schema_root()); 
- 
-  std::shared_ptr<const KeyValueMetadata> metadata; 
-  RETURN_NOT_OK(GetSchemaMetadata(schema, pool, *arrow_properties, &metadata)); 
- 
-  std::unique_ptr<ParquetFileWriter> base_writer; 
-  PARQUET_CATCH_NOT_OK(base_writer = ParquetFileWriter::Open(std::move(sink), schema_node, 
-                                                             std::move(properties), 
-                                                             std::move(metadata))); 
- 
-  auto schema_ptr = std::make_shared<::arrow::Schema>(schema); 
-  return Make(pool, std::move(base_writer), std::move(schema_ptr), 
-              std::move(arrow_properties), writer); 
-} 
- 
-Status WriteFileMetaData(const FileMetaData& file_metadata, 
-                         ::arrow::io::OutputStream* sink) { 
-  PARQUET_CATCH_NOT_OK(::parquet::WriteFileMetaData(file_metadata, sink)); 
-  return Status::OK(); 
-} 
- 
-Status WriteMetaDataFile(const FileMetaData& file_metadata, 
-                         ::arrow::io::OutputStream* sink) { 
-  PARQUET_CATCH_NOT_OK(::parquet::WriteMetaDataFile(file_metadata, sink)); 
-  return Status::OK(); 
-} 
- 
-Status WriteTable(const ::arrow::Table& table, ::arrow::MemoryPool* pool, 
-                  std::shared_ptr<::arrow::io::OutputStream> sink, int64_t chunk_size, 
-                  std::shared_ptr<WriterProperties> properties, 
-                  std::shared_ptr<ArrowWriterProperties> arrow_properties) { 
-  std::unique_ptr<FileWriter> writer; 
-  RETURN_NOT_OK(FileWriter::Open(*table.schema(), pool, std::move(sink), 
-                                 std::move(properties), std::move(arrow_properties), 
-                                 &writer)); 
-  RETURN_NOT_OK(writer->WriteTable(table, chunk_size)); 
-  return writer->Close(); 
-} 
- 
-}  // namespace arrow 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/arrow/writer.h"
+
+#include <algorithm>
+#include <deque>
+#include <string>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/extension_type.h"
+#include "arrow/ipc/writer.h"
+#include "arrow/table.h"
+#include "arrow/type.h"
+#include "arrow/util/base64.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/make_unique.h"
+#include "arrow/visitor_inline.h"
+
+#include "parquet/arrow/path_internal.h"
+#include "parquet/arrow/reader_internal.h"
+#include "parquet/arrow/schema.h"
+#include "parquet/column_writer.h"
+#include "parquet/exception.h"
+#include "parquet/file_writer.h"
+#include "parquet/platform.h"
+#include "parquet/schema.h"
+
+using arrow::Array;
+using arrow::BinaryArray;
+using arrow::BooleanArray;
+using arrow::ChunkedArray;
+using arrow::DataType;
+using arrow::DictionaryArray;
+using arrow::ExtensionArray;
+using arrow::ExtensionType;
+using arrow::Field;
+using arrow::FixedSizeBinaryArray;
+using arrow::ListArray;
+using arrow::MemoryPool;
+using arrow::NumericArray;
+using arrow::PrimitiveArray;
+using arrow::ResizableBuffer;
+using arrow::Status;
+using arrow::Table;
+using arrow::TimeUnit;
+
+using arrow::internal::checked_cast;
+
+using parquet::ParquetFileWriter;
+using parquet::ParquetVersion;
+using parquet::schema::GroupNode;
+
+namespace parquet {
+namespace arrow {
+
+namespace {
+
+int CalculateLeafCount(const DataType* type) {
+  if (type->id() == ::arrow::Type::EXTENSION) {
+    type = checked_cast<const ExtensionType&>(*type).storage_type().get();
+  }
+  // Note num_fields() can be 0 for an empty struct type
+  if (!::arrow::is_nested(type->id())) {
+    // Primitive type.
+    return 1;
+  }
+
+  int num_leaves = 0;
+  for (const auto& field : type->fields()) {
+    num_leaves += CalculateLeafCount(field->type().get());
+  }
+  return num_leaves;
+}
+
+// Determines if the |schema_field|'s root ancestor is nullable.
+bool HasNullableRoot(const SchemaManifest& schema_manifest,
+                     const SchemaField* schema_field) {
+  DCHECK(schema_field != nullptr);
+  const SchemaField* current_field = schema_field;
+  bool nullable = schema_field->field->nullable();
+  while (current_field != nullptr) {
+    nullable = current_field->field->nullable();
+    current_field = schema_manifest.GetParent(current_field);
+  }
+  return nullable;
+}
+
+// Manages writing nested parquet columns with support for all nested types
+// supported by parquet.
+class ArrowColumnWriterV2 {
+ public:
+  // Constructs a new object (use Make() method below to construct from
+  // A ChunkedArray).
+  // level_builders should contain one MultipathLevelBuilder per chunk of the
+  // Arrow-column to write.
+  ArrowColumnWriterV2(std::vector<std::unique_ptr<MultipathLevelBuilder>> level_builders,
+                      int leaf_count, RowGroupWriter* row_group_writer)
+      : level_builders_(std::move(level_builders)),
+        leaf_count_(leaf_count),
+        row_group_writer_(row_group_writer) {}
+
+  // Writes out all leaf parquet columns to the RowGroupWriter that this
+  // object was constructed with.  Each leaf column is written fully before
+  // the next column is written (i.e. no buffering is assumed).
+  //
+  // Columns are written in DFS order.
+  Status Write(ArrowWriteContext* ctx) {
+    for (int leaf_idx = 0; leaf_idx < leaf_count_; leaf_idx++) {
+      ColumnWriter* column_writer;
+      PARQUET_CATCH_NOT_OK(column_writer = row_group_writer_->NextColumn());
+      for (auto& level_builder : level_builders_) {
+        RETURN_NOT_OK(level_builder->Write(
+            leaf_idx, ctx, [&](const MultipathLevelBuilderResult& result) {
+              size_t visited_component_size = result.post_list_visited_elements.size();
+              DCHECK_GT(visited_component_size, 0);
+              if (visited_component_size != 1) {
+                return Status::NotImplemented(
+                    "Lists with non-zero length null components are not supported");
+              }
+              const ElementRange& range = result.post_list_visited_elements[0];
+              std::shared_ptr<Array> values_array =
+                  result.leaf_array->Slice(range.start, range.Size());
+
+              return column_writer->WriteArrow(result.def_levels, result.rep_levels,
+                                               result.def_rep_level_count, *values_array,
+                                               ctx, result.leaf_is_nullable);
+            }));
+      }
+
+      PARQUET_CATCH_NOT_OK(column_writer->Close());
+    }
+    return Status::OK();
+  }
+
+  // Make a new object by converting each chunk in |data| to a MultipathLevelBuilder.
+  //
+  // It is necessary to create a new builder per array because the MultipathlevelBuilder
+  // extracts the data necessary for writing each leaf column at construction time.
+  // (it optimizes based on null count) and with slicing via |offset| ephemeral
+  // chunks are created which need to be tracked across each leaf column-write.
+  // This decision could potentially be revisited if we wanted to use "buffered"
+  // RowGroupWriters (we could construct each builder on demand in that case).
+  static ::arrow::Result<std::unique_ptr<ArrowColumnWriterV2>> Make(
+      const ChunkedArray& data, int64_t offset, const int64_t size,
+      const SchemaManifest& schema_manifest, RowGroupWriter* row_group_writer) {
+    int64_t absolute_position = 0;
+    int chunk_index = 0;
+    int64_t chunk_offset = 0;
+    if (data.length() == 0) {
+      return ::arrow::internal::make_unique<ArrowColumnWriterV2>(
+          std::vector<std::unique_ptr<MultipathLevelBuilder>>{},
+          CalculateLeafCount(data.type().get()), row_group_writer);
+    }
+    while (chunk_index < data.num_chunks() && absolute_position < offset) {
+      const int64_t chunk_length = data.chunk(chunk_index)->length();
+      if (absolute_position + chunk_length > offset) {
+        // Relative offset into the chunk to reach the desired start offset for
+        // writing
+        chunk_offset = offset - absolute_position;
+        break;
+      } else {
+        ++chunk_index;
+        absolute_position += chunk_length;
+      }
+    }
+
+    if (absolute_position >= data.length()) {
+      return Status::Invalid("Cannot write data at offset past end of chunked array");
+    }
+
+    int64_t values_written = 0;
+    std::vector<std::unique_ptr<MultipathLevelBuilder>> builders;
+    const int leaf_count = CalculateLeafCount(data.type().get());
+    bool is_nullable = false;
+    // The row_group_writer hasn't been advanced yet so add 1 to the current
+    // which is the one this instance will start writing for.
+    int column_index = row_group_writer->current_column() + 1;
+    for (int leaf_offset = 0; leaf_offset < leaf_count; ++leaf_offset) {
+      const SchemaField* schema_field = nullptr;
+      RETURN_NOT_OK(
+          schema_manifest.GetColumnField(column_index + leaf_offset, &schema_field));
+      bool nullable_root = HasNullableRoot(schema_manifest, schema_field);
+      if (leaf_offset == 0) {
+        is_nullable = nullable_root;
+      }
+
+// Don't validate common ancestry for all leafs if not in debug.
+#ifndef NDEBUG
+      break;
+#else
+      if (is_nullable != nullable_root) {
+        return Status::UnknownError(
+            "Unexpected mismatched nullability between column index",
+            column_index + leaf_offset, " and ", column_index);
+      }
+#endif
+    }
+    while (values_written < size) {
+      const Array& chunk = *data.chunk(chunk_index);
+      const int64_t available_values = chunk.length() - chunk_offset;
+      const int64_t chunk_write_size = std::min(size - values_written, available_values);
+
+      // The chunk offset here will be 0 except for possibly the first chunk
+      // because of the advancing logic above
+      std::shared_ptr<Array> array_to_write = chunk.Slice(chunk_offset, chunk_write_size);
+
+      if (array_to_write->length() > 0) {
+        ARROW_ASSIGN_OR_RAISE(std::unique_ptr<MultipathLevelBuilder> builder,
+                              MultipathLevelBuilder::Make(*array_to_write, is_nullable));
+        if (leaf_count != builder->GetLeafCount()) {
+          return Status::UnknownError("data type leaf_count != builder_leaf_count",
+                                      leaf_count, " ", builder->GetLeafCount());
+        }
+        builders.emplace_back(std::move(builder));
+      }
+
+      if (chunk_write_size == available_values) {
+        chunk_offset = 0;
+        ++chunk_index;
+      }
+      values_written += chunk_write_size;
+    }
+    return ::arrow::internal::make_unique<ArrowColumnWriterV2>(
+        std::move(builders), leaf_count, row_group_writer);
+  }
+
+ private:
+  // One builder per column-chunk.
+  std::vector<std::unique_ptr<MultipathLevelBuilder>> level_builders_;
+  int leaf_count_;
+  RowGroupWriter* row_group_writer_;
+};
+
+}  // namespace
+
+// ----------------------------------------------------------------------
+// FileWriter implementation
+
+class FileWriterImpl : public FileWriter {
+ public:
+  FileWriterImpl(std::shared_ptr<::arrow::Schema> schema, MemoryPool* pool,
+                 std::unique_ptr<ParquetFileWriter> writer,
+                 std::shared_ptr<ArrowWriterProperties> arrow_properties)
+      : schema_(std::move(schema)),
+        writer_(std::move(writer)),
+        row_group_writer_(nullptr),
+        column_write_context_(pool, arrow_properties.get()),
+        arrow_properties_(std::move(arrow_properties)),
+        closed_(false) {}
+
+  Status Init() {
+    return SchemaManifest::Make(writer_->schema(), /*schema_metadata=*/nullptr,
+                                default_arrow_reader_properties(), &schema_manifest_);
+  }
+
+  Status NewRowGroup(int64_t chunk_size) override {
+    if (row_group_writer_ != nullptr) {
+      PARQUET_CATCH_NOT_OK(row_group_writer_->Close());
+    }
+    PARQUET_CATCH_NOT_OK(row_group_writer_ = writer_->AppendRowGroup());
+    return Status::OK();
+  }
+
+  Status Close() override {
+    if (!closed_) {
+      // Make idempotent
+      closed_ = true;
+      if (row_group_writer_ != nullptr) {
+        PARQUET_CATCH_NOT_OK(row_group_writer_->Close());
+      }
+      PARQUET_CATCH_NOT_OK(writer_->Close());
+    }
+    return Status::OK();
+  }
+
+  Status WriteColumnChunk(const Array& data) override {
+    // A bit awkward here since cannot instantiate ChunkedArray from const Array&
+    auto chunk = ::arrow::MakeArray(data.data());
+    auto chunked_array = std::make_shared<::arrow::ChunkedArray>(chunk);
+    return WriteColumnChunk(chunked_array, 0, data.length());
+  }
+
+  Status WriteColumnChunk(const std::shared_ptr<ChunkedArray>& data, int64_t offset,
+                          int64_t size) override {
+    if (arrow_properties_->engine_version() == ArrowWriterProperties::V2 ||
+        arrow_properties_->engine_version() == ArrowWriterProperties::V1) {
+      ARROW_ASSIGN_OR_RAISE(
+          std::unique_ptr<ArrowColumnWriterV2> writer,
+          ArrowColumnWriterV2::Make(*data, offset, size, schema_manifest_,
+                                    row_group_writer_));
+      return writer->Write(&column_write_context_);
+    }
+    return Status::NotImplemented("Unknown engine version.");
+  }
+
+  Status WriteColumnChunk(const std::shared_ptr<::arrow::ChunkedArray>& data) override {
+    return WriteColumnChunk(data, 0, data->length());
+  }
+
+  std::shared_ptr<::arrow::Schema> schema() const override { return schema_; }
+
+  Status WriteTable(const Table& table, int64_t chunk_size) override {
+    RETURN_NOT_OK(table.Validate());
+
+    if (chunk_size <= 0 && table.num_rows() > 0) {
+      return Status::Invalid("chunk size per row_group must be greater than 0");
+    } else if (!table.schema()->Equals(*schema_, false)) {
+      return Status::Invalid("table schema does not match this writer's. table:'",
+                             table.schema()->ToString(), "' this:'", schema_->ToString(),
+                             "'");
+    } else if (chunk_size > this->properties().max_row_group_length()) {
+      chunk_size = this->properties().max_row_group_length();
+    }
+
+    auto WriteRowGroup = [&](int64_t offset, int64_t size) {
+      RETURN_NOT_OK(NewRowGroup(size));
+      for (int i = 0; i < table.num_columns(); i++) {
+        RETURN_NOT_OK(WriteColumnChunk(table.column(i), offset, size));
+      }
+      return Status::OK();
+    };
+
+    if (table.num_rows() == 0) {
+      // Append a row group with 0 rows
+      RETURN_NOT_OK_ELSE(WriteRowGroup(0, 0), PARQUET_IGNORE_NOT_OK(Close()));
+      return Status::OK();
+    }
+
+    for (int chunk = 0; chunk * chunk_size < table.num_rows(); chunk++) {
+      int64_t offset = chunk * chunk_size;
+      RETURN_NOT_OK_ELSE(
+          WriteRowGroup(offset, std::min(chunk_size, table.num_rows() - offset)),
+          PARQUET_IGNORE_NOT_OK(Close()));
+    }
+    return Status::OK();
+  }
+
+  const WriterProperties& properties() const { return *writer_->properties(); }
+
+  ::arrow::MemoryPool* memory_pool() const override {
+    return column_write_context_.memory_pool;
+  }
+
+  const std::shared_ptr<FileMetaData> metadata() const override {
+    return writer_->metadata();
+  }
+
+ private:
+  friend class FileWriter;
+
+  std::shared_ptr<::arrow::Schema> schema_;
+
+  SchemaManifest schema_manifest_;
+
+  std::unique_ptr<ParquetFileWriter> writer_;
+  RowGroupWriter* row_group_writer_;
+  ArrowWriteContext column_write_context_;
+  std::shared_ptr<ArrowWriterProperties> arrow_properties_;
+  bool closed_;
+};
+
+FileWriter::~FileWriter() {}
+
+Status FileWriter::Make(::arrow::MemoryPool* pool,
+                        std::unique_ptr<ParquetFileWriter> writer,
+                        std::shared_ptr<::arrow::Schema> schema,
+                        std::shared_ptr<ArrowWriterProperties> arrow_properties,
+                        std::unique_ptr<FileWriter>* out) {
+  std::unique_ptr<FileWriterImpl> impl(new FileWriterImpl(
+      std::move(schema), pool, std::move(writer), std::move(arrow_properties)));
+  RETURN_NOT_OK(impl->Init());
+  *out = std::move(impl);
+  return Status::OK();
+}
+
+Status FileWriter::Open(const ::arrow::Schema& schema, ::arrow::MemoryPool* pool,
+                        std::shared_ptr<::arrow::io::OutputStream> sink,
+                        std::shared_ptr<WriterProperties> properties,
+                        std::unique_ptr<FileWriter>* writer) {
+  return Open(std::move(schema), pool, std::move(sink), std::move(properties),
+              default_arrow_writer_properties(), writer);
+}
+
+Status GetSchemaMetadata(const ::arrow::Schema& schema, ::arrow::MemoryPool* pool,
+                         const ArrowWriterProperties& properties,
+                         std::shared_ptr<const KeyValueMetadata>* out) {
+  if (!properties.store_schema()) {
+    *out = nullptr;
+    return Status::OK();
+  }
+
+  static const std::string kArrowSchemaKey = "ARROW:schema";
+  std::shared_ptr<KeyValueMetadata> result;
+  if (schema.metadata()) {
+    result = schema.metadata()->Copy();
+  } else {
+    result = ::arrow::key_value_metadata({}, {});
+  }
+
+  ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> serialized,
+                        ::arrow::ipc::SerializeSchema(schema, pool));
+
+  // The serialized schema is not UTF-8, which is required for Thrift
+  std::string schema_as_string = serialized->ToString();
+  std::string schema_base64 = ::arrow::util::base64_encode(
+      reinterpret_cast<const unsigned char*>(schema_as_string.data()),
+      static_cast<unsigned int>(schema_as_string.size()));
+  result->Append(kArrowSchemaKey, schema_base64);
+  *out = result;
+  return Status::OK();
+}
+
+Status FileWriter::Open(const ::arrow::Schema& schema, ::arrow::MemoryPool* pool,
+                        std::shared_ptr<::arrow::io::OutputStream> sink,
+                        std::shared_ptr<WriterProperties> properties,
+                        std::shared_ptr<ArrowWriterProperties> arrow_properties,
+                        std::unique_ptr<FileWriter>* writer) {
+  std::shared_ptr<SchemaDescriptor> parquet_schema;
+  RETURN_NOT_OK(
+      ToParquetSchema(&schema, *properties, *arrow_properties, &parquet_schema));
+
+  auto schema_node = std::static_pointer_cast<GroupNode>(parquet_schema->schema_root());
+
+  std::shared_ptr<const KeyValueMetadata> metadata;
+  RETURN_NOT_OK(GetSchemaMetadata(schema, pool, *arrow_properties, &metadata));
+
+  std::unique_ptr<ParquetFileWriter> base_writer;
+  PARQUET_CATCH_NOT_OK(base_writer = ParquetFileWriter::Open(std::move(sink), schema_node,
+                                                             std::move(properties),
+                                                             std::move(metadata)));
+
+  auto schema_ptr = std::make_shared<::arrow::Schema>(schema);
+  return Make(pool, std::move(base_writer), std::move(schema_ptr),
+              std::move(arrow_properties), writer);
+}
+
+Status WriteFileMetaData(const FileMetaData& file_metadata,
+                         ::arrow::io::OutputStream* sink) {
+  PARQUET_CATCH_NOT_OK(::parquet::WriteFileMetaData(file_metadata, sink));
+  return Status::OK();
+}
+
+Status WriteMetaDataFile(const FileMetaData& file_metadata,
+                         ::arrow::io::OutputStream* sink) {
+  PARQUET_CATCH_NOT_OK(::parquet::WriteMetaDataFile(file_metadata, sink));
+  return Status::OK();
+}
+
+Status WriteTable(const ::arrow::Table& table, ::arrow::MemoryPool* pool,
+                  std::shared_ptr<::arrow::io::OutputStream> sink, int64_t chunk_size,
+                  std::shared_ptr<WriterProperties> properties,
+                  std::shared_ptr<ArrowWriterProperties> arrow_properties) {
+  std::unique_ptr<FileWriter> writer;
+  RETURN_NOT_OK(FileWriter::Open(*table.schema(), pool, std::move(sink),
+                                 std::move(properties), std::move(arrow_properties),
+                                 &writer));
+  RETURN_NOT_OK(writer->WriteTable(table, chunk_size));
+  return writer->Close();
+}
+
+}  // namespace arrow
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/arrow/writer.h b/contrib/libs/apache/arrow/cpp/src/parquet/arrow/writer.h
index 43c5ede1ab5..f31f3d03def 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/arrow/writer.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/arrow/writer.h
@@ -1,109 +1,109 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <cstdint> 
-#include <memory> 
- 
-#include "parquet/platform.h" 
-#include "parquet/properties.h" 
- 
-namespace arrow { 
- 
-class Array; 
-class ChunkedArray; 
-class Schema; 
-class Table; 
- 
-}  // namespace arrow 
- 
-namespace parquet { 
- 
-class FileMetaData; 
-class ParquetFileWriter; 
- 
-namespace arrow { 
- 
-/// \brief Iterative FileWriter class 
-/// 
-/// Start a new RowGroup or Chunk with NewRowGroup. 
-/// Write column-by-column the whole column chunk. 
-/// 
-/// If PARQUET:field_id is present as a metadata key on a field, and the corresponding 
-/// value is a nonnegative integer, then it will be used as the field_id in the parquet 
-/// file. 
-class PARQUET_EXPORT FileWriter { 
- public: 
-  static ::arrow::Status Make(MemoryPool* pool, std::unique_ptr<ParquetFileWriter> writer, 
-                              std::shared_ptr<::arrow::Schema> schema, 
-                              std::shared_ptr<ArrowWriterProperties> arrow_properties, 
-                              std::unique_ptr<FileWriter>* out); 
- 
-  static ::arrow::Status Open(const ::arrow::Schema& schema, MemoryPool* pool, 
-                              std::shared_ptr<::arrow::io::OutputStream> sink, 
-                              std::shared_ptr<WriterProperties> properties, 
-                              std::unique_ptr<FileWriter>* writer); 
- 
-  static ::arrow::Status Open(const ::arrow::Schema& schema, MemoryPool* pool, 
-                              std::shared_ptr<::arrow::io::OutputStream> sink, 
-                              std::shared_ptr<WriterProperties> properties, 
-                              std::shared_ptr<ArrowWriterProperties> arrow_properties, 
-                              std::unique_ptr<FileWriter>* writer); 
- 
-  virtual std::shared_ptr<::arrow::Schema> schema() const = 0; 
- 
-  /// \brief Write a Table to Parquet. 
-  virtual ::arrow::Status WriteTable(const ::arrow::Table& table, int64_t chunk_size) = 0; 
- 
-  virtual ::arrow::Status NewRowGroup(int64_t chunk_size) = 0; 
-  virtual ::arrow::Status WriteColumnChunk(const ::arrow::Array& data) = 0; 
- 
-  /// \brief Write ColumnChunk in row group using slice of a ChunkedArray 
-  virtual ::arrow::Status WriteColumnChunk( 
-      const std::shared_ptr<::arrow::ChunkedArray>& data, int64_t offset, 
-      int64_t size) = 0; 
- 
-  virtual ::arrow::Status WriteColumnChunk( 
-      const std::shared_ptr<::arrow::ChunkedArray>& data) = 0; 
-  virtual ::arrow::Status Close() = 0; 
-  virtual ~FileWriter(); 
- 
-  virtual MemoryPool* memory_pool() const = 0; 
-  virtual const std::shared_ptr<FileMetaData> metadata() const = 0; 
-}; 
- 
-/// \brief Write Parquet file metadata only to indicated Arrow OutputStream 
-PARQUET_EXPORT 
-::arrow::Status WriteFileMetaData(const FileMetaData& file_metadata, 
-                                  ::arrow::io::OutputStream* sink); 
- 
-/// \brief Write metadata-only Parquet file to indicated Arrow OutputStream 
-PARQUET_EXPORT 
-::arrow::Status WriteMetaDataFile(const FileMetaData& file_metadata, 
-                                  ::arrow::io::OutputStream* sink); 
- 
-/// \brief Write a Table to Parquet. 
-::arrow::Status PARQUET_EXPORT 
-WriteTable(const ::arrow::Table& table, MemoryPool* pool, 
-           std::shared_ptr<::arrow::io::OutputStream> sink, int64_t chunk_size, 
-           std::shared_ptr<WriterProperties> properties = default_writer_properties(), 
-           std::shared_ptr<ArrowWriterProperties> arrow_properties = 
-               default_arrow_writer_properties()); 
- 
-}  // namespace arrow 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+
+#include "parquet/platform.h"
+#include "parquet/properties.h"
+
+namespace arrow {
+
+class Array;
+class ChunkedArray;
+class Schema;
+class Table;
+
+}  // namespace arrow
+
+namespace parquet {
+
+class FileMetaData;
+class ParquetFileWriter;
+
+namespace arrow {
+
+/// \brief Iterative FileWriter class
+///
+/// Start a new RowGroup or Chunk with NewRowGroup.
+/// Write column-by-column the whole column chunk.
+///
+/// If PARQUET:field_id is present as a metadata key on a field, and the corresponding
+/// value is a nonnegative integer, then it will be used as the field_id in the parquet
+/// file.
+class PARQUET_EXPORT FileWriter {
+ public:
+  static ::arrow::Status Make(MemoryPool* pool, std::unique_ptr<ParquetFileWriter> writer,
+                              std::shared_ptr<::arrow::Schema> schema,
+                              std::shared_ptr<ArrowWriterProperties> arrow_properties,
+                              std::unique_ptr<FileWriter>* out);
+
+  static ::arrow::Status Open(const ::arrow::Schema& schema, MemoryPool* pool,
+                              std::shared_ptr<::arrow::io::OutputStream> sink,
+                              std::shared_ptr<WriterProperties> properties,
+                              std::unique_ptr<FileWriter>* writer);
+
+  static ::arrow::Status Open(const ::arrow::Schema& schema, MemoryPool* pool,
+                              std::shared_ptr<::arrow::io::OutputStream> sink,
+                              std::shared_ptr<WriterProperties> properties,
+                              std::shared_ptr<ArrowWriterProperties> arrow_properties,
+                              std::unique_ptr<FileWriter>* writer);
+
+  virtual std::shared_ptr<::arrow::Schema> schema() const = 0;
+
+  /// \brief Write a Table to Parquet.
+  virtual ::arrow::Status WriteTable(const ::arrow::Table& table, int64_t chunk_size) = 0;
+
+  virtual ::arrow::Status NewRowGroup(int64_t chunk_size) = 0;
+  virtual ::arrow::Status WriteColumnChunk(const ::arrow::Array& data) = 0;
+
+  /// \brief Write ColumnChunk in row group using slice of a ChunkedArray
+  virtual ::arrow::Status WriteColumnChunk(
+      const std::shared_ptr<::arrow::ChunkedArray>& data, int64_t offset,
+      int64_t size) = 0;
+
+  virtual ::arrow::Status WriteColumnChunk(
+      const std::shared_ptr<::arrow::ChunkedArray>& data) = 0;
+  virtual ::arrow::Status Close() = 0;
+  virtual ~FileWriter();
+
+  virtual MemoryPool* memory_pool() const = 0;
+  virtual const std::shared_ptr<FileMetaData> metadata() const = 0;
+};
+
+/// \brief Write Parquet file metadata only to indicated Arrow OutputStream
+PARQUET_EXPORT
+::arrow::Status WriteFileMetaData(const FileMetaData& file_metadata,
+                                  ::arrow::io::OutputStream* sink);
+
+/// \brief Write metadata-only Parquet file to indicated Arrow OutputStream
+PARQUET_EXPORT
+::arrow::Status WriteMetaDataFile(const FileMetaData& file_metadata,
+                                  ::arrow::io::OutputStream* sink);
+
+/// \brief Write a Table to Parquet.
+::arrow::Status PARQUET_EXPORT
+WriteTable(const ::arrow::Table& table, MemoryPool* pool,
+           std::shared_ptr<::arrow::io::OutputStream> sink, int64_t chunk_size,
+           std::shared_ptr<WriterProperties> properties = default_writer_properties(),
+           std::shared_ptr<ArrowWriterProperties> arrow_properties =
+               default_arrow_writer_properties());
+
+}  // namespace arrow
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/bloom_filter.cc b/contrib/libs/apache/arrow/cpp/src/parquet/bloom_filter.cc
index e56449060ef..f6f6d327d06 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/bloom_filter.cc
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/bloom_filter.cc
@@ -1,162 +1,162 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include <cstdint> 
-#include <cstring> 
- 
-#include "arrow/result.h" 
-#include "arrow/util/logging.h" 
-#include "parquet/bloom_filter.h" 
-#include "parquet/exception.h" 
-#include "parquet/murmur3.h" 
- 
-namespace parquet { 
-constexpr uint32_t BlockSplitBloomFilter::SALT[kBitsSetPerBlock]; 
- 
-BlockSplitBloomFilter::BlockSplitBloomFilter() 
-    : pool_(::arrow::default_memory_pool()), 
-      hash_strategy_(HashStrategy::MURMUR3_X64_128), 
-      algorithm_(Algorithm::BLOCK) {} 
- 
-void BlockSplitBloomFilter::Init(uint32_t num_bytes) { 
-  if (num_bytes < kMinimumBloomFilterBytes) { 
-    num_bytes = kMinimumBloomFilterBytes; 
-  } 
- 
-  // Get next power of 2 if it is not power of 2. 
-  if ((num_bytes & (num_bytes - 1)) != 0) { 
-    num_bytes = static_cast<uint32_t>(::arrow::BitUtil::NextPower2(num_bytes)); 
-  } 
- 
-  if (num_bytes > kMaximumBloomFilterBytes) { 
-    num_bytes = kMaximumBloomFilterBytes; 
-  } 
- 
-  num_bytes_ = num_bytes; 
-  PARQUET_ASSIGN_OR_THROW(data_, ::arrow::AllocateBuffer(num_bytes_, pool_)); 
-  memset(data_->mutable_data(), 0, num_bytes_); 
- 
-  this->hasher_.reset(new MurmurHash3()); 
-} 
- 
-void BlockSplitBloomFilter::Init(const uint8_t* bitset, uint32_t num_bytes) { 
-  DCHECK(bitset != nullptr); 
- 
-  if (num_bytes < kMinimumBloomFilterBytes || num_bytes > kMaximumBloomFilterBytes || 
-      (num_bytes & (num_bytes - 1)) != 0) { 
-    throw ParquetException("Given length of bitset is illegal"); 
-  } 
- 
-  num_bytes_ = num_bytes; 
-  PARQUET_ASSIGN_OR_THROW(data_, ::arrow::AllocateBuffer(num_bytes_, pool_)); 
-  memcpy(data_->mutable_data(), bitset, num_bytes_); 
- 
-  this->hasher_.reset(new MurmurHash3()); 
-} 
- 
-BlockSplitBloomFilter BlockSplitBloomFilter::Deserialize(ArrowInputStream* input) { 
-  uint32_t len, hash, algorithm; 
-  int64_t bytes_available; 
- 
-  PARQUET_ASSIGN_OR_THROW(bytes_available, input->Read(sizeof(uint32_t), &len)); 
-  if (static_cast<uint32_t>(bytes_available) != sizeof(uint32_t)) { 
-    throw ParquetException("Failed to deserialize from input stream"); 
-  } 
- 
-  PARQUET_ASSIGN_OR_THROW(bytes_available, input->Read(sizeof(uint32_t), &hash)); 
-  if (static_cast<uint32_t>(bytes_available) != sizeof(uint32_t)) { 
-    throw ParquetException("Failed to deserialize from input stream"); 
-  } 
-  if (static_cast<HashStrategy>(hash) != HashStrategy::MURMUR3_X64_128) { 
-    throw ParquetException("Unsupported hash strategy"); 
-  } 
- 
-  PARQUET_ASSIGN_OR_THROW(bytes_available, input->Read(sizeof(uint32_t), &algorithm)); 
-  if (static_cast<uint32_t>(bytes_available) != sizeof(uint32_t)) { 
-    throw ParquetException("Failed to deserialize from input stream"); 
-  } 
-  if (static_cast<Algorithm>(algorithm) != BloomFilter::Algorithm::BLOCK) { 
-    throw ParquetException("Unsupported Bloom filter algorithm"); 
-  } 
- 
-  BlockSplitBloomFilter bloom_filter; 
- 
-  PARQUET_ASSIGN_OR_THROW(auto buffer, input->Read(len)); 
-  bloom_filter.Init(buffer->data(), len); 
-  return bloom_filter; 
-} 
- 
-void BlockSplitBloomFilter::WriteTo(ArrowOutputStream* sink) const { 
-  DCHECK(sink != nullptr); 
- 
-  PARQUET_THROW_NOT_OK( 
-      sink->Write(reinterpret_cast<const uint8_t*>(&num_bytes_), sizeof(num_bytes_))); 
-  PARQUET_THROW_NOT_OK(sink->Write(reinterpret_cast<const uint8_t*>(&hash_strategy_), 
-                                   sizeof(hash_strategy_))); 
-  PARQUET_THROW_NOT_OK( 
-      sink->Write(reinterpret_cast<const uint8_t*>(&algorithm_), sizeof(algorithm_))); 
-  PARQUET_THROW_NOT_OK(sink->Write(data_->mutable_data(), num_bytes_)); 
-} 
- 
-void BlockSplitBloomFilter::SetMask(uint32_t key, BlockMask& block_mask) const { 
-  for (int i = 0; i < kBitsSetPerBlock; ++i) { 
-    block_mask.item[i] = key * SALT[i]; 
-  } 
- 
-  for (int i = 0; i < kBitsSetPerBlock; ++i) { 
-    block_mask.item[i] = block_mask.item[i] >> 27; 
-  } 
- 
-  for (int i = 0; i < kBitsSetPerBlock; ++i) { 
-    block_mask.item[i] = UINT32_C(0x1) << block_mask.item[i]; 
-  } 
-} 
- 
-bool BlockSplitBloomFilter::FindHash(uint64_t hash) const { 
-  const uint32_t bucket_index = 
-      static_cast<uint32_t>((hash >> 32) & (num_bytes_ / kBytesPerFilterBlock - 1)); 
-  uint32_t key = static_cast<uint32_t>(hash); 
-  uint32_t* bitset32 = reinterpret_cast<uint32_t*>(data_->mutable_data()); 
- 
-  // Calculate mask for bucket. 
-  BlockMask block_mask; 
-  SetMask(key, block_mask); 
- 
-  for (int i = 0; i < kBitsSetPerBlock; ++i) { 
-    if (0 == (bitset32[kBitsSetPerBlock * bucket_index + i] & block_mask.item[i])) { 
-      return false; 
-    } 
-  } 
-  return true; 
-} 
- 
-void BlockSplitBloomFilter::InsertHash(uint64_t hash) { 
-  const uint32_t bucket_index = 
-      static_cast<uint32_t>(hash >> 32) & (num_bytes_ / kBytesPerFilterBlock - 1); 
-  uint32_t key = static_cast<uint32_t>(hash); 
-  uint32_t* bitset32 = reinterpret_cast<uint32_t*>(data_->mutable_data()); 
- 
-  // Calculate mask for bucket. 
-  BlockMask block_mask; 
-  SetMask(key, block_mask); 
- 
-  for (int i = 0; i < kBitsSetPerBlock; i++) { 
-    bitset32[bucket_index * kBitsSetPerBlock + i] |= block_mask.item[i]; 
-  } 
-} 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <cstring>
+
+#include "arrow/result.h"
+#include "arrow/util/logging.h"
+#include "parquet/bloom_filter.h"
+#include "parquet/exception.h"
+#include "parquet/murmur3.h"
+
+namespace parquet {
+constexpr uint32_t BlockSplitBloomFilter::SALT[kBitsSetPerBlock];
+
+BlockSplitBloomFilter::BlockSplitBloomFilter()
+    : pool_(::arrow::default_memory_pool()),
+      hash_strategy_(HashStrategy::MURMUR3_X64_128),
+      algorithm_(Algorithm::BLOCK) {}
+
+void BlockSplitBloomFilter::Init(uint32_t num_bytes) {
+  if (num_bytes < kMinimumBloomFilterBytes) {
+    num_bytes = kMinimumBloomFilterBytes;
+  }
+
+  // Get next power of 2 if it is not power of 2.
+  if ((num_bytes & (num_bytes - 1)) != 0) {
+    num_bytes = static_cast<uint32_t>(::arrow::BitUtil::NextPower2(num_bytes));
+  }
+
+  if (num_bytes > kMaximumBloomFilterBytes) {
+    num_bytes = kMaximumBloomFilterBytes;
+  }
+
+  num_bytes_ = num_bytes;
+  PARQUET_ASSIGN_OR_THROW(data_, ::arrow::AllocateBuffer(num_bytes_, pool_));
+  memset(data_->mutable_data(), 0, num_bytes_);
+
+  this->hasher_.reset(new MurmurHash3());
+}
+
+void BlockSplitBloomFilter::Init(const uint8_t* bitset, uint32_t num_bytes) {
+  DCHECK(bitset != nullptr);
+
+  if (num_bytes < kMinimumBloomFilterBytes || num_bytes > kMaximumBloomFilterBytes ||
+      (num_bytes & (num_bytes - 1)) != 0) {
+    throw ParquetException("Given length of bitset is illegal");
+  }
+
+  num_bytes_ = num_bytes;
+  PARQUET_ASSIGN_OR_THROW(data_, ::arrow::AllocateBuffer(num_bytes_, pool_));
+  memcpy(data_->mutable_data(), bitset, num_bytes_);
+
+  this->hasher_.reset(new MurmurHash3());
+}
+
+BlockSplitBloomFilter BlockSplitBloomFilter::Deserialize(ArrowInputStream* input) {
+  uint32_t len, hash, algorithm;
+  int64_t bytes_available;
+
+  PARQUET_ASSIGN_OR_THROW(bytes_available, input->Read(sizeof(uint32_t), &len));
+  if (static_cast<uint32_t>(bytes_available) != sizeof(uint32_t)) {
+    throw ParquetException("Failed to deserialize from input stream");
+  }
+
+  PARQUET_ASSIGN_OR_THROW(bytes_available, input->Read(sizeof(uint32_t), &hash));
+  if (static_cast<uint32_t>(bytes_available) != sizeof(uint32_t)) {
+    throw ParquetException("Failed to deserialize from input stream");
+  }
+  if (static_cast<HashStrategy>(hash) != HashStrategy::MURMUR3_X64_128) {
+    throw ParquetException("Unsupported hash strategy");
+  }
+
+  PARQUET_ASSIGN_OR_THROW(bytes_available, input->Read(sizeof(uint32_t), &algorithm));
+  if (static_cast<uint32_t>(bytes_available) != sizeof(uint32_t)) {
+    throw ParquetException("Failed to deserialize from input stream");
+  }
+  if (static_cast<Algorithm>(algorithm) != BloomFilter::Algorithm::BLOCK) {
+    throw ParquetException("Unsupported Bloom filter algorithm");
+  }
+
+  BlockSplitBloomFilter bloom_filter;
+
+  PARQUET_ASSIGN_OR_THROW(auto buffer, input->Read(len));
+  bloom_filter.Init(buffer->data(), len);
+  return bloom_filter;
+}
+
+void BlockSplitBloomFilter::WriteTo(ArrowOutputStream* sink) const {
+  DCHECK(sink != nullptr);
+
+  PARQUET_THROW_NOT_OK(
+      sink->Write(reinterpret_cast<const uint8_t*>(&num_bytes_), sizeof(num_bytes_)));
+  PARQUET_THROW_NOT_OK(sink->Write(reinterpret_cast<const uint8_t*>(&hash_strategy_),
+                                   sizeof(hash_strategy_)));
+  PARQUET_THROW_NOT_OK(
+      sink->Write(reinterpret_cast<const uint8_t*>(&algorithm_), sizeof(algorithm_)));
+  PARQUET_THROW_NOT_OK(sink->Write(data_->mutable_data(), num_bytes_));
+}
+
+void BlockSplitBloomFilter::SetMask(uint32_t key, BlockMask& block_mask) const {
+  for (int i = 0; i < kBitsSetPerBlock; ++i) {
+    block_mask.item[i] = key * SALT[i];
+  }
+
+  for (int i = 0; i < kBitsSetPerBlock; ++i) {
+    block_mask.item[i] = block_mask.item[i] >> 27;
+  }
+
+  for (int i = 0; i < kBitsSetPerBlock; ++i) {
+    block_mask.item[i] = UINT32_C(0x1) << block_mask.item[i];
+  }
+}
+
+bool BlockSplitBloomFilter::FindHash(uint64_t hash) const {
+  const uint32_t bucket_index =
+      static_cast<uint32_t>((hash >> 32) & (num_bytes_ / kBytesPerFilterBlock - 1));
+  uint32_t key = static_cast<uint32_t>(hash);
+  uint32_t* bitset32 = reinterpret_cast<uint32_t*>(data_->mutable_data());
+
+  // Calculate mask for bucket.
+  BlockMask block_mask;
+  SetMask(key, block_mask);
+
+  for (int i = 0; i < kBitsSetPerBlock; ++i) {
+    if (0 == (bitset32[kBitsSetPerBlock * bucket_index + i] & block_mask.item[i])) {
+      return false;
+    }
+  }
+  return true;
+}
+
+void BlockSplitBloomFilter::InsertHash(uint64_t hash) {
+  const uint32_t bucket_index =
+      static_cast<uint32_t>(hash >> 32) & (num_bytes_ / kBytesPerFilterBlock - 1);
+  uint32_t key = static_cast<uint32_t>(hash);
+  uint32_t* bitset32 = reinterpret_cast<uint32_t*>(data_->mutable_data());
+
+  // Calculate mask for bucket.
+  BlockMask block_mask;
+  SetMask(key, block_mask);
+
+  for (int i = 0; i < kBitsSetPerBlock; i++) {
+    bitset32[bucket_index * kBitsSetPerBlock + i] |= block_mask.item[i];
+  }
+}
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/bloom_filter.h b/contrib/libs/apache/arrow/cpp/src/parquet/bloom_filter.h
index 218a1162674..39f9561ae5b 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/bloom_filter.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/bloom_filter.h
@@ -1,247 +1,247 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <cmath> 
-#include <cstdint> 
-#include <memory> 
- 
-#include "arrow/util/bit_util.h" 
-#include "arrow/util/logging.h" 
-#include "parquet/hasher.h" 
-#include "parquet/platform.h" 
-#include "parquet/types.h" 
- 
-namespace parquet { 
- 
-// A Bloom filter is a compact structure to indicate whether an item is not in a set or 
-// probably in a set. The Bloom filter usually consists of a bit set that represents a 
-// set of elements, a hash strategy and a Bloom filter algorithm. 
-class PARQUET_EXPORT BloomFilter { 
- public: 
-  // Maximum Bloom filter size, it sets to HDFS default block size 128MB 
-  // This value will be reconsidered when implementing Bloom filter producer. 
-  static constexpr uint32_t kMaximumBloomFilterBytes = 128 * 1024 * 1024; 
- 
-  /// Determine whether an element exist in set or not. 
-  /// 
-  /// @param hash the element to contain. 
-  /// @return false if value is definitely not in set, and true means PROBABLY 
-  /// in set. 
-  virtual bool FindHash(uint64_t hash) const = 0; 
- 
-  /// Insert element to set represented by Bloom filter bitset. 
-  /// @param hash the hash of value to insert into Bloom filter. 
-  virtual void InsertHash(uint64_t hash) = 0; 
- 
-  /// Write this Bloom filter to an output stream. A Bloom filter structure should 
-  /// include bitset length, hash strategy, algorithm, and bitset. 
-  /// 
-  /// @param sink the output stream to write 
-  virtual void WriteTo(ArrowOutputStream* sink) const = 0; 
- 
-  /// Get the number of bytes of bitset 
-  virtual uint32_t GetBitsetSize() const = 0; 
- 
-  /// Compute hash for 32 bits value by using its plain encoding result. 
-  /// 
-  /// @param value the value to hash. 
-  /// @return hash result. 
-  virtual uint64_t Hash(int32_t value) const = 0; 
- 
-  /// Compute hash for 64 bits value by using its plain encoding result. 
-  /// 
-  /// @param value the value to hash. 
-  /// @return hash result. 
-  virtual uint64_t Hash(int64_t value) const = 0; 
- 
-  /// Compute hash for float value by using its plain encoding result. 
-  /// 
-  /// @param value the value to hash. 
-  /// @return hash result. 
-  virtual uint64_t Hash(float value) const = 0; 
- 
-  /// Compute hash for double value by using its plain encoding result. 
-  /// 
-  /// @param value the value to hash. 
-  /// @return hash result. 
-  virtual uint64_t Hash(double value) const = 0; 
- 
-  /// Compute hash for Int96 value by using its plain encoding result. 
-  /// 
-  /// @param value the value to hash. 
-  /// @return hash result. 
-  virtual uint64_t Hash(const Int96* value) const = 0; 
- 
-  /// Compute hash for ByteArray value by using its plain encoding result. 
-  /// 
-  /// @param value the value to hash. 
-  /// @return hash result. 
-  virtual uint64_t Hash(const ByteArray* value) const = 0; 
- 
-  /// Compute hash for fixed byte array value by using its plain encoding result. 
-  /// 
-  /// @param value the value address. 
-  /// @param len the value length. 
-  /// @return hash result. 
-  virtual uint64_t Hash(const FLBA* value, uint32_t len) const = 0; 
- 
-  virtual ~BloomFilter() {} 
- 
- protected: 
-  // Hash strategy available for Bloom filter. 
-  enum class HashStrategy : uint32_t { MURMUR3_X64_128 = 0 }; 
- 
-  // Bloom filter algorithm. 
-  enum class Algorithm : uint32_t { BLOCK = 0 }; 
-}; 
- 
-// The BlockSplitBloomFilter is implemented using block-based Bloom filters from 
-// Putze et al.'s "Cache-,Hash- and Space-Efficient Bloom filters". The basic idea is to 
-// hash the item to a tiny Bloom filter which size fit a single cache line or smaller. 
-// 
-// This implementation sets 8 bits in each tiny Bloom filter. Each tiny Bloom 
-// filter is 32 bytes to take advantage of 32-byte SIMD instructions. 
-class PARQUET_EXPORT BlockSplitBloomFilter : public BloomFilter { 
- public: 
-  /// The constructor of BlockSplitBloomFilter. It uses murmur3_x64_128 as hash function. 
-  BlockSplitBloomFilter(); 
- 
-  /// Initialize the BlockSplitBloomFilter. The range of num_bytes should be within 
-  /// [kMinimumBloomFilterBytes, kMaximumBloomFilterBytes], it will be 
-  /// rounded up/down to lower/upper bound if num_bytes is out of range and also 
-  /// will be rounded up to a power of 2. 
-  /// 
-  /// @param num_bytes The number of bytes to store Bloom filter bitset. 
-  void Init(uint32_t num_bytes); 
- 
-  /// Initialize the BlockSplitBloomFilter. It copies the bitset as underlying 
-  /// bitset because the given bitset may not satisfy the 32-byte alignment requirement 
-  /// which may lead to segfault when performing SIMD instructions. It is the caller's 
-  /// responsibility to free the bitset passed in. This is used when reconstructing 
-  /// a Bloom filter from a parquet file. 
-  /// 
-  /// @param bitset The given bitset to initialize the Bloom filter. 
-  /// @param num_bytes  The number of bytes of given bitset. 
-  void Init(const uint8_t* bitset, uint32_t num_bytes); 
- 
-  // Minimum Bloom filter size, it sets to 32 bytes to fit a tiny Bloom filter. 
-  static constexpr uint32_t kMinimumBloomFilterBytes = 32; 
- 
-  /// Calculate optimal size according to the number of distinct values and false 
-  /// positive probability. 
-  /// 
-  /// @param ndv The number of distinct values. 
-  /// @param fpp The false positive probability. 
-  /// @return it always return a value between kMinimumBloomFilterBytes and 
-  /// kMaximumBloomFilterBytes, and the return value is always a power of 2 
-  static uint32_t OptimalNumOfBits(uint32_t ndv, double fpp) { 
-    DCHECK(fpp > 0.0 && fpp < 1.0); 
-    const double m = -8.0 * ndv / log(1 - pow(fpp, 1.0 / 8)); 
-    uint32_t num_bits; 
- 
-    // Handle overflow. 
-    if (m < 0 || m > kMaximumBloomFilterBytes << 3) { 
-      num_bits = static_cast<uint32_t>(kMaximumBloomFilterBytes << 3); 
-    } else { 
-      num_bits = static_cast<uint32_t>(m); 
-    } 
- 
-    // Round up to lower bound 
-    if (num_bits < kMinimumBloomFilterBytes << 3) { 
-      num_bits = kMinimumBloomFilterBytes << 3; 
-    } 
- 
-    // Get next power of 2 if bits is not power of 2. 
-    if ((num_bits & (num_bits - 1)) != 0) { 
-      num_bits = static_cast<uint32_t>(::arrow::BitUtil::NextPower2(num_bits)); 
-    } 
- 
-    // Round down to upper bound 
-    if (num_bits > kMaximumBloomFilterBytes << 3) { 
-      num_bits = kMaximumBloomFilterBytes << 3; 
-    } 
- 
-    return num_bits; 
-  } 
- 
-  bool FindHash(uint64_t hash) const override; 
-  void InsertHash(uint64_t hash) override; 
-  void WriteTo(ArrowOutputStream* sink) const override; 
-  uint32_t GetBitsetSize() const override { return num_bytes_; } 
- 
-  uint64_t Hash(int64_t value) const override { return hasher_->Hash(value); } 
-  uint64_t Hash(float value) const override { return hasher_->Hash(value); } 
-  uint64_t Hash(double value) const override { return hasher_->Hash(value); } 
-  uint64_t Hash(const Int96* value) const override { return hasher_->Hash(value); } 
-  uint64_t Hash(const ByteArray* value) const override { return hasher_->Hash(value); } 
-  uint64_t Hash(int32_t value) const override { return hasher_->Hash(value); } 
-  uint64_t Hash(const FLBA* value, uint32_t len) const override { 
-    return hasher_->Hash(value, len); 
-  } 
- 
-  /// Deserialize the Bloom filter from an input stream. It is used when reconstructing 
-  /// a Bloom filter from a parquet filter. 
-  /// 
-  /// @param input_stream The input stream from which to construct the Bloom filter 
-  /// @return The BlockSplitBloomFilter. 
-  static BlockSplitBloomFilter Deserialize(ArrowInputStream* input_stream); 
- 
- private: 
-  // Bytes in a tiny Bloom filter block. 
-  static constexpr int kBytesPerFilterBlock = 32; 
- 
-  // The number of bits to be set in each tiny Bloom filter 
-  static constexpr int kBitsSetPerBlock = 8; 
- 
-  // A mask structure used to set bits in each tiny Bloom filter. 
-  struct BlockMask { 
-    uint32_t item[kBitsSetPerBlock]; 
-  }; 
- 
-  // The block-based algorithm needs eight odd SALT values to calculate eight indexes 
-  // of bit to set, one bit in each 32-bit word. 
-  static constexpr uint32_t SALT[kBitsSetPerBlock] = { 
-      0x47b6137bU, 0x44974d91U, 0x8824ad5bU, 0xa2b7289dU, 
-      0x705495c7U, 0x2df1424bU, 0x9efc4947U, 0x5c6bfb31U}; 
- 
-  /// Set bits in mask array according to input key. 
-  /// @param key the value to calculate mask values. 
-  /// @param mask the mask array is used to set inside a block 
-  void SetMask(uint32_t key, BlockMask& mask) const; 
- 
-  // Memory pool to allocate aligned buffer for bitset 
-  ::arrow::MemoryPool* pool_; 
- 
-  // The underlying buffer of bitset. 
-  std::shared_ptr<Buffer> data_; 
- 
-  // The number of bytes of Bloom filter bitset. 
-  uint32_t num_bytes_; 
- 
-  // Hash strategy used in this Bloom filter. 
-  HashStrategy hash_strategy_; 
- 
-  // Algorithm used in this Bloom filter. 
-  Algorithm algorithm_; 
- 
-  // The hash pointer points to actual hash class used. 
-  std::unique_ptr<Hasher> hasher_; 
-}; 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cmath>
+#include <cstdint>
+#include <memory>
+
+#include "arrow/util/bit_util.h"
+#include "arrow/util/logging.h"
+#include "parquet/hasher.h"
+#include "parquet/platform.h"
+#include "parquet/types.h"
+
+namespace parquet {
+
+// A Bloom filter is a compact structure to indicate whether an item is not in a set or
+// probably in a set. The Bloom filter usually consists of a bit set that represents a
+// set of elements, a hash strategy and a Bloom filter algorithm.
+class PARQUET_EXPORT BloomFilter {
+ public:
+  // Maximum Bloom filter size, it sets to HDFS default block size 128MB
+  // This value will be reconsidered when implementing Bloom filter producer.
+  static constexpr uint32_t kMaximumBloomFilterBytes = 128 * 1024 * 1024;
+
+  /// Determine whether an element exist in set or not.
+  ///
+  /// @param hash the element to contain.
+  /// @return false if value is definitely not in set, and true means PROBABLY
+  /// in set.
+  virtual bool FindHash(uint64_t hash) const = 0;
+
+  /// Insert element to set represented by Bloom filter bitset.
+  /// @param hash the hash of value to insert into Bloom filter.
+  virtual void InsertHash(uint64_t hash) = 0;
+
+  /// Write this Bloom filter to an output stream. A Bloom filter structure should
+  /// include bitset length, hash strategy, algorithm, and bitset.
+  ///
+  /// @param sink the output stream to write
+  virtual void WriteTo(ArrowOutputStream* sink) const = 0;
+
+  /// Get the number of bytes of bitset
+  virtual uint32_t GetBitsetSize() const = 0;
+
+  /// Compute hash for 32 bits value by using its plain encoding result.
+  ///
+  /// @param value the value to hash.
+  /// @return hash result.
+  virtual uint64_t Hash(int32_t value) const = 0;
+
+  /// Compute hash for 64 bits value by using its plain encoding result.
+  ///
+  /// @param value the value to hash.
+  /// @return hash result.
+  virtual uint64_t Hash(int64_t value) const = 0;
+
+  /// Compute hash for float value by using its plain encoding result.
+  ///
+  /// @param value the value to hash.
+  /// @return hash result.
+  virtual uint64_t Hash(float value) const = 0;
+
+  /// Compute hash for double value by using its plain encoding result.
+  ///
+  /// @param value the value to hash.
+  /// @return hash result.
+  virtual uint64_t Hash(double value) const = 0;
+
+  /// Compute hash for Int96 value by using its plain encoding result.
+  ///
+  /// @param value the value to hash.
+  /// @return hash result.
+  virtual uint64_t Hash(const Int96* value) const = 0;
+
+  /// Compute hash for ByteArray value by using its plain encoding result.
+  ///
+  /// @param value the value to hash.
+  /// @return hash result.
+  virtual uint64_t Hash(const ByteArray* value) const = 0;
+
+  /// Compute hash for fixed byte array value by using its plain encoding result.
+  ///
+  /// @param value the value address.
+  /// @param len the value length.
+  /// @return hash result.
+  virtual uint64_t Hash(const FLBA* value, uint32_t len) const = 0;
+
+  virtual ~BloomFilter() {}
+
+ protected:
+  // Hash strategy available for Bloom filter.
+  enum class HashStrategy : uint32_t { MURMUR3_X64_128 = 0 };
+
+  // Bloom filter algorithm.
+  enum class Algorithm : uint32_t { BLOCK = 0 };
+};
+
+// The BlockSplitBloomFilter is implemented using block-based Bloom filters from
+// Putze et al.'s "Cache-,Hash- and Space-Efficient Bloom filters". The basic idea is to
+// hash the item to a tiny Bloom filter which size fit a single cache line or smaller.
+//
+// This implementation sets 8 bits in each tiny Bloom filter. Each tiny Bloom
+// filter is 32 bytes to take advantage of 32-byte SIMD instructions.
+class PARQUET_EXPORT BlockSplitBloomFilter : public BloomFilter {
+ public:
+  /// The constructor of BlockSplitBloomFilter. It uses murmur3_x64_128 as hash function.
+  BlockSplitBloomFilter();
+
+  /// Initialize the BlockSplitBloomFilter. The range of num_bytes should be within
+  /// [kMinimumBloomFilterBytes, kMaximumBloomFilterBytes], it will be
+  /// rounded up/down to lower/upper bound if num_bytes is out of range and also
+  /// will be rounded up to a power of 2.
+  ///
+  /// @param num_bytes The number of bytes to store Bloom filter bitset.
+  void Init(uint32_t num_bytes);
+
+  /// Initialize the BlockSplitBloomFilter. It copies the bitset as underlying
+  /// bitset because the given bitset may not satisfy the 32-byte alignment requirement
+  /// which may lead to segfault when performing SIMD instructions. It is the caller's
+  /// responsibility to free the bitset passed in. This is used when reconstructing
+  /// a Bloom filter from a parquet file.
+  ///
+  /// @param bitset The given bitset to initialize the Bloom filter.
+  /// @param num_bytes  The number of bytes of given bitset.
+  void Init(const uint8_t* bitset, uint32_t num_bytes);
+
+  // Minimum Bloom filter size, it sets to 32 bytes to fit a tiny Bloom filter.
+  static constexpr uint32_t kMinimumBloomFilterBytes = 32;
+
+  /// Calculate optimal size according to the number of distinct values and false
+  /// positive probability.
+  ///
+  /// @param ndv The number of distinct values.
+  /// @param fpp The false positive probability.
+  /// @return it always return a value between kMinimumBloomFilterBytes and
+  /// kMaximumBloomFilterBytes, and the return value is always a power of 2
+  static uint32_t OptimalNumOfBits(uint32_t ndv, double fpp) {
+    DCHECK(fpp > 0.0 && fpp < 1.0);
+    const double m = -8.0 * ndv / log(1 - pow(fpp, 1.0 / 8));
+    uint32_t num_bits;
+
+    // Handle overflow.
+    if (m < 0 || m > kMaximumBloomFilterBytes << 3) {
+      num_bits = static_cast<uint32_t>(kMaximumBloomFilterBytes << 3);
+    } else {
+      num_bits = static_cast<uint32_t>(m);
+    }
+
+    // Round up to lower bound
+    if (num_bits < kMinimumBloomFilterBytes << 3) {
+      num_bits = kMinimumBloomFilterBytes << 3;
+    }
+
+    // Get next power of 2 if bits is not power of 2.
+    if ((num_bits & (num_bits - 1)) != 0) {
+      num_bits = static_cast<uint32_t>(::arrow::BitUtil::NextPower2(num_bits));
+    }
+
+    // Round down to upper bound
+    if (num_bits > kMaximumBloomFilterBytes << 3) {
+      num_bits = kMaximumBloomFilterBytes << 3;
+    }
+
+    return num_bits;
+  }
+
+  bool FindHash(uint64_t hash) const override;
+  void InsertHash(uint64_t hash) override;
+  void WriteTo(ArrowOutputStream* sink) const override;
+  uint32_t GetBitsetSize() const override { return num_bytes_; }
+
+  uint64_t Hash(int64_t value) const override { return hasher_->Hash(value); }
+  uint64_t Hash(float value) const override { return hasher_->Hash(value); }
+  uint64_t Hash(double value) const override { return hasher_->Hash(value); }
+  uint64_t Hash(const Int96* value) const override { return hasher_->Hash(value); }
+  uint64_t Hash(const ByteArray* value) const override { return hasher_->Hash(value); }
+  uint64_t Hash(int32_t value) const override { return hasher_->Hash(value); }
+  uint64_t Hash(const FLBA* value, uint32_t len) const override {
+    return hasher_->Hash(value, len);
+  }
+
+  /// Deserialize the Bloom filter from an input stream. It is used when reconstructing
+  /// a Bloom filter from a parquet filter.
+  ///
+  /// @param input_stream The input stream from which to construct the Bloom filter
+  /// @return The BlockSplitBloomFilter.
+  static BlockSplitBloomFilter Deserialize(ArrowInputStream* input_stream);
+
+ private:
+  // Bytes in a tiny Bloom filter block.
+  static constexpr int kBytesPerFilterBlock = 32;
+
+  // The number of bits to be set in each tiny Bloom filter
+  static constexpr int kBitsSetPerBlock = 8;
+
+  // A mask structure used to set bits in each tiny Bloom filter.
+  struct BlockMask {
+    uint32_t item[kBitsSetPerBlock];
+  };
+
+  // The block-based algorithm needs eight odd SALT values to calculate eight indexes
+  // of bit to set, one bit in each 32-bit word.
+  static constexpr uint32_t SALT[kBitsSetPerBlock] = {
+      0x47b6137bU, 0x44974d91U, 0x8824ad5bU, 0xa2b7289dU,
+      0x705495c7U, 0x2df1424bU, 0x9efc4947U, 0x5c6bfb31U};
+
+  /// Set bits in mask array according to input key.
+  /// @param key the value to calculate mask values.
+  /// @param mask the mask array is used to set inside a block
+  void SetMask(uint32_t key, BlockMask& mask) const;
+
+  // Memory pool to allocate aligned buffer for bitset
+  ::arrow::MemoryPool* pool_;
+
+  // The underlying buffer of bitset.
+  std::shared_ptr<Buffer> data_;
+
+  // The number of bytes of Bloom filter bitset.
+  uint32_t num_bytes_;
+
+  // Hash strategy used in this Bloom filter.
+  HashStrategy hash_strategy_;
+
+  // Algorithm used in this Bloom filter.
+  Algorithm algorithm_;
+
+  // The hash pointer points to actual hash class used.
+  std::unique_ptr<Hasher> hasher_;
+};
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/column_page.h b/contrib/libs/apache/arrow/cpp/src/parquet/column_page.h
index 242f16b2e67..2fab77ed01a 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/column_page.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/column_page.h
@@ -1,160 +1,160 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-// This module defines an abstract interface for iterating through pages in a 
-// Parquet column chunk within a row group. It could be extended in the future 
-// to iterate through all data pages in all chunks in a file. 
- 
-#pragma once 
- 
-#include <cstdint> 
-#include <memory> 
-#include <string> 
- 
-#include "parquet/statistics.h" 
-#include "parquet/types.h" 
- 
-namespace parquet { 
- 
-// TODO: Parallel processing is not yet safe because of memory-ownership 
-// semantics (the PageReader may or may not own the memory referenced by a 
-// page) 
-// 
-// TODO(wesm): In the future Parquet implementations may store the crc code 
-// in format::PageHeader. parquet-mr currently does not, so we also skip it 
-// here, both on the read and write path 
-class Page { 
- public: 
-  Page(const std::shared_ptr<Buffer>& buffer, PageType::type type) 
-      : buffer_(buffer), type_(type) {} 
- 
-  PageType::type type() const { return type_; } 
- 
-  std::shared_ptr<Buffer> buffer() const { return buffer_; } 
- 
-  // @returns: a pointer to the page's data 
-  const uint8_t* data() const { return buffer_->data(); } 
- 
-  // @returns: the total size in bytes of the page's data buffer 
-  int32_t size() const { return static_cast<int32_t>(buffer_->size()); } 
- 
- private: 
-  std::shared_ptr<Buffer> buffer_; 
-  PageType::type type_; 
-}; 
- 
-/// \brief Base type for DataPageV1 and DataPageV2 including common attributes 
-class DataPage : public Page { 
- public: 
-  int32_t num_values() const { return num_values_; } 
-  Encoding::type encoding() const { return encoding_; } 
-  int64_t uncompressed_size() const { return uncompressed_size_; } 
-  const EncodedStatistics& statistics() const { return statistics_; } 
- 
-  virtual ~DataPage() = default; 
- 
- protected: 
-  DataPage(PageType::type type, const std::shared_ptr<Buffer>& buffer, int32_t num_values, 
-           Encoding::type encoding, int64_t uncompressed_size, 
-           const EncodedStatistics& statistics = EncodedStatistics()) 
-      : Page(buffer, type), 
-        num_values_(num_values), 
-        encoding_(encoding), 
-        uncompressed_size_(uncompressed_size), 
-        statistics_(statistics) {} 
- 
-  int32_t num_values_; 
-  Encoding::type encoding_; 
-  int64_t uncompressed_size_; 
-  EncodedStatistics statistics_; 
-}; 
- 
-class DataPageV1 : public DataPage { 
- public: 
-  DataPageV1(const std::shared_ptr<Buffer>& buffer, int32_t num_values, 
-             Encoding::type encoding, Encoding::type definition_level_encoding, 
-             Encoding::type repetition_level_encoding, int64_t uncompressed_size, 
-             const EncodedStatistics& statistics = EncodedStatistics()) 
-      : DataPage(PageType::DATA_PAGE, buffer, num_values, encoding, uncompressed_size, 
-                 statistics), 
-        definition_level_encoding_(definition_level_encoding), 
-        repetition_level_encoding_(repetition_level_encoding) {} 
- 
-  Encoding::type repetition_level_encoding() const { return repetition_level_encoding_; } 
- 
-  Encoding::type definition_level_encoding() const { return definition_level_encoding_; } 
- 
- private: 
-  Encoding::type definition_level_encoding_; 
-  Encoding::type repetition_level_encoding_; 
-}; 
- 
-class DataPageV2 : public DataPage { 
- public: 
-  DataPageV2(const std::shared_ptr<Buffer>& buffer, int32_t num_values, int32_t num_nulls, 
-             int32_t num_rows, Encoding::type encoding, 
-             int32_t definition_levels_byte_length, int32_t repetition_levels_byte_length, 
-             int64_t uncompressed_size, bool is_compressed = false, 
-             const EncodedStatistics& statistics = EncodedStatistics()) 
-      : DataPage(PageType::DATA_PAGE_V2, buffer, num_values, encoding, uncompressed_size, 
-                 statistics), 
-        num_nulls_(num_nulls), 
-        num_rows_(num_rows), 
-        definition_levels_byte_length_(definition_levels_byte_length), 
-        repetition_levels_byte_length_(repetition_levels_byte_length), 
-        is_compressed_(is_compressed) {} 
- 
-  int32_t num_nulls() const { return num_nulls_; } 
- 
-  int32_t num_rows() const { return num_rows_; } 
- 
-  int32_t definition_levels_byte_length() const { return definition_levels_byte_length_; } 
- 
-  int32_t repetition_levels_byte_length() const { return repetition_levels_byte_length_; } 
- 
-  bool is_compressed() const { return is_compressed_; } 
- 
- private: 
-  int32_t num_nulls_; 
-  int32_t num_rows_; 
-  int32_t definition_levels_byte_length_; 
-  int32_t repetition_levels_byte_length_; 
-  bool is_compressed_; 
-}; 
- 
-class DictionaryPage : public Page { 
- public: 
-  DictionaryPage(const std::shared_ptr<Buffer>& buffer, int32_t num_values, 
-                 Encoding::type encoding, bool is_sorted = false) 
-      : Page(buffer, PageType::DICTIONARY_PAGE), 
-        num_values_(num_values), 
-        encoding_(encoding), 
-        is_sorted_(is_sorted) {} 
- 
-  int32_t num_values() const { return num_values_; } 
- 
-  Encoding::type encoding() const { return encoding_; } 
- 
-  bool is_sorted() const { return is_sorted_; } 
- 
- private: 
-  int32_t num_values_; 
-  Encoding::type encoding_; 
-  bool is_sorted_; 
-}; 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This module defines an abstract interface for iterating through pages in a
+// Parquet column chunk within a row group. It could be extended in the future
+// to iterate through all data pages in all chunks in a file.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+
+#include "parquet/statistics.h"
+#include "parquet/types.h"
+
+namespace parquet {
+
+// TODO: Parallel processing is not yet safe because of memory-ownership
+// semantics (the PageReader may or may not own the memory referenced by a
+// page)
+//
+// TODO(wesm): In the future Parquet implementations may store the crc code
+// in format::PageHeader. parquet-mr currently does not, so we also skip it
+// here, both on the read and write path
+class Page {
+ public:
+  Page(const std::shared_ptr<Buffer>& buffer, PageType::type type)
+      : buffer_(buffer), type_(type) {}
+
+  PageType::type type() const { return type_; }
+
+  std::shared_ptr<Buffer> buffer() const { return buffer_; }
+
+  // @returns: a pointer to the page's data
+  const uint8_t* data() const { return buffer_->data(); }
+
+  // @returns: the total size in bytes of the page's data buffer
+  int32_t size() const { return static_cast<int32_t>(buffer_->size()); }
+
+ private:
+  std::shared_ptr<Buffer> buffer_;
+  PageType::type type_;
+};
+
+/// \brief Base type for DataPageV1 and DataPageV2 including common attributes
+class DataPage : public Page {
+ public:
+  int32_t num_values() const { return num_values_; }
+  Encoding::type encoding() const { return encoding_; }
+  int64_t uncompressed_size() const { return uncompressed_size_; }
+  const EncodedStatistics& statistics() const { return statistics_; }
+
+  virtual ~DataPage() = default;
+
+ protected:
+  DataPage(PageType::type type, const std::shared_ptr<Buffer>& buffer, int32_t num_values,
+           Encoding::type encoding, int64_t uncompressed_size,
+           const EncodedStatistics& statistics = EncodedStatistics())
+      : Page(buffer, type),
+        num_values_(num_values),
+        encoding_(encoding),
+        uncompressed_size_(uncompressed_size),
+        statistics_(statistics) {}
+
+  int32_t num_values_;
+  Encoding::type encoding_;
+  int64_t uncompressed_size_;
+  EncodedStatistics statistics_;
+};
+
+class DataPageV1 : public DataPage {
+ public:
+  DataPageV1(const std::shared_ptr<Buffer>& buffer, int32_t num_values,
+             Encoding::type encoding, Encoding::type definition_level_encoding,
+             Encoding::type repetition_level_encoding, int64_t uncompressed_size,
+             const EncodedStatistics& statistics = EncodedStatistics())
+      : DataPage(PageType::DATA_PAGE, buffer, num_values, encoding, uncompressed_size,
+                 statistics),
+        definition_level_encoding_(definition_level_encoding),
+        repetition_level_encoding_(repetition_level_encoding) {}
+
+  Encoding::type repetition_level_encoding() const { return repetition_level_encoding_; }
+
+  Encoding::type definition_level_encoding() const { return definition_level_encoding_; }
+
+ private:
+  Encoding::type definition_level_encoding_;
+  Encoding::type repetition_level_encoding_;
+};
+
+class DataPageV2 : public DataPage {
+ public:
+  DataPageV2(const std::shared_ptr<Buffer>& buffer, int32_t num_values, int32_t num_nulls,
+             int32_t num_rows, Encoding::type encoding,
+             int32_t definition_levels_byte_length, int32_t repetition_levels_byte_length,
+             int64_t uncompressed_size, bool is_compressed = false,
+             const EncodedStatistics& statistics = EncodedStatistics())
+      : DataPage(PageType::DATA_PAGE_V2, buffer, num_values, encoding, uncompressed_size,
+                 statistics),
+        num_nulls_(num_nulls),
+        num_rows_(num_rows),
+        definition_levels_byte_length_(definition_levels_byte_length),
+        repetition_levels_byte_length_(repetition_levels_byte_length),
+        is_compressed_(is_compressed) {}
+
+  int32_t num_nulls() const { return num_nulls_; }
+
+  int32_t num_rows() const { return num_rows_; }
+
+  int32_t definition_levels_byte_length() const { return definition_levels_byte_length_; }
+
+  int32_t repetition_levels_byte_length() const { return repetition_levels_byte_length_; }
+
+  bool is_compressed() const { return is_compressed_; }
+
+ private:
+  int32_t num_nulls_;
+  int32_t num_rows_;
+  int32_t definition_levels_byte_length_;
+  int32_t repetition_levels_byte_length_;
+  bool is_compressed_;
+};
+
+class DictionaryPage : public Page {
+ public:
+  DictionaryPage(const std::shared_ptr<Buffer>& buffer, int32_t num_values,
+                 Encoding::type encoding, bool is_sorted = false)
+      : Page(buffer, PageType::DICTIONARY_PAGE),
+        num_values_(num_values),
+        encoding_(encoding),
+        is_sorted_(is_sorted) {}
+
+  int32_t num_values() const { return num_values_; }
+
+  Encoding::type encoding() const { return encoding_; }
+
+  bool is_sorted() const { return is_sorted_; }
+
+ private:
+  int32_t num_values_;
+  Encoding::type encoding_;
+  bool is_sorted_;
+};
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/column_reader.cc b/contrib/libs/apache/arrow/cpp/src/parquet/column_reader.cc
index 713205e98dd..047d99fed9a 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/column_reader.cc
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/column_reader.cc
@@ -1,1802 +1,1802 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "parquet/column_reader.h" 
- 
-#include <algorithm> 
-#include <cstdint> 
-#include <cstring> 
-#include <exception> 
-#include <iostream> 
-#include <memory> 
-#include <string> 
-#include <unordered_map> 
-#include <utility> 
-#include <vector> 
- 
-#include "arrow/array.h" 
-#include "arrow/array/builder_binary.h" 
-#include "arrow/array/builder_dict.h" 
-#include "arrow/array/builder_primitive.h" 
-#include "arrow/chunked_array.h" 
-#include "arrow/type.h" 
-#include "arrow/util/bit_stream_utils.h" 
-#include "arrow/util/bit_util.h" 
-#include "arrow/util/checked_cast.h" 
-#include "arrow/util/compression.h" 
-#include "arrow/util/int_util_internal.h" 
-#include "arrow/util/logging.h" 
-#include "arrow/util/rle_encoding.h" 
-#include "parquet/column_page.h" 
-#include "parquet/encoding.h" 
-#include "parquet/encryption/encryption_internal.h" 
-#include "parquet/encryption/internal_file_decryptor.h" 
-#include "parquet/level_comparison.h" 
-#include "parquet/level_conversion.h" 
-#include "parquet/properties.h" 
-#include "parquet/statistics.h" 
-#include "parquet/thrift_internal.h"  // IWYU pragma: keep 
-// Required after "arrow/util/int_util_internal.h" (for OPTIONAL) 
-#include "parquet/windows_compatibility.h" 
- 
-using arrow::MemoryPool; 
-using arrow::internal::AddWithOverflow; 
-using arrow::internal::checked_cast; 
-using arrow::internal::MultiplyWithOverflow; 
- 
-namespace BitUtil = arrow::BitUtil; 
- 
-namespace parquet { 
-namespace { 
-inline bool HasSpacedValues(const ColumnDescriptor* descr) { 
-  if (descr->max_repetition_level() > 0) { 
-    // repeated+flat case 
-    return !descr->schema_node()->is_required(); 
-  } else { 
-    // non-repeated+nested case 
-    // Find if a node forces nulls in the lowest level along the hierarchy 
-    const schema::Node* node = descr->schema_node().get(); 
-    while (node) { 
-      if (node->is_optional()) { 
-        return true; 
-      } 
-      node = node->parent(); 
-    } 
-    return false; 
-  } 
-} 
-}  // namespace 
- 
-LevelDecoder::LevelDecoder() : num_values_remaining_(0) {} 
- 
-LevelDecoder::~LevelDecoder() {} 
- 
-int LevelDecoder::SetData(Encoding::type encoding, int16_t max_level, 
-                          int num_buffered_values, const uint8_t* data, 
-                          int32_t data_size) { 
-  max_level_ = max_level; 
-  int32_t num_bytes = 0; 
-  encoding_ = encoding; 
-  num_values_remaining_ = num_buffered_values; 
-  bit_width_ = BitUtil::Log2(max_level + 1); 
-  switch (encoding) { 
-    case Encoding::RLE: { 
-      if (data_size < 4) { 
-        throw ParquetException("Received invalid levels (corrupt data page?)"); 
-      } 
-      num_bytes = ::arrow::util::SafeLoadAs<int32_t>(data); 
-      if (num_bytes < 0 || num_bytes > data_size - 4) { 
-        throw ParquetException("Received invalid number of bytes (corrupt data page?)"); 
-      } 
-      const uint8_t* decoder_data = data + 4; 
-      if (!rle_decoder_) { 
-        rle_decoder_.reset( 
-            new ::arrow::util::RleDecoder(decoder_data, num_bytes, bit_width_)); 
-      } else { 
-        rle_decoder_->Reset(decoder_data, num_bytes, bit_width_); 
-      } 
-      return 4 + num_bytes; 
-    } 
-    case Encoding::BIT_PACKED: { 
-      int num_bits = 0; 
-      if (MultiplyWithOverflow(num_buffered_values, bit_width_, &num_bits)) { 
-        throw ParquetException( 
-            "Number of buffered values too large (corrupt data page?)"); 
-      } 
-      num_bytes = static_cast<int32_t>(BitUtil::BytesForBits(num_bits)); 
-      if (num_bytes < 0 || num_bytes > data_size - 4) { 
-        throw ParquetException("Received invalid number of bytes (corrupt data page?)"); 
-      } 
-      if (!bit_packed_decoder_) { 
-        bit_packed_decoder_.reset(new ::arrow::BitUtil::BitReader(data, num_bytes)); 
-      } else { 
-        bit_packed_decoder_->Reset(data, num_bytes); 
-      } 
-      return num_bytes; 
-    } 
-    default: 
-      throw ParquetException("Unknown encoding type for levels."); 
-  } 
-  return -1; 
-} 
- 
-void LevelDecoder::SetDataV2(int32_t num_bytes, int16_t max_level, 
-                             int num_buffered_values, const uint8_t* data) { 
-  max_level_ = max_level; 
-  // Repetition and definition levels always uses RLE encoding 
-  // in the DataPageV2 format. 
-  if (num_bytes < 0) { 
-    throw ParquetException("Invalid page header (corrupt data page?)"); 
-  } 
-  encoding_ = Encoding::RLE; 
-  num_values_remaining_ = num_buffered_values; 
-  bit_width_ = BitUtil::Log2(max_level + 1); 
- 
-  if (!rle_decoder_) { 
-    rle_decoder_.reset(new ::arrow::util::RleDecoder(data, num_bytes, bit_width_)); 
-  } else { 
-    rle_decoder_->Reset(data, num_bytes, bit_width_); 
-  } 
-} 
- 
-int LevelDecoder::Decode(int batch_size, int16_t* levels) { 
-  int num_decoded = 0; 
- 
-  int num_values = std::min(num_values_remaining_, batch_size); 
-  if (encoding_ == Encoding::RLE) { 
-    num_decoded = rle_decoder_->GetBatch(levels, num_values); 
-  } else { 
-    num_decoded = bit_packed_decoder_->GetBatch(bit_width_, levels, num_values); 
-  } 
-  if (num_decoded > 0) { 
-    internal::MinMax min_max = internal::FindMinMax(levels, num_decoded); 
-    if (ARROW_PREDICT_FALSE(min_max.min < 0 || min_max.max > max_level_)) { 
-      std::stringstream ss; 
-      ss << "Malformed levels. min: " << min_max.min << " max: " << min_max.max 
-         << " out of range.  Max Level: " << max_level_; 
-      throw ParquetException(ss.str()); 
-    } 
-  } 
-  num_values_remaining_ -= num_decoded; 
-  return num_decoded; 
-} 
- 
-ReaderProperties default_reader_properties() { 
-  static ReaderProperties default_reader_properties; 
-  return default_reader_properties; 
-} 
- 
-namespace { 
- 
-// Extracts encoded statistics from V1 and V2 data page headers 
-template <typename H> 
-EncodedStatistics ExtractStatsFromHeader(const H& header) { 
-  EncodedStatistics page_statistics; 
-  if (!header.__isset.statistics) { 
-    return page_statistics; 
-  } 
-  const format::Statistics& stats = header.statistics; 
-  if (stats.__isset.max) { 
-    page_statistics.set_max(stats.max); 
-  } 
-  if (stats.__isset.min) { 
-    page_statistics.set_min(stats.min); 
-  } 
-  if (stats.__isset.null_count) { 
-    page_statistics.set_null_count(stats.null_count); 
-  } 
-  if (stats.__isset.distinct_count) { 
-    page_statistics.set_distinct_count(stats.distinct_count); 
-  } 
-  return page_statistics; 
-} 
- 
-// ---------------------------------------------------------------------- 
-// SerializedPageReader deserializes Thrift metadata and pages that have been 
-// assembled in a serialized stream for storing in a Parquet files 
- 
-// This subclass delimits pages appearing in a serialized stream, each preceded 
-// by a serialized Thrift format::PageHeader indicating the type of each page 
-// and the page metadata. 
-class SerializedPageReader : public PageReader { 
- public: 
-  SerializedPageReader(std::shared_ptr<ArrowInputStream> stream, int64_t total_num_rows, 
-                       Compression::type codec, ::arrow::MemoryPool* pool, 
-                       const CryptoContext* crypto_ctx) 
-      : stream_(std::move(stream)), 
-        decompression_buffer_(AllocateBuffer(pool, 0)), 
-        page_ordinal_(0), 
-        seen_num_rows_(0), 
-        total_num_rows_(total_num_rows), 
-        decryption_buffer_(AllocateBuffer(pool, 0)) { 
-    if (crypto_ctx != nullptr) { 
-      crypto_ctx_ = *crypto_ctx; 
-      InitDecryption(); 
-    } 
-    max_page_header_size_ = kDefaultMaxPageHeaderSize; 
-    decompressor_ = GetCodec(codec); 
-  } 
- 
-  // Implement the PageReader interface 
-  std::shared_ptr<Page> NextPage() override; 
- 
-  void set_max_page_header_size(uint32_t size) override { max_page_header_size_ = size; } 
- 
- private: 
-  void UpdateDecryption(const std::shared_ptr<Decryptor>& decryptor, int8_t module_type, 
-                        const std::string& page_aad); 
- 
-  void InitDecryption(); 
- 
-  std::shared_ptr<Buffer> DecompressIfNeeded(std::shared_ptr<Buffer> page_buffer, 
-                                             int compressed_len, int uncompressed_len, 
-                                             int levels_byte_len = 0); 
- 
-  std::shared_ptr<ArrowInputStream> stream_; 
- 
-  format::PageHeader current_page_header_; 
-  std::shared_ptr<Page> current_page_; 
- 
-  // Compression codec to use. 
-  std::unique_ptr<::arrow::util::Codec> decompressor_; 
-  std::shared_ptr<ResizableBuffer> decompression_buffer_; 
- 
-  // The fields below are used for calculation of AAD (additional authenticated data) 
-  // suffix which is part of the Parquet Modular Encryption. 
-  // The AAD suffix for a parquet module is built internally by 
-  // concatenating different parts some of which include 
-  // the row group ordinal, column ordinal and page ordinal. 
-  // Please refer to the encryption specification for more details: 
-  // https://github.com/apache/parquet-format/blob/encryption/Encryption.md#44-additional-authenticated-data 
- 
-  // The ordinal fields in the context below are used for AAD suffix calculation. 
-  CryptoContext crypto_ctx_; 
-  int16_t page_ordinal_;  // page ordinal does not count the dictionary page 
- 
-  // Maximum allowed page size 
-  uint32_t max_page_header_size_; 
- 
-  // Number of rows read in data pages so far 
-  int64_t seen_num_rows_; 
- 
-  // Number of rows in all the data pages 
-  int64_t total_num_rows_; 
- 
-  // data_page_aad_ and data_page_header_aad_ contain the AAD for data page and data page 
-  // header in a single column respectively. 
-  // While calculating AAD for different pages in a single column the pages AAD is 
-  // updated by only the page ordinal. 
-  std::string data_page_aad_; 
-  std::string data_page_header_aad_; 
-  // Encryption 
-  std::shared_ptr<ResizableBuffer> decryption_buffer_; 
-}; 
- 
-void SerializedPageReader::InitDecryption() { 
-  // Prepare the AAD for quick update later. 
-  if (crypto_ctx_.data_decryptor != nullptr) { 
-    DCHECK(!crypto_ctx_.data_decryptor->file_aad().empty()); 
-    data_page_aad_ = encryption::CreateModuleAad( 
-        crypto_ctx_.data_decryptor->file_aad(), encryption::kDataPage, 
-        crypto_ctx_.row_group_ordinal, crypto_ctx_.column_ordinal, kNonPageOrdinal); 
-  } 
-  if (crypto_ctx_.meta_decryptor != nullptr) { 
-    DCHECK(!crypto_ctx_.meta_decryptor->file_aad().empty()); 
-    data_page_header_aad_ = encryption::CreateModuleAad( 
-        crypto_ctx_.meta_decryptor->file_aad(), encryption::kDataPageHeader, 
-        crypto_ctx_.row_group_ordinal, crypto_ctx_.column_ordinal, kNonPageOrdinal); 
-  } 
-} 
- 
-void SerializedPageReader::UpdateDecryption(const std::shared_ptr<Decryptor>& decryptor, 
-                                            int8_t module_type, 
-                                            const std::string& page_aad) { 
-  DCHECK(decryptor != nullptr); 
-  if (crypto_ctx_.start_decrypt_with_dictionary_page) { 
-    std::string aad = encryption::CreateModuleAad( 
-        decryptor->file_aad(), module_type, crypto_ctx_.row_group_ordinal, 
-        crypto_ctx_.column_ordinal, kNonPageOrdinal); 
-    decryptor->UpdateAad(aad); 
-  } else { 
-    encryption::QuickUpdatePageAad(page_aad, page_ordinal_); 
-    decryptor->UpdateAad(page_aad); 
-  } 
-} 
- 
-std::shared_ptr<Page> SerializedPageReader::NextPage() { 
-  // Loop here because there may be unhandled page types that we skip until 
-  // finding a page that we do know what to do with 
- 
-  while (seen_num_rows_ < total_num_rows_) { 
-    uint32_t header_size = 0; 
-    uint32_t allowed_page_size = kDefaultPageHeaderSize; 
- 
-    // Page headers can be very large because of page statistics 
-    // We try to deserialize a larger buffer progressively 
-    // until a maximum allowed header limit 
-    while (true) { 
-      PARQUET_ASSIGN_OR_THROW(auto view, stream_->Peek(allowed_page_size)); 
-      if (view.size() == 0) { 
-        return std::shared_ptr<Page>(nullptr); 
-      } 
- 
-      // This gets used, then set by DeserializeThriftMsg 
-      header_size = static_cast<uint32_t>(view.size()); 
-      try { 
-        if (crypto_ctx_.meta_decryptor != nullptr) { 
-          UpdateDecryption(crypto_ctx_.meta_decryptor, encryption::kDictionaryPageHeader, 
-                           data_page_header_aad_); 
-        } 
-        DeserializeThriftMsg(reinterpret_cast<const uint8_t*>(view.data()), &header_size, 
-                             &current_page_header_, crypto_ctx_.meta_decryptor); 
-        break; 
-      } catch (std::exception& e) { 
-        // Failed to deserialize. Double the allowed page header size and try again 
-        std::stringstream ss; 
-        ss << e.what(); 
-        allowed_page_size *= 2; 
-        if (allowed_page_size > max_page_header_size_) { 
-          ss << "Deserializing page header failed.\n"; 
-          throw ParquetException(ss.str()); 
-        } 
-      } 
-    } 
-    // Advance the stream offset 
-    PARQUET_THROW_NOT_OK(stream_->Advance(header_size)); 
- 
-    int compressed_len = current_page_header_.compressed_page_size; 
-    int uncompressed_len = current_page_header_.uncompressed_page_size; 
-    if (compressed_len < 0 || uncompressed_len < 0) { 
-      throw ParquetException("Invalid page header"); 
-    } 
- 
-    if (crypto_ctx_.data_decryptor != nullptr) { 
-      UpdateDecryption(crypto_ctx_.data_decryptor, encryption::kDictionaryPage, 
-                       data_page_aad_); 
-    } 
- 
-    // Read the compressed data page. 
-    PARQUET_ASSIGN_OR_THROW(auto page_buffer, stream_->Read(compressed_len)); 
-    if (page_buffer->size() != compressed_len) { 
-      std::stringstream ss; 
-      ss << "Page was smaller (" << page_buffer->size() << ") than expected (" 
-         << compressed_len << ")"; 
-      ParquetException::EofException(ss.str()); 
-    } 
- 
-    // Decrypt it if we need to 
-    if (crypto_ctx_.data_decryptor != nullptr) { 
-      PARQUET_THROW_NOT_OK(decryption_buffer_->Resize( 
-          compressed_len - crypto_ctx_.data_decryptor->CiphertextSizeDelta(), false)); 
-      compressed_len = crypto_ctx_.data_decryptor->Decrypt( 
-          page_buffer->data(), compressed_len, decryption_buffer_->mutable_data()); 
- 
-      page_buffer = decryption_buffer_; 
-    } 
- 
-    const PageType::type page_type = LoadEnumSafe(&current_page_header_.type); 
- 
-    if (page_type == PageType::DICTIONARY_PAGE) { 
-      crypto_ctx_.start_decrypt_with_dictionary_page = false; 
-      const format::DictionaryPageHeader& dict_header = 
-          current_page_header_.dictionary_page_header; 
- 
-      bool is_sorted = dict_header.__isset.is_sorted ? dict_header.is_sorted : false; 
-      if (dict_header.num_values < 0) { 
-        throw ParquetException("Invalid page header (negative number of values)"); 
-      } 
- 
-      // Uncompress if needed 
-      page_buffer = 
-          DecompressIfNeeded(std::move(page_buffer), compressed_len, uncompressed_len); 
- 
-      return std::make_shared<DictionaryPage>(page_buffer, dict_header.num_values, 
-                                              LoadEnumSafe(&dict_header.encoding), 
-                                              is_sorted); 
-    } else if (page_type == PageType::DATA_PAGE) { 
-      ++page_ordinal_; 
-      const format::DataPageHeader& header = current_page_header_.data_page_header; 
- 
-      if (header.num_values < 0) { 
-        throw ParquetException("Invalid page header (negative number of values)"); 
-      } 
-      EncodedStatistics page_statistics = ExtractStatsFromHeader(header); 
-      seen_num_rows_ += header.num_values; 
- 
-      // Uncompress if needed 
-      page_buffer = 
-          DecompressIfNeeded(std::move(page_buffer), compressed_len, uncompressed_len); 
- 
-      return std::make_shared<DataPageV1>(page_buffer, header.num_values, 
-                                          LoadEnumSafe(&header.encoding), 
-                                          LoadEnumSafe(&header.definition_level_encoding), 
-                                          LoadEnumSafe(&header.repetition_level_encoding), 
-                                          uncompressed_len, page_statistics); 
-    } else if (page_type == PageType::DATA_PAGE_V2) { 
-      ++page_ordinal_; 
-      const format::DataPageHeaderV2& header = current_page_header_.data_page_header_v2; 
- 
-      if (header.num_values < 0) { 
-        throw ParquetException("Invalid page header (negative number of values)"); 
-      } 
-      if (header.definition_levels_byte_length < 0 || 
-          header.repetition_levels_byte_length < 0) { 
-        throw ParquetException("Invalid page header (negative levels byte length)"); 
-      } 
-      bool is_compressed = header.__isset.is_compressed ? header.is_compressed : false; 
-      EncodedStatistics page_statistics = ExtractStatsFromHeader(header); 
-      seen_num_rows_ += header.num_values; 
- 
-      // Uncompress if needed 
-      int levels_byte_len; 
-      if (AddWithOverflow(header.definition_levels_byte_length, 
-                          header.repetition_levels_byte_length, &levels_byte_len)) { 
-        throw ParquetException("Levels size too large (corrupt file?)"); 
-      } 
-      // DecompressIfNeeded doesn't take `is_compressed` into account as 
-      // it's page type-agnostic. 
-      if (is_compressed) { 
-        page_buffer = DecompressIfNeeded(std::move(page_buffer), compressed_len, 
-                                         uncompressed_len, levels_byte_len); 
-      } 
- 
-      return std::make_shared<DataPageV2>( 
-          page_buffer, header.num_values, header.num_nulls, header.num_rows, 
-          LoadEnumSafe(&header.encoding), header.definition_levels_byte_length, 
-          header.repetition_levels_byte_length, uncompressed_len, is_compressed, 
-          page_statistics); 
-    } else { 
-      // We don't know what this page type is. We're allowed to skip non-data 
-      // pages. 
-      continue; 
-    } 
-  } 
-  return std::shared_ptr<Page>(nullptr); 
-} 
- 
-std::shared_ptr<Buffer> SerializedPageReader::DecompressIfNeeded( 
-    std::shared_ptr<Buffer> page_buffer, int compressed_len, int uncompressed_len, 
-    int levels_byte_len) { 
-  if (decompressor_ == nullptr) { 
-    return page_buffer; 
-  } 
-  if (compressed_len < levels_byte_len || uncompressed_len < levels_byte_len) { 
-    throw ParquetException("Invalid page header"); 
-  } 
- 
-  // Grow the uncompressed buffer if we need to. 
-  if (uncompressed_len > static_cast<int>(decompression_buffer_->size())) { 
-    PARQUET_THROW_NOT_OK(decompression_buffer_->Resize(uncompressed_len, false)); 
-  } 
- 
-  if (levels_byte_len > 0) { 
-    // First copy the levels as-is 
-    uint8_t* decompressed = decompression_buffer_->mutable_data(); 
-    memcpy(decompressed, page_buffer->data(), levels_byte_len); 
-  } 
- 
-  // Decompress the values 
-  PARQUET_THROW_NOT_OK(decompressor_->Decompress( 
-      compressed_len - levels_byte_len, page_buffer->data() + levels_byte_len, 
-      uncompressed_len - levels_byte_len, 
-      decompression_buffer_->mutable_data() + levels_byte_len)); 
- 
-  return decompression_buffer_; 
-} 
- 
-}  // namespace 
- 
-std::unique_ptr<PageReader> PageReader::Open(std::shared_ptr<ArrowInputStream> stream, 
-                                             int64_t total_num_rows, 
-                                             Compression::type codec, 
-                                             ::arrow::MemoryPool* pool, 
-                                             const CryptoContext* ctx) { 
-  return std::unique_ptr<PageReader>( 
-      new SerializedPageReader(std::move(stream), total_num_rows, codec, pool, ctx)); 
-} 
- 
-namespace { 
- 
-// ---------------------------------------------------------------------- 
-// Impl base class for TypedColumnReader and RecordReader 
- 
-// PLAIN_DICTIONARY is deprecated but used to be used as a dictionary index 
-// encoding. 
-static bool IsDictionaryIndexEncoding(const Encoding::type& e) { 
-  return e == Encoding::RLE_DICTIONARY || e == Encoding::PLAIN_DICTIONARY; 
-} 
- 
-template <typename DType> 
-class ColumnReaderImplBase { 
- public: 
-  using T = typename DType::c_type; 
- 
-  ColumnReaderImplBase(const ColumnDescriptor* descr, ::arrow::MemoryPool* pool) 
-      : descr_(descr), 
-        max_def_level_(descr->max_definition_level()), 
-        max_rep_level_(descr->max_repetition_level()), 
-        num_buffered_values_(0), 
-        num_decoded_values_(0), 
-        pool_(pool), 
-        current_decoder_(nullptr), 
-        current_encoding_(Encoding::UNKNOWN) {} 
- 
-  virtual ~ColumnReaderImplBase() = default; 
- 
- protected: 
-  // Read up to batch_size values from the current data page into the 
-  // pre-allocated memory T* 
-  // 
-  // @returns: the number of values read into the out buffer 
-  int64_t ReadValues(int64_t batch_size, T* out) { 
-    int64_t num_decoded = current_decoder_->Decode(out, static_cast<int>(batch_size)); 
-    return num_decoded; 
-  } 
- 
-  // Read up to batch_size values from the current data page into the 
-  // pre-allocated memory T*, leaving spaces for null entries according 
-  // to the def_levels. 
-  // 
-  // @returns: the number of values read into the out buffer 
-  int64_t ReadValuesSpaced(int64_t batch_size, T* out, int64_t null_count, 
-                           uint8_t* valid_bits, int64_t valid_bits_offset) { 
-    return current_decoder_->DecodeSpaced(out, static_cast<int>(batch_size), 
-                                          static_cast<int>(null_count), valid_bits, 
-                                          valid_bits_offset); 
-  } 
- 
-  // Read multiple definition levels into preallocated memory 
-  // 
-  // Returns the number of decoded definition levels 
-  int64_t ReadDefinitionLevels(int64_t batch_size, int16_t* levels) { 
-    if (max_def_level_ == 0) { 
-      return 0; 
-    } 
-    return definition_level_decoder_.Decode(static_cast<int>(batch_size), levels); 
-  } 
- 
-  bool HasNextInternal() { 
-    // Either there is no data page available yet, or the data page has been 
-    // exhausted 
-    if (num_buffered_values_ == 0 || num_decoded_values_ == num_buffered_values_) { 
-      if (!ReadNewPage() || num_buffered_values_ == 0) { 
-        return false; 
-      } 
-    } 
-    return true; 
-  } 
- 
-  // Read multiple repetition levels into preallocated memory 
-  // Returns the number of decoded repetition levels 
-  int64_t ReadRepetitionLevels(int64_t batch_size, int16_t* levels) { 
-    if (max_rep_level_ == 0) { 
-      return 0; 
-    } 
-    return repetition_level_decoder_.Decode(static_cast<int>(batch_size), levels); 
-  } 
- 
-  // Advance to the next data page 
-  bool ReadNewPage() { 
-    // Loop until we find the next data page. 
-    while (true) { 
-      current_page_ = pager_->NextPage(); 
-      if (!current_page_) { 
-        // EOS 
-        return false; 
-      } 
- 
-      if (current_page_->type() == PageType::DICTIONARY_PAGE) { 
-        ConfigureDictionary(static_cast<const DictionaryPage*>(current_page_.get())); 
-        continue; 
-      } else if (current_page_->type() == PageType::DATA_PAGE) { 
-        const auto page = std::static_pointer_cast<DataPageV1>(current_page_); 
-        const int64_t levels_byte_size = InitializeLevelDecoders( 
-            *page, page->repetition_level_encoding(), page->definition_level_encoding()); 
-        InitializeDataDecoder(*page, levels_byte_size); 
-        return true; 
-      } else if (current_page_->type() == PageType::DATA_PAGE_V2) { 
-        const auto page = std::static_pointer_cast<DataPageV2>(current_page_); 
-        int64_t levels_byte_size = InitializeLevelDecodersV2(*page); 
-        InitializeDataDecoder(*page, levels_byte_size); 
-        return true; 
-      } else { 
-        // We don't know what this page type is. We're allowed to skip non-data 
-        // pages. 
-        continue; 
-      } 
-    } 
-    return true; 
-  } 
- 
-  void ConfigureDictionary(const DictionaryPage* page) { 
-    int encoding = static_cast<int>(page->encoding()); 
-    if (page->encoding() == Encoding::PLAIN_DICTIONARY || 
-        page->encoding() == Encoding::PLAIN) { 
-      encoding = static_cast<int>(Encoding::RLE_DICTIONARY); 
-    } 
- 
-    auto it = decoders_.find(encoding); 
-    if (it != decoders_.end()) { 
-      throw ParquetException("Column cannot have more than one dictionary."); 
-    } 
- 
-    if (page->encoding() == Encoding::PLAIN_DICTIONARY || 
-        page->encoding() == Encoding::PLAIN) { 
-      auto dictionary = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_); 
-      dictionary->SetData(page->num_values(), page->data(), page->size()); 
- 
-      // The dictionary is fully decoded during DictionaryDecoder::Init, so the 
-      // DictionaryPage buffer is no longer required after this step 
-      // 
-      // TODO(wesm): investigate whether this all-or-nothing decoding of the 
-      // dictionary makes sense and whether performance can be improved 
- 
-      std::unique_ptr<DictDecoder<DType>> decoder = MakeDictDecoder<DType>(descr_, pool_); 
-      decoder->SetDict(dictionary.get()); 
-      decoders_[encoding] = 
-          std::unique_ptr<DecoderType>(dynamic_cast<DecoderType*>(decoder.release())); 
-    } else { 
-      ParquetException::NYI("only plain dictionary encoding has been implemented"); 
-    } 
- 
-    new_dictionary_ = true; 
-    current_decoder_ = decoders_[encoding].get(); 
-    DCHECK(current_decoder_); 
-  } 
- 
-  // Initialize repetition and definition level decoders on the next data page. 
- 
-  // If the data page includes repetition and definition levels, we 
-  // initialize the level decoders and return the number of encoded level bytes. 
-  // The return value helps determine the number of bytes in the encoded data. 
-  int64_t InitializeLevelDecoders(const DataPage& page, 
-                                  Encoding::type repetition_level_encoding, 
-                                  Encoding::type definition_level_encoding) { 
-    // Read a data page. 
-    num_buffered_values_ = page.num_values(); 
- 
-    // Have not decoded any values from the data page yet 
-    num_decoded_values_ = 0; 
- 
-    const uint8_t* buffer = page.data(); 
-    int32_t levels_byte_size = 0; 
-    int32_t max_size = page.size(); 
- 
-    // Data page Layout: Repetition Levels - Definition Levels - encoded values. 
-    // Levels are encoded as rle or bit-packed. 
-    // Init repetition levels 
-    if (max_rep_level_ > 0) { 
-      int32_t rep_levels_bytes = repetition_level_decoder_.SetData( 
-          repetition_level_encoding, max_rep_level_, 
-          static_cast<int>(num_buffered_values_), buffer, max_size); 
-      buffer += rep_levels_bytes; 
-      levels_byte_size += rep_levels_bytes; 
-      max_size -= rep_levels_bytes; 
-    } 
-    // TODO figure a way to set max_def_level_ to 0 
-    // if the initial value is invalid 
- 
-    // Init definition levels 
-    if (max_def_level_ > 0) { 
-      int32_t def_levels_bytes = definition_level_decoder_.SetData( 
-          definition_level_encoding, max_def_level_, 
-          static_cast<int>(num_buffered_values_), buffer, max_size); 
-      levels_byte_size += def_levels_bytes; 
-      max_size -= def_levels_bytes; 
-    } 
- 
-    return levels_byte_size; 
-  } 
- 
-  int64_t InitializeLevelDecodersV2(const DataPageV2& page) { 
-    // Read a data page. 
-    num_buffered_values_ = page.num_values(); 
- 
-    // Have not decoded any values from the data page yet 
-    num_decoded_values_ = 0; 
-    const uint8_t* buffer = page.data(); 
- 
-    const int64_t total_levels_length = 
-        static_cast<int64_t>(page.repetition_levels_byte_length()) + 
-        page.definition_levels_byte_length(); 
- 
-    if (total_levels_length > page.size()) { 
-      throw ParquetException("Data page too small for levels (corrupt header?)"); 
-    } 
- 
-    if (max_rep_level_ > 0) { 
-      repetition_level_decoder_.SetDataV2(page.repetition_levels_byte_length(), 
-                                          max_rep_level_, 
-                                          static_cast<int>(num_buffered_values_), buffer); 
-      buffer += page.repetition_levels_byte_length(); 
-    } 
- 
-    if (max_def_level_ > 0) { 
-      definition_level_decoder_.SetDataV2(page.definition_levels_byte_length(), 
-                                          max_def_level_, 
-                                          static_cast<int>(num_buffered_values_), buffer); 
-    } 
- 
-    return total_levels_length; 
-  } 
- 
-  // Get a decoder object for this page or create a new decoder if this is the 
-  // first page with this encoding. 
-  void InitializeDataDecoder(const DataPage& page, int64_t levels_byte_size) { 
-    const uint8_t* buffer = page.data() + levels_byte_size; 
-    const int64_t data_size = page.size() - levels_byte_size; 
- 
-    if (data_size < 0) { 
-      throw ParquetException("Page smaller than size of encoded levels"); 
-    } 
- 
-    Encoding::type encoding = page.encoding(); 
- 
-    if (IsDictionaryIndexEncoding(encoding)) { 
-      encoding = Encoding::RLE_DICTIONARY; 
-    } 
- 
-    auto it = decoders_.find(static_cast<int>(encoding)); 
-    if (it != decoders_.end()) { 
-      DCHECK(it->second.get() != nullptr); 
-      if (encoding == Encoding::RLE_DICTIONARY) { 
-        DCHECK(current_decoder_->encoding() == Encoding::RLE_DICTIONARY); 
-      } 
-      current_decoder_ = it->second.get(); 
-    } else { 
-      switch (encoding) { 
-        case Encoding::PLAIN: { 
-          auto decoder = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_); 
-          current_decoder_ = decoder.get(); 
-          decoders_[static_cast<int>(encoding)] = std::move(decoder); 
-          break; 
-        } 
-        case Encoding::BYTE_STREAM_SPLIT: { 
-          auto decoder = MakeTypedDecoder<DType>(Encoding::BYTE_STREAM_SPLIT, descr_); 
-          current_decoder_ = decoder.get(); 
-          decoders_[static_cast<int>(encoding)] = std::move(decoder); 
-          break; 
-        } 
-        case Encoding::RLE_DICTIONARY: 
-          throw ParquetException("Dictionary page must be before data page."); 
- 
-        case Encoding::DELTA_BINARY_PACKED: 
-        case Encoding::DELTA_LENGTH_BYTE_ARRAY: 
-        case Encoding::DELTA_BYTE_ARRAY: 
-          ParquetException::NYI("Unsupported encoding"); 
- 
-        default: 
-          throw ParquetException("Unknown encoding type."); 
-      } 
-    } 
-    current_encoding_ = encoding; 
-    current_decoder_->SetData(static_cast<int>(num_buffered_values_), buffer, 
-                              static_cast<int>(data_size)); 
-  } 
- 
-  const ColumnDescriptor* descr_; 
-  const int16_t max_def_level_; 
-  const int16_t max_rep_level_; 
- 
-  std::unique_ptr<PageReader> pager_; 
-  std::shared_ptr<Page> current_page_; 
- 
-  // Not set if full schema for this field has no optional or repeated elements 
-  LevelDecoder definition_level_decoder_; 
- 
-  // Not set for flat schemas. 
-  LevelDecoder repetition_level_decoder_; 
- 
-  // The total number of values stored in the data page. This is the maximum of 
-  // the number of encoded definition levels or encoded values. For 
-  // non-repeated, required columns, this is equal to the number of encoded 
-  // values. For repeated or optional values, there may be fewer data values 
-  // than levels, and this tells you how many encoded levels there are in that 
-  // case. 
-  int64_t num_buffered_values_; 
- 
-  // The number of values from the current data page that have been decoded 
-  // into memory 
-  int64_t num_decoded_values_; 
- 
-  ::arrow::MemoryPool* pool_; 
- 
-  using DecoderType = TypedDecoder<DType>; 
-  DecoderType* current_decoder_; 
-  Encoding::type current_encoding_; 
- 
-  /// Flag to signal when a new dictionary has been set, for the benefit of 
-  /// DictionaryRecordReader 
-  bool new_dictionary_; 
- 
-  // The exposed encoding 
-  ExposedEncoding exposed_encoding_ = ExposedEncoding::NO_ENCODING; 
- 
-  // Map of encoding type to the respective decoder object. For example, a 
-  // column chunk's data pages may include both dictionary-encoded and 
-  // plain-encoded data. 
-  std::unordered_map<int, std::unique_ptr<DecoderType>> decoders_; 
- 
-  void ConsumeBufferedValues(int64_t num_values) { num_decoded_values_ += num_values; } 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// TypedColumnReader implementations 
- 
-template <typename DType> 
-class TypedColumnReaderImpl : public TypedColumnReader<DType>, 
-                              public ColumnReaderImplBase<DType> { 
- public: 
-  using T = typename DType::c_type; 
- 
-  TypedColumnReaderImpl(const ColumnDescriptor* descr, std::unique_ptr<PageReader> pager, 
-                        ::arrow::MemoryPool* pool) 
-      : ColumnReaderImplBase<DType>(descr, pool) { 
-    this->pager_ = std::move(pager); 
-  } 
- 
-  bool HasNext() override { return this->HasNextInternal(); } 
- 
-  int64_t ReadBatch(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels, 
-                    T* values, int64_t* values_read) override; 
- 
-  int64_t ReadBatchSpaced(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels, 
-                          T* values, uint8_t* valid_bits, int64_t valid_bits_offset, 
-                          int64_t* levels_read, int64_t* values_read, 
-                          int64_t* null_count) override; 
- 
-  int64_t Skip(int64_t num_rows_to_skip) override; 
- 
-  Type::type type() const override { return this->descr_->physical_type(); } 
- 
-  const ColumnDescriptor* descr() const override { return this->descr_; } 
- 
-  ExposedEncoding GetExposedEncoding() override { return this->exposed_encoding_; }; 
- 
-  int64_t ReadBatchWithDictionary(int64_t batch_size, int16_t* def_levels, 
-                                  int16_t* rep_levels, int32_t* indices, 
-                                  int64_t* indices_read, const T** dict, 
-                                  int32_t* dict_len) override; 
- 
- protected: 
-  void SetExposedEncoding(ExposedEncoding encoding) override { 
-    this->exposed_encoding_ = encoding; 
-  } 
- 
- private: 
-  // Read dictionary indices. Similar to ReadValues but decode data to dictionary indices. 
-  // This function is called only by ReadBatchWithDictionary(). 
-  int64_t ReadDictionaryIndices(int64_t indices_to_read, int32_t* indices) { 
-    auto decoder = dynamic_cast<DictDecoder<DType>*>(this->current_decoder_); 
-    return decoder->DecodeIndices(static_cast<int>(indices_to_read), indices); 
-  } 
- 
-  // Get dictionary. The dictionary should have been set by SetDict(). The dictionary is 
-  // owned by the internal decoder and is destroyed when the reader is destroyed. This 
-  // function is called only by ReadBatchWithDictionary() after dictionary is configured. 
-  void GetDictionary(const T** dictionary, int32_t* dictionary_length) { 
-    auto decoder = dynamic_cast<DictDecoder<DType>*>(this->current_decoder_); 
-    decoder->GetDictionary(dictionary, dictionary_length); 
-  } 
- 
-  // Read definition and repetition levels. Also return the number of definition levels 
-  // and number of values to read. This function is called before reading values. 
-  void ReadLevels(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels, 
-                  int64_t* num_def_levels, int64_t* values_to_read) { 
-    batch_size = 
-        std::min(batch_size, this->num_buffered_values_ - this->num_decoded_values_); 
- 
-    // If the field is required and non-repeated, there are no definition levels 
-    if (this->max_def_level_ > 0 && def_levels != nullptr) { 
-      *num_def_levels = this->ReadDefinitionLevels(batch_size, def_levels); 
-      // TODO(wesm): this tallying of values-to-decode can be performed with better 
-      // cache-efficiency if fused with the level decoding. 
-      for (int64_t i = 0; i < *num_def_levels; ++i) { 
-        if (def_levels[i] == this->max_def_level_) { 
-          ++(*values_to_read); 
-        } 
-      } 
-    } else { 
-      // Required field, read all values 
-      *values_to_read = batch_size; 
-    } 
- 
-    // Not present for non-repeated fields 
-    if (this->max_rep_level_ > 0 && rep_levels != nullptr) { 
-      int64_t num_rep_levels = this->ReadRepetitionLevels(batch_size, rep_levels); 
-      if (def_levels != nullptr && *num_def_levels != num_rep_levels) { 
-        throw ParquetException("Number of decoded rep / def levels did not match"); 
-      } 
-    } 
-  } 
-}; 
- 
-template <typename DType> 
-int64_t TypedColumnReaderImpl<DType>::ReadBatchWithDictionary( 
-    int64_t batch_size, int16_t* def_levels, int16_t* rep_levels, int32_t* indices, 
-    int64_t* indices_read, const T** dict, int32_t* dict_len) { 
-  bool has_dict_output = dict != nullptr && dict_len != nullptr; 
-  // Similar logic as ReadValues to get pages. 
-  if (!HasNext()) { 
-    *indices_read = 0; 
-    if (has_dict_output) { 
-      *dict = nullptr; 
-      *dict_len = 0; 
-    } 
-    return 0; 
-  } 
- 
-  // Verify the current data page is dictionary encoded. 
-  if (this->current_encoding_ != Encoding::RLE_DICTIONARY) { 
-    std::stringstream ss; 
-    ss << "Data page is not dictionary encoded. Encoding: " 
-       << EncodingToString(this->current_encoding_); 
-    throw ParquetException(ss.str()); 
-  } 
- 
-  // Get dictionary pointer and length. 
-  if (has_dict_output) { 
-    GetDictionary(dict, dict_len); 
-  } 
- 
-  // Similar logic as ReadValues to get def levels and rep levels. 
-  int64_t num_def_levels = 0; 
-  int64_t indices_to_read = 0; 
-  ReadLevels(batch_size, def_levels, rep_levels, &num_def_levels, &indices_to_read); 
- 
-  // Read dictionary indices. 
-  *indices_read = ReadDictionaryIndices(indices_to_read, indices); 
-  int64_t total_indices = std::max(num_def_levels, *indices_read); 
-  this->ConsumeBufferedValues(total_indices); 
- 
-  return total_indices; 
-} 
- 
-template <typename DType> 
-int64_t TypedColumnReaderImpl<DType>::ReadBatch(int64_t batch_size, int16_t* def_levels, 
-                                                int16_t* rep_levels, T* values, 
-                                                int64_t* values_read) { 
-  // HasNext invokes ReadNewPage 
-  if (!HasNext()) { 
-    *values_read = 0; 
-    return 0; 
-  } 
- 
-  // TODO(wesm): keep reading data pages until batch_size is reached, or the 
-  // row group is finished 
-  int64_t num_def_levels = 0; 
-  int64_t values_to_read = 0; 
-  ReadLevels(batch_size, def_levels, rep_levels, &num_def_levels, &values_to_read); 
- 
-  *values_read = this->ReadValues(values_to_read, values); 
-  int64_t total_values = std::max(num_def_levels, *values_read); 
-  this->ConsumeBufferedValues(total_values); 
- 
-  return total_values; 
-} 
- 
-template <typename DType> 
-int64_t TypedColumnReaderImpl<DType>::ReadBatchSpaced( 
-    int64_t batch_size, int16_t* def_levels, int16_t* rep_levels, T* values, 
-    uint8_t* valid_bits, int64_t valid_bits_offset, int64_t* levels_read, 
-    int64_t* values_read, int64_t* null_count_out) { 
-  // HasNext invokes ReadNewPage 
-  if (!HasNext()) { 
-    *levels_read = 0; 
-    *values_read = 0; 
-    *null_count_out = 0; 
-    return 0; 
-  } 
- 
-  int64_t total_values; 
-  // TODO(wesm): keep reading data pages until batch_size is reached, or the 
-  // row group is finished 
-  batch_size = 
-      std::min(batch_size, this->num_buffered_values_ - this->num_decoded_values_); 
- 
-  // If the field is required and non-repeated, there are no definition levels 
-  if (this->max_def_level_ > 0) { 
-    int64_t num_def_levels = this->ReadDefinitionLevels(batch_size, def_levels); 
- 
-    // Not present for non-repeated fields 
-    if (this->max_rep_level_ > 0) { 
-      int64_t num_rep_levels = this->ReadRepetitionLevels(batch_size, rep_levels); 
-      if (num_def_levels != num_rep_levels) { 
-        throw ParquetException("Number of decoded rep / def levels did not match"); 
-      } 
-    } 
- 
-    const bool has_spaced_values = HasSpacedValues(this->descr_); 
-    int64_t null_count = 0; 
-    if (!has_spaced_values) { 
-      int values_to_read = 0; 
-      for (int64_t i = 0; i < num_def_levels; ++i) { 
-        if (def_levels[i] == this->max_def_level_) { 
-          ++values_to_read; 
-        } 
-      } 
-      total_values = this->ReadValues(values_to_read, values); 
-      ::arrow::BitUtil::SetBitsTo(valid_bits, valid_bits_offset, 
-                                  /*length=*/total_values, 
-                                  /*bits_are_set=*/true); 
-      *values_read = total_values; 
-    } else { 
-      internal::LevelInfo info; 
-      info.repeated_ancestor_def_level = this->max_def_level_ - 1; 
-      info.def_level = this->max_def_level_; 
-      info.rep_level = this->max_rep_level_; 
-      internal::ValidityBitmapInputOutput validity_io; 
-      validity_io.values_read_upper_bound = num_def_levels; 
-      validity_io.valid_bits = valid_bits; 
-      validity_io.valid_bits_offset = valid_bits_offset; 
-      validity_io.null_count = null_count; 
-      validity_io.values_read = *values_read; 
- 
-      internal::DefLevelsToBitmap(def_levels, num_def_levels, info, &validity_io); 
-      null_count = validity_io.null_count; 
-      *values_read = validity_io.values_read; 
- 
-      total_values = 
-          this->ReadValuesSpaced(*values_read, values, static_cast<int>(null_count), 
-                                 valid_bits, valid_bits_offset); 
-    } 
-    *levels_read = num_def_levels; 
-    *null_count_out = null_count; 
- 
-  } else { 
-    // Required field, read all values 
-    total_values = this->ReadValues(batch_size, values); 
-    ::arrow::BitUtil::SetBitsTo(valid_bits, valid_bits_offset, 
-                                /*length=*/total_values, 
-                                /*bits_are_set=*/true); 
-    *null_count_out = 0; 
-    *values_read = total_values; 
-    *levels_read = total_values; 
-  } 
- 
-  this->ConsumeBufferedValues(*levels_read); 
-  return total_values; 
-} 
- 
-template <typename DType> 
-int64_t TypedColumnReaderImpl<DType>::Skip(int64_t num_rows_to_skip) { 
-  int64_t rows_to_skip = num_rows_to_skip; 
-  while (HasNext() && rows_to_skip > 0) { 
-    // If the number of rows to skip is more than the number of undecoded values, skip the 
-    // Page. 
-    if (rows_to_skip > (this->num_buffered_values_ - this->num_decoded_values_)) { 
-      rows_to_skip -= this->num_buffered_values_ - this->num_decoded_values_; 
-      this->num_decoded_values_ = this->num_buffered_values_; 
-    } else { 
-      // We need to read this Page 
-      // Jump to the right offset in the Page 
-      int64_t batch_size = 1024;  // ReadBatch with a smaller memory footprint 
-      int64_t values_read = 0; 
- 
-      // This will be enough scratch space to accommodate 16-bit levels or any 
-      // value type 
-      std::shared_ptr<ResizableBuffer> scratch = AllocateBuffer( 
-          this->pool_, batch_size * type_traits<DType::type_num>::value_byte_size); 
- 
-      do { 
-        batch_size = std::min(batch_size, rows_to_skip); 
-        values_read = 
-            ReadBatch(static_cast<int>(batch_size), 
-                      reinterpret_cast<int16_t*>(scratch->mutable_data()), 
-                      reinterpret_cast<int16_t*>(scratch->mutable_data()), 
-                      reinterpret_cast<T*>(scratch->mutable_data()), &values_read); 
-        rows_to_skip -= values_read; 
-      } while (values_read > 0 && rows_to_skip > 0); 
-    } 
-  } 
-  return num_rows_to_skip - rows_to_skip; 
-} 
- 
-}  // namespace 
- 
-// ---------------------------------------------------------------------- 
-// Dynamic column reader constructor 
- 
-std::shared_ptr<ColumnReader> ColumnReader::Make(const ColumnDescriptor* descr, 
-                                                 std::unique_ptr<PageReader> pager, 
-                                                 MemoryPool* pool) { 
-  switch (descr->physical_type()) { 
-    case Type::BOOLEAN: 
-      return std::make_shared<TypedColumnReaderImpl<BooleanType>>(descr, std::move(pager), 
-                                                                  pool); 
-    case Type::INT32: 
-      return std::make_shared<TypedColumnReaderImpl<Int32Type>>(descr, std::move(pager), 
-                                                                pool); 
-    case Type::INT64: 
-      return std::make_shared<TypedColumnReaderImpl<Int64Type>>(descr, std::move(pager), 
-                                                                pool); 
-    case Type::INT96: 
-      return std::make_shared<TypedColumnReaderImpl<Int96Type>>(descr, std::move(pager), 
-                                                                pool); 
-    case Type::FLOAT: 
-      return std::make_shared<TypedColumnReaderImpl<FloatType>>(descr, std::move(pager), 
-                                                                pool); 
-    case Type::DOUBLE: 
-      return std::make_shared<TypedColumnReaderImpl<DoubleType>>(descr, std::move(pager), 
-                                                                 pool); 
-    case Type::BYTE_ARRAY: 
-      return std::make_shared<TypedColumnReaderImpl<ByteArrayType>>( 
-          descr, std::move(pager), pool); 
-    case Type::FIXED_LEN_BYTE_ARRAY: 
-      return std::make_shared<TypedColumnReaderImpl<FLBAType>>(descr, std::move(pager), 
-                                                               pool); 
-    default: 
-      ParquetException::NYI("type reader not implemented"); 
-  } 
-  // Unreachable code, but suppress compiler warning 
-  return std::shared_ptr<ColumnReader>(nullptr); 
-} 
- 
-// ---------------------------------------------------------------------- 
-// RecordReader 
- 
-namespace internal { 
-namespace { 
- 
-// The minimum number of repetition/definition levels to decode at a time, for 
-// better vectorized performance when doing many smaller record reads 
-constexpr int64_t kMinLevelBatchSize = 1024; 
- 
-template <typename DType> 
-class TypedRecordReader : public ColumnReaderImplBase<DType>, 
-                          virtual public RecordReader { 
- public: 
-  using T = typename DType::c_type; 
-  using BASE = ColumnReaderImplBase<DType>; 
-  TypedRecordReader(const ColumnDescriptor* descr, LevelInfo leaf_info, MemoryPool* pool) 
-      : BASE(descr, pool) { 
-    leaf_info_ = leaf_info; 
-    nullable_values_ = leaf_info.HasNullableValues(); 
-    at_record_start_ = true; 
-    records_read_ = 0; 
-    values_written_ = 0; 
-    values_capacity_ = 0; 
-    null_count_ = 0; 
-    levels_written_ = 0; 
-    levels_position_ = 0; 
-    levels_capacity_ = 0; 
-    uses_values_ = !(descr->physical_type() == Type::BYTE_ARRAY); 
- 
-    if (uses_values_) { 
-      values_ = AllocateBuffer(pool); 
-    } 
-    valid_bits_ = AllocateBuffer(pool); 
-    def_levels_ = AllocateBuffer(pool); 
-    rep_levels_ = AllocateBuffer(pool); 
-    Reset(); 
-  } 
- 
-  int64_t available_values_current_page() const { 
-    return this->num_buffered_values_ - this->num_decoded_values_; 
-  } 
- 
-  // Compute the values capacity in bytes for the given number of elements 
-  int64_t bytes_for_values(int64_t nitems) const { 
-    int64_t type_size = GetTypeByteSize(this->descr_->physical_type()); 
-    int64_t bytes_for_values = -1; 
-    if (MultiplyWithOverflow(nitems, type_size, &bytes_for_values)) { 
-      throw ParquetException("Total size of items too large"); 
-    } 
-    return bytes_for_values; 
-  } 
- 
-  int64_t ReadRecords(int64_t num_records) override { 
-    // Delimit records, then read values at the end 
-    int64_t records_read = 0; 
- 
-    if (levels_position_ < levels_written_) { 
-      records_read += ReadRecordData(num_records); 
-    } 
- 
-    int64_t level_batch_size = std::max(kMinLevelBatchSize, num_records); 
- 
-    // If we are in the middle of a record, we continue until reaching the 
-    // desired number of records or the end of the current record if we've found 
-    // enough records 
-    while (!at_record_start_ || records_read < num_records) { 
-      // Is there more data to read in this row group? 
-      if (!this->HasNextInternal()) { 
-        if (!at_record_start_) { 
-          // We ended the row group while inside a record that we haven't seen 
-          // the end of yet. So increment the record count for the last record in 
-          // the row group 
-          ++records_read; 
-          at_record_start_ = true; 
-        } 
-        break; 
-      } 
- 
-      /// We perform multiple batch reads until we either exhaust the row group 
-      /// or observe the desired number of records 
-      int64_t batch_size = std::min(level_batch_size, available_values_current_page()); 
- 
-      // No more data in column 
-      if (batch_size == 0) { 
-        break; 
-      } 
- 
-      if (this->max_def_level_ > 0) { 
-        ReserveLevels(batch_size); 
- 
-        int16_t* def_levels = this->def_levels() + levels_written_; 
-        int16_t* rep_levels = this->rep_levels() + levels_written_; 
- 
-        // Not present for non-repeated fields 
-        int64_t levels_read = 0; 
-        if (this->max_rep_level_ > 0) { 
-          levels_read = this->ReadDefinitionLevels(batch_size, def_levels); 
-          if (this->ReadRepetitionLevels(batch_size, rep_levels) != levels_read) { 
-            throw ParquetException("Number of decoded rep / def levels did not match"); 
-          } 
-        } else if (this->max_def_level_ > 0) { 
-          levels_read = this->ReadDefinitionLevels(batch_size, def_levels); 
-        } 
- 
-        // Exhausted column chunk 
-        if (levels_read == 0) { 
-          break; 
-        } 
- 
-        levels_written_ += levels_read; 
-        records_read += ReadRecordData(num_records - records_read); 
-      } else { 
-        // No repetition or definition levels 
-        batch_size = std::min(num_records - records_read, batch_size); 
-        records_read += ReadRecordData(batch_size); 
-      } 
-    } 
- 
-    return records_read; 
-  } 
- 
-  // We may outwardly have the appearance of having exhausted a column chunk 
-  // when in fact we are in the middle of processing the last batch 
-  bool has_values_to_process() const { return levels_position_ < levels_written_; } 
- 
-  std::shared_ptr<ResizableBuffer> ReleaseValues() override { 
-    if (uses_values_) { 
-      auto result = values_; 
-      PARQUET_THROW_NOT_OK(result->Resize(bytes_for_values(values_written_), true)); 
-      values_ = AllocateBuffer(this->pool_); 
-      values_capacity_ = 0; 
-      return result; 
-    } else { 
-      return nullptr; 
-    } 
-  } 
- 
-  std::shared_ptr<ResizableBuffer> ReleaseIsValid() override { 
-    if (leaf_info_.HasNullableValues()) { 
-      auto result = valid_bits_; 
-      PARQUET_THROW_NOT_OK(result->Resize(BitUtil::BytesForBits(values_written_), true)); 
-      valid_bits_ = AllocateBuffer(this->pool_); 
-      return result; 
-    } else { 
-      return nullptr; 
-    } 
-  } 
- 
-  // Process written repetition/definition levels to reach the end of 
-  // records. Process no more levels than necessary to delimit the indicated 
-  // number of logical records. Updates internal state of RecordReader 
-  // 
-  // \return Number of records delimited 
-  int64_t DelimitRecords(int64_t num_records, int64_t* values_seen) { 
-    int64_t values_to_read = 0; 
-    int64_t records_read = 0; 
- 
-    const int16_t* def_levels = this->def_levels() + levels_position_; 
-    const int16_t* rep_levels = this->rep_levels() + levels_position_; 
- 
-    DCHECK_GT(this->max_rep_level_, 0); 
- 
-    // Count logical records and number of values to read 
-    while (levels_position_ < levels_written_) { 
-      const int16_t rep_level = *rep_levels++; 
-      if (rep_level == 0) { 
-        // If at_record_start_ is true, we are seeing the start of a record 
-        // for the second time, such as after repeated calls to 
-        // DelimitRecords. In this case we must continue until we find 
-        // another record start or exhausting the ColumnChunk 
-        if (!at_record_start_) { 
-          // We've reached the end of a record; increment the record count. 
-          ++records_read; 
-          if (records_read == num_records) { 
-            // We've found the number of records we were looking for. Set 
-            // at_record_start_ to true and break 
-            at_record_start_ = true; 
-            break; 
-          } 
-        } 
-      } 
-      // We have decided to consume the level at this position; therefore we 
-      // must advance until we find another record boundary 
-      at_record_start_ = false; 
- 
-      const int16_t def_level = *def_levels++; 
-      if (def_level == this->max_def_level_) { 
-        ++values_to_read; 
-      } 
-      ++levels_position_; 
-    } 
-    *values_seen = values_to_read; 
-    return records_read; 
-  } 
- 
-  void Reserve(int64_t capacity) override { 
-    ReserveLevels(capacity); 
-    ReserveValues(capacity); 
-  } 
- 
-  int64_t UpdateCapacity(int64_t capacity, int64_t size, int64_t extra_size) { 
-    if (extra_size < 0) { 
-      throw ParquetException("Negative size (corrupt file?)"); 
-    } 
-    int64_t target_size = -1; 
-    if (AddWithOverflow(size, extra_size, &target_size)) { 
-      throw ParquetException("Allocation size too large (corrupt file?)"); 
-    } 
-    if (target_size >= (1LL << 62)) { 
-      throw ParquetException("Allocation size too large (corrupt file?)"); 
-    } 
-    if (capacity >= target_size) { 
-      return capacity; 
-    } 
-    return BitUtil::NextPower2(target_size); 
-  } 
- 
-  void ReserveLevels(int64_t extra_levels) { 
-    if (this->max_def_level_ > 0) { 
-      const int64_t new_levels_capacity = 
-          UpdateCapacity(levels_capacity_, levels_written_, extra_levels); 
-      if (new_levels_capacity > levels_capacity_) { 
-        constexpr auto kItemSize = static_cast<int64_t>(sizeof(int16_t)); 
-        int64_t capacity_in_bytes = -1; 
-        if (MultiplyWithOverflow(new_levels_capacity, kItemSize, &capacity_in_bytes)) { 
-          throw ParquetException("Allocation size too large (corrupt file?)"); 
-        } 
-        PARQUET_THROW_NOT_OK(def_levels_->Resize(capacity_in_bytes, false)); 
-        if (this->max_rep_level_ > 0) { 
-          PARQUET_THROW_NOT_OK(rep_levels_->Resize(capacity_in_bytes, false)); 
-        } 
-        levels_capacity_ = new_levels_capacity; 
-      } 
-    } 
-  } 
- 
-  void ReserveValues(int64_t extra_values) { 
-    const int64_t new_values_capacity = 
-        UpdateCapacity(values_capacity_, values_written_, extra_values); 
-    if (new_values_capacity > values_capacity_) { 
-      // XXX(wesm): A hack to avoid memory allocation when reading directly 
-      // into builder classes 
-      if (uses_values_) { 
-        PARQUET_THROW_NOT_OK( 
-            values_->Resize(bytes_for_values(new_values_capacity), false)); 
-      } 
-      values_capacity_ = new_values_capacity; 
-    } 
-    if (leaf_info_.HasNullableValues()) { 
-      int64_t valid_bytes_new = BitUtil::BytesForBits(values_capacity_); 
-      if (valid_bits_->size() < valid_bytes_new) { 
-        int64_t valid_bytes_old = BitUtil::BytesForBits(values_written_); 
-        PARQUET_THROW_NOT_OK(valid_bits_->Resize(valid_bytes_new, false)); 
- 
-        // Avoid valgrind warnings 
-        memset(valid_bits_->mutable_data() + valid_bytes_old, 0, 
-               valid_bytes_new - valid_bytes_old); 
-      } 
-    } 
-  } 
- 
-  void Reset() override { 
-    ResetValues(); 
- 
-    if (levels_written_ > 0) { 
-      const int64_t levels_remaining = levels_written_ - levels_position_; 
-      // Shift remaining levels to beginning of buffer and trim to only the number 
-      // of decoded levels remaining 
-      int16_t* def_data = def_levels(); 
-      int16_t* rep_data = rep_levels(); 
- 
-      std::copy(def_data + levels_position_, def_data + levels_written_, def_data); 
-      PARQUET_THROW_NOT_OK( 
-          def_levels_->Resize(levels_remaining * sizeof(int16_t), false)); 
- 
-      if (this->max_rep_level_ > 0) { 
-        std::copy(rep_data + levels_position_, rep_data + levels_written_, rep_data); 
-        PARQUET_THROW_NOT_OK( 
-            rep_levels_->Resize(levels_remaining * sizeof(int16_t), false)); 
-      } 
- 
-      levels_written_ -= levels_position_; 
-      levels_position_ = 0; 
-      levels_capacity_ = levels_remaining; 
-    } 
- 
-    records_read_ = 0; 
- 
-    // Call Finish on the binary builders to reset them 
-  } 
- 
-  void SetPageReader(std::unique_ptr<PageReader> reader) override { 
-    at_record_start_ = true; 
-    this->pager_ = std::move(reader); 
-    ResetDecoders(); 
-  } 
- 
-  bool HasMoreData() const override { return this->pager_ != nullptr; } 
- 
-  // Dictionary decoders must be reset when advancing row groups 
-  void ResetDecoders() { this->decoders_.clear(); } 
- 
-  virtual void ReadValuesSpaced(int64_t values_with_nulls, int64_t null_count) { 
-    uint8_t* valid_bits = valid_bits_->mutable_data(); 
-    const int64_t valid_bits_offset = values_written_; 
- 
-    int64_t num_decoded = this->current_decoder_->DecodeSpaced( 
-        ValuesHead<T>(), static_cast<int>(values_with_nulls), 
-        static_cast<int>(null_count), valid_bits, valid_bits_offset); 
-    DCHECK_EQ(num_decoded, values_with_nulls); 
-  } 
- 
-  virtual void ReadValuesDense(int64_t values_to_read) { 
-    int64_t num_decoded = 
-        this->current_decoder_->Decode(ValuesHead<T>(), static_cast<int>(values_to_read)); 
-    DCHECK_EQ(num_decoded, values_to_read); 
-  } 
- 
-  // Return number of logical records read 
-  int64_t ReadRecordData(int64_t num_records) { 
-    // Conservative upper bound 
-    const int64_t possible_num_values = 
-        std::max(num_records, levels_written_ - levels_position_); 
-    ReserveValues(possible_num_values); 
- 
-    const int64_t start_levels_position = levels_position_; 
- 
-    int64_t values_to_read = 0; 
-    int64_t records_read = 0; 
-    if (this->max_rep_level_ > 0) { 
-      records_read = DelimitRecords(num_records, &values_to_read); 
-    } else if (this->max_def_level_ > 0) { 
-      // No repetition levels, skip delimiting logic. Each level represents a 
-      // null or not null entry 
-      records_read = std::min(levels_written_ - levels_position_, num_records); 
- 
-      // This is advanced by DelimitRecords, which we skipped 
-      levels_position_ += records_read; 
-    } else { 
-      records_read = values_to_read = num_records; 
-    } 
- 
-    int64_t null_count = 0; 
-    if (leaf_info_.HasNullableValues()) { 
-      ValidityBitmapInputOutput validity_io; 
-      validity_io.values_read_upper_bound = levels_position_ - start_levels_position; 
-      validity_io.valid_bits = valid_bits_->mutable_data(); 
-      validity_io.valid_bits_offset = values_written_; 
- 
-      DefLevelsToBitmap(def_levels() + start_levels_position, 
-                        levels_position_ - start_levels_position, leaf_info_, 
-                        &validity_io); 
-      values_to_read = validity_io.values_read - validity_io.null_count; 
-      null_count = validity_io.null_count; 
-      DCHECK_GE(values_to_read, 0); 
-      ReadValuesSpaced(validity_io.values_read, null_count); 
-    } else { 
-      DCHECK_GE(values_to_read, 0); 
-      ReadValuesDense(values_to_read); 
-    } 
-    if (this->leaf_info_.def_level > 0) { 
-      // Optional, repeated, or some mix thereof 
-      this->ConsumeBufferedValues(levels_position_ - start_levels_position); 
-    } else { 
-      // Flat, non-repeated 
-      this->ConsumeBufferedValues(values_to_read); 
-    } 
-    // Total values, including null spaces, if any 
-    values_written_ += values_to_read + null_count; 
-    null_count_ += null_count; 
- 
-    return records_read; 
-  } 
- 
-  void DebugPrintState() override { 
-    const int16_t* def_levels = this->def_levels(); 
-    const int16_t* rep_levels = this->rep_levels(); 
-    const int64_t total_levels_read = levels_position_; 
- 
-    const T* vals = reinterpret_cast<const T*>(this->values()); 
- 
-    std::cout << "def levels: "; 
-    for (int64_t i = 0; i < total_levels_read; ++i) { 
-      std::cout << def_levels[i] << " "; 
-    } 
-    std::cout << std::endl; 
- 
-    std::cout << "rep levels: "; 
-    for (int64_t i = 0; i < total_levels_read; ++i) { 
-      std::cout << rep_levels[i] << " "; 
-    } 
-    std::cout << std::endl; 
- 
-    std::cout << "values: "; 
-    for (int64_t i = 0; i < this->values_written(); ++i) { 
-      std::cout << vals[i] << " "; 
-    } 
-    std::cout << std::endl; 
-  } 
- 
-  void ResetValues() { 
-    if (values_written_ > 0) { 
-      // Resize to 0, but do not shrink to fit 
-      if (uses_values_) { 
-        PARQUET_THROW_NOT_OK(values_->Resize(0, false)); 
-      } 
-      PARQUET_THROW_NOT_OK(valid_bits_->Resize(0, false)); 
-      values_written_ = 0; 
-      values_capacity_ = 0; 
-      null_count_ = 0; 
-    } 
-  } 
- 
- protected: 
-  template <typename T> 
-  T* ValuesHead() { 
-    return reinterpret_cast<T*>(values_->mutable_data()) + values_written_; 
-  } 
-  LevelInfo leaf_info_; 
-}; 
- 
-class FLBARecordReader : public TypedRecordReader<FLBAType>, 
-                         virtual public BinaryRecordReader { 
- public: 
-  FLBARecordReader(const ColumnDescriptor* descr, LevelInfo leaf_info, 
-                   ::arrow::MemoryPool* pool) 
-      : TypedRecordReader<FLBAType>(descr, leaf_info, pool), builder_(nullptr) { 
-    DCHECK_EQ(descr_->physical_type(), Type::FIXED_LEN_BYTE_ARRAY); 
-    int byte_width = descr_->type_length(); 
-    std::shared_ptr<::arrow::DataType> type = ::arrow::fixed_size_binary(byte_width); 
-    builder_.reset(new ::arrow::FixedSizeBinaryBuilder(type, this->pool_)); 
-  } 
- 
-  ::arrow::ArrayVector GetBuilderChunks() override { 
-    std::shared_ptr<::arrow::Array> chunk; 
-    PARQUET_THROW_NOT_OK(builder_->Finish(&chunk)); 
-    return ::arrow::ArrayVector({chunk}); 
-  } 
- 
-  void ReadValuesDense(int64_t values_to_read) override { 
-    auto values = ValuesHead<FLBA>(); 
-    int64_t num_decoded = 
-        this->current_decoder_->Decode(values, static_cast<int>(values_to_read)); 
-    DCHECK_EQ(num_decoded, values_to_read); 
- 
-    for (int64_t i = 0; i < num_decoded; i++) { 
-      PARQUET_THROW_NOT_OK(builder_->Append(values[i].ptr)); 
-    } 
-    ResetValues(); 
-  } 
- 
-  void ReadValuesSpaced(int64_t values_to_read, int64_t null_count) override { 
-    uint8_t* valid_bits = valid_bits_->mutable_data(); 
-    const int64_t valid_bits_offset = values_written_; 
-    auto values = ValuesHead<FLBA>(); 
- 
-    int64_t num_decoded = this->current_decoder_->DecodeSpaced( 
-        values, static_cast<int>(values_to_read), static_cast<int>(null_count), 
-        valid_bits, valid_bits_offset); 
-    DCHECK_EQ(num_decoded, values_to_read); 
- 
-    for (int64_t i = 0; i < num_decoded; i++) { 
-      if (::arrow::BitUtil::GetBit(valid_bits, valid_bits_offset + i)) { 
-        PARQUET_THROW_NOT_OK(builder_->Append(values[i].ptr)); 
-      } else { 
-        PARQUET_THROW_NOT_OK(builder_->AppendNull()); 
-      } 
-    } 
-    ResetValues(); 
-  } 
- 
- private: 
-  std::unique_ptr<::arrow::FixedSizeBinaryBuilder> builder_; 
-}; 
- 
-class ByteArrayChunkedRecordReader : public TypedRecordReader<ByteArrayType>, 
-                                     virtual public BinaryRecordReader { 
- public: 
-  ByteArrayChunkedRecordReader(const ColumnDescriptor* descr, LevelInfo leaf_info, 
-                               ::arrow::MemoryPool* pool) 
-      : TypedRecordReader<ByteArrayType>(descr, leaf_info, pool) { 
-    DCHECK_EQ(descr_->physical_type(), Type::BYTE_ARRAY); 
-    accumulator_.builder.reset(new ::arrow::BinaryBuilder(pool)); 
-  } 
- 
-  ::arrow::ArrayVector GetBuilderChunks() override { 
-    ::arrow::ArrayVector result = accumulator_.chunks; 
-    if (result.size() == 0 || accumulator_.builder->length() > 0) { 
-      std::shared_ptr<::arrow::Array> last_chunk; 
-      PARQUET_THROW_NOT_OK(accumulator_.builder->Finish(&last_chunk)); 
-      result.push_back(std::move(last_chunk)); 
-    } 
-    accumulator_.chunks = {}; 
-    return result; 
-  } 
- 
-  void ReadValuesDense(int64_t values_to_read) override { 
-    int64_t num_decoded = this->current_decoder_->DecodeArrowNonNull( 
-        static_cast<int>(values_to_read), &accumulator_); 
-    DCHECK_EQ(num_decoded, values_to_read); 
-    ResetValues(); 
-  } 
- 
-  void ReadValuesSpaced(int64_t values_to_read, int64_t null_count) override { 
-    int64_t num_decoded = this->current_decoder_->DecodeArrow( 
-        static_cast<int>(values_to_read), static_cast<int>(null_count), 
-        valid_bits_->mutable_data(), values_written_, &accumulator_); 
-    DCHECK_EQ(num_decoded, values_to_read - null_count); 
-    ResetValues(); 
-  } 
- 
- private: 
-  // Helper data structure for accumulating builder chunks 
-  typename EncodingTraits<ByteArrayType>::Accumulator accumulator_; 
-}; 
- 
-class ByteArrayDictionaryRecordReader : public TypedRecordReader<ByteArrayType>, 
-                                        virtual public DictionaryRecordReader { 
- public: 
-  ByteArrayDictionaryRecordReader(const ColumnDescriptor* descr, LevelInfo leaf_info, 
-                                  ::arrow::MemoryPool* pool) 
-      : TypedRecordReader<ByteArrayType>(descr, leaf_info, pool), builder_(pool) { 
-    this->read_dictionary_ = true; 
-  } 
- 
-  std::shared_ptr<::arrow::ChunkedArray> GetResult() override { 
-    FlushBuilder(); 
-    std::vector<std::shared_ptr<::arrow::Array>> result; 
-    std::swap(result, result_chunks_); 
-    return std::make_shared<::arrow::ChunkedArray>(std::move(result), builder_.type()); 
-  } 
- 
-  void FlushBuilder() { 
-    if (builder_.length() > 0) { 
-      std::shared_ptr<::arrow::Array> chunk; 
-      PARQUET_THROW_NOT_OK(builder_.Finish(&chunk)); 
-      result_chunks_.emplace_back(std::move(chunk)); 
- 
-      // Also clears the dictionary memo table 
-      builder_.Reset(); 
-    } 
-  } 
- 
-  void MaybeWriteNewDictionary() { 
-    if (this->new_dictionary_) { 
-      /// If there is a new dictionary, we may need to flush the builder, then 
-      /// insert the new dictionary values 
-      FlushBuilder(); 
-      builder_.ResetFull(); 
-      auto decoder = dynamic_cast<BinaryDictDecoder*>(this->current_decoder_); 
-      decoder->InsertDictionary(&builder_); 
-      this->new_dictionary_ = false; 
-    } 
-  } 
- 
-  void ReadValuesDense(int64_t values_to_read) override { 
-    int64_t num_decoded = 0; 
-    if (current_encoding_ == Encoding::RLE_DICTIONARY) { 
-      MaybeWriteNewDictionary(); 
-      auto decoder = dynamic_cast<BinaryDictDecoder*>(this->current_decoder_); 
-      num_decoded = decoder->DecodeIndices(static_cast<int>(values_to_read), &builder_); 
-    } else { 
-      num_decoded = this->current_decoder_->DecodeArrowNonNull( 
-          static_cast<int>(values_to_read), &builder_); 
- 
-      /// Flush values since they have been copied into the builder 
-      ResetValues(); 
-    } 
-    DCHECK_EQ(num_decoded, values_to_read); 
-  } 
- 
-  void ReadValuesSpaced(int64_t values_to_read, int64_t null_count) override { 
-    int64_t num_decoded = 0; 
-    if (current_encoding_ == Encoding::RLE_DICTIONARY) { 
-      MaybeWriteNewDictionary(); 
-      auto decoder = dynamic_cast<BinaryDictDecoder*>(this->current_decoder_); 
-      num_decoded = decoder->DecodeIndicesSpaced( 
-          static_cast<int>(values_to_read), static_cast<int>(null_count), 
-          valid_bits_->mutable_data(), values_written_, &builder_); 
-    } else { 
-      num_decoded = this->current_decoder_->DecodeArrow( 
-          static_cast<int>(values_to_read), static_cast<int>(null_count), 
-          valid_bits_->mutable_data(), values_written_, &builder_); 
- 
-      /// Flush values since they have been copied into the builder 
-      ResetValues(); 
-    } 
-    DCHECK_EQ(num_decoded, values_to_read - null_count); 
-  } 
- 
- private: 
-  using BinaryDictDecoder = DictDecoder<ByteArrayType>; 
- 
-  ::arrow::BinaryDictionary32Builder builder_; 
-  std::vector<std::shared_ptr<::arrow::Array>> result_chunks_; 
-}; 
- 
-// TODO(wesm): Implement these to some satisfaction 
-template <> 
-void TypedRecordReader<Int96Type>::DebugPrintState() {} 
- 
-template <> 
-void TypedRecordReader<ByteArrayType>::DebugPrintState() {} 
- 
-template <> 
-void TypedRecordReader<FLBAType>::DebugPrintState() {} 
- 
-std::shared_ptr<RecordReader> MakeByteArrayRecordReader(const ColumnDescriptor* descr, 
-                                                        LevelInfo leaf_info, 
-                                                        ::arrow::MemoryPool* pool, 
-                                                        bool read_dictionary) { 
-  if (read_dictionary) { 
-    return std::make_shared<ByteArrayDictionaryRecordReader>(descr, leaf_info, pool); 
-  } else { 
-    return std::make_shared<ByteArrayChunkedRecordReader>(descr, leaf_info, pool); 
-  } 
-} 
- 
-}  // namespace 
- 
-std::shared_ptr<RecordReader> RecordReader::Make(const ColumnDescriptor* descr, 
-                                                 LevelInfo leaf_info, MemoryPool* pool, 
-                                                 const bool read_dictionary) { 
-  switch (descr->physical_type()) { 
-    case Type::BOOLEAN: 
-      return std::make_shared<TypedRecordReader<BooleanType>>(descr, leaf_info, pool); 
-    case Type::INT32: 
-      return std::make_shared<TypedRecordReader<Int32Type>>(descr, leaf_info, pool); 
-    case Type::INT64: 
-      return std::make_shared<TypedRecordReader<Int64Type>>(descr, leaf_info, pool); 
-    case Type::INT96: 
-      return std::make_shared<TypedRecordReader<Int96Type>>(descr, leaf_info, pool); 
-    case Type::FLOAT: 
-      return std::make_shared<TypedRecordReader<FloatType>>(descr, leaf_info, pool); 
-    case Type::DOUBLE: 
-      return std::make_shared<TypedRecordReader<DoubleType>>(descr, leaf_info, pool); 
-    case Type::BYTE_ARRAY: 
-      return MakeByteArrayRecordReader(descr, leaf_info, pool, read_dictionary); 
-    case Type::FIXED_LEN_BYTE_ARRAY: 
-      return std::make_shared<FLBARecordReader>(descr, leaf_info, pool); 
-    default: { 
-      // PARQUET-1481: This can occur if the file is corrupt 
-      std::stringstream ss; 
-      ss << "Invalid physical column type: " << static_cast<int>(descr->physical_type()); 
-      throw ParquetException(ss.str()); 
-    } 
-  } 
-  // Unreachable code, but suppress compiler warning 
-  return nullptr; 
-} 
- 
-}  // namespace internal 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/column_reader.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+#include <exception>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/array/builder_binary.h"
+#include "arrow/array/builder_dict.h"
+#include "arrow/array/builder_primitive.h"
+#include "arrow/chunked_array.h"
+#include "arrow/type.h"
+#include "arrow/util/bit_stream_utils.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/compression.h"
+#include "arrow/util/int_util_internal.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/rle_encoding.h"
+#include "parquet/column_page.h"
+#include "parquet/encoding.h"
+#include "parquet/encryption/encryption_internal.h"
+#include "parquet/encryption/internal_file_decryptor.h"
+#include "parquet/level_comparison.h"
+#include "parquet/level_conversion.h"
+#include "parquet/properties.h"
+#include "parquet/statistics.h"
+#include "parquet/thrift_internal.h"  // IWYU pragma: keep
+// Required after "arrow/util/int_util_internal.h" (for OPTIONAL)
+#include "parquet/windows_compatibility.h"
+
+using arrow::MemoryPool;
+using arrow::internal::AddWithOverflow;
+using arrow::internal::checked_cast;
+using arrow::internal::MultiplyWithOverflow;
+
+namespace BitUtil = arrow::BitUtil;
+
+namespace parquet {
+namespace {
+inline bool HasSpacedValues(const ColumnDescriptor* descr) {
+  if (descr->max_repetition_level() > 0) {
+    // repeated+flat case
+    return !descr->schema_node()->is_required();
+  } else {
+    // non-repeated+nested case
+    // Find if a node forces nulls in the lowest level along the hierarchy
+    const schema::Node* node = descr->schema_node().get();
+    while (node) {
+      if (node->is_optional()) {
+        return true;
+      }
+      node = node->parent();
+    }
+    return false;
+  }
+}
+}  // namespace
+
+LevelDecoder::LevelDecoder() : num_values_remaining_(0) {}
+
+LevelDecoder::~LevelDecoder() {}
+
+int LevelDecoder::SetData(Encoding::type encoding, int16_t max_level,
+                          int num_buffered_values, const uint8_t* data,
+                          int32_t data_size) {
+  max_level_ = max_level;
+  int32_t num_bytes = 0;
+  encoding_ = encoding;
+  num_values_remaining_ = num_buffered_values;
+  bit_width_ = BitUtil::Log2(max_level + 1);
+  switch (encoding) {
+    case Encoding::RLE: {
+      if (data_size < 4) {
+        throw ParquetException("Received invalid levels (corrupt data page?)");
+      }
+      num_bytes = ::arrow::util::SafeLoadAs<int32_t>(data);
+      if (num_bytes < 0 || num_bytes > data_size - 4) {
+        throw ParquetException("Received invalid number of bytes (corrupt data page?)");
+      }
+      const uint8_t* decoder_data = data + 4;
+      if (!rle_decoder_) {
+        rle_decoder_.reset(
+            new ::arrow::util::RleDecoder(decoder_data, num_bytes, bit_width_));
+      } else {
+        rle_decoder_->Reset(decoder_data, num_bytes, bit_width_);
+      }
+      return 4 + num_bytes;
+    }
+    case Encoding::BIT_PACKED: {
+      int num_bits = 0;
+      if (MultiplyWithOverflow(num_buffered_values, bit_width_, &num_bits)) {
+        throw ParquetException(
+            "Number of buffered values too large (corrupt data page?)");
+      }
+      num_bytes = static_cast<int32_t>(BitUtil::BytesForBits(num_bits));
+      if (num_bytes < 0 || num_bytes > data_size - 4) {
+        throw ParquetException("Received invalid number of bytes (corrupt data page?)");
+      }
+      if (!bit_packed_decoder_) {
+        bit_packed_decoder_.reset(new ::arrow::BitUtil::BitReader(data, num_bytes));
+      } else {
+        bit_packed_decoder_->Reset(data, num_bytes);
+      }
+      return num_bytes;
+    }
+    default:
+      throw ParquetException("Unknown encoding type for levels.");
+  }
+  return -1;
+}
+
+void LevelDecoder::SetDataV2(int32_t num_bytes, int16_t max_level,
+                             int num_buffered_values, const uint8_t* data) {
+  max_level_ = max_level;
+  // Repetition and definition levels always uses RLE encoding
+  // in the DataPageV2 format.
+  if (num_bytes < 0) {
+    throw ParquetException("Invalid page header (corrupt data page?)");
+  }
+  encoding_ = Encoding::RLE;
+  num_values_remaining_ = num_buffered_values;
+  bit_width_ = BitUtil::Log2(max_level + 1);
+
+  if (!rle_decoder_) {
+    rle_decoder_.reset(new ::arrow::util::RleDecoder(data, num_bytes, bit_width_));
+  } else {
+    rle_decoder_->Reset(data, num_bytes, bit_width_);
+  }
+}
+
+int LevelDecoder::Decode(int batch_size, int16_t* levels) {
+  int num_decoded = 0;
+
+  int num_values = std::min(num_values_remaining_, batch_size);
+  if (encoding_ == Encoding::RLE) {
+    num_decoded = rle_decoder_->GetBatch(levels, num_values);
+  } else {
+    num_decoded = bit_packed_decoder_->GetBatch(bit_width_, levels, num_values);
+  }
+  if (num_decoded > 0) {
+    internal::MinMax min_max = internal::FindMinMax(levels, num_decoded);
+    if (ARROW_PREDICT_FALSE(min_max.min < 0 || min_max.max > max_level_)) {
+      std::stringstream ss;
+      ss << "Malformed levels. min: " << min_max.min << " max: " << min_max.max
+         << " out of range.  Max Level: " << max_level_;
+      throw ParquetException(ss.str());
+    }
+  }
+  num_values_remaining_ -= num_decoded;
+  return num_decoded;
+}
+
+ReaderProperties default_reader_properties() {
+  static ReaderProperties default_reader_properties;
+  return default_reader_properties;
+}
+
+namespace {
+
+// Extracts encoded statistics from V1 and V2 data page headers
+template <typename H>
+EncodedStatistics ExtractStatsFromHeader(const H& header) {
+  EncodedStatistics page_statistics;
+  if (!header.__isset.statistics) {
+    return page_statistics;
+  }
+  const format::Statistics& stats = header.statistics;
+  if (stats.__isset.max) {
+    page_statistics.set_max(stats.max);
+  }
+  if (stats.__isset.min) {
+    page_statistics.set_min(stats.min);
+  }
+  if (stats.__isset.null_count) {
+    page_statistics.set_null_count(stats.null_count);
+  }
+  if (stats.__isset.distinct_count) {
+    page_statistics.set_distinct_count(stats.distinct_count);
+  }
+  return page_statistics;
+}
+
+// ----------------------------------------------------------------------
+// SerializedPageReader deserializes Thrift metadata and pages that have been
+// assembled in a serialized stream for storing in a Parquet files
+
+// This subclass delimits pages appearing in a serialized stream, each preceded
+// by a serialized Thrift format::PageHeader indicating the type of each page
+// and the page metadata.
+class SerializedPageReader : public PageReader {
+ public:
+  SerializedPageReader(std::shared_ptr<ArrowInputStream> stream, int64_t total_num_rows,
+                       Compression::type codec, ::arrow::MemoryPool* pool,
+                       const CryptoContext* crypto_ctx)
+      : stream_(std::move(stream)),
+        decompression_buffer_(AllocateBuffer(pool, 0)),
+        page_ordinal_(0),
+        seen_num_rows_(0),
+        total_num_rows_(total_num_rows),
+        decryption_buffer_(AllocateBuffer(pool, 0)) {
+    if (crypto_ctx != nullptr) {
+      crypto_ctx_ = *crypto_ctx;
+      InitDecryption();
+    }
+    max_page_header_size_ = kDefaultMaxPageHeaderSize;
+    decompressor_ = GetCodec(codec);
+  }
+
+  // Implement the PageReader interface
+  std::shared_ptr<Page> NextPage() override;
+
+  void set_max_page_header_size(uint32_t size) override { max_page_header_size_ = size; }
+
+ private:
+  void UpdateDecryption(const std::shared_ptr<Decryptor>& decryptor, int8_t module_type,
+                        const std::string& page_aad);
+
+  void InitDecryption();
+
+  std::shared_ptr<Buffer> DecompressIfNeeded(std::shared_ptr<Buffer> page_buffer,
+                                             int compressed_len, int uncompressed_len,
+                                             int levels_byte_len = 0);
+
+  std::shared_ptr<ArrowInputStream> stream_;
+
+  format::PageHeader current_page_header_;
+  std::shared_ptr<Page> current_page_;
+
+  // Compression codec to use.
+  std::unique_ptr<::arrow::util::Codec> decompressor_;
+  std::shared_ptr<ResizableBuffer> decompression_buffer_;
+
+  // The fields below are used for calculation of AAD (additional authenticated data)
+  // suffix which is part of the Parquet Modular Encryption.
+  // The AAD suffix for a parquet module is built internally by
+  // concatenating different parts some of which include
+  // the row group ordinal, column ordinal and page ordinal.
+  // Please refer to the encryption specification for more details:
+  // https://github.com/apache/parquet-format/blob/encryption/Encryption.md#44-additional-authenticated-data
+
+  // The ordinal fields in the context below are used for AAD suffix calculation.
+  CryptoContext crypto_ctx_;
+  int16_t page_ordinal_;  // page ordinal does not count the dictionary page
+
+  // Maximum allowed page size
+  uint32_t max_page_header_size_;
+
+  // Number of rows read in data pages so far
+  int64_t seen_num_rows_;
+
+  // Number of rows in all the data pages
+  int64_t total_num_rows_;
+
+  // data_page_aad_ and data_page_header_aad_ contain the AAD for data page and data page
+  // header in a single column respectively.
+  // While calculating AAD for different pages in a single column the pages AAD is
+  // updated by only the page ordinal.
+  std::string data_page_aad_;
+  std::string data_page_header_aad_;
+  // Encryption
+  std::shared_ptr<ResizableBuffer> decryption_buffer_;
+};
+
+void SerializedPageReader::InitDecryption() {
+  // Prepare the AAD for quick update later.
+  if (crypto_ctx_.data_decryptor != nullptr) {
+    DCHECK(!crypto_ctx_.data_decryptor->file_aad().empty());
+    data_page_aad_ = encryption::CreateModuleAad(
+        crypto_ctx_.data_decryptor->file_aad(), encryption::kDataPage,
+        crypto_ctx_.row_group_ordinal, crypto_ctx_.column_ordinal, kNonPageOrdinal);
+  }
+  if (crypto_ctx_.meta_decryptor != nullptr) {
+    DCHECK(!crypto_ctx_.meta_decryptor->file_aad().empty());
+    data_page_header_aad_ = encryption::CreateModuleAad(
+        crypto_ctx_.meta_decryptor->file_aad(), encryption::kDataPageHeader,
+        crypto_ctx_.row_group_ordinal, crypto_ctx_.column_ordinal, kNonPageOrdinal);
+  }
+}
+
+void SerializedPageReader::UpdateDecryption(const std::shared_ptr<Decryptor>& decryptor,
+                                            int8_t module_type,
+                                            const std::string& page_aad) {
+  DCHECK(decryptor != nullptr);
+  if (crypto_ctx_.start_decrypt_with_dictionary_page) {
+    std::string aad = encryption::CreateModuleAad(
+        decryptor->file_aad(), module_type, crypto_ctx_.row_group_ordinal,
+        crypto_ctx_.column_ordinal, kNonPageOrdinal);
+    decryptor->UpdateAad(aad);
+  } else {
+    encryption::QuickUpdatePageAad(page_aad, page_ordinal_);
+    decryptor->UpdateAad(page_aad);
+  }
+}
+
+std::shared_ptr<Page> SerializedPageReader::NextPage() {
+  // Loop here because there may be unhandled page types that we skip until
+  // finding a page that we do know what to do with
+
+  while (seen_num_rows_ < total_num_rows_) {
+    uint32_t header_size = 0;
+    uint32_t allowed_page_size = kDefaultPageHeaderSize;
+
+    // Page headers can be very large because of page statistics
+    // We try to deserialize a larger buffer progressively
+    // until a maximum allowed header limit
+    while (true) {
+      PARQUET_ASSIGN_OR_THROW(auto view, stream_->Peek(allowed_page_size));
+      if (view.size() == 0) {
+        return std::shared_ptr<Page>(nullptr);
+      }
+
+      // This gets used, then set by DeserializeThriftMsg
+      header_size = static_cast<uint32_t>(view.size());
+      try {
+        if (crypto_ctx_.meta_decryptor != nullptr) {
+          UpdateDecryption(crypto_ctx_.meta_decryptor, encryption::kDictionaryPageHeader,
+                           data_page_header_aad_);
+        }
+        DeserializeThriftMsg(reinterpret_cast<const uint8_t*>(view.data()), &header_size,
+                             &current_page_header_, crypto_ctx_.meta_decryptor);
+        break;
+      } catch (std::exception& e) {
+        // Failed to deserialize. Double the allowed page header size and try again
+        std::stringstream ss;
+        ss << e.what();
+        allowed_page_size *= 2;
+        if (allowed_page_size > max_page_header_size_) {
+          ss << "Deserializing page header failed.\n";
+          throw ParquetException(ss.str());
+        }
+      }
+    }
+    // Advance the stream offset
+    PARQUET_THROW_NOT_OK(stream_->Advance(header_size));
+
+    int compressed_len = current_page_header_.compressed_page_size;
+    int uncompressed_len = current_page_header_.uncompressed_page_size;
+    if (compressed_len < 0 || uncompressed_len < 0) {
+      throw ParquetException("Invalid page header");
+    }
+
+    if (crypto_ctx_.data_decryptor != nullptr) {
+      UpdateDecryption(crypto_ctx_.data_decryptor, encryption::kDictionaryPage,
+                       data_page_aad_);
+    }
+
+    // Read the compressed data page.
+    PARQUET_ASSIGN_OR_THROW(auto page_buffer, stream_->Read(compressed_len));
+    if (page_buffer->size() != compressed_len) {
+      std::stringstream ss;
+      ss << "Page was smaller (" << page_buffer->size() << ") than expected ("
+         << compressed_len << ")";
+      ParquetException::EofException(ss.str());
+    }
+
+    // Decrypt it if we need to
+    if (crypto_ctx_.data_decryptor != nullptr) {
+      PARQUET_THROW_NOT_OK(decryption_buffer_->Resize(
+          compressed_len - crypto_ctx_.data_decryptor->CiphertextSizeDelta(), false));
+      compressed_len = crypto_ctx_.data_decryptor->Decrypt(
+          page_buffer->data(), compressed_len, decryption_buffer_->mutable_data());
+
+      page_buffer = decryption_buffer_;
+    }
+
+    const PageType::type page_type = LoadEnumSafe(&current_page_header_.type);
+
+    if (page_type == PageType::DICTIONARY_PAGE) {
+      crypto_ctx_.start_decrypt_with_dictionary_page = false;
+      const format::DictionaryPageHeader& dict_header =
+          current_page_header_.dictionary_page_header;
+
+      bool is_sorted = dict_header.__isset.is_sorted ? dict_header.is_sorted : false;
+      if (dict_header.num_values < 0) {
+        throw ParquetException("Invalid page header (negative number of values)");
+      }
+
+      // Uncompress if needed
+      page_buffer =
+          DecompressIfNeeded(std::move(page_buffer), compressed_len, uncompressed_len);
+
+      return std::make_shared<DictionaryPage>(page_buffer, dict_header.num_values,
+                                              LoadEnumSafe(&dict_header.encoding),
+                                              is_sorted);
+    } else if (page_type == PageType::DATA_PAGE) {
+      ++page_ordinal_;
+      const format::DataPageHeader& header = current_page_header_.data_page_header;
+
+      if (header.num_values < 0) {
+        throw ParquetException("Invalid page header (negative number of values)");
+      }
+      EncodedStatistics page_statistics = ExtractStatsFromHeader(header);
+      seen_num_rows_ += header.num_values;
+
+      // Uncompress if needed
+      page_buffer =
+          DecompressIfNeeded(std::move(page_buffer), compressed_len, uncompressed_len);
+
+      return std::make_shared<DataPageV1>(page_buffer, header.num_values,
+                                          LoadEnumSafe(&header.encoding),
+                                          LoadEnumSafe(&header.definition_level_encoding),
+                                          LoadEnumSafe(&header.repetition_level_encoding),
+                                          uncompressed_len, page_statistics);
+    } else if (page_type == PageType::DATA_PAGE_V2) {
+      ++page_ordinal_;
+      const format::DataPageHeaderV2& header = current_page_header_.data_page_header_v2;
+
+      if (header.num_values < 0) {
+        throw ParquetException("Invalid page header (negative number of values)");
+      }
+      if (header.definition_levels_byte_length < 0 ||
+          header.repetition_levels_byte_length < 0) {
+        throw ParquetException("Invalid page header (negative levels byte length)");
+      }
+      bool is_compressed = header.__isset.is_compressed ? header.is_compressed : false;
+      EncodedStatistics page_statistics = ExtractStatsFromHeader(header);
+      seen_num_rows_ += header.num_values;
+
+      // Uncompress if needed
+      int levels_byte_len;
+      if (AddWithOverflow(header.definition_levels_byte_length,
+                          header.repetition_levels_byte_length, &levels_byte_len)) {
+        throw ParquetException("Levels size too large (corrupt file?)");
+      }
+      // DecompressIfNeeded doesn't take `is_compressed` into account as
+      // it's page type-agnostic.
+      if (is_compressed) {
+        page_buffer = DecompressIfNeeded(std::move(page_buffer), compressed_len,
+                                         uncompressed_len, levels_byte_len);
+      }
+
+      return std::make_shared<DataPageV2>(
+          page_buffer, header.num_values, header.num_nulls, header.num_rows,
+          LoadEnumSafe(&header.encoding), header.definition_levels_byte_length,
+          header.repetition_levels_byte_length, uncompressed_len, is_compressed,
+          page_statistics);
+    } else {
+      // We don't know what this page type is. We're allowed to skip non-data
+      // pages.
+      continue;
+    }
+  }
+  return std::shared_ptr<Page>(nullptr);
+}
+
+std::shared_ptr<Buffer> SerializedPageReader::DecompressIfNeeded(
+    std::shared_ptr<Buffer> page_buffer, int compressed_len, int uncompressed_len,
+    int levels_byte_len) {
+  if (decompressor_ == nullptr) {
+    return page_buffer;
+  }
+  if (compressed_len < levels_byte_len || uncompressed_len < levels_byte_len) {
+    throw ParquetException("Invalid page header");
+  }
+
+  // Grow the uncompressed buffer if we need to.
+  if (uncompressed_len > static_cast<int>(decompression_buffer_->size())) {
+    PARQUET_THROW_NOT_OK(decompression_buffer_->Resize(uncompressed_len, false));
+  }
+
+  if (levels_byte_len > 0) {
+    // First copy the levels as-is
+    uint8_t* decompressed = decompression_buffer_->mutable_data();
+    memcpy(decompressed, page_buffer->data(), levels_byte_len);
+  }
+
+  // Decompress the values
+  PARQUET_THROW_NOT_OK(decompressor_->Decompress(
+      compressed_len - levels_byte_len, page_buffer->data() + levels_byte_len,
+      uncompressed_len - levels_byte_len,
+      decompression_buffer_->mutable_data() + levels_byte_len));
+
+  return decompression_buffer_;
+}
+
+}  // namespace
+
+std::unique_ptr<PageReader> PageReader::Open(std::shared_ptr<ArrowInputStream> stream,
+                                             int64_t total_num_rows,
+                                             Compression::type codec,
+                                             ::arrow::MemoryPool* pool,
+                                             const CryptoContext* ctx) {
+  return std::unique_ptr<PageReader>(
+      new SerializedPageReader(std::move(stream), total_num_rows, codec, pool, ctx));
+}
+
+namespace {
+
+// ----------------------------------------------------------------------
+// Impl base class for TypedColumnReader and RecordReader
+
+// PLAIN_DICTIONARY is deprecated but used to be used as a dictionary index
+// encoding.
+static bool IsDictionaryIndexEncoding(const Encoding::type& e) {
+  return e == Encoding::RLE_DICTIONARY || e == Encoding::PLAIN_DICTIONARY;
+}
+
+template <typename DType>
+class ColumnReaderImplBase {
+ public:
+  using T = typename DType::c_type;
+
+  ColumnReaderImplBase(const ColumnDescriptor* descr, ::arrow::MemoryPool* pool)
+      : descr_(descr),
+        max_def_level_(descr->max_definition_level()),
+        max_rep_level_(descr->max_repetition_level()),
+        num_buffered_values_(0),
+        num_decoded_values_(0),
+        pool_(pool),
+        current_decoder_(nullptr),
+        current_encoding_(Encoding::UNKNOWN) {}
+
+  virtual ~ColumnReaderImplBase() = default;
+
+ protected:
+  // Read up to batch_size values from the current data page into the
+  // pre-allocated memory T*
+  //
+  // @returns: the number of values read into the out buffer
+  int64_t ReadValues(int64_t batch_size, T* out) {
+    int64_t num_decoded = current_decoder_->Decode(out, static_cast<int>(batch_size));
+    return num_decoded;
+  }
+
+  // Read up to batch_size values from the current data page into the
+  // pre-allocated memory T*, leaving spaces for null entries according
+  // to the def_levels.
+  //
+  // @returns: the number of values read into the out buffer
+  int64_t ReadValuesSpaced(int64_t batch_size, T* out, int64_t null_count,
+                           uint8_t* valid_bits, int64_t valid_bits_offset) {
+    return current_decoder_->DecodeSpaced(out, static_cast<int>(batch_size),
+                                          static_cast<int>(null_count), valid_bits,
+                                          valid_bits_offset);
+  }
+
+  // Read multiple definition levels into preallocated memory
+  //
+  // Returns the number of decoded definition levels
+  int64_t ReadDefinitionLevels(int64_t batch_size, int16_t* levels) {
+    if (max_def_level_ == 0) {
+      return 0;
+    }
+    return definition_level_decoder_.Decode(static_cast<int>(batch_size), levels);
+  }
+
+  bool HasNextInternal() {
+    // Either there is no data page available yet, or the data page has been
+    // exhausted
+    if (num_buffered_values_ == 0 || num_decoded_values_ == num_buffered_values_) {
+      if (!ReadNewPage() || num_buffered_values_ == 0) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  // Read multiple repetition levels into preallocated memory
+  // Returns the number of decoded repetition levels
+  int64_t ReadRepetitionLevels(int64_t batch_size, int16_t* levels) {
+    if (max_rep_level_ == 0) {
+      return 0;
+    }
+    return repetition_level_decoder_.Decode(static_cast<int>(batch_size), levels);
+  }
+
+  // Advance to the next data page
+  bool ReadNewPage() {
+    // Loop until we find the next data page.
+    while (true) {
+      current_page_ = pager_->NextPage();
+      if (!current_page_) {
+        // EOS
+        return false;
+      }
+
+      if (current_page_->type() == PageType::DICTIONARY_PAGE) {
+        ConfigureDictionary(static_cast<const DictionaryPage*>(current_page_.get()));
+        continue;
+      } else if (current_page_->type() == PageType::DATA_PAGE) {
+        const auto page = std::static_pointer_cast<DataPageV1>(current_page_);
+        const int64_t levels_byte_size = InitializeLevelDecoders(
+            *page, page->repetition_level_encoding(), page->definition_level_encoding());
+        InitializeDataDecoder(*page, levels_byte_size);
+        return true;
+      } else if (current_page_->type() == PageType::DATA_PAGE_V2) {
+        const auto page = std::static_pointer_cast<DataPageV2>(current_page_);
+        int64_t levels_byte_size = InitializeLevelDecodersV2(*page);
+        InitializeDataDecoder(*page, levels_byte_size);
+        return true;
+      } else {
+        // We don't know what this page type is. We're allowed to skip non-data
+        // pages.
+        continue;
+      }
+    }
+    return true;
+  }
+
+  void ConfigureDictionary(const DictionaryPage* page) {
+    int encoding = static_cast<int>(page->encoding());
+    if (page->encoding() == Encoding::PLAIN_DICTIONARY ||
+        page->encoding() == Encoding::PLAIN) {
+      encoding = static_cast<int>(Encoding::RLE_DICTIONARY);
+    }
+
+    auto it = decoders_.find(encoding);
+    if (it != decoders_.end()) {
+      throw ParquetException("Column cannot have more than one dictionary.");
+    }
+
+    if (page->encoding() == Encoding::PLAIN_DICTIONARY ||
+        page->encoding() == Encoding::PLAIN) {
+      auto dictionary = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_);
+      dictionary->SetData(page->num_values(), page->data(), page->size());
+
+      // The dictionary is fully decoded during DictionaryDecoder::Init, so the
+      // DictionaryPage buffer is no longer required after this step
+      //
+      // TODO(wesm): investigate whether this all-or-nothing decoding of the
+      // dictionary makes sense and whether performance can be improved
+
+      std::unique_ptr<DictDecoder<DType>> decoder = MakeDictDecoder<DType>(descr_, pool_);
+      decoder->SetDict(dictionary.get());
+      decoders_[encoding] =
+          std::unique_ptr<DecoderType>(dynamic_cast<DecoderType*>(decoder.release()));
+    } else {
+      ParquetException::NYI("only plain dictionary encoding has been implemented");
+    }
+
+    new_dictionary_ = true;
+    current_decoder_ = decoders_[encoding].get();
+    DCHECK(current_decoder_);
+  }
+
+  // Initialize repetition and definition level decoders on the next data page.
+
+  // If the data page includes repetition and definition levels, we
+  // initialize the level decoders and return the number of encoded level bytes.
+  // The return value helps determine the number of bytes in the encoded data.
+  int64_t InitializeLevelDecoders(const DataPage& page,
+                                  Encoding::type repetition_level_encoding,
+                                  Encoding::type definition_level_encoding) {
+    // Read a data page.
+    num_buffered_values_ = page.num_values();
+
+    // Have not decoded any values from the data page yet
+    num_decoded_values_ = 0;
+
+    const uint8_t* buffer = page.data();
+    int32_t levels_byte_size = 0;
+    int32_t max_size = page.size();
+
+    // Data page Layout: Repetition Levels - Definition Levels - encoded values.
+    // Levels are encoded as rle or bit-packed.
+    // Init repetition levels
+    if (max_rep_level_ > 0) {
+      int32_t rep_levels_bytes = repetition_level_decoder_.SetData(
+          repetition_level_encoding, max_rep_level_,
+          static_cast<int>(num_buffered_values_), buffer, max_size);
+      buffer += rep_levels_bytes;
+      levels_byte_size += rep_levels_bytes;
+      max_size -= rep_levels_bytes;
+    }
+    // TODO figure a way to set max_def_level_ to 0
+    // if the initial value is invalid
+
+    // Init definition levels
+    if (max_def_level_ > 0) {
+      int32_t def_levels_bytes = definition_level_decoder_.SetData(
+          definition_level_encoding, max_def_level_,
+          static_cast<int>(num_buffered_values_), buffer, max_size);
+      levels_byte_size += def_levels_bytes;
+      max_size -= def_levels_bytes;
+    }
+
+    return levels_byte_size;
+  }
+
+  int64_t InitializeLevelDecodersV2(const DataPageV2& page) {
+    // Read a data page.
+    num_buffered_values_ = page.num_values();
+
+    // Have not decoded any values from the data page yet
+    num_decoded_values_ = 0;
+    const uint8_t* buffer = page.data();
+
+    const int64_t total_levels_length =
+        static_cast<int64_t>(page.repetition_levels_byte_length()) +
+        page.definition_levels_byte_length();
+
+    if (total_levels_length > page.size()) {
+      throw ParquetException("Data page too small for levels (corrupt header?)");
+    }
+
+    if (max_rep_level_ > 0) {
+      repetition_level_decoder_.SetDataV2(page.repetition_levels_byte_length(),
+                                          max_rep_level_,
+                                          static_cast<int>(num_buffered_values_), buffer);
+      buffer += page.repetition_levels_byte_length();
+    }
+
+    if (max_def_level_ > 0) {
+      definition_level_decoder_.SetDataV2(page.definition_levels_byte_length(),
+                                          max_def_level_,
+                                          static_cast<int>(num_buffered_values_), buffer);
+    }
+
+    return total_levels_length;
+  }
+
+  // Get a decoder object for this page or create a new decoder if this is the
+  // first page with this encoding.
+  void InitializeDataDecoder(const DataPage& page, int64_t levels_byte_size) {
+    const uint8_t* buffer = page.data() + levels_byte_size;
+    const int64_t data_size = page.size() - levels_byte_size;
+
+    if (data_size < 0) {
+      throw ParquetException("Page smaller than size of encoded levels");
+    }
+
+    Encoding::type encoding = page.encoding();
+
+    if (IsDictionaryIndexEncoding(encoding)) {
+      encoding = Encoding::RLE_DICTIONARY;
+    }
+
+    auto it = decoders_.find(static_cast<int>(encoding));
+    if (it != decoders_.end()) {
+      DCHECK(it->second.get() != nullptr);
+      if (encoding == Encoding::RLE_DICTIONARY) {
+        DCHECK(current_decoder_->encoding() == Encoding::RLE_DICTIONARY);
+      }
+      current_decoder_ = it->second.get();
+    } else {
+      switch (encoding) {
+        case Encoding::PLAIN: {
+          auto decoder = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_);
+          current_decoder_ = decoder.get();
+          decoders_[static_cast<int>(encoding)] = std::move(decoder);
+          break;
+        }
+        case Encoding::BYTE_STREAM_SPLIT: {
+          auto decoder = MakeTypedDecoder<DType>(Encoding::BYTE_STREAM_SPLIT, descr_);
+          current_decoder_ = decoder.get();
+          decoders_[static_cast<int>(encoding)] = std::move(decoder);
+          break;
+        }
+        case Encoding::RLE_DICTIONARY:
+          throw ParquetException("Dictionary page must be before data page.");
+
+        case Encoding::DELTA_BINARY_PACKED:
+        case Encoding::DELTA_LENGTH_BYTE_ARRAY:
+        case Encoding::DELTA_BYTE_ARRAY:
+          ParquetException::NYI("Unsupported encoding");
+
+        default:
+          throw ParquetException("Unknown encoding type.");
+      }
+    }
+    current_encoding_ = encoding;
+    current_decoder_->SetData(static_cast<int>(num_buffered_values_), buffer,
+                              static_cast<int>(data_size));
+  }
+
+  const ColumnDescriptor* descr_;
+  const int16_t max_def_level_;
+  const int16_t max_rep_level_;
+
+  std::unique_ptr<PageReader> pager_;
+  std::shared_ptr<Page> current_page_;
+
+  // Not set if full schema for this field has no optional or repeated elements
+  LevelDecoder definition_level_decoder_;
+
+  // Not set for flat schemas.
+  LevelDecoder repetition_level_decoder_;
+
+  // The total number of values stored in the data page. This is the maximum of
+  // the number of encoded definition levels or encoded values. For
+  // non-repeated, required columns, this is equal to the number of encoded
+  // values. For repeated or optional values, there may be fewer data values
+  // than levels, and this tells you how many encoded levels there are in that
+  // case.
+  int64_t num_buffered_values_;
+
+  // The number of values from the current data page that have been decoded
+  // into memory
+  int64_t num_decoded_values_;
+
+  ::arrow::MemoryPool* pool_;
+
+  using DecoderType = TypedDecoder<DType>;
+  DecoderType* current_decoder_;
+  Encoding::type current_encoding_;
+
+  /// Flag to signal when a new dictionary has been set, for the benefit of
+  /// DictionaryRecordReader
+  bool new_dictionary_;
+
+  // The exposed encoding
+  ExposedEncoding exposed_encoding_ = ExposedEncoding::NO_ENCODING;
+
+  // Map of encoding type to the respective decoder object. For example, a
+  // column chunk's data pages may include both dictionary-encoded and
+  // plain-encoded data.
+  std::unordered_map<int, std::unique_ptr<DecoderType>> decoders_;
+
+  void ConsumeBufferedValues(int64_t num_values) { num_decoded_values_ += num_values; }
+};
+
+// ----------------------------------------------------------------------
+// TypedColumnReader implementations
+
+template <typename DType>
+class TypedColumnReaderImpl : public TypedColumnReader<DType>,
+                              public ColumnReaderImplBase<DType> {
+ public:
+  using T = typename DType::c_type;
+
+  TypedColumnReaderImpl(const ColumnDescriptor* descr, std::unique_ptr<PageReader> pager,
+                        ::arrow::MemoryPool* pool)
+      : ColumnReaderImplBase<DType>(descr, pool) {
+    this->pager_ = std::move(pager);
+  }
+
+  bool HasNext() override { return this->HasNextInternal(); }
+
+  int64_t ReadBatch(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels,
+                    T* values, int64_t* values_read) override;
+
+  int64_t ReadBatchSpaced(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels,
+                          T* values, uint8_t* valid_bits, int64_t valid_bits_offset,
+                          int64_t* levels_read, int64_t* values_read,
+                          int64_t* null_count) override;
+
+  int64_t Skip(int64_t num_rows_to_skip) override;
+
+  Type::type type() const override { return this->descr_->physical_type(); }
+
+  const ColumnDescriptor* descr() const override { return this->descr_; }
+
+  ExposedEncoding GetExposedEncoding() override { return this->exposed_encoding_; };
+
+  int64_t ReadBatchWithDictionary(int64_t batch_size, int16_t* def_levels,
+                                  int16_t* rep_levels, int32_t* indices,
+                                  int64_t* indices_read, const T** dict,
+                                  int32_t* dict_len) override;
+
+ protected:
+  void SetExposedEncoding(ExposedEncoding encoding) override {
+    this->exposed_encoding_ = encoding;
+  }
+
+ private:
+  // Read dictionary indices. Similar to ReadValues but decode data to dictionary indices.
+  // This function is called only by ReadBatchWithDictionary().
+  int64_t ReadDictionaryIndices(int64_t indices_to_read, int32_t* indices) {
+    auto decoder = dynamic_cast<DictDecoder<DType>*>(this->current_decoder_);
+    return decoder->DecodeIndices(static_cast<int>(indices_to_read), indices);
+  }
+
+  // Get dictionary. The dictionary should have been set by SetDict(). The dictionary is
+  // owned by the internal decoder and is destroyed when the reader is destroyed. This
+  // function is called only by ReadBatchWithDictionary() after dictionary is configured.
+  void GetDictionary(const T** dictionary, int32_t* dictionary_length) {
+    auto decoder = dynamic_cast<DictDecoder<DType>*>(this->current_decoder_);
+    decoder->GetDictionary(dictionary, dictionary_length);
+  }
+
+  // Read definition and repetition levels. Also return the number of definition levels
+  // and number of values to read. This function is called before reading values.
+  void ReadLevels(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels,
+                  int64_t* num_def_levels, int64_t* values_to_read) {
+    batch_size =
+        std::min(batch_size, this->num_buffered_values_ - this->num_decoded_values_);
+
+    // If the field is required and non-repeated, there are no definition levels
+    if (this->max_def_level_ > 0 && def_levels != nullptr) {
+      *num_def_levels = this->ReadDefinitionLevels(batch_size, def_levels);
+      // TODO(wesm): this tallying of values-to-decode can be performed with better
+      // cache-efficiency if fused with the level decoding.
+      for (int64_t i = 0; i < *num_def_levels; ++i) {
+        if (def_levels[i] == this->max_def_level_) {
+          ++(*values_to_read);
+        }
+      }
+    } else {
+      // Required field, read all values
+      *values_to_read = batch_size;
+    }
+
+    // Not present for non-repeated fields
+    if (this->max_rep_level_ > 0 && rep_levels != nullptr) {
+      int64_t num_rep_levels = this->ReadRepetitionLevels(batch_size, rep_levels);
+      if (def_levels != nullptr && *num_def_levels != num_rep_levels) {
+        throw ParquetException("Number of decoded rep / def levels did not match");
+      }
+    }
+  }
+};
+
+template <typename DType>
+int64_t TypedColumnReaderImpl<DType>::ReadBatchWithDictionary(
+    int64_t batch_size, int16_t* def_levels, int16_t* rep_levels, int32_t* indices,
+    int64_t* indices_read, const T** dict, int32_t* dict_len) {
+  bool has_dict_output = dict != nullptr && dict_len != nullptr;
+  // Similar logic as ReadValues to get pages.
+  if (!HasNext()) {
+    *indices_read = 0;
+    if (has_dict_output) {
+      *dict = nullptr;
+      *dict_len = 0;
+    }
+    return 0;
+  }
+
+  // Verify the current data page is dictionary encoded.
+  if (this->current_encoding_ != Encoding::RLE_DICTIONARY) {
+    std::stringstream ss;
+    ss << "Data page is not dictionary encoded. Encoding: "
+       << EncodingToString(this->current_encoding_);
+    throw ParquetException(ss.str());
+  }
+
+  // Get dictionary pointer and length.
+  if (has_dict_output) {
+    GetDictionary(dict, dict_len);
+  }
+
+  // Similar logic as ReadValues to get def levels and rep levels.
+  int64_t num_def_levels = 0;
+  int64_t indices_to_read = 0;
+  ReadLevels(batch_size, def_levels, rep_levels, &num_def_levels, &indices_to_read);
+
+  // Read dictionary indices.
+  *indices_read = ReadDictionaryIndices(indices_to_read, indices);
+  int64_t total_indices = std::max(num_def_levels, *indices_read);
+  this->ConsumeBufferedValues(total_indices);
+
+  return total_indices;
+}
+
+template <typename DType>
+int64_t TypedColumnReaderImpl<DType>::ReadBatch(int64_t batch_size, int16_t* def_levels,
+                                                int16_t* rep_levels, T* values,
+                                                int64_t* values_read) {
+  // HasNext invokes ReadNewPage
+  if (!HasNext()) {
+    *values_read = 0;
+    return 0;
+  }
+
+  // TODO(wesm): keep reading data pages until batch_size is reached, or the
+  // row group is finished
+  int64_t num_def_levels = 0;
+  int64_t values_to_read = 0;
+  ReadLevels(batch_size, def_levels, rep_levels, &num_def_levels, &values_to_read);
+
+  *values_read = this->ReadValues(values_to_read, values);
+  int64_t total_values = std::max(num_def_levels, *values_read);
+  this->ConsumeBufferedValues(total_values);
+
+  return total_values;
+}
+
+template <typename DType>
+int64_t TypedColumnReaderImpl<DType>::ReadBatchSpaced(
+    int64_t batch_size, int16_t* def_levels, int16_t* rep_levels, T* values,
+    uint8_t* valid_bits, int64_t valid_bits_offset, int64_t* levels_read,
+    int64_t* values_read, int64_t* null_count_out) {
+  // HasNext invokes ReadNewPage
+  if (!HasNext()) {
+    *levels_read = 0;
+    *values_read = 0;
+    *null_count_out = 0;
+    return 0;
+  }
+
+  int64_t total_values;
+  // TODO(wesm): keep reading data pages until batch_size is reached, or the
+  // row group is finished
+  batch_size =
+      std::min(batch_size, this->num_buffered_values_ - this->num_decoded_values_);
+
+  // If the field is required and non-repeated, there are no definition levels
+  if (this->max_def_level_ > 0) {
+    int64_t num_def_levels = this->ReadDefinitionLevels(batch_size, def_levels);
+
+    // Not present for non-repeated fields
+    if (this->max_rep_level_ > 0) {
+      int64_t num_rep_levels = this->ReadRepetitionLevels(batch_size, rep_levels);
+      if (num_def_levels != num_rep_levels) {
+        throw ParquetException("Number of decoded rep / def levels did not match");
+      }
+    }
+
+    const bool has_spaced_values = HasSpacedValues(this->descr_);
+    int64_t null_count = 0;
+    if (!has_spaced_values) {
+      int values_to_read = 0;
+      for (int64_t i = 0; i < num_def_levels; ++i) {
+        if (def_levels[i] == this->max_def_level_) {
+          ++values_to_read;
+        }
+      }
+      total_values = this->ReadValues(values_to_read, values);
+      ::arrow::BitUtil::SetBitsTo(valid_bits, valid_bits_offset,
+                                  /*length=*/total_values,
+                                  /*bits_are_set=*/true);
+      *values_read = total_values;
+    } else {
+      internal::LevelInfo info;
+      info.repeated_ancestor_def_level = this->max_def_level_ - 1;
+      info.def_level = this->max_def_level_;
+      info.rep_level = this->max_rep_level_;
+      internal::ValidityBitmapInputOutput validity_io;
+      validity_io.values_read_upper_bound = num_def_levels;
+      validity_io.valid_bits = valid_bits;
+      validity_io.valid_bits_offset = valid_bits_offset;
+      validity_io.null_count = null_count;
+      validity_io.values_read = *values_read;
+
+      internal::DefLevelsToBitmap(def_levels, num_def_levels, info, &validity_io);
+      null_count = validity_io.null_count;
+      *values_read = validity_io.values_read;
+
+      total_values =
+          this->ReadValuesSpaced(*values_read, values, static_cast<int>(null_count),
+                                 valid_bits, valid_bits_offset);
+    }
+    *levels_read = num_def_levels;
+    *null_count_out = null_count;
+
+  } else {
+    // Required field, read all values
+    total_values = this->ReadValues(batch_size, values);
+    ::arrow::BitUtil::SetBitsTo(valid_bits, valid_bits_offset,
+                                /*length=*/total_values,
+                                /*bits_are_set=*/true);
+    *null_count_out = 0;
+    *values_read = total_values;
+    *levels_read = total_values;
+  }
+
+  this->ConsumeBufferedValues(*levels_read);
+  return total_values;
+}
+
+template <typename DType>
+int64_t TypedColumnReaderImpl<DType>::Skip(int64_t num_rows_to_skip) {
+  int64_t rows_to_skip = num_rows_to_skip;
+  while (HasNext() && rows_to_skip > 0) {
+    // If the number of rows to skip is more than the number of undecoded values, skip the
+    // Page.
+    if (rows_to_skip > (this->num_buffered_values_ - this->num_decoded_values_)) {
+      rows_to_skip -= this->num_buffered_values_ - this->num_decoded_values_;
+      this->num_decoded_values_ = this->num_buffered_values_;
+    } else {
+      // We need to read this Page
+      // Jump to the right offset in the Page
+      int64_t batch_size = 1024;  // ReadBatch with a smaller memory footprint
+      int64_t values_read = 0;
+
+      // This will be enough scratch space to accommodate 16-bit levels or any
+      // value type
+      std::shared_ptr<ResizableBuffer> scratch = AllocateBuffer(
+          this->pool_, batch_size * type_traits<DType::type_num>::value_byte_size);
+
+      do {
+        batch_size = std::min(batch_size, rows_to_skip);
+        values_read =
+            ReadBatch(static_cast<int>(batch_size),
+                      reinterpret_cast<int16_t*>(scratch->mutable_data()),
+                      reinterpret_cast<int16_t*>(scratch->mutable_data()),
+                      reinterpret_cast<T*>(scratch->mutable_data()), &values_read);
+        rows_to_skip -= values_read;
+      } while (values_read > 0 && rows_to_skip > 0);
+    }
+  }
+  return num_rows_to_skip - rows_to_skip;
+}
+
+}  // namespace
+
+// ----------------------------------------------------------------------
+// Dynamic column reader constructor
+
+std::shared_ptr<ColumnReader> ColumnReader::Make(const ColumnDescriptor* descr,
+                                                 std::unique_ptr<PageReader> pager,
+                                                 MemoryPool* pool) {
+  switch (descr->physical_type()) {
+    case Type::BOOLEAN:
+      return std::make_shared<TypedColumnReaderImpl<BooleanType>>(descr, std::move(pager),
+                                                                  pool);
+    case Type::INT32:
+      return std::make_shared<TypedColumnReaderImpl<Int32Type>>(descr, std::move(pager),
+                                                                pool);
+    case Type::INT64:
+      return std::make_shared<TypedColumnReaderImpl<Int64Type>>(descr, std::move(pager),
+                                                                pool);
+    case Type::INT96:
+      return std::make_shared<TypedColumnReaderImpl<Int96Type>>(descr, std::move(pager),
+                                                                pool);
+    case Type::FLOAT:
+      return std::make_shared<TypedColumnReaderImpl<FloatType>>(descr, std::move(pager),
+                                                                pool);
+    case Type::DOUBLE:
+      return std::make_shared<TypedColumnReaderImpl<DoubleType>>(descr, std::move(pager),
+                                                                 pool);
+    case Type::BYTE_ARRAY:
+      return std::make_shared<TypedColumnReaderImpl<ByteArrayType>>(
+          descr, std::move(pager), pool);
+    case Type::FIXED_LEN_BYTE_ARRAY:
+      return std::make_shared<TypedColumnReaderImpl<FLBAType>>(descr, std::move(pager),
+                                                               pool);
+    default:
+      ParquetException::NYI("type reader not implemented");
+  }
+  // Unreachable code, but suppress compiler warning
+  return std::shared_ptr<ColumnReader>(nullptr);
+}
+
+// ----------------------------------------------------------------------
+// RecordReader
+
+namespace internal {
+namespace {
+
+// The minimum number of repetition/definition levels to decode at a time, for
+// better vectorized performance when doing many smaller record reads
+constexpr int64_t kMinLevelBatchSize = 1024;
+
+template <typename DType>
+class TypedRecordReader : public ColumnReaderImplBase<DType>,
+                          virtual public RecordReader {
+ public:
+  using T = typename DType::c_type;
+  using BASE = ColumnReaderImplBase<DType>;
+  TypedRecordReader(const ColumnDescriptor* descr, LevelInfo leaf_info, MemoryPool* pool)
+      : BASE(descr, pool) {
+    leaf_info_ = leaf_info;
+    nullable_values_ = leaf_info.HasNullableValues();
+    at_record_start_ = true;
+    records_read_ = 0;
+    values_written_ = 0;
+    values_capacity_ = 0;
+    null_count_ = 0;
+    levels_written_ = 0;
+    levels_position_ = 0;
+    levels_capacity_ = 0;
+    uses_values_ = !(descr->physical_type() == Type::BYTE_ARRAY);
+
+    if (uses_values_) {
+      values_ = AllocateBuffer(pool);
+    }
+    valid_bits_ = AllocateBuffer(pool);
+    def_levels_ = AllocateBuffer(pool);
+    rep_levels_ = AllocateBuffer(pool);
+    Reset();
+  }
+
+  int64_t available_values_current_page() const {
+    return this->num_buffered_values_ - this->num_decoded_values_;
+  }
+
+  // Compute the values capacity in bytes for the given number of elements
+  int64_t bytes_for_values(int64_t nitems) const {
+    int64_t type_size = GetTypeByteSize(this->descr_->physical_type());
+    int64_t bytes_for_values = -1;
+    if (MultiplyWithOverflow(nitems, type_size, &bytes_for_values)) {
+      throw ParquetException("Total size of items too large");
+    }
+    return bytes_for_values;
+  }
+
+  int64_t ReadRecords(int64_t num_records) override {
+    // Delimit records, then read values at the end
+    int64_t records_read = 0;
+
+    if (levels_position_ < levels_written_) {
+      records_read += ReadRecordData(num_records);
+    }
+
+    int64_t level_batch_size = std::max(kMinLevelBatchSize, num_records);
+
+    // If we are in the middle of a record, we continue until reaching the
+    // desired number of records or the end of the current record if we've found
+    // enough records
+    while (!at_record_start_ || records_read < num_records) {
+      // Is there more data to read in this row group?
+      if (!this->HasNextInternal()) {
+        if (!at_record_start_) {
+          // We ended the row group while inside a record that we haven't seen
+          // the end of yet. So increment the record count for the last record in
+          // the row group
+          ++records_read;
+          at_record_start_ = true;
+        }
+        break;
+      }
+
+      /// We perform multiple batch reads until we either exhaust the row group
+      /// or observe the desired number of records
+      int64_t batch_size = std::min(level_batch_size, available_values_current_page());
+
+      // No more data in column
+      if (batch_size == 0) {
+        break;
+      }
+
+      if (this->max_def_level_ > 0) {
+        ReserveLevels(batch_size);
+
+        int16_t* def_levels = this->def_levels() + levels_written_;
+        int16_t* rep_levels = this->rep_levels() + levels_written_;
+
+        // Not present for non-repeated fields
+        int64_t levels_read = 0;
+        if (this->max_rep_level_ > 0) {
+          levels_read = this->ReadDefinitionLevels(batch_size, def_levels);
+          if (this->ReadRepetitionLevels(batch_size, rep_levels) != levels_read) {
+            throw ParquetException("Number of decoded rep / def levels did not match");
+          }
+        } else if (this->max_def_level_ > 0) {
+          levels_read = this->ReadDefinitionLevels(batch_size, def_levels);
+        }
+
+        // Exhausted column chunk
+        if (levels_read == 0) {
+          break;
+        }
+
+        levels_written_ += levels_read;
+        records_read += ReadRecordData(num_records - records_read);
+      } else {
+        // No repetition or definition levels
+        batch_size = std::min(num_records - records_read, batch_size);
+        records_read += ReadRecordData(batch_size);
+      }
+    }
+
+    return records_read;
+  }
+
+  // We may outwardly have the appearance of having exhausted a column chunk
+  // when in fact we are in the middle of processing the last batch
+  bool has_values_to_process() const { return levels_position_ < levels_written_; }
+
+  std::shared_ptr<ResizableBuffer> ReleaseValues() override {
+    if (uses_values_) {
+      auto result = values_;
+      PARQUET_THROW_NOT_OK(result->Resize(bytes_for_values(values_written_), true));
+      values_ = AllocateBuffer(this->pool_);
+      values_capacity_ = 0;
+      return result;
+    } else {
+      return nullptr;
+    }
+  }
+
+  std::shared_ptr<ResizableBuffer> ReleaseIsValid() override {
+    if (leaf_info_.HasNullableValues()) {
+      auto result = valid_bits_;
+      PARQUET_THROW_NOT_OK(result->Resize(BitUtil::BytesForBits(values_written_), true));
+      valid_bits_ = AllocateBuffer(this->pool_);
+      return result;
+    } else {
+      return nullptr;
+    }
+  }
+
+  // Process written repetition/definition levels to reach the end of
+  // records. Process no more levels than necessary to delimit the indicated
+  // number of logical records. Updates internal state of RecordReader
+  //
+  // \return Number of records delimited
+  int64_t DelimitRecords(int64_t num_records, int64_t* values_seen) {
+    int64_t values_to_read = 0;
+    int64_t records_read = 0;
+
+    const int16_t* def_levels = this->def_levels() + levels_position_;
+    const int16_t* rep_levels = this->rep_levels() + levels_position_;
+
+    DCHECK_GT(this->max_rep_level_, 0);
+
+    // Count logical records and number of values to read
+    while (levels_position_ < levels_written_) {
+      const int16_t rep_level = *rep_levels++;
+      if (rep_level == 0) {
+        // If at_record_start_ is true, we are seeing the start of a record
+        // for the second time, such as after repeated calls to
+        // DelimitRecords. In this case we must continue until we find
+        // another record start or exhausting the ColumnChunk
+        if (!at_record_start_) {
+          // We've reached the end of a record; increment the record count.
+          ++records_read;
+          if (records_read == num_records) {
+            // We've found the number of records we were looking for. Set
+            // at_record_start_ to true and break
+            at_record_start_ = true;
+            break;
+          }
+        }
+      }
+      // We have decided to consume the level at this position; therefore we
+      // must advance until we find another record boundary
+      at_record_start_ = false;
+
+      const int16_t def_level = *def_levels++;
+      if (def_level == this->max_def_level_) {
+        ++values_to_read;
+      }
+      ++levels_position_;
+    }
+    *values_seen = values_to_read;
+    return records_read;
+  }
+
+  void Reserve(int64_t capacity) override {
+    ReserveLevels(capacity);
+    ReserveValues(capacity);
+  }
+
+  int64_t UpdateCapacity(int64_t capacity, int64_t size, int64_t extra_size) {
+    if (extra_size < 0) {
+      throw ParquetException("Negative size (corrupt file?)");
+    }
+    int64_t target_size = -1;
+    if (AddWithOverflow(size, extra_size, &target_size)) {
+      throw ParquetException("Allocation size too large (corrupt file?)");
+    }
+    if (target_size >= (1LL << 62)) {
+      throw ParquetException("Allocation size too large (corrupt file?)");
+    }
+    if (capacity >= target_size) {
+      return capacity;
+    }
+    return BitUtil::NextPower2(target_size);
+  }
+
+  void ReserveLevels(int64_t extra_levels) {
+    if (this->max_def_level_ > 0) {
+      const int64_t new_levels_capacity =
+          UpdateCapacity(levels_capacity_, levels_written_, extra_levels);
+      if (new_levels_capacity > levels_capacity_) {
+        constexpr auto kItemSize = static_cast<int64_t>(sizeof(int16_t));
+        int64_t capacity_in_bytes = -1;
+        if (MultiplyWithOverflow(new_levels_capacity, kItemSize, &capacity_in_bytes)) {
+          throw ParquetException("Allocation size too large (corrupt file?)");
+        }
+        PARQUET_THROW_NOT_OK(def_levels_->Resize(capacity_in_bytes, false));
+        if (this->max_rep_level_ > 0) {
+          PARQUET_THROW_NOT_OK(rep_levels_->Resize(capacity_in_bytes, false));
+        }
+        levels_capacity_ = new_levels_capacity;
+      }
+    }
+  }
+
+  void ReserveValues(int64_t extra_values) {
+    const int64_t new_values_capacity =
+        UpdateCapacity(values_capacity_, values_written_, extra_values);
+    if (new_values_capacity > values_capacity_) {
+      // XXX(wesm): A hack to avoid memory allocation when reading directly
+      // into builder classes
+      if (uses_values_) {
+        PARQUET_THROW_NOT_OK(
+            values_->Resize(bytes_for_values(new_values_capacity), false));
+      }
+      values_capacity_ = new_values_capacity;
+    }
+    if (leaf_info_.HasNullableValues()) {
+      int64_t valid_bytes_new = BitUtil::BytesForBits(values_capacity_);
+      if (valid_bits_->size() < valid_bytes_new) {
+        int64_t valid_bytes_old = BitUtil::BytesForBits(values_written_);
+        PARQUET_THROW_NOT_OK(valid_bits_->Resize(valid_bytes_new, false));
+
+        // Avoid valgrind warnings
+        memset(valid_bits_->mutable_data() + valid_bytes_old, 0,
+               valid_bytes_new - valid_bytes_old);
+      }
+    }
+  }
+
+  void Reset() override {
+    ResetValues();
+
+    if (levels_written_ > 0) {
+      const int64_t levels_remaining = levels_written_ - levels_position_;
+      // Shift remaining levels to beginning of buffer and trim to only the number
+      // of decoded levels remaining
+      int16_t* def_data = def_levels();
+      int16_t* rep_data = rep_levels();
+
+      std::copy(def_data + levels_position_, def_data + levels_written_, def_data);
+      PARQUET_THROW_NOT_OK(
+          def_levels_->Resize(levels_remaining * sizeof(int16_t), false));
+
+      if (this->max_rep_level_ > 0) {
+        std::copy(rep_data + levels_position_, rep_data + levels_written_, rep_data);
+        PARQUET_THROW_NOT_OK(
+            rep_levels_->Resize(levels_remaining * sizeof(int16_t), false));
+      }
+
+      levels_written_ -= levels_position_;
+      levels_position_ = 0;
+      levels_capacity_ = levels_remaining;
+    }
+
+    records_read_ = 0;
+
+    // Call Finish on the binary builders to reset them
+  }
+
+  void SetPageReader(std::unique_ptr<PageReader> reader) override {
+    at_record_start_ = true;
+    this->pager_ = std::move(reader);
+    ResetDecoders();
+  }
+
+  bool HasMoreData() const override { return this->pager_ != nullptr; }
+
+  // Dictionary decoders must be reset when advancing row groups
+  void ResetDecoders() { this->decoders_.clear(); }
+
+  virtual void ReadValuesSpaced(int64_t values_with_nulls, int64_t null_count) {
+    uint8_t* valid_bits = valid_bits_->mutable_data();
+    const int64_t valid_bits_offset = values_written_;
+
+    int64_t num_decoded = this->current_decoder_->DecodeSpaced(
+        ValuesHead<T>(), static_cast<int>(values_with_nulls),
+        static_cast<int>(null_count), valid_bits, valid_bits_offset);
+    DCHECK_EQ(num_decoded, values_with_nulls);
+  }
+
+  virtual void ReadValuesDense(int64_t values_to_read) {
+    int64_t num_decoded =
+        this->current_decoder_->Decode(ValuesHead<T>(), static_cast<int>(values_to_read));
+    DCHECK_EQ(num_decoded, values_to_read);
+  }
+
+  // Return number of logical records read
+  int64_t ReadRecordData(int64_t num_records) {
+    // Conservative upper bound
+    const int64_t possible_num_values =
+        std::max(num_records, levels_written_ - levels_position_);
+    ReserveValues(possible_num_values);
+
+    const int64_t start_levels_position = levels_position_;
+
+    int64_t values_to_read = 0;
+    int64_t records_read = 0;
+    if (this->max_rep_level_ > 0) {
+      records_read = DelimitRecords(num_records, &values_to_read);
+    } else if (this->max_def_level_ > 0) {
+      // No repetition levels, skip delimiting logic. Each level represents a
+      // null or not null entry
+      records_read = std::min(levels_written_ - levels_position_, num_records);
+
+      // This is advanced by DelimitRecords, which we skipped
+      levels_position_ += records_read;
+    } else {
+      records_read = values_to_read = num_records;
+    }
+
+    int64_t null_count = 0;
+    if (leaf_info_.HasNullableValues()) {
+      ValidityBitmapInputOutput validity_io;
+      validity_io.values_read_upper_bound = levels_position_ - start_levels_position;
+      validity_io.valid_bits = valid_bits_->mutable_data();
+      validity_io.valid_bits_offset = values_written_;
+
+      DefLevelsToBitmap(def_levels() + start_levels_position,
+                        levels_position_ - start_levels_position, leaf_info_,
+                        &validity_io);
+      values_to_read = validity_io.values_read - validity_io.null_count;
+      null_count = validity_io.null_count;
+      DCHECK_GE(values_to_read, 0);
+      ReadValuesSpaced(validity_io.values_read, null_count);
+    } else {
+      DCHECK_GE(values_to_read, 0);
+      ReadValuesDense(values_to_read);
+    }
+    if (this->leaf_info_.def_level > 0) {
+      // Optional, repeated, or some mix thereof
+      this->ConsumeBufferedValues(levels_position_ - start_levels_position);
+    } else {
+      // Flat, non-repeated
+      this->ConsumeBufferedValues(values_to_read);
+    }
+    // Total values, including null spaces, if any
+    values_written_ += values_to_read + null_count;
+    null_count_ += null_count;
+
+    return records_read;
+  }
+
+  void DebugPrintState() override {
+    const int16_t* def_levels = this->def_levels();
+    const int16_t* rep_levels = this->rep_levels();
+    const int64_t total_levels_read = levels_position_;
+
+    const T* vals = reinterpret_cast<const T*>(this->values());
+
+    std::cout << "def levels: ";
+    for (int64_t i = 0; i < total_levels_read; ++i) {
+      std::cout << def_levels[i] << " ";
+    }
+    std::cout << std::endl;
+
+    std::cout << "rep levels: ";
+    for (int64_t i = 0; i < total_levels_read; ++i) {
+      std::cout << rep_levels[i] << " ";
+    }
+    std::cout << std::endl;
+
+    std::cout << "values: ";
+    for (int64_t i = 0; i < this->values_written(); ++i) {
+      std::cout << vals[i] << " ";
+    }
+    std::cout << std::endl;
+  }
+
+  void ResetValues() {
+    if (values_written_ > 0) {
+      // Resize to 0, but do not shrink to fit
+      if (uses_values_) {
+        PARQUET_THROW_NOT_OK(values_->Resize(0, false));
+      }
+      PARQUET_THROW_NOT_OK(valid_bits_->Resize(0, false));
+      values_written_ = 0;
+      values_capacity_ = 0;
+      null_count_ = 0;
+    }
+  }
+
+ protected:
+  template <typename T>
+  T* ValuesHead() {
+    return reinterpret_cast<T*>(values_->mutable_data()) + values_written_;
+  }
+  LevelInfo leaf_info_;
+};
+
+class FLBARecordReader : public TypedRecordReader<FLBAType>,
+                         virtual public BinaryRecordReader {
+ public:
+  FLBARecordReader(const ColumnDescriptor* descr, LevelInfo leaf_info,
+                   ::arrow::MemoryPool* pool)
+      : TypedRecordReader<FLBAType>(descr, leaf_info, pool), builder_(nullptr) {
+    DCHECK_EQ(descr_->physical_type(), Type::FIXED_LEN_BYTE_ARRAY);
+    int byte_width = descr_->type_length();
+    std::shared_ptr<::arrow::DataType> type = ::arrow::fixed_size_binary(byte_width);
+    builder_.reset(new ::arrow::FixedSizeBinaryBuilder(type, this->pool_));
+  }
+
+  ::arrow::ArrayVector GetBuilderChunks() override {
+    std::shared_ptr<::arrow::Array> chunk;
+    PARQUET_THROW_NOT_OK(builder_->Finish(&chunk));
+    return ::arrow::ArrayVector({chunk});
+  }
+
+  void ReadValuesDense(int64_t values_to_read) override {
+    auto values = ValuesHead<FLBA>();
+    int64_t num_decoded =
+        this->current_decoder_->Decode(values, static_cast<int>(values_to_read));
+    DCHECK_EQ(num_decoded, values_to_read);
+
+    for (int64_t i = 0; i < num_decoded; i++) {
+      PARQUET_THROW_NOT_OK(builder_->Append(values[i].ptr));
+    }
+    ResetValues();
+  }
+
+  void ReadValuesSpaced(int64_t values_to_read, int64_t null_count) override {
+    uint8_t* valid_bits = valid_bits_->mutable_data();
+    const int64_t valid_bits_offset = values_written_;
+    auto values = ValuesHead<FLBA>();
+
+    int64_t num_decoded = this->current_decoder_->DecodeSpaced(
+        values, static_cast<int>(values_to_read), static_cast<int>(null_count),
+        valid_bits, valid_bits_offset);
+    DCHECK_EQ(num_decoded, values_to_read);
+
+    for (int64_t i = 0; i < num_decoded; i++) {
+      if (::arrow::BitUtil::GetBit(valid_bits, valid_bits_offset + i)) {
+        PARQUET_THROW_NOT_OK(builder_->Append(values[i].ptr));
+      } else {
+        PARQUET_THROW_NOT_OK(builder_->AppendNull());
+      }
+    }
+    ResetValues();
+  }
+
+ private:
+  std::unique_ptr<::arrow::FixedSizeBinaryBuilder> builder_;
+};
+
+class ByteArrayChunkedRecordReader : public TypedRecordReader<ByteArrayType>,
+                                     virtual public BinaryRecordReader {
+ public:
+  ByteArrayChunkedRecordReader(const ColumnDescriptor* descr, LevelInfo leaf_info,
+                               ::arrow::MemoryPool* pool)
+      : TypedRecordReader<ByteArrayType>(descr, leaf_info, pool) {
+    DCHECK_EQ(descr_->physical_type(), Type::BYTE_ARRAY);
+    accumulator_.builder.reset(new ::arrow::BinaryBuilder(pool));
+  }
+
+  ::arrow::ArrayVector GetBuilderChunks() override {
+    ::arrow::ArrayVector result = accumulator_.chunks;
+    if (result.size() == 0 || accumulator_.builder->length() > 0) {
+      std::shared_ptr<::arrow::Array> last_chunk;
+      PARQUET_THROW_NOT_OK(accumulator_.builder->Finish(&last_chunk));
+      result.push_back(std::move(last_chunk));
+    }
+    accumulator_.chunks = {};
+    return result;
+  }
+
+  void ReadValuesDense(int64_t values_to_read) override {
+    int64_t num_decoded = this->current_decoder_->DecodeArrowNonNull(
+        static_cast<int>(values_to_read), &accumulator_);
+    DCHECK_EQ(num_decoded, values_to_read);
+    ResetValues();
+  }
+
+  void ReadValuesSpaced(int64_t values_to_read, int64_t null_count) override {
+    int64_t num_decoded = this->current_decoder_->DecodeArrow(
+        static_cast<int>(values_to_read), static_cast<int>(null_count),
+        valid_bits_->mutable_data(), values_written_, &accumulator_);
+    DCHECK_EQ(num_decoded, values_to_read - null_count);
+    ResetValues();
+  }
+
+ private:
+  // Helper data structure for accumulating builder chunks
+  typename EncodingTraits<ByteArrayType>::Accumulator accumulator_;
+};
+
+class ByteArrayDictionaryRecordReader : public TypedRecordReader<ByteArrayType>,
+                                        virtual public DictionaryRecordReader {
+ public:
+  ByteArrayDictionaryRecordReader(const ColumnDescriptor* descr, LevelInfo leaf_info,
+                                  ::arrow::MemoryPool* pool)
+      : TypedRecordReader<ByteArrayType>(descr, leaf_info, pool), builder_(pool) {
+    this->read_dictionary_ = true;
+  }
+
+  std::shared_ptr<::arrow::ChunkedArray> GetResult() override {
+    FlushBuilder();
+    std::vector<std::shared_ptr<::arrow::Array>> result;
+    std::swap(result, result_chunks_);
+    return std::make_shared<::arrow::ChunkedArray>(std::move(result), builder_.type());
+  }
+
+  void FlushBuilder() {
+    if (builder_.length() > 0) {
+      std::shared_ptr<::arrow::Array> chunk;
+      PARQUET_THROW_NOT_OK(builder_.Finish(&chunk));
+      result_chunks_.emplace_back(std::move(chunk));
+
+      // Also clears the dictionary memo table
+      builder_.Reset();
+    }
+  }
+
+  void MaybeWriteNewDictionary() {
+    if (this->new_dictionary_) {
+      /// If there is a new dictionary, we may need to flush the builder, then
+      /// insert the new dictionary values
+      FlushBuilder();
+      builder_.ResetFull();
+      auto decoder = dynamic_cast<BinaryDictDecoder*>(this->current_decoder_);
+      decoder->InsertDictionary(&builder_);
+      this->new_dictionary_ = false;
+    }
+  }
+
+  void ReadValuesDense(int64_t values_to_read) override {
+    int64_t num_decoded = 0;
+    if (current_encoding_ == Encoding::RLE_DICTIONARY) {
+      MaybeWriteNewDictionary();
+      auto decoder = dynamic_cast<BinaryDictDecoder*>(this->current_decoder_);
+      num_decoded = decoder->DecodeIndices(static_cast<int>(values_to_read), &builder_);
+    } else {
+      num_decoded = this->current_decoder_->DecodeArrowNonNull(
+          static_cast<int>(values_to_read), &builder_);
+
+      /// Flush values since they have been copied into the builder
+      ResetValues();
+    }
+    DCHECK_EQ(num_decoded, values_to_read);
+  }
+
+  void ReadValuesSpaced(int64_t values_to_read, int64_t null_count) override {
+    int64_t num_decoded = 0;
+    if (current_encoding_ == Encoding::RLE_DICTIONARY) {
+      MaybeWriteNewDictionary();
+      auto decoder = dynamic_cast<BinaryDictDecoder*>(this->current_decoder_);
+      num_decoded = decoder->DecodeIndicesSpaced(
+          static_cast<int>(values_to_read), static_cast<int>(null_count),
+          valid_bits_->mutable_data(), values_written_, &builder_);
+    } else {
+      num_decoded = this->current_decoder_->DecodeArrow(
+          static_cast<int>(values_to_read), static_cast<int>(null_count),
+          valid_bits_->mutable_data(), values_written_, &builder_);
+
+      /// Flush values since they have been copied into the builder
+      ResetValues();
+    }
+    DCHECK_EQ(num_decoded, values_to_read - null_count);
+  }
+
+ private:
+  using BinaryDictDecoder = DictDecoder<ByteArrayType>;
+
+  ::arrow::BinaryDictionary32Builder builder_;
+  std::vector<std::shared_ptr<::arrow::Array>> result_chunks_;
+};
+
+// TODO(wesm): Implement these to some satisfaction
+template <>
+void TypedRecordReader<Int96Type>::DebugPrintState() {}
+
+template <>
+void TypedRecordReader<ByteArrayType>::DebugPrintState() {}
+
+template <>
+void TypedRecordReader<FLBAType>::DebugPrintState() {}
+
+std::shared_ptr<RecordReader> MakeByteArrayRecordReader(const ColumnDescriptor* descr,
+                                                        LevelInfo leaf_info,
+                                                        ::arrow::MemoryPool* pool,
+                                                        bool read_dictionary) {
+  if (read_dictionary) {
+    return std::make_shared<ByteArrayDictionaryRecordReader>(descr, leaf_info, pool);
+  } else {
+    return std::make_shared<ByteArrayChunkedRecordReader>(descr, leaf_info, pool);
+  }
+}
+
+}  // namespace
+
+std::shared_ptr<RecordReader> RecordReader::Make(const ColumnDescriptor* descr,
+                                                 LevelInfo leaf_info, MemoryPool* pool,
+                                                 const bool read_dictionary) {
+  switch (descr->physical_type()) {
+    case Type::BOOLEAN:
+      return std::make_shared<TypedRecordReader<BooleanType>>(descr, leaf_info, pool);
+    case Type::INT32:
+      return std::make_shared<TypedRecordReader<Int32Type>>(descr, leaf_info, pool);
+    case Type::INT64:
+      return std::make_shared<TypedRecordReader<Int64Type>>(descr, leaf_info, pool);
+    case Type::INT96:
+      return std::make_shared<TypedRecordReader<Int96Type>>(descr, leaf_info, pool);
+    case Type::FLOAT:
+      return std::make_shared<TypedRecordReader<FloatType>>(descr, leaf_info, pool);
+    case Type::DOUBLE:
+      return std::make_shared<TypedRecordReader<DoubleType>>(descr, leaf_info, pool);
+    case Type::BYTE_ARRAY:
+      return MakeByteArrayRecordReader(descr, leaf_info, pool, read_dictionary);
+    case Type::FIXED_LEN_BYTE_ARRAY:
+      return std::make_shared<FLBARecordReader>(descr, leaf_info, pool);
+    default: {
+      // PARQUET-1481: This can occur if the file is corrupt
+      std::stringstream ss;
+      ss << "Invalid physical column type: " << static_cast<int>(descr->physical_type());
+      throw ParquetException(ss.str());
+    }
+  }
+  // Unreachable code, but suppress compiler warning
+  return nullptr;
+}
+
+}  // namespace internal
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/column_reader.h b/contrib/libs/apache/arrow/cpp/src/parquet/column_reader.h
index 7f51cff2e97..8c48e4d7843 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/column_reader.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/column_reader.h
@@ -1,376 +1,376 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <cstdint> 
-#include <memory> 
-#include <utility> 
-#include <vector> 
- 
-#include "parquet/exception.h" 
-#include "parquet/level_conversion.h" 
-#include "parquet/platform.h" 
-#include "parquet/schema.h" 
-#include "parquet/types.h" 
- 
-namespace arrow { 
- 
-class Array; 
-class ChunkedArray; 
- 
-namespace BitUtil { 
-class BitReader; 
-}  // namespace BitUtil 
- 
-namespace util { 
-class RleDecoder; 
-}  // namespace util 
- 
-}  // namespace arrow 
- 
-namespace parquet { 
- 
-class Decryptor; 
-class Page; 
- 
-// 16 MB is the default maximum page header size 
-static constexpr uint32_t kDefaultMaxPageHeaderSize = 16 * 1024 * 1024; 
- 
-// 16 KB is the default expected page header size 
-static constexpr uint32_t kDefaultPageHeaderSize = 16 * 1024; 
- 
-class PARQUET_EXPORT LevelDecoder { 
- public: 
-  LevelDecoder(); 
-  ~LevelDecoder(); 
- 
-  // Initialize the LevelDecoder state with new data 
-  // and return the number of bytes consumed 
-  int SetData(Encoding::type encoding, int16_t max_level, int num_buffered_values, 
-              const uint8_t* data, int32_t data_size); 
- 
-  void SetDataV2(int32_t num_bytes, int16_t max_level, int num_buffered_values, 
-                 const uint8_t* data); 
- 
-  // Decodes a batch of levels into an array and returns the number of levels decoded 
-  int Decode(int batch_size, int16_t* levels); 
- 
- private: 
-  int bit_width_; 
-  int num_values_remaining_; 
-  Encoding::type encoding_; 
-  std::unique_ptr<::arrow::util::RleDecoder> rle_decoder_; 
-  std::unique_ptr<::arrow::BitUtil::BitReader> bit_packed_decoder_; 
-  int16_t max_level_; 
-}; 
- 
-struct CryptoContext { 
-  CryptoContext(bool start_with_dictionary_page, int16_t rg_ordinal, int16_t col_ordinal, 
-                std::shared_ptr<Decryptor> meta, std::shared_ptr<Decryptor> data) 
-      : start_decrypt_with_dictionary_page(start_with_dictionary_page), 
-        row_group_ordinal(rg_ordinal), 
-        column_ordinal(col_ordinal), 
-        meta_decryptor(std::move(meta)), 
-        data_decryptor(std::move(data)) {} 
-  CryptoContext() {} 
- 
-  bool start_decrypt_with_dictionary_page = false; 
-  int16_t row_group_ordinal = -1; 
-  int16_t column_ordinal = -1; 
-  std::shared_ptr<Decryptor> meta_decryptor; 
-  std::shared_ptr<Decryptor> data_decryptor; 
-}; 
- 
-// Abstract page iterator interface. This way, we can feed column pages to the 
-// ColumnReader through whatever mechanism we choose 
-class PARQUET_EXPORT PageReader { 
- public: 
-  virtual ~PageReader() = default; 
- 
-  static std::unique_ptr<PageReader> Open( 
-      std::shared_ptr<ArrowInputStream> stream, int64_t total_num_rows, 
-      Compression::type codec, ::arrow::MemoryPool* pool = ::arrow::default_memory_pool(), 
-      const CryptoContext* ctx = NULLPTR); 
- 
-  // @returns: shared_ptr<Page>(nullptr) on EOS, std::shared_ptr<Page> 
-  // containing new Page otherwise 
-  virtual std::shared_ptr<Page> NextPage() = 0; 
- 
-  virtual void set_max_page_header_size(uint32_t size) = 0; 
-}; 
- 
-class PARQUET_EXPORT ColumnReader { 
- public: 
-  virtual ~ColumnReader() = default; 
- 
-  static std::shared_ptr<ColumnReader> Make( 
-      const ColumnDescriptor* descr, std::unique_ptr<PageReader> pager, 
-      ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()); 
- 
-  // Returns true if there are still values in this column. 
-  virtual bool HasNext() = 0; 
- 
-  virtual Type::type type() const = 0; 
- 
-  virtual const ColumnDescriptor* descr() const = 0; 
- 
-  // Get the encoding that can be exposed by this reader. If it returns 
-  // dictionary encoding, then ReadBatchWithDictionary can be used to read data. 
-  // 
-  // \note API EXPERIMENTAL 
-  virtual ExposedEncoding GetExposedEncoding() = 0; 
- 
- protected: 
-  friend class RowGroupReader; 
-  // Set the encoding that can be exposed by this reader. 
-  // 
-  // \note API EXPERIMENTAL 
-  virtual void SetExposedEncoding(ExposedEncoding encoding) = 0; 
-}; 
- 
-// API to read values from a single column. This is a main client facing API. 
-template <typename DType> 
-class TypedColumnReader : public ColumnReader { 
- public: 
-  typedef typename DType::c_type T; 
- 
-  // Read a batch of repetition levels, definition levels, and values from the 
-  // column. 
-  // 
-  // Since null values are not stored in the values, the number of values read 
-  // may be less than the number of repetition and definition levels. With 
-  // nested data this is almost certainly true. 
-  // 
-  // Set def_levels or rep_levels to nullptr if you want to skip reading them. 
-  // This is only safe if you know through some other source that there are no 
-  // undefined values. 
-  // 
-  // To fully exhaust a row group, you must read batches until the number of 
-  // values read reaches the number of stored values according to the metadata. 
-  // 
-  // This API is the same for both V1 and V2 of the DataPage 
-  // 
-  // @returns: actual number of levels read (see values_read for number of values read) 
-  virtual int64_t ReadBatch(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels, 
-                            T* values, int64_t* values_read) = 0; 
- 
-  /// Read a batch of repetition levels, definition levels, and values from the 
-  /// column and leave spaces for null entries on the lowest level in the values 
-  /// buffer. 
-  /// 
-  /// In comparison to ReadBatch the length of repetition and definition levels 
-  /// is the same as of the number of values read for max_definition_level == 1. 
-  /// In the case of max_definition_level > 1, the repetition and definition 
-  /// levels are larger than the values but the values include the null entries 
-  /// with definition_level == (max_definition_level - 1). 
-  /// 
-  /// To fully exhaust a row group, you must read batches until the number of 
-  /// values read reaches the number of stored values according to the metadata. 
-  /// 
-  /// @param batch_size the number of levels to read 
-  /// @param[out] def_levels The Parquet definition levels, output has 
-  ///   the length levels_read. 
-  /// @param[out] rep_levels The Parquet repetition levels, output has 
-  ///   the length levels_read. 
-  /// @param[out] values The values in the lowest nested level including 
-  ///   spacing for nulls on the lowest levels; output has the length 
-  ///   values_read. 
-  /// @param[out] valid_bits Memory allocated for a bitmap that indicates if 
-  ///   the row is null or on the maximum definition level. For performance 
-  ///   reasons the underlying buffer should be able to store 1 bit more than 
-  ///   required. If this requires an additional byte, this byte is only read 
-  ///   but never written to. 
-  /// @param valid_bits_offset The offset in bits of the valid_bits where the 
-  ///   first relevant bit resides. 
-  /// @param[out] levels_read The number of repetition/definition levels that were read. 
-  /// @param[out] values_read The number of values read, this includes all 
-  ///   non-null entries as well as all null-entries on the lowest level 
-  ///   (i.e. definition_level == max_definition_level - 1) 
-  /// @param[out] null_count The number of nulls on the lowest levels. 
-  ///   (i.e. (values_read - null_count) is total number of non-null entries) 
-  /// 
-  /// \deprecated Since 4.0.0 
-  ARROW_DEPRECATED("Doesn't handle nesting correctly and unused outside of unit tests.") 
-  virtual int64_t ReadBatchSpaced(int64_t batch_size, int16_t* def_levels, 
-                                  int16_t* rep_levels, T* values, uint8_t* valid_bits, 
-                                  int64_t valid_bits_offset, int64_t* levels_read, 
-                                  int64_t* values_read, int64_t* null_count) = 0; 
- 
-  // Skip reading levels 
-  // Returns the number of levels skipped 
-  virtual int64_t Skip(int64_t num_rows_to_skip) = 0; 
- 
-  // Read a batch of repetition levels, definition levels, and indices from the 
-  // column. And read the dictionary if a dictionary page is encountered during 
-  // reading pages. This API is similar to ReadBatch(), with ability to read 
-  // dictionary and indices. It is only valid to call this method  when the reader can 
-  // expose dictionary encoding. (i.e., the reader's GetExposedEncoding() returns 
-  // DICTIONARY). 
-  // 
-  // The dictionary is read along with the data page. When there's no data page, 
-  // the dictionary won't be returned. 
-  // 
-  // @param batch_size The batch size to read 
-  // @param[out] def_levels The Parquet definition levels. 
-  // @param[out] rep_levels The Parquet repetition levels. 
-  // @param[out] indices The dictionary indices. 
-  // @param[out] indices_read The number of indices read. 
-  // @param[out] dict The pointer to dictionary values. It will return nullptr if 
-  // there's no data page. Each column chunk only has one dictionary page. The dictionary 
-  // is owned by the reader, so the caller is responsible for copying the dictionary 
-  // values before the reader gets destroyed. 
-  // @param[out] dict_len The dictionary length. It will return 0 if there's no data 
-  // page. 
-  // @returns: actual number of levels read (see indices_read for number of 
-  // indices read 
-  // 
-  // \note API EXPERIMENTAL 
-  virtual int64_t ReadBatchWithDictionary(int64_t batch_size, int16_t* def_levels, 
-                                          int16_t* rep_levels, int32_t* indices, 
-                                          int64_t* indices_read, const T** dict, 
-                                          int32_t* dict_len) = 0; 
-}; 
- 
-namespace internal { 
- 
-/// \brief Stateful column reader that delimits semantic records for both flat 
-/// and nested columns 
-/// 
-/// \note API EXPERIMENTAL 
-/// \since 1.3.0 
-class RecordReader { 
- public: 
-  static std::shared_ptr<RecordReader> Make( 
-      const ColumnDescriptor* descr, LevelInfo leaf_info, 
-      ::arrow::MemoryPool* pool = ::arrow::default_memory_pool(), 
-      const bool read_dictionary = false); 
- 
-  virtual ~RecordReader() = default; 
- 
-  /// \brief Attempt to read indicated number of records from column chunk 
-  /// \return number of records read 
-  virtual int64_t ReadRecords(int64_t num_records) = 0; 
- 
-  /// \brief Pre-allocate space for data. Results in better flat read performance 
-  virtual void Reserve(int64_t num_values) = 0; 
- 
-  /// \brief Clear consumed values and repetition/definition levels as the 
-  /// result of calling ReadRecords 
-  virtual void Reset() = 0; 
- 
-  /// \brief Transfer filled values buffer to caller. A new one will be 
-  /// allocated in subsequent ReadRecords calls 
-  virtual std::shared_ptr<ResizableBuffer> ReleaseValues() = 0; 
- 
-  /// \brief Transfer filled validity bitmap buffer to caller. A new one will 
-  /// be allocated in subsequent ReadRecords calls 
-  virtual std::shared_ptr<ResizableBuffer> ReleaseIsValid() = 0; 
- 
-  /// \brief Return true if the record reader has more internal data yet to 
-  /// process 
-  virtual bool HasMoreData() const = 0; 
- 
-  /// \brief Advance record reader to the next row group 
-  /// \param[in] reader obtained from RowGroupReader::GetColumnPageReader 
-  virtual void SetPageReader(std::unique_ptr<PageReader> reader) = 0; 
- 
-  virtual void DebugPrintState() = 0; 
- 
-  /// \brief Decoded definition levels 
-  int16_t* def_levels() const { 
-    return reinterpret_cast<int16_t*>(def_levels_->mutable_data()); 
-  } 
- 
-  /// \brief Decoded repetition levels 
-  int16_t* rep_levels() const { 
-    return reinterpret_cast<int16_t*>(rep_levels_->mutable_data()); 
-  } 
- 
-  /// \brief Decoded values, including nulls, if any 
-  uint8_t* values() const { return values_->mutable_data(); } 
- 
-  /// \brief Number of values written including nulls (if any) 
-  int64_t values_written() const { return values_written_; } 
- 
-  /// \brief Number of definition / repetition levels (from those that have 
-  /// been decoded) that have been consumed inside the reader. 
-  int64_t levels_position() const { return levels_position_; } 
- 
-  /// \brief Number of definition / repetition levels that have been written 
-  /// internally in the reader 
-  int64_t levels_written() const { return levels_written_; } 
- 
-  /// \brief Number of nulls in the leaf 
-  int64_t null_count() const { return null_count_; } 
- 
-  /// \brief True if the leaf values are nullable 
-  bool nullable_values() const { return nullable_values_; } 
- 
-  /// \brief True if reading directly as Arrow dictionary-encoded 
-  bool read_dictionary() const { return read_dictionary_; } 
- 
- protected: 
-  bool nullable_values_; 
- 
-  bool at_record_start_; 
-  int64_t records_read_; 
- 
-  int64_t values_written_; 
-  int64_t values_capacity_; 
-  int64_t null_count_; 
- 
-  int64_t levels_written_; 
-  int64_t levels_position_; 
-  int64_t levels_capacity_; 
- 
-  std::shared_ptr<::arrow::ResizableBuffer> values_; 
-  // In the case of false, don't allocate the values buffer (when we directly read into 
-  // builder classes). 
-  bool uses_values_; 
- 
-  std::shared_ptr<::arrow::ResizableBuffer> valid_bits_; 
-  std::shared_ptr<::arrow::ResizableBuffer> def_levels_; 
-  std::shared_ptr<::arrow::ResizableBuffer> rep_levels_; 
- 
-  bool read_dictionary_ = false; 
-}; 
- 
-class BinaryRecordReader : virtual public RecordReader { 
- public: 
-  virtual std::vector<std::shared_ptr<::arrow::Array>> GetBuilderChunks() = 0; 
-}; 
- 
-/// \brief Read records directly to dictionary-encoded Arrow form (int32 
-/// indices). Only valid for BYTE_ARRAY columns 
-class DictionaryRecordReader : virtual public RecordReader { 
- public: 
-  virtual std::shared_ptr<::arrow::ChunkedArray> GetResult() = 0; 
-}; 
- 
-}  // namespace internal 
- 
-using BoolReader = TypedColumnReader<BooleanType>; 
-using Int32Reader = TypedColumnReader<Int32Type>; 
-using Int64Reader = TypedColumnReader<Int64Type>; 
-using Int96Reader = TypedColumnReader<Int96Type>; 
-using FloatReader = TypedColumnReader<FloatType>; 
-using DoubleReader = TypedColumnReader<DoubleType>; 
-using ByteArrayReader = TypedColumnReader<ByteArrayType>; 
-using FixedLenByteArrayReader = TypedColumnReader<FLBAType>; 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "parquet/exception.h"
+#include "parquet/level_conversion.h"
+#include "parquet/platform.h"
+#include "parquet/schema.h"
+#include "parquet/types.h"
+
+namespace arrow {
+
+class Array;
+class ChunkedArray;
+
+namespace BitUtil {
+class BitReader;
+}  // namespace BitUtil
+
+namespace util {
+class RleDecoder;
+}  // namespace util
+
+}  // namespace arrow
+
+namespace parquet {
+
+class Decryptor;
+class Page;
+
+// 16 MB is the default maximum page header size
+static constexpr uint32_t kDefaultMaxPageHeaderSize = 16 * 1024 * 1024;
+
+// 16 KB is the default expected page header size
+static constexpr uint32_t kDefaultPageHeaderSize = 16 * 1024;
+
+class PARQUET_EXPORT LevelDecoder {
+ public:
+  LevelDecoder();
+  ~LevelDecoder();
+
+  // Initialize the LevelDecoder state with new data
+  // and return the number of bytes consumed
+  int SetData(Encoding::type encoding, int16_t max_level, int num_buffered_values,
+              const uint8_t* data, int32_t data_size);
+
+  void SetDataV2(int32_t num_bytes, int16_t max_level, int num_buffered_values,
+                 const uint8_t* data);
+
+  // Decodes a batch of levels into an array and returns the number of levels decoded
+  int Decode(int batch_size, int16_t* levels);
+
+ private:
+  int bit_width_;
+  int num_values_remaining_;
+  Encoding::type encoding_;
+  std::unique_ptr<::arrow::util::RleDecoder> rle_decoder_;
+  std::unique_ptr<::arrow::BitUtil::BitReader> bit_packed_decoder_;
+  int16_t max_level_;
+};
+
+struct CryptoContext {
+  CryptoContext(bool start_with_dictionary_page, int16_t rg_ordinal, int16_t col_ordinal,
+                std::shared_ptr<Decryptor> meta, std::shared_ptr<Decryptor> data)
+      : start_decrypt_with_dictionary_page(start_with_dictionary_page),
+        row_group_ordinal(rg_ordinal),
+        column_ordinal(col_ordinal),
+        meta_decryptor(std::move(meta)),
+        data_decryptor(std::move(data)) {}
+  CryptoContext() {}
+
+  bool start_decrypt_with_dictionary_page = false;
+  int16_t row_group_ordinal = -1;
+  int16_t column_ordinal = -1;
+  std::shared_ptr<Decryptor> meta_decryptor;
+  std::shared_ptr<Decryptor> data_decryptor;
+};
+
+// Abstract page iterator interface. This way, we can feed column pages to the
+// ColumnReader through whatever mechanism we choose
+class PARQUET_EXPORT PageReader {
+ public:
+  virtual ~PageReader() = default;
+
+  static std::unique_ptr<PageReader> Open(
+      std::shared_ptr<ArrowInputStream> stream, int64_t total_num_rows,
+      Compression::type codec, ::arrow::MemoryPool* pool = ::arrow::default_memory_pool(),
+      const CryptoContext* ctx = NULLPTR);
+
+  // @returns: shared_ptr<Page>(nullptr) on EOS, std::shared_ptr<Page>
+  // containing new Page otherwise
+  virtual std::shared_ptr<Page> NextPage() = 0;
+
+  virtual void set_max_page_header_size(uint32_t size) = 0;
+};
+
+class PARQUET_EXPORT ColumnReader {
+ public:
+  virtual ~ColumnReader() = default;
+
+  static std::shared_ptr<ColumnReader> Make(
+      const ColumnDescriptor* descr, std::unique_ptr<PageReader> pager,
+      ::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
+
+  // Returns true if there are still values in this column.
+  virtual bool HasNext() = 0;
+
+  virtual Type::type type() const = 0;
+
+  virtual const ColumnDescriptor* descr() const = 0;
+
+  // Get the encoding that can be exposed by this reader. If it returns
+  // dictionary encoding, then ReadBatchWithDictionary can be used to read data.
+  //
+  // \note API EXPERIMENTAL
+  virtual ExposedEncoding GetExposedEncoding() = 0;
+
+ protected:
+  friend class RowGroupReader;
+  // Set the encoding that can be exposed by this reader.
+  //
+  // \note API EXPERIMENTAL
+  virtual void SetExposedEncoding(ExposedEncoding encoding) = 0;
+};
+
+// API to read values from a single column. This is a main client facing API.
+template <typename DType>
+class TypedColumnReader : public ColumnReader {
+ public:
+  typedef typename DType::c_type T;
+
+  // Read a batch of repetition levels, definition levels, and values from the
+  // column.
+  //
+  // Since null values are not stored in the values, the number of values read
+  // may be less than the number of repetition and definition levels. With
+  // nested data this is almost certainly true.
+  //
+  // Set def_levels or rep_levels to nullptr if you want to skip reading them.
+  // This is only safe if you know through some other source that there are no
+  // undefined values.
+  //
+  // To fully exhaust a row group, you must read batches until the number of
+  // values read reaches the number of stored values according to the metadata.
+  //
+  // This API is the same for both V1 and V2 of the DataPage
+  //
+  // @returns: actual number of levels read (see values_read for number of values read)
+  virtual int64_t ReadBatch(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels,
+                            T* values, int64_t* values_read) = 0;
+
+  /// Read a batch of repetition levels, definition levels, and values from the
+  /// column and leave spaces for null entries on the lowest level in the values
+  /// buffer.
+  ///
+  /// In comparison to ReadBatch the length of repetition and definition levels
+  /// is the same as of the number of values read for max_definition_level == 1.
+  /// In the case of max_definition_level > 1, the repetition and definition
+  /// levels are larger than the values but the values include the null entries
+  /// with definition_level == (max_definition_level - 1).
+  ///
+  /// To fully exhaust a row group, you must read batches until the number of
+  /// values read reaches the number of stored values according to the metadata.
+  ///
+  /// @param batch_size the number of levels to read
+  /// @param[out] def_levels The Parquet definition levels, output has
+  ///   the length levels_read.
+  /// @param[out] rep_levels The Parquet repetition levels, output has
+  ///   the length levels_read.
+  /// @param[out] values The values in the lowest nested level including
+  ///   spacing for nulls on the lowest levels; output has the length
+  ///   values_read.
+  /// @param[out] valid_bits Memory allocated for a bitmap that indicates if
+  ///   the row is null or on the maximum definition level. For performance
+  ///   reasons the underlying buffer should be able to store 1 bit more than
+  ///   required. If this requires an additional byte, this byte is only read
+  ///   but never written to.
+  /// @param valid_bits_offset The offset in bits of the valid_bits where the
+  ///   first relevant bit resides.
+  /// @param[out] levels_read The number of repetition/definition levels that were read.
+  /// @param[out] values_read The number of values read, this includes all
+  ///   non-null entries as well as all null-entries on the lowest level
+  ///   (i.e. definition_level == max_definition_level - 1)
+  /// @param[out] null_count The number of nulls on the lowest levels.
+  ///   (i.e. (values_read - null_count) is total number of non-null entries)
+  ///
+  /// \deprecated Since 4.0.0
+  ARROW_DEPRECATED("Doesn't handle nesting correctly and unused outside of unit tests.")
+  virtual int64_t ReadBatchSpaced(int64_t batch_size, int16_t* def_levels,
+                                  int16_t* rep_levels, T* values, uint8_t* valid_bits,
+                                  int64_t valid_bits_offset, int64_t* levels_read,
+                                  int64_t* values_read, int64_t* null_count) = 0;
+
+  // Skip reading levels
+  // Returns the number of levels skipped
+  virtual int64_t Skip(int64_t num_rows_to_skip) = 0;
+
+  // Read a batch of repetition levels, definition levels, and indices from the
+  // column. And read the dictionary if a dictionary page is encountered during
+  // reading pages. This API is similar to ReadBatch(), with ability to read
+  // dictionary and indices. It is only valid to call this method  when the reader can
+  // expose dictionary encoding. (i.e., the reader's GetExposedEncoding() returns
+  // DICTIONARY).
+  //
+  // The dictionary is read along with the data page. When there's no data page,
+  // the dictionary won't be returned.
+  //
+  // @param batch_size The batch size to read
+  // @param[out] def_levels The Parquet definition levels.
+  // @param[out] rep_levels The Parquet repetition levels.
+  // @param[out] indices The dictionary indices.
+  // @param[out] indices_read The number of indices read.
+  // @param[out] dict The pointer to dictionary values. It will return nullptr if
+  // there's no data page. Each column chunk only has one dictionary page. The dictionary
+  // is owned by the reader, so the caller is responsible for copying the dictionary
+  // values before the reader gets destroyed.
+  // @param[out] dict_len The dictionary length. It will return 0 if there's no data
+  // page.
+  // @returns: actual number of levels read (see indices_read for number of
+  // indices read
+  //
+  // \note API EXPERIMENTAL
+  virtual int64_t ReadBatchWithDictionary(int64_t batch_size, int16_t* def_levels,
+                                          int16_t* rep_levels, int32_t* indices,
+                                          int64_t* indices_read, const T** dict,
+                                          int32_t* dict_len) = 0;
+};
+
+namespace internal {
+
+/// \brief Stateful column reader that delimits semantic records for both flat
+/// and nested columns
+///
+/// \note API EXPERIMENTAL
+/// \since 1.3.0
+class RecordReader {
+ public:
+  static std::shared_ptr<RecordReader> Make(
+      const ColumnDescriptor* descr, LevelInfo leaf_info,
+      ::arrow::MemoryPool* pool = ::arrow::default_memory_pool(),
+      const bool read_dictionary = false);
+
+  virtual ~RecordReader() = default;
+
+  /// \brief Attempt to read indicated number of records from column chunk
+  /// \return number of records read
+  virtual int64_t ReadRecords(int64_t num_records) = 0;
+
+  /// \brief Pre-allocate space for data. Results in better flat read performance
+  virtual void Reserve(int64_t num_values) = 0;
+
+  /// \brief Clear consumed values and repetition/definition levels as the
+  /// result of calling ReadRecords
+  virtual void Reset() = 0;
+
+  /// \brief Transfer filled values buffer to caller. A new one will be
+  /// allocated in subsequent ReadRecords calls
+  virtual std::shared_ptr<ResizableBuffer> ReleaseValues() = 0;
+
+  /// \brief Transfer filled validity bitmap buffer to caller. A new one will
+  /// be allocated in subsequent ReadRecords calls
+  virtual std::shared_ptr<ResizableBuffer> ReleaseIsValid() = 0;
+
+  /// \brief Return true if the record reader has more internal data yet to
+  /// process
+  virtual bool HasMoreData() const = 0;
+
+  /// \brief Advance record reader to the next row group
+  /// \param[in] reader obtained from RowGroupReader::GetColumnPageReader
+  virtual void SetPageReader(std::unique_ptr<PageReader> reader) = 0;
+
+  virtual void DebugPrintState() = 0;
+
+  /// \brief Decoded definition levels
+  int16_t* def_levels() const {
+    return reinterpret_cast<int16_t*>(def_levels_->mutable_data());
+  }
+
+  /// \brief Decoded repetition levels
+  int16_t* rep_levels() const {
+    return reinterpret_cast<int16_t*>(rep_levels_->mutable_data());
+  }
+
+  /// \brief Decoded values, including nulls, if any
+  uint8_t* values() const { return values_->mutable_data(); }
+
+  /// \brief Number of values written including nulls (if any)
+  int64_t values_written() const { return values_written_; }
+
+  /// \brief Number of definition / repetition levels (from those that have
+  /// been decoded) that have been consumed inside the reader.
+  int64_t levels_position() const { return levels_position_; }
+
+  /// \brief Number of definition / repetition levels that have been written
+  /// internally in the reader
+  int64_t levels_written() const { return levels_written_; }
+
+  /// \brief Number of nulls in the leaf
+  int64_t null_count() const { return null_count_; }
+
+  /// \brief True if the leaf values are nullable
+  bool nullable_values() const { return nullable_values_; }
+
+  /// \brief True if reading directly as Arrow dictionary-encoded
+  bool read_dictionary() const { return read_dictionary_; }
+
+ protected:
+  bool nullable_values_;
+
+  bool at_record_start_;
+  int64_t records_read_;
+
+  int64_t values_written_;
+  int64_t values_capacity_;
+  int64_t null_count_;
+
+  int64_t levels_written_;
+  int64_t levels_position_;
+  int64_t levels_capacity_;
+
+  std::shared_ptr<::arrow::ResizableBuffer> values_;
+  // In the case of false, don't allocate the values buffer (when we directly read into
+  // builder classes).
+  bool uses_values_;
+
+  std::shared_ptr<::arrow::ResizableBuffer> valid_bits_;
+  std::shared_ptr<::arrow::ResizableBuffer> def_levels_;
+  std::shared_ptr<::arrow::ResizableBuffer> rep_levels_;
+
+  bool read_dictionary_ = false;
+};
+
+class BinaryRecordReader : virtual public RecordReader {
+ public:
+  virtual std::vector<std::shared_ptr<::arrow::Array>> GetBuilderChunks() = 0;
+};
+
+/// \brief Read records directly to dictionary-encoded Arrow form (int32
+/// indices). Only valid for BYTE_ARRAY columns
+class DictionaryRecordReader : virtual public RecordReader {
+ public:
+  virtual std::shared_ptr<::arrow::ChunkedArray> GetResult() = 0;
+};
+
+}  // namespace internal
+
+using BoolReader = TypedColumnReader<BooleanType>;
+using Int32Reader = TypedColumnReader<Int32Type>;
+using Int64Reader = TypedColumnReader<Int64Type>;
+using Int96Reader = TypedColumnReader<Int96Type>;
+using FloatReader = TypedColumnReader<FloatType>;
+using DoubleReader = TypedColumnReader<DoubleType>;
+using ByteArrayReader = TypedColumnReader<ByteArrayType>;
+using FixedLenByteArrayReader = TypedColumnReader<FLBAType>;
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/column_scanner.cc b/contrib/libs/apache/arrow/cpp/src/parquet/column_scanner.cc
index 0ef83568e3e..9ab1663ccd7 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/column_scanner.cc
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/column_scanner.cc
@@ -1,91 +1,91 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "parquet/column_scanner.h" 
- 
-#include <cstdint> 
-#include <memory> 
- 
-#include "parquet/column_reader.h" 
- 
-using arrow::MemoryPool; 
- 
-namespace parquet { 
- 
-std::shared_ptr<Scanner> Scanner::Make(std::shared_ptr<ColumnReader> col_reader, 
-                                       int64_t batch_size, MemoryPool* pool) { 
-  switch (col_reader->type()) { 
-    case Type::BOOLEAN: 
-      return std::make_shared<BoolScanner>(std::move(col_reader), batch_size, pool); 
-    case Type::INT32: 
-      return std::make_shared<Int32Scanner>(std::move(col_reader), batch_size, pool); 
-    case Type::INT64: 
-      return std::make_shared<Int64Scanner>(std::move(col_reader), batch_size, pool); 
-    case Type::INT96: 
-      return std::make_shared<Int96Scanner>(std::move(col_reader), batch_size, pool); 
-    case Type::FLOAT: 
-      return std::make_shared<FloatScanner>(std::move(col_reader), batch_size, pool); 
-    case Type::DOUBLE: 
-      return std::make_shared<DoubleScanner>(std::move(col_reader), batch_size, pool); 
-    case Type::BYTE_ARRAY: 
-      return std::make_shared<ByteArrayScanner>(std::move(col_reader), batch_size, pool); 
-    case Type::FIXED_LEN_BYTE_ARRAY: 
-      return std::make_shared<FixedLenByteArrayScanner>(std::move(col_reader), batch_size, 
-                                                        pool); 
-    default: 
-      ParquetException::NYI("type reader not implemented"); 
-  } 
-  // Unreachable code, but suppress compiler warning 
-  return std::shared_ptr<Scanner>(nullptr); 
-} 
- 
-int64_t ScanAllValues(int32_t batch_size, int16_t* def_levels, int16_t* rep_levels, 
-                      uint8_t* values, int64_t* values_buffered, 
-                      parquet::ColumnReader* reader) { 
-  switch (reader->type()) { 
-    case parquet::Type::BOOLEAN: 
-      return ScanAll<parquet::BoolReader>(batch_size, def_levels, rep_levels, values, 
-                                          values_buffered, reader); 
-    case parquet::Type::INT32: 
-      return ScanAll<parquet::Int32Reader>(batch_size, def_levels, rep_levels, values, 
-                                           values_buffered, reader); 
-    case parquet::Type::INT64: 
-      return ScanAll<parquet::Int64Reader>(batch_size, def_levels, rep_levels, values, 
-                                           values_buffered, reader); 
-    case parquet::Type::INT96: 
-      return ScanAll<parquet::Int96Reader>(batch_size, def_levels, rep_levels, values, 
-                                           values_buffered, reader); 
-    case parquet::Type::FLOAT: 
-      return ScanAll<parquet::FloatReader>(batch_size, def_levels, rep_levels, values, 
-                                           values_buffered, reader); 
-    case parquet::Type::DOUBLE: 
-      return ScanAll<parquet::DoubleReader>(batch_size, def_levels, rep_levels, values, 
-                                            values_buffered, reader); 
-    case parquet::Type::BYTE_ARRAY: 
-      return ScanAll<parquet::ByteArrayReader>(batch_size, def_levels, rep_levels, values, 
-                                               values_buffered, reader); 
-    case parquet::Type::FIXED_LEN_BYTE_ARRAY: 
-      return ScanAll<parquet::FixedLenByteArrayReader>(batch_size, def_levels, rep_levels, 
-                                                       values, values_buffered, reader); 
-    default: 
-      parquet::ParquetException::NYI("type reader not implemented"); 
-  } 
-  // Unreachable code, but suppress compiler warning 
-  return 0; 
-} 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/column_scanner.h"
+
+#include <cstdint>
+#include <memory>
+
+#include "parquet/column_reader.h"
+
+using arrow::MemoryPool;
+
+namespace parquet {
+
+std::shared_ptr<Scanner> Scanner::Make(std::shared_ptr<ColumnReader> col_reader,
+                                       int64_t batch_size, MemoryPool* pool) {
+  switch (col_reader->type()) {
+    case Type::BOOLEAN:
+      return std::make_shared<BoolScanner>(std::move(col_reader), batch_size, pool);
+    case Type::INT32:
+      return std::make_shared<Int32Scanner>(std::move(col_reader), batch_size, pool);
+    case Type::INT64:
+      return std::make_shared<Int64Scanner>(std::move(col_reader), batch_size, pool);
+    case Type::INT96:
+      return std::make_shared<Int96Scanner>(std::move(col_reader), batch_size, pool);
+    case Type::FLOAT:
+      return std::make_shared<FloatScanner>(std::move(col_reader), batch_size, pool);
+    case Type::DOUBLE:
+      return std::make_shared<DoubleScanner>(std::move(col_reader), batch_size, pool);
+    case Type::BYTE_ARRAY:
+      return std::make_shared<ByteArrayScanner>(std::move(col_reader), batch_size, pool);
+    case Type::FIXED_LEN_BYTE_ARRAY:
+      return std::make_shared<FixedLenByteArrayScanner>(std::move(col_reader), batch_size,
+                                                        pool);
+    default:
+      ParquetException::NYI("type reader not implemented");
+  }
+  // Unreachable code, but suppress compiler warning
+  return std::shared_ptr<Scanner>(nullptr);
+}
+
+int64_t ScanAllValues(int32_t batch_size, int16_t* def_levels, int16_t* rep_levels,
+                      uint8_t* values, int64_t* values_buffered,
+                      parquet::ColumnReader* reader) {
+  switch (reader->type()) {
+    case parquet::Type::BOOLEAN:
+      return ScanAll<parquet::BoolReader>(batch_size, def_levels, rep_levels, values,
+                                          values_buffered, reader);
+    case parquet::Type::INT32:
+      return ScanAll<parquet::Int32Reader>(batch_size, def_levels, rep_levels, values,
+                                           values_buffered, reader);
+    case parquet::Type::INT64:
+      return ScanAll<parquet::Int64Reader>(batch_size, def_levels, rep_levels, values,
+                                           values_buffered, reader);
+    case parquet::Type::INT96:
+      return ScanAll<parquet::Int96Reader>(batch_size, def_levels, rep_levels, values,
+                                           values_buffered, reader);
+    case parquet::Type::FLOAT:
+      return ScanAll<parquet::FloatReader>(batch_size, def_levels, rep_levels, values,
+                                           values_buffered, reader);
+    case parquet::Type::DOUBLE:
+      return ScanAll<parquet::DoubleReader>(batch_size, def_levels, rep_levels, values,
+                                            values_buffered, reader);
+    case parquet::Type::BYTE_ARRAY:
+      return ScanAll<parquet::ByteArrayReader>(batch_size, def_levels, rep_levels, values,
+                                               values_buffered, reader);
+    case parquet::Type::FIXED_LEN_BYTE_ARRAY:
+      return ScanAll<parquet::FixedLenByteArrayReader>(batch_size, def_levels, rep_levels,
+                                                       values, values_buffered, reader);
+    default:
+      parquet::ParquetException::NYI("type reader not implemented");
+  }
+  // Unreachable code, but suppress compiler warning
+  return 0;
+}
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/column_scanner.h b/contrib/libs/apache/arrow/cpp/src/parquet/column_scanner.h
index 61d08841409..d53435f03cd 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/column_scanner.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/column_scanner.h
@@ -1,262 +1,262 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <stdio.h> 
- 
-#include <cstdint> 
-#include <memory> 
-#include <ostream> 
-#include <string> 
-#include <utility> 
-#include <vector> 
- 
-#include "parquet/column_reader.h" 
-#include "parquet/exception.h" 
-#include "parquet/platform.h" 
-#include "parquet/schema.h" 
-#include "parquet/types.h" 
- 
-namespace parquet { 
- 
-static constexpr int64_t DEFAULT_SCANNER_BATCH_SIZE = 128; 
- 
-class PARQUET_EXPORT Scanner { 
- public: 
-  explicit Scanner(std::shared_ptr<ColumnReader> reader, 
-                   int64_t batch_size = DEFAULT_SCANNER_BATCH_SIZE, 
-                   ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()) 
-      : batch_size_(batch_size), 
-        level_offset_(0), 
-        levels_buffered_(0), 
-        value_buffer_(AllocateBuffer(pool)), 
-        value_offset_(0), 
-        values_buffered_(0), 
-        reader_(std::move(reader)) { 
-    def_levels_.resize(descr()->max_definition_level() > 0 ? batch_size_ : 0); 
-    rep_levels_.resize(descr()->max_repetition_level() > 0 ? batch_size_ : 0); 
-  } 
- 
-  virtual ~Scanner() {} 
- 
-  static std::shared_ptr<Scanner> Make( 
-      std::shared_ptr<ColumnReader> col_reader, 
-      int64_t batch_size = DEFAULT_SCANNER_BATCH_SIZE, 
-      ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()); 
- 
-  virtual void PrintNext(std::ostream& out, int width, bool with_levels = false) = 0; 
- 
-  bool HasNext() { return level_offset_ < levels_buffered_ || reader_->HasNext(); } 
- 
-  const ColumnDescriptor* descr() const { return reader_->descr(); } 
- 
-  int64_t batch_size() const { return batch_size_; } 
- 
-  void SetBatchSize(int64_t batch_size) { batch_size_ = batch_size; } 
- 
- protected: 
-  int64_t batch_size_; 
- 
-  std::vector<int16_t> def_levels_; 
-  std::vector<int16_t> rep_levels_; 
-  int level_offset_; 
-  int levels_buffered_; 
- 
-  std::shared_ptr<ResizableBuffer> value_buffer_; 
-  int value_offset_; 
-  int64_t values_buffered_; 
-  std::shared_ptr<ColumnReader> reader_; 
-}; 
- 
-template <typename DType> 
-class PARQUET_TEMPLATE_CLASS_EXPORT TypedScanner : public Scanner { 
- public: 
-  typedef typename DType::c_type T; 
- 
-  explicit TypedScanner(std::shared_ptr<ColumnReader> reader, 
-                        int64_t batch_size = DEFAULT_SCANNER_BATCH_SIZE, 
-                        ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()) 
-      : Scanner(std::move(reader), batch_size, pool) { 
-    typed_reader_ = static_cast<TypedColumnReader<DType>*>(reader_.get()); 
-    int value_byte_size = type_traits<DType::type_num>::value_byte_size; 
-    PARQUET_THROW_NOT_OK(value_buffer_->Resize(batch_size_ * value_byte_size)); 
-    values_ = reinterpret_cast<T*>(value_buffer_->mutable_data()); 
-  } 
- 
-  virtual ~TypedScanner() {} 
- 
-  bool NextLevels(int16_t* def_level, int16_t* rep_level) { 
-    if (level_offset_ == levels_buffered_) { 
-      levels_buffered_ = static_cast<int>( 
-          typed_reader_->ReadBatch(static_cast<int>(batch_size_), def_levels_.data(), 
-                                   rep_levels_.data(), values_, &values_buffered_)); 
- 
-      value_offset_ = 0; 
-      level_offset_ = 0; 
-      if (!levels_buffered_) { 
-        return false; 
-      } 
-    } 
-    *def_level = descr()->max_definition_level() > 0 ? def_levels_[level_offset_] : 0; 
-    *rep_level = descr()->max_repetition_level() > 0 ? rep_levels_[level_offset_] : 0; 
-    level_offset_++; 
-    return true; 
-  } 
- 
-  bool Next(T* val, int16_t* def_level, int16_t* rep_level, bool* is_null) { 
-    if (level_offset_ == levels_buffered_) { 
-      if (!HasNext()) { 
-        // Out of data pages 
-        return false; 
-      } 
-    } 
- 
-    NextLevels(def_level, rep_level); 
-    *is_null = *def_level < descr()->max_definition_level(); 
- 
-    if (*is_null) { 
-      return true; 
-    } 
- 
-    if (value_offset_ == values_buffered_) { 
-      throw ParquetException("Value was non-null, but has not been buffered"); 
-    } 
-    *val = values_[value_offset_++]; 
-    return true; 
-  } 
- 
-  // Returns true if there is a next value 
-  bool NextValue(T* val, bool* is_null) { 
-    if (level_offset_ == levels_buffered_) { 
-      if (!HasNext()) { 
-        // Out of data pages 
-        return false; 
-      } 
-    } 
- 
-    // Out of values 
-    int16_t def_level = -1; 
-    int16_t rep_level = -1; 
-    NextLevels(&def_level, &rep_level); 
-    *is_null = def_level < descr()->max_definition_level(); 
- 
-    if (*is_null) { 
-      return true; 
-    } 
- 
-    if (value_offset_ == values_buffered_) { 
-      throw ParquetException("Value was non-null, but has not been buffered"); 
-    } 
-    *val = values_[value_offset_++]; 
-    return true; 
-  } 
- 
-  virtual void PrintNext(std::ostream& out, int width, bool with_levels = false) { 
-    T val{}; 
-    int16_t def_level = -1; 
-    int16_t rep_level = -1; 
-    bool is_null = false; 
-    char buffer[80]; 
- 
-    if (!Next(&val, &def_level, &rep_level, &is_null)) { 
-      throw ParquetException("No more values buffered"); 
-    } 
- 
-    if (with_levels) { 
-      out << "  D:" << def_level << " R:" << rep_level << " "; 
-      if (!is_null) { 
-        out << "V:"; 
-      } 
-    } 
- 
-    if (is_null) { 
-      std::string null_fmt = format_fwf<ByteArrayType>(width); 
-      snprintf(buffer, sizeof(buffer), null_fmt.c_str(), "NULL"); 
-    } else { 
-      FormatValue(&val, buffer, sizeof(buffer), width); 
-    } 
-    out << buffer; 
-  } 
- 
- private: 
-  // The ownership of this object is expressed through the reader_ variable in the base 
-  TypedColumnReader<DType>* typed_reader_; 
- 
-  inline void FormatValue(void* val, char* buffer, int bufsize, int width); 
- 
-  T* values_; 
-}; 
- 
-template <typename DType> 
-inline void TypedScanner<DType>::FormatValue(void* val, char* buffer, int bufsize, 
-                                             int width) { 
-  std::string fmt = format_fwf<DType>(width); 
-  snprintf(buffer, bufsize, fmt.c_str(), *reinterpret_cast<T*>(val)); 
-} 
- 
-template <> 
-inline void TypedScanner<Int96Type>::FormatValue(void* val, char* buffer, int bufsize, 
-                                                 int width) { 
-  std::string fmt = format_fwf<Int96Type>(width); 
-  std::string result = Int96ToString(*reinterpret_cast<Int96*>(val)); 
-  snprintf(buffer, bufsize, fmt.c_str(), result.c_str()); 
-} 
- 
-template <> 
-inline void TypedScanner<ByteArrayType>::FormatValue(void* val, char* buffer, int bufsize, 
-                                                     int width) { 
-  std::string fmt = format_fwf<ByteArrayType>(width); 
-  std::string result = ByteArrayToString(*reinterpret_cast<ByteArray*>(val)); 
-  snprintf(buffer, bufsize, fmt.c_str(), result.c_str()); 
-} 
- 
-template <> 
-inline void TypedScanner<FLBAType>::FormatValue(void* val, char* buffer, int bufsize, 
-                                                int width) { 
-  std::string fmt = format_fwf<FLBAType>(width); 
-  std::string result = FixedLenByteArrayToString( 
-      *reinterpret_cast<FixedLenByteArray*>(val), descr()->type_length()); 
-  snprintf(buffer, bufsize, fmt.c_str(), result.c_str()); 
-} 
- 
-typedef TypedScanner<BooleanType> BoolScanner; 
-typedef TypedScanner<Int32Type> Int32Scanner; 
-typedef TypedScanner<Int64Type> Int64Scanner; 
-typedef TypedScanner<Int96Type> Int96Scanner; 
-typedef TypedScanner<FloatType> FloatScanner; 
-typedef TypedScanner<DoubleType> DoubleScanner; 
-typedef TypedScanner<ByteArrayType> ByteArrayScanner; 
-typedef TypedScanner<FLBAType> FixedLenByteArrayScanner; 
- 
-template <typename RType> 
-int64_t ScanAll(int32_t batch_size, int16_t* def_levels, int16_t* rep_levels, 
-                uint8_t* values, int64_t* values_buffered, 
-                parquet::ColumnReader* reader) { 
-  typedef typename RType::T Type; 
-  auto typed_reader = static_cast<RType*>(reader); 
-  auto vals = reinterpret_cast<Type*>(&values[0]); 
-  return typed_reader->ReadBatch(batch_size, def_levels, rep_levels, vals, 
-                                 values_buffered); 
-} 
- 
-int64_t PARQUET_EXPORT ScanAllValues(int32_t batch_size, int16_t* def_levels, 
-                                     int16_t* rep_levels, uint8_t* values, 
-                                     int64_t* values_buffered, 
-                                     parquet::ColumnReader* reader); 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <stdio.h>
+
+#include <cstdint>
+#include <memory>
+#include <ostream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "parquet/column_reader.h"
+#include "parquet/exception.h"
+#include "parquet/platform.h"
+#include "parquet/schema.h"
+#include "parquet/types.h"
+
+namespace parquet {
+
+static constexpr int64_t DEFAULT_SCANNER_BATCH_SIZE = 128;
+
+class PARQUET_EXPORT Scanner {
+ public:
+  explicit Scanner(std::shared_ptr<ColumnReader> reader,
+                   int64_t batch_size = DEFAULT_SCANNER_BATCH_SIZE,
+                   ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
+      : batch_size_(batch_size),
+        level_offset_(0),
+        levels_buffered_(0),
+        value_buffer_(AllocateBuffer(pool)),
+        value_offset_(0),
+        values_buffered_(0),
+        reader_(std::move(reader)) {
+    def_levels_.resize(descr()->max_definition_level() > 0 ? batch_size_ : 0);
+    rep_levels_.resize(descr()->max_repetition_level() > 0 ? batch_size_ : 0);
+  }
+
+  virtual ~Scanner() {}
+
+  static std::shared_ptr<Scanner> Make(
+      std::shared_ptr<ColumnReader> col_reader,
+      int64_t batch_size = DEFAULT_SCANNER_BATCH_SIZE,
+      ::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
+
+  virtual void PrintNext(std::ostream& out, int width, bool with_levels = false) = 0;
+
+  bool HasNext() { return level_offset_ < levels_buffered_ || reader_->HasNext(); }
+
+  const ColumnDescriptor* descr() const { return reader_->descr(); }
+
+  int64_t batch_size() const { return batch_size_; }
+
+  void SetBatchSize(int64_t batch_size) { batch_size_ = batch_size; }
+
+ protected:
+  int64_t batch_size_;
+
+  std::vector<int16_t> def_levels_;
+  std::vector<int16_t> rep_levels_;
+  int level_offset_;
+  int levels_buffered_;
+
+  std::shared_ptr<ResizableBuffer> value_buffer_;
+  int value_offset_;
+  int64_t values_buffered_;
+  std::shared_ptr<ColumnReader> reader_;
+};
+
+template <typename DType>
+class PARQUET_TEMPLATE_CLASS_EXPORT TypedScanner : public Scanner {
+ public:
+  typedef typename DType::c_type T;
+
+  explicit TypedScanner(std::shared_ptr<ColumnReader> reader,
+                        int64_t batch_size = DEFAULT_SCANNER_BATCH_SIZE,
+                        ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
+      : Scanner(std::move(reader), batch_size, pool) {
+    typed_reader_ = static_cast<TypedColumnReader<DType>*>(reader_.get());
+    int value_byte_size = type_traits<DType::type_num>::value_byte_size;
+    PARQUET_THROW_NOT_OK(value_buffer_->Resize(batch_size_ * value_byte_size));
+    values_ = reinterpret_cast<T*>(value_buffer_->mutable_data());
+  }
+
+  virtual ~TypedScanner() {}
+
+  bool NextLevels(int16_t* def_level, int16_t* rep_level) {
+    if (level_offset_ == levels_buffered_) {
+      levels_buffered_ = static_cast<int>(
+          typed_reader_->ReadBatch(static_cast<int>(batch_size_), def_levels_.data(),
+                                   rep_levels_.data(), values_, &values_buffered_));
+
+      value_offset_ = 0;
+      level_offset_ = 0;
+      if (!levels_buffered_) {
+        return false;
+      }
+    }
+    *def_level = descr()->max_definition_level() > 0 ? def_levels_[level_offset_] : 0;
+    *rep_level = descr()->max_repetition_level() > 0 ? rep_levels_[level_offset_] : 0;
+    level_offset_++;
+    return true;
+  }
+
+  bool Next(T* val, int16_t* def_level, int16_t* rep_level, bool* is_null) {
+    if (level_offset_ == levels_buffered_) {
+      if (!HasNext()) {
+        // Out of data pages
+        return false;
+      }
+    }
+
+    NextLevels(def_level, rep_level);
+    *is_null = *def_level < descr()->max_definition_level();
+
+    if (*is_null) {
+      return true;
+    }
+
+    if (value_offset_ == values_buffered_) {
+      throw ParquetException("Value was non-null, but has not been buffered");
+    }
+    *val = values_[value_offset_++];
+    return true;
+  }
+
+  // Returns true if there is a next value
+  bool NextValue(T* val, bool* is_null) {
+    if (level_offset_ == levels_buffered_) {
+      if (!HasNext()) {
+        // Out of data pages
+        return false;
+      }
+    }
+
+    // Out of values
+    int16_t def_level = -1;
+    int16_t rep_level = -1;
+    NextLevels(&def_level, &rep_level);
+    *is_null = def_level < descr()->max_definition_level();
+
+    if (*is_null) {
+      return true;
+    }
+
+    if (value_offset_ == values_buffered_) {
+      throw ParquetException("Value was non-null, but has not been buffered");
+    }
+    *val = values_[value_offset_++];
+    return true;
+  }
+
+  virtual void PrintNext(std::ostream& out, int width, bool with_levels = false) {
+    T val{};
+    int16_t def_level = -1;
+    int16_t rep_level = -1;
+    bool is_null = false;
+    char buffer[80];
+
+    if (!Next(&val, &def_level, &rep_level, &is_null)) {
+      throw ParquetException("No more values buffered");
+    }
+
+    if (with_levels) {
+      out << "  D:" << def_level << " R:" << rep_level << " ";
+      if (!is_null) {
+        out << "V:";
+      }
+    }
+
+    if (is_null) {
+      std::string null_fmt = format_fwf<ByteArrayType>(width);
+      snprintf(buffer, sizeof(buffer), null_fmt.c_str(), "NULL");
+    } else {
+      FormatValue(&val, buffer, sizeof(buffer), width);
+    }
+    out << buffer;
+  }
+
+ private:
+  // The ownership of this object is expressed through the reader_ variable in the base
+  TypedColumnReader<DType>* typed_reader_;
+
+  inline void FormatValue(void* val, char* buffer, int bufsize, int width);
+
+  T* values_;
+};
+
+template <typename DType>
+inline void TypedScanner<DType>::FormatValue(void* val, char* buffer, int bufsize,
+                                             int width) {
+  std::string fmt = format_fwf<DType>(width);
+  snprintf(buffer, bufsize, fmt.c_str(), *reinterpret_cast<T*>(val));
+}
+
+template <>
+inline void TypedScanner<Int96Type>::FormatValue(void* val, char* buffer, int bufsize,
+                                                 int width) {
+  std::string fmt = format_fwf<Int96Type>(width);
+  std::string result = Int96ToString(*reinterpret_cast<Int96*>(val));
+  snprintf(buffer, bufsize, fmt.c_str(), result.c_str());
+}
+
+template <>
+inline void TypedScanner<ByteArrayType>::FormatValue(void* val, char* buffer, int bufsize,
+                                                     int width) {
+  std::string fmt = format_fwf<ByteArrayType>(width);
+  std::string result = ByteArrayToString(*reinterpret_cast<ByteArray*>(val));
+  snprintf(buffer, bufsize, fmt.c_str(), result.c_str());
+}
+
+template <>
+inline void TypedScanner<FLBAType>::FormatValue(void* val, char* buffer, int bufsize,
+                                                int width) {
+  std::string fmt = format_fwf<FLBAType>(width);
+  std::string result = FixedLenByteArrayToString(
+      *reinterpret_cast<FixedLenByteArray*>(val), descr()->type_length());
+  snprintf(buffer, bufsize, fmt.c_str(), result.c_str());
+}
+
+typedef TypedScanner<BooleanType> BoolScanner;
+typedef TypedScanner<Int32Type> Int32Scanner;
+typedef TypedScanner<Int64Type> Int64Scanner;
+typedef TypedScanner<Int96Type> Int96Scanner;
+typedef TypedScanner<FloatType> FloatScanner;
+typedef TypedScanner<DoubleType> DoubleScanner;
+typedef TypedScanner<ByteArrayType> ByteArrayScanner;
+typedef TypedScanner<FLBAType> FixedLenByteArrayScanner;
+
+template <typename RType>
+int64_t ScanAll(int32_t batch_size, int16_t* def_levels, int16_t* rep_levels,
+                uint8_t* values, int64_t* values_buffered,
+                parquet::ColumnReader* reader) {
+  typedef typename RType::T Type;
+  auto typed_reader = static_cast<RType*>(reader);
+  auto vals = reinterpret_cast<Type*>(&values[0]);
+  return typed_reader->ReadBatch(batch_size, def_levels, rep_levels, vals,
+                                 values_buffered);
+}
+
+int64_t PARQUET_EXPORT ScanAllValues(int32_t batch_size, int16_t* def_levels,
+                                     int16_t* rep_levels, uint8_t* values,
+                                     int64_t* values_buffered,
+                                     parquet::ColumnReader* reader);
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/column_writer.cc b/contrib/libs/apache/arrow/cpp/src/parquet/column_writer.cc
index 75df6f0c683..446fe25e644 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/column_writer.cc
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/column_writer.cc
@@ -1,2067 +1,2067 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "parquet/column_writer.h" 
- 
-#include <algorithm> 
-#include <cstdint> 
-#include <cstring> 
-#include <map> 
-#include <memory> 
-#include <string> 
-#include <utility> 
-#include <vector> 
- 
-#include "arrow/array.h" 
-#include "arrow/buffer_builder.h" 
-#include "arrow/compute/api.h" 
-#include "arrow/io/memory.h" 
-#include "arrow/status.h" 
-#include "arrow/type.h" 
-#include "arrow/type_traits.h" 
-#include "arrow/util/bit_stream_utils.h" 
-#include "arrow/util/bit_util.h" 
-#include "arrow/util/bitmap_ops.h" 
-#include "arrow/util/checked_cast.h" 
-#include "arrow/util/compression.h" 
-#include "arrow/util/endian.h" 
-#include "arrow/util/logging.h" 
-#include "arrow/util/rle_encoding.h" 
-#include "arrow/visitor_inline.h" 
-#include "parquet/column_page.h" 
-#include "parquet/encoding.h" 
-#include "parquet/encryption/encryption_internal.h" 
-#include "parquet/encryption/internal_file_encryptor.h" 
-#include "parquet/level_conversion.h" 
-#include "parquet/metadata.h" 
-#include "parquet/platform.h" 
-#include "parquet/properties.h" 
-#include "parquet/schema.h" 
-#include "parquet/statistics.h" 
-#include "parquet/thrift_internal.h" 
-#include "parquet/types.h" 
- 
-using arrow::Array; 
-using arrow::ArrayData; 
-using arrow::Datum; 
-using arrow::Result; 
-using arrow::Status; 
-using arrow::BitUtil::BitWriter; 
-using arrow::internal::checked_cast; 
-using arrow::internal::checked_pointer_cast; 
-using arrow::util::RleEncoder; 
- 
-namespace BitUtil = arrow::BitUtil; 
- 
-namespace parquet { 
- 
-namespace { 
- 
-// Visitor that exracts the value buffer from a FlatArray at a given offset. 
-struct ValueBufferSlicer { 
-  template <typename T> 
-  ::arrow::enable_if_base_binary<typename T::TypeClass, Status> Visit(const T& array) { 
-    auto data = array.data(); 
-    buffer_ = 
-        SliceBuffer(data->buffers[1], data->offset * sizeof(typename T::offset_type), 
-                    data->length * sizeof(typename T::offset_type)); 
-    return Status::OK(); 
-  } 
- 
-  template <typename T> 
-  ::arrow::enable_if_fixed_size_binary<typename T::TypeClass, Status> Visit( 
-      const T& array) { 
-    auto data = array.data(); 
-    buffer_ = SliceBuffer(data->buffers[1], data->offset * array.byte_width(), 
-                          data->length * array.byte_width()); 
-    return Status::OK(); 
-  } 
- 
-  template <typename T> 
-  ::arrow::enable_if_t<::arrow::has_c_type<typename T::TypeClass>::value && 
-                           !std::is_same<BooleanType, typename T::TypeClass>::value, 
-                       Status> 
-  Visit(const T& array) { 
-    auto data = array.data(); 
-    buffer_ = SliceBuffer( 
-        data->buffers[1], 
-        ::arrow::TypeTraits<typename T::TypeClass>::bytes_required(data->offset), 
-        ::arrow::TypeTraits<typename T::TypeClass>::bytes_required(data->length)); 
-    return Status::OK(); 
-  } 
- 
-  Status Visit(const ::arrow::BooleanArray& array) { 
-    auto data = array.data(); 
-    if (BitUtil::IsMultipleOf8(data->offset)) { 
-      buffer_ = SliceBuffer(data->buffers[1], BitUtil::BytesForBits(data->offset), 
-                            BitUtil::BytesForBits(data->length)); 
-      return Status::OK(); 
-    } 
-    PARQUET_ASSIGN_OR_THROW(buffer_, 
-                            ::arrow::internal::CopyBitmap(pool_, data->buffers[1]->data(), 
-                                                          data->offset, data->length)); 
-    return Status::OK(); 
-  } 
-#define NOT_IMPLEMENTED_VISIT(ArrowTypePrefix)                                      \ 
-  Status Visit(const ::arrow::ArrowTypePrefix##Array& array) {                      \ 
-    return Status::NotImplemented("Slicing not implemented for " #ArrowTypePrefix); \ 
-  } 
- 
-  NOT_IMPLEMENTED_VISIT(Null); 
-  NOT_IMPLEMENTED_VISIT(Union); 
-  NOT_IMPLEMENTED_VISIT(List); 
-  NOT_IMPLEMENTED_VISIT(LargeList); 
-  NOT_IMPLEMENTED_VISIT(Struct); 
-  NOT_IMPLEMENTED_VISIT(FixedSizeList); 
-  NOT_IMPLEMENTED_VISIT(Dictionary); 
-  NOT_IMPLEMENTED_VISIT(Extension); 
- 
-#undef NOT_IMPLEMENTED_VISIT 
- 
-  MemoryPool* pool_; 
-  std::shared_ptr<Buffer> buffer_; 
-}; 
- 
-internal::LevelInfo ComputeLevelInfo(const ColumnDescriptor* descr) { 
-  internal::LevelInfo level_info; 
-  level_info.def_level = descr->max_definition_level(); 
-  level_info.rep_level = descr->max_repetition_level(); 
- 
-  int16_t min_spaced_def_level = descr->max_definition_level(); 
-  const ::parquet::schema::Node* node = descr->schema_node().get(); 
-  while (node != nullptr && !node->is_repeated()) { 
-    if (node->is_optional()) { 
-      min_spaced_def_level--; 
-    } 
-    node = node->parent(); 
-  } 
-  level_info.repeated_ancestor_def_level = min_spaced_def_level; 
-  return level_info; 
-} 
- 
-template <class T> 
-inline const T* AddIfNotNull(const T* base, int64_t offset) { 
-  if (base != nullptr) { 
-    return base + offset; 
-  } 
-  return nullptr; 
-} 
- 
-}  // namespace 
- 
-LevelEncoder::LevelEncoder() {} 
-LevelEncoder::~LevelEncoder() {} 
- 
-void LevelEncoder::Init(Encoding::type encoding, int16_t max_level, 
-                        int num_buffered_values, uint8_t* data, int data_size) { 
-  bit_width_ = BitUtil::Log2(max_level + 1); 
-  encoding_ = encoding; 
-  switch (encoding) { 
-    case Encoding::RLE: { 
-      rle_encoder_.reset(new RleEncoder(data, data_size, bit_width_)); 
-      break; 
-    } 
-    case Encoding::BIT_PACKED: { 
-      int num_bytes = 
-          static_cast<int>(BitUtil::BytesForBits(num_buffered_values * bit_width_)); 
-      bit_packed_encoder_.reset(new BitWriter(data, num_bytes)); 
-      break; 
-    } 
-    default: 
-      throw ParquetException("Unknown encoding type for levels."); 
-  } 
-} 
- 
-int LevelEncoder::MaxBufferSize(Encoding::type encoding, int16_t max_level, 
-                                int num_buffered_values) { 
-  int bit_width = BitUtil::Log2(max_level + 1); 
-  int num_bytes = 0; 
-  switch (encoding) { 
-    case Encoding::RLE: { 
-      // TODO: Due to the way we currently check if the buffer is full enough, 
-      // we need to have MinBufferSize as head room. 
-      num_bytes = RleEncoder::MaxBufferSize(bit_width, num_buffered_values) + 
-                  RleEncoder::MinBufferSize(bit_width); 
-      break; 
-    } 
-    case Encoding::BIT_PACKED: { 
-      num_bytes = 
-          static_cast<int>(BitUtil::BytesForBits(num_buffered_values * bit_width)); 
-      break; 
-    } 
-    default: 
-      throw ParquetException("Unknown encoding type for levels."); 
-  } 
-  return num_bytes; 
-} 
- 
-int LevelEncoder::Encode(int batch_size, const int16_t* levels) { 
-  int num_encoded = 0; 
-  if (!rle_encoder_ && !bit_packed_encoder_) { 
-    throw ParquetException("Level encoders are not initialized."); 
-  } 
- 
-  if (encoding_ == Encoding::RLE) { 
-    for (int i = 0; i < batch_size; ++i) { 
-      if (!rle_encoder_->Put(*(levels + i))) { 
-        break; 
-      } 
-      ++num_encoded; 
-    } 
-    rle_encoder_->Flush(); 
-    rle_length_ = rle_encoder_->len(); 
-  } else { 
-    for (int i = 0; i < batch_size; ++i) { 
-      if (!bit_packed_encoder_->PutValue(*(levels + i), bit_width_)) { 
-        break; 
-      } 
-      ++num_encoded; 
-    } 
-    bit_packed_encoder_->Flush(); 
-  } 
-  return num_encoded; 
-} 
- 
-// ---------------------------------------------------------------------- 
-// PageWriter implementation 
- 
-// This subclass delimits pages appearing in a serialized stream, each preceded 
-// by a serialized Thrift format::PageHeader indicating the type of each page 
-// and the page metadata. 
-class SerializedPageWriter : public PageWriter { 
- public: 
-  SerializedPageWriter(std::shared_ptr<ArrowOutputStream> sink, Compression::type codec, 
-                       int compression_level, ColumnChunkMetaDataBuilder* metadata, 
-                       int16_t row_group_ordinal, int16_t column_chunk_ordinal, 
-                       MemoryPool* pool = ::arrow::default_memory_pool(), 
-                       std::shared_ptr<Encryptor> meta_encryptor = nullptr, 
-                       std::shared_ptr<Encryptor> data_encryptor = nullptr) 
-      : sink_(std::move(sink)), 
-        metadata_(metadata), 
-        pool_(pool), 
-        num_values_(0), 
-        dictionary_page_offset_(0), 
-        data_page_offset_(0), 
-        total_uncompressed_size_(0), 
-        total_compressed_size_(0), 
-        page_ordinal_(0), 
-        row_group_ordinal_(row_group_ordinal), 
-        column_ordinal_(column_chunk_ordinal), 
-        meta_encryptor_(std::move(meta_encryptor)), 
-        data_encryptor_(std::move(data_encryptor)), 
-        encryption_buffer_(AllocateBuffer(pool, 0)) { 
-    if (data_encryptor_ != nullptr || meta_encryptor_ != nullptr) { 
-      InitEncryption(); 
-    } 
-    compressor_ = GetCodec(codec, compression_level); 
-    thrift_serializer_.reset(new ThriftSerializer); 
-  } 
- 
-  int64_t WriteDictionaryPage(const DictionaryPage& page) override { 
-    int64_t uncompressed_size = page.size(); 
-    std::shared_ptr<Buffer> compressed_data; 
-    if (has_compressor()) { 
-      auto buffer = std::static_pointer_cast<ResizableBuffer>( 
-          AllocateBuffer(pool_, uncompressed_size)); 
-      Compress(*(page.buffer().get()), buffer.get()); 
-      compressed_data = std::static_pointer_cast<Buffer>(buffer); 
-    } else { 
-      compressed_data = page.buffer(); 
-    } 
- 
-    format::DictionaryPageHeader dict_page_header; 
-    dict_page_header.__set_num_values(page.num_values()); 
-    dict_page_header.__set_encoding(ToThrift(page.encoding())); 
-    dict_page_header.__set_is_sorted(page.is_sorted()); 
- 
-    const uint8_t* output_data_buffer = compressed_data->data(); 
-    int32_t output_data_len = static_cast<int32_t>(compressed_data->size()); 
- 
-    if (data_encryptor_.get()) { 
-      UpdateEncryption(encryption::kDictionaryPage); 
-      PARQUET_THROW_NOT_OK(encryption_buffer_->Resize( 
-          data_encryptor_->CiphertextSizeDelta() + output_data_len, false)); 
-      output_data_len = data_encryptor_->Encrypt(compressed_data->data(), output_data_len, 
-                                                 encryption_buffer_->mutable_data()); 
-      output_data_buffer = encryption_buffer_->data(); 
-    } 
- 
-    format::PageHeader page_header; 
-    page_header.__set_type(format::PageType::DICTIONARY_PAGE); 
-    page_header.__set_uncompressed_page_size(static_cast<int32_t>(uncompressed_size)); 
-    page_header.__set_compressed_page_size(static_cast<int32_t>(output_data_len)); 
-    page_header.__set_dictionary_page_header(dict_page_header); 
-    // TODO(PARQUET-594) crc checksum 
- 
-    PARQUET_ASSIGN_OR_THROW(int64_t start_pos, sink_->Tell()); 
-    if (dictionary_page_offset_ == 0) { 
-      dictionary_page_offset_ = start_pos; 
-    } 
- 
-    if (meta_encryptor_) { 
-      UpdateEncryption(encryption::kDictionaryPageHeader); 
-    } 
-    const int64_t header_size = 
-        thrift_serializer_->Serialize(&page_header, sink_.get(), meta_encryptor_); 
- 
-    PARQUET_THROW_NOT_OK(sink_->Write(output_data_buffer, output_data_len)); 
- 
-    total_uncompressed_size_ += uncompressed_size + header_size; 
-    total_compressed_size_ += output_data_len + header_size; 
-    ++dict_encoding_stats_[page.encoding()]; 
-    return uncompressed_size + header_size; 
-  } 
- 
-  void Close(bool has_dictionary, bool fallback) override { 
-    if (meta_encryptor_ != nullptr) { 
-      UpdateEncryption(encryption::kColumnMetaData); 
-    } 
-    // index_page_offset = -1 since they are not supported 
-    metadata_->Finish(num_values_, dictionary_page_offset_, -1, data_page_offset_, 
-                      total_compressed_size_, total_uncompressed_size_, has_dictionary, 
-                      fallback, dict_encoding_stats_, data_encoding_stats_, 
-                      meta_encryptor_); 
-    // Write metadata at end of column chunk 
-    metadata_->WriteTo(sink_.get()); 
-  } 
- 
-  /** 
-   * Compress a buffer. 
-   */ 
-  void Compress(const Buffer& src_buffer, ResizableBuffer* dest_buffer) override { 
-    DCHECK(compressor_ != nullptr); 
- 
-    // Compress the data 
-    int64_t max_compressed_size = 
-        compressor_->MaxCompressedLen(src_buffer.size(), src_buffer.data()); 
- 
-    // Use Arrow::Buffer::shrink_to_fit = false 
-    // underlying buffer only keeps growing. Resize to a smaller size does not reallocate. 
-    PARQUET_THROW_NOT_OK(dest_buffer->Resize(max_compressed_size, false)); 
- 
-    PARQUET_ASSIGN_OR_THROW( 
-        int64_t compressed_size, 
-        compressor_->Compress(src_buffer.size(), src_buffer.data(), max_compressed_size, 
-                              dest_buffer->mutable_data())); 
-    PARQUET_THROW_NOT_OK(dest_buffer->Resize(compressed_size, false)); 
-  } 
- 
-  int64_t WriteDataPage(const DataPage& page) override { 
-    const int64_t uncompressed_size = page.uncompressed_size(); 
-    std::shared_ptr<Buffer> compressed_data = page.buffer(); 
-    const uint8_t* output_data_buffer = compressed_data->data(); 
-    int32_t output_data_len = static_cast<int32_t>(compressed_data->size()); 
- 
-    if (data_encryptor_.get()) { 
-      PARQUET_THROW_NOT_OK(encryption_buffer_->Resize( 
-          data_encryptor_->CiphertextSizeDelta() + output_data_len, false)); 
-      UpdateEncryption(encryption::kDataPage); 
-      output_data_len = data_encryptor_->Encrypt(compressed_data->data(), output_data_len, 
-                                                 encryption_buffer_->mutable_data()); 
-      output_data_buffer = encryption_buffer_->data(); 
-    } 
- 
-    format::PageHeader page_header; 
-    page_header.__set_uncompressed_page_size(static_cast<int32_t>(uncompressed_size)); 
-    page_header.__set_compressed_page_size(static_cast<int32_t>(output_data_len)); 
-    // TODO(PARQUET-594) crc checksum 
- 
-    if (page.type() == PageType::DATA_PAGE) { 
-      const DataPageV1& v1_page = checked_cast<const DataPageV1&>(page); 
-      SetDataPageHeader(page_header, v1_page); 
-    } else if (page.type() == PageType::DATA_PAGE_V2) { 
-      const DataPageV2& v2_page = checked_cast<const DataPageV2&>(page); 
-      SetDataPageV2Header(page_header, v2_page); 
-    } else { 
-      throw ParquetException("Unexpected page type"); 
-    } 
- 
-    PARQUET_ASSIGN_OR_THROW(int64_t start_pos, sink_->Tell()); 
-    if (page_ordinal_ == 0) { 
-      data_page_offset_ = start_pos; 
-    } 
- 
-    if (meta_encryptor_) { 
-      UpdateEncryption(encryption::kDataPageHeader); 
-    } 
-    const int64_t header_size = 
-        thrift_serializer_->Serialize(&page_header, sink_.get(), meta_encryptor_); 
-    PARQUET_THROW_NOT_OK(sink_->Write(output_data_buffer, output_data_len)); 
- 
-    total_uncompressed_size_ += uncompressed_size + header_size; 
-    total_compressed_size_ += output_data_len + header_size; 
-    num_values_ += page.num_values(); 
-    ++data_encoding_stats_[page.encoding()]; 
-    ++page_ordinal_; 
-    return uncompressed_size + header_size; 
-  } 
- 
-  void SetDataPageHeader(format::PageHeader& page_header, const DataPageV1& page) { 
-    format::DataPageHeader data_page_header; 
-    data_page_header.__set_num_values(page.num_values()); 
-    data_page_header.__set_encoding(ToThrift(page.encoding())); 
-    data_page_header.__set_definition_level_encoding( 
-        ToThrift(page.definition_level_encoding())); 
-    data_page_header.__set_repetition_level_encoding( 
-        ToThrift(page.repetition_level_encoding())); 
-    data_page_header.__set_statistics(ToThrift(page.statistics())); 
- 
-    page_header.__set_type(format::PageType::DATA_PAGE); 
-    page_header.__set_data_page_header(data_page_header); 
-  } 
- 
-  void SetDataPageV2Header(format::PageHeader& page_header, const DataPageV2 page) { 
-    format::DataPageHeaderV2 data_page_header; 
-    data_page_header.__set_num_values(page.num_values()); 
-    data_page_header.__set_num_nulls(page.num_nulls()); 
-    data_page_header.__set_num_rows(page.num_rows()); 
-    data_page_header.__set_encoding(ToThrift(page.encoding())); 
- 
-    data_page_header.__set_definition_levels_byte_length( 
-        page.definition_levels_byte_length()); 
-    data_page_header.__set_repetition_levels_byte_length( 
-        page.repetition_levels_byte_length()); 
- 
-    data_page_header.__set_is_compressed(page.is_compressed()); 
-    data_page_header.__set_statistics(ToThrift(page.statistics())); 
- 
-    page_header.__set_type(format::PageType::DATA_PAGE_V2); 
-    page_header.__set_data_page_header_v2(data_page_header); 
-  } 
- 
-  bool has_compressor() override { return (compressor_ != nullptr); } 
- 
-  int64_t num_values() { return num_values_; } 
- 
-  int64_t dictionary_page_offset() { return dictionary_page_offset_; } 
- 
-  int64_t data_page_offset() { return data_page_offset_; } 
- 
-  int64_t total_compressed_size() { return total_compressed_size_; } 
- 
-  int64_t total_uncompressed_size() { return total_uncompressed_size_; } 
- 
- private: 
-  // To allow UpdateEncryption on Close 
-  friend class BufferedPageWriter; 
- 
-  void InitEncryption() { 
-    // Prepare the AAD for quick update later. 
-    if (data_encryptor_ != nullptr) { 
-      data_page_aad_ = encryption::CreateModuleAad( 
-          data_encryptor_->file_aad(), encryption::kDataPage, row_group_ordinal_, 
-          column_ordinal_, kNonPageOrdinal); 
-    } 
-    if (meta_encryptor_ != nullptr) { 
-      data_page_header_aad_ = encryption::CreateModuleAad( 
-          meta_encryptor_->file_aad(), encryption::kDataPageHeader, row_group_ordinal_, 
-          column_ordinal_, kNonPageOrdinal); 
-    } 
-  } 
- 
-  void UpdateEncryption(int8_t module_type) { 
-    switch (module_type) { 
-      case encryption::kColumnMetaData: { 
-        meta_encryptor_->UpdateAad(encryption::CreateModuleAad( 
-            meta_encryptor_->file_aad(), module_type, row_group_ordinal_, column_ordinal_, 
-            kNonPageOrdinal)); 
-        break; 
-      } 
-      case encryption::kDataPage: { 
-        encryption::QuickUpdatePageAad(data_page_aad_, page_ordinal_); 
-        data_encryptor_->UpdateAad(data_page_aad_); 
-        break; 
-      } 
-      case encryption::kDataPageHeader: { 
-        encryption::QuickUpdatePageAad(data_page_header_aad_, page_ordinal_); 
-        meta_encryptor_->UpdateAad(data_page_header_aad_); 
-        break; 
-      } 
-      case encryption::kDictionaryPageHeader: { 
-        meta_encryptor_->UpdateAad(encryption::CreateModuleAad( 
-            meta_encryptor_->file_aad(), module_type, row_group_ordinal_, column_ordinal_, 
-            kNonPageOrdinal)); 
-        break; 
-      } 
-      case encryption::kDictionaryPage: { 
-        data_encryptor_->UpdateAad(encryption::CreateModuleAad( 
-            data_encryptor_->file_aad(), module_type, row_group_ordinal_, column_ordinal_, 
-            kNonPageOrdinal)); 
-        break; 
-      } 
-      default: 
-        throw ParquetException("Unknown module type in UpdateEncryption"); 
-    } 
-  } 
- 
-  std::shared_ptr<ArrowOutputStream> sink_; 
-  ColumnChunkMetaDataBuilder* metadata_; 
-  MemoryPool* pool_; 
-  int64_t num_values_; 
-  int64_t dictionary_page_offset_; 
-  int64_t data_page_offset_; 
-  int64_t total_uncompressed_size_; 
-  int64_t total_compressed_size_; 
-  int16_t page_ordinal_; 
-  int16_t row_group_ordinal_; 
-  int16_t column_ordinal_; 
- 
-  std::unique_ptr<ThriftSerializer> thrift_serializer_; 
- 
-  // Compression codec to use. 
-  std::unique_ptr<::arrow::util::Codec> compressor_; 
- 
-  std::string data_page_aad_; 
-  std::string data_page_header_aad_; 
- 
-  std::shared_ptr<Encryptor> meta_encryptor_; 
-  std::shared_ptr<Encryptor> data_encryptor_; 
- 
-  std::shared_ptr<ResizableBuffer> encryption_buffer_; 
- 
-  std::map<Encoding::type, int32_t> dict_encoding_stats_; 
-  std::map<Encoding::type, int32_t> data_encoding_stats_; 
-}; 
- 
-// This implementation of the PageWriter writes to the final sink on Close . 
-class BufferedPageWriter : public PageWriter { 
- public: 
-  BufferedPageWriter(std::shared_ptr<ArrowOutputStream> sink, Compression::type codec, 
-                     int compression_level, ColumnChunkMetaDataBuilder* metadata, 
-                     int16_t row_group_ordinal, int16_t current_column_ordinal, 
-                     MemoryPool* pool = ::arrow::default_memory_pool(), 
-                     std::shared_ptr<Encryptor> meta_encryptor = nullptr, 
-                     std::shared_ptr<Encryptor> data_encryptor = nullptr) 
-      : final_sink_(std::move(sink)), metadata_(metadata), has_dictionary_pages_(false) { 
-    in_memory_sink_ = CreateOutputStream(pool); 
-    pager_ = std::unique_ptr<SerializedPageWriter>( 
-        new SerializedPageWriter(in_memory_sink_, codec, compression_level, metadata, 
-                                 row_group_ordinal, current_column_ordinal, pool, 
-                                 std::move(meta_encryptor), std::move(data_encryptor))); 
-  } 
- 
-  int64_t WriteDictionaryPage(const DictionaryPage& page) override { 
-    has_dictionary_pages_ = true; 
-    return pager_->WriteDictionaryPage(page); 
-  } 
- 
-  void Close(bool has_dictionary, bool fallback) override { 
-    if (pager_->meta_encryptor_ != nullptr) { 
-      pager_->UpdateEncryption(encryption::kColumnMetaData); 
-    } 
-    // index_page_offset = -1 since they are not supported 
-    PARQUET_ASSIGN_OR_THROW(int64_t final_position, final_sink_->Tell()); 
-    // dictionary page offset should be 0 iff there are no dictionary pages 
-    auto dictionary_page_offset = 
-        has_dictionary_pages_ ? pager_->dictionary_page_offset() + final_position : 0; 
-    metadata_->Finish(pager_->num_values(), dictionary_page_offset, -1, 
-                      pager_->data_page_offset() + final_position, 
-                      pager_->total_compressed_size(), pager_->total_uncompressed_size(), 
-                      has_dictionary, fallback, pager_->dict_encoding_stats_, 
-                      pager_->data_encoding_stats_, pager_->meta_encryptor_); 
- 
-    // Write metadata at end of column chunk 
-    metadata_->WriteTo(in_memory_sink_.get()); 
- 
-    // flush everything to the serialized sink 
-    PARQUET_ASSIGN_OR_THROW(auto buffer, in_memory_sink_->Finish()); 
-    PARQUET_THROW_NOT_OK(final_sink_->Write(buffer)); 
-  } 
- 
-  int64_t WriteDataPage(const DataPage& page) override { 
-    return pager_->WriteDataPage(page); 
-  } 
- 
-  void Compress(const Buffer& src_buffer, ResizableBuffer* dest_buffer) override { 
-    pager_->Compress(src_buffer, dest_buffer); 
-  } 
- 
-  bool has_compressor() override { return pager_->has_compressor(); } 
- 
- private: 
-  std::shared_ptr<ArrowOutputStream> final_sink_; 
-  ColumnChunkMetaDataBuilder* metadata_; 
-  std::shared_ptr<::arrow::io::BufferOutputStream> in_memory_sink_; 
-  std::unique_ptr<SerializedPageWriter> pager_; 
-  bool has_dictionary_pages_; 
-}; 
- 
-std::unique_ptr<PageWriter> PageWriter::Open( 
-    std::shared_ptr<ArrowOutputStream> sink, Compression::type codec, 
-    int compression_level, ColumnChunkMetaDataBuilder* metadata, 
-    int16_t row_group_ordinal, int16_t column_chunk_ordinal, MemoryPool* pool, 
-    bool buffered_row_group, std::shared_ptr<Encryptor> meta_encryptor, 
-    std::shared_ptr<Encryptor> data_encryptor) { 
-  if (buffered_row_group) { 
-    return std::unique_ptr<PageWriter>( 
-        new BufferedPageWriter(std::move(sink), codec, compression_level, metadata, 
-                               row_group_ordinal, column_chunk_ordinal, pool, 
-                               std::move(meta_encryptor), std::move(data_encryptor))); 
-  } else { 
-    return std::unique_ptr<PageWriter>( 
-        new SerializedPageWriter(std::move(sink), codec, compression_level, metadata, 
-                                 row_group_ordinal, column_chunk_ordinal, pool, 
-                                 std::move(meta_encryptor), std::move(data_encryptor))); 
-  } 
-} 
- 
-// ---------------------------------------------------------------------- 
-// ColumnWriter 
- 
-const std::shared_ptr<WriterProperties>& default_writer_properties() { 
-  static std::shared_ptr<WriterProperties> default_writer_properties = 
-      WriterProperties::Builder().build(); 
-  return default_writer_properties; 
-} 
- 
-class ColumnWriterImpl { 
- public: 
-  ColumnWriterImpl(ColumnChunkMetaDataBuilder* metadata, 
-                   std::unique_ptr<PageWriter> pager, const bool use_dictionary, 
-                   Encoding::type encoding, const WriterProperties* properties) 
-      : metadata_(metadata), 
-        descr_(metadata->descr()), 
-        level_info_(ComputeLevelInfo(metadata->descr())), 
-        pager_(std::move(pager)), 
-        has_dictionary_(use_dictionary), 
-        encoding_(encoding), 
-        properties_(properties), 
-        allocator_(properties->memory_pool()), 
-        num_buffered_values_(0), 
-        num_buffered_encoded_values_(0), 
-        rows_written_(0), 
-        total_bytes_written_(0), 
-        total_compressed_bytes_(0), 
-        closed_(false), 
-        fallback_(false), 
-        definition_levels_sink_(allocator_), 
-        repetition_levels_sink_(allocator_) { 
-    definition_levels_rle_ = 
-        std::static_pointer_cast<ResizableBuffer>(AllocateBuffer(allocator_, 0)); 
-    repetition_levels_rle_ = 
-        std::static_pointer_cast<ResizableBuffer>(AllocateBuffer(allocator_, 0)); 
-    uncompressed_data_ = 
-        std::static_pointer_cast<ResizableBuffer>(AllocateBuffer(allocator_, 0)); 
- 
-    if (pager_->has_compressor()) { 
-      compressor_temp_buffer_ = 
-          std::static_pointer_cast<ResizableBuffer>(AllocateBuffer(allocator_, 0)); 
-    } 
-  } 
- 
-  virtual ~ColumnWriterImpl() = default; 
- 
-  int64_t Close(); 
- 
- protected: 
-  virtual std::shared_ptr<Buffer> GetValuesBuffer() = 0; 
- 
-  // Serializes Dictionary Page if enabled 
-  virtual void WriteDictionaryPage() = 0; 
- 
-  // Plain-encoded statistics of the current page 
-  virtual EncodedStatistics GetPageStatistics() = 0; 
- 
-  // Plain-encoded statistics of the whole chunk 
-  virtual EncodedStatistics GetChunkStatistics() = 0; 
- 
-  // Merges page statistics into chunk statistics, then resets the values 
-  virtual void ResetPageStatistics() = 0; 
- 
-  // Adds Data Pages to an in memory buffer in dictionary encoding mode 
-  // Serializes the Data Pages in other encoding modes 
-  void AddDataPage(); 
- 
-  void BuildDataPageV1(int64_t definition_levels_rle_size, 
-                       int64_t repetition_levels_rle_size, int64_t uncompressed_size, 
-                       const std::shared_ptr<Buffer>& values); 
-  void BuildDataPageV2(int64_t definition_levels_rle_size, 
-                       int64_t repetition_levels_rle_size, int64_t uncompressed_size, 
-                       const std::shared_ptr<Buffer>& values); 
- 
-  // Serializes Data Pages 
-  void WriteDataPage(const DataPage& page) { 
-    total_bytes_written_ += pager_->WriteDataPage(page); 
-  } 
- 
-  // Write multiple definition levels 
-  void WriteDefinitionLevels(int64_t num_levels, const int16_t* levels) { 
-    DCHECK(!closed_); 
-    PARQUET_THROW_NOT_OK( 
-        definition_levels_sink_.Append(levels, sizeof(int16_t) * num_levels)); 
-  } 
- 
-  // Write multiple repetition levels 
-  void WriteRepetitionLevels(int64_t num_levels, const int16_t* levels) { 
-    DCHECK(!closed_); 
-    PARQUET_THROW_NOT_OK( 
-        repetition_levels_sink_.Append(levels, sizeof(int16_t) * num_levels)); 
-  } 
- 
-  // RLE encode the src_buffer into dest_buffer and return the encoded size 
-  int64_t RleEncodeLevels(const void* src_buffer, ResizableBuffer* dest_buffer, 
-                          int16_t max_level, bool include_length_prefix = true); 
- 
-  // Serialize the buffered Data Pages 
-  void FlushBufferedDataPages(); 
- 
-  ColumnChunkMetaDataBuilder* metadata_; 
-  const ColumnDescriptor* descr_; 
-  // scratch buffer if validity bits need to be recalculated. 
-  std::shared_ptr<ResizableBuffer> bits_buffer_; 
-  const internal::LevelInfo level_info_; 
- 
-  std::unique_ptr<PageWriter> pager_; 
- 
-  bool has_dictionary_; 
-  Encoding::type encoding_; 
-  const WriterProperties* properties_; 
- 
-  LevelEncoder level_encoder_; 
- 
-  MemoryPool* allocator_; 
- 
-  // The total number of values stored in the data page. This is the maximum of 
-  // the number of encoded definition levels or encoded values. For 
-  // non-repeated, required columns, this is equal to the number of encoded 
-  // values. For repeated or optional values, there may be fewer data values 
-  // than levels, and this tells you how many encoded levels there are in that 
-  // case. 
-  int64_t num_buffered_values_; 
- 
-  // The total number of stored values. For repeated or optional values, this 
-  // number may be lower than num_buffered_values_. 
-  int64_t num_buffered_encoded_values_; 
- 
-  // Total number of rows written with this ColumnWriter 
-  int rows_written_; 
- 
-  // Records the total number of uncompressed bytes written by the serializer 
-  int64_t total_bytes_written_; 
- 
-  // Records the current number of compressed bytes in a column 
-  int64_t total_compressed_bytes_; 
- 
-  // Flag to check if the Writer has been closed 
-  bool closed_; 
- 
-  // Flag to infer if dictionary encoding has fallen back to PLAIN 
-  bool fallback_; 
- 
-  ::arrow::BufferBuilder definition_levels_sink_; 
-  ::arrow::BufferBuilder repetition_levels_sink_; 
- 
-  std::shared_ptr<ResizableBuffer> definition_levels_rle_; 
-  std::shared_ptr<ResizableBuffer> repetition_levels_rle_; 
- 
-  std::shared_ptr<ResizableBuffer> uncompressed_data_; 
-  std::shared_ptr<ResizableBuffer> compressor_temp_buffer_; 
- 
-  std::vector<std::unique_ptr<DataPage>> data_pages_; 
- 
- private: 
-  void InitSinks() { 
-    definition_levels_sink_.Rewind(0); 
-    repetition_levels_sink_.Rewind(0); 
-  } 
- 
-  // Concatenate the encoded levels and values into one buffer 
-  void ConcatenateBuffers(int64_t definition_levels_rle_size, 
-                          int64_t repetition_levels_rle_size, 
-                          const std::shared_ptr<Buffer>& values, uint8_t* combined) { 
-    memcpy(combined, repetition_levels_rle_->data(), repetition_levels_rle_size); 
-    combined += repetition_levels_rle_size; 
-    memcpy(combined, definition_levels_rle_->data(), definition_levels_rle_size); 
-    combined += definition_levels_rle_size; 
-    memcpy(combined, values->data(), values->size()); 
-  } 
-}; 
- 
-// return the size of the encoded buffer 
-int64_t ColumnWriterImpl::RleEncodeLevels(const void* src_buffer, 
-                                          ResizableBuffer* dest_buffer, int16_t max_level, 
-                                          bool include_length_prefix) { 
-  // V1 DataPage includes the length of the RLE level as a prefix. 
-  int32_t prefix_size = include_length_prefix ? sizeof(int32_t) : 0; 
- 
-  // TODO: This only works with due to some RLE specifics 
-  int64_t rle_size = LevelEncoder::MaxBufferSize(Encoding::RLE, max_level, 
-                                                 static_cast<int>(num_buffered_values_)) + 
-                     prefix_size; 
- 
-  // Use Arrow::Buffer::shrink_to_fit = false 
-  // underlying buffer only keeps growing. Resize to a smaller size does not reallocate. 
-  PARQUET_THROW_NOT_OK(dest_buffer->Resize(rle_size, false)); 
- 
-  level_encoder_.Init(Encoding::RLE, max_level, static_cast<int>(num_buffered_values_), 
-                      dest_buffer->mutable_data() + prefix_size, 
-                      static_cast<int>(dest_buffer->size() - prefix_size)); 
-  int encoded = level_encoder_.Encode(static_cast<int>(num_buffered_values_), 
-                                      reinterpret_cast<const int16_t*>(src_buffer)); 
-  DCHECK_EQ(encoded, num_buffered_values_); 
- 
-  if (include_length_prefix) { 
-    reinterpret_cast<int32_t*>(dest_buffer->mutable_data())[0] = level_encoder_.len(); 
-  } 
- 
-  return level_encoder_.len() + prefix_size; 
-} 
- 
-void ColumnWriterImpl::AddDataPage() { 
-  int64_t definition_levels_rle_size = 0; 
-  int64_t repetition_levels_rle_size = 0; 
- 
-  std::shared_ptr<Buffer> values = GetValuesBuffer(); 
-  bool is_v1_data_page = properties_->data_page_version() == ParquetDataPageVersion::V1; 
- 
-  if (descr_->max_definition_level() > 0) { 
-    definition_levels_rle_size = RleEncodeLevels( 
-        definition_levels_sink_.data(), definition_levels_rle_.get(), 
-        descr_->max_definition_level(), /*include_length_prefix=*/is_v1_data_page); 
-  } 
- 
-  if (descr_->max_repetition_level() > 0) { 
-    repetition_levels_rle_size = RleEncodeLevels( 
-        repetition_levels_sink_.data(), repetition_levels_rle_.get(), 
-        descr_->max_repetition_level(), /*include_length_prefix=*/is_v1_data_page); 
-  } 
- 
-  int64_t uncompressed_size = 
-      definition_levels_rle_size + repetition_levels_rle_size + values->size(); 
- 
-  if (is_v1_data_page) { 
-    BuildDataPageV1(definition_levels_rle_size, repetition_levels_rle_size, 
-                    uncompressed_size, values); 
-  } else { 
-    BuildDataPageV2(definition_levels_rle_size, repetition_levels_rle_size, 
-                    uncompressed_size, values); 
-  } 
- 
-  // Re-initialize the sinks for next Page. 
-  InitSinks(); 
-  num_buffered_values_ = 0; 
-  num_buffered_encoded_values_ = 0; 
-} 
- 
-void ColumnWriterImpl::BuildDataPageV1(int64_t definition_levels_rle_size, 
-                                       int64_t repetition_levels_rle_size, 
-                                       int64_t uncompressed_size, 
-                                       const std::shared_ptr<Buffer>& values) { 
-  // Use Arrow::Buffer::shrink_to_fit = false 
-  // underlying buffer only keeps growing. Resize to a smaller size does not reallocate. 
-  PARQUET_THROW_NOT_OK(uncompressed_data_->Resize(uncompressed_size, false)); 
-  ConcatenateBuffers(definition_levels_rle_size, repetition_levels_rle_size, values, 
-                     uncompressed_data_->mutable_data()); 
- 
-  EncodedStatistics page_stats = GetPageStatistics(); 
-  page_stats.ApplyStatSizeLimits(properties_->max_statistics_size(descr_->path())); 
-  page_stats.set_is_signed(SortOrder::SIGNED == descr_->sort_order()); 
-  ResetPageStatistics(); 
- 
-  std::shared_ptr<Buffer> compressed_data; 
-  if (pager_->has_compressor()) { 
-    pager_->Compress(*(uncompressed_data_.get()), compressor_temp_buffer_.get()); 
-    compressed_data = compressor_temp_buffer_; 
-  } else { 
-    compressed_data = uncompressed_data_; 
-  } 
- 
-  // Write the page to OutputStream eagerly if there is no dictionary or 
-  // if dictionary encoding has fallen back to PLAIN 
-  if (has_dictionary_ && !fallback_) {  // Save pages until end of dictionary encoding 
-    PARQUET_ASSIGN_OR_THROW( 
-        auto compressed_data_copy, 
-        compressed_data->CopySlice(0, compressed_data->size(), allocator_)); 
-    std::unique_ptr<DataPage> page_ptr(new DataPageV1( 
-        compressed_data_copy, static_cast<int32_t>(num_buffered_values_), encoding_, 
-        Encoding::RLE, Encoding::RLE, uncompressed_size, page_stats)); 
-    total_compressed_bytes_ += page_ptr->size() + sizeof(format::PageHeader); 
- 
-    data_pages_.push_back(std::move(page_ptr)); 
-  } else {  // Eagerly write pages 
-    DataPageV1 page(compressed_data, static_cast<int32_t>(num_buffered_values_), 
-                    encoding_, Encoding::RLE, Encoding::RLE, uncompressed_size, 
-                    page_stats); 
-    WriteDataPage(page); 
-  } 
-} 
- 
-void ColumnWriterImpl::BuildDataPageV2(int64_t definition_levels_rle_size, 
-                                       int64_t repetition_levels_rle_size, 
-                                       int64_t uncompressed_size, 
-                                       const std::shared_ptr<Buffer>& values) { 
-  // Compress the values if needed. Repetition and definition levels are uncompressed in 
-  // V2. 
-  std::shared_ptr<Buffer> compressed_values; 
-  if (pager_->has_compressor()) { 
-    pager_->Compress(*values, compressor_temp_buffer_.get()); 
-    compressed_values = compressor_temp_buffer_; 
-  } else { 
-    compressed_values = values; 
-  } 
- 
-  // Concatenate uncompressed levels and the possibly compressed values 
-  int64_t combined_size = 
-      definition_levels_rle_size + repetition_levels_rle_size + compressed_values->size(); 
-  std::shared_ptr<ResizableBuffer> combined = AllocateBuffer(allocator_, combined_size); 
- 
-  ConcatenateBuffers(definition_levels_rle_size, repetition_levels_rle_size, 
-                     compressed_values, combined->mutable_data()); 
- 
-  EncodedStatistics page_stats = GetPageStatistics(); 
-  page_stats.ApplyStatSizeLimits(properties_->max_statistics_size(descr_->path())); 
-  page_stats.set_is_signed(SortOrder::SIGNED == descr_->sort_order()); 
-  ResetPageStatistics(); 
- 
-  int32_t num_values = static_cast<int32_t>(num_buffered_values_); 
-  int32_t null_count = static_cast<int32_t>(page_stats.null_count); 
-  int32_t def_levels_byte_length = static_cast<int32_t>(definition_levels_rle_size); 
-  int32_t rep_levels_byte_length = static_cast<int32_t>(repetition_levels_rle_size); 
- 
-  // Write the page to OutputStream eagerly if there is no dictionary or 
-  // if dictionary encoding has fallen back to PLAIN 
-  if (has_dictionary_ && !fallback_) {  // Save pages until end of dictionary encoding 
-    PARQUET_ASSIGN_OR_THROW(auto data_copy, 
-                            combined->CopySlice(0, combined->size(), allocator_)); 
-    std::unique_ptr<DataPage> page_ptr(new DataPageV2( 
-        combined, num_values, null_count, num_values, encoding_, def_levels_byte_length, 
-        rep_levels_byte_length, uncompressed_size, pager_->has_compressor())); 
-    total_compressed_bytes_ += page_ptr->size() + sizeof(format::PageHeader); 
-    data_pages_.push_back(std::move(page_ptr)); 
-  } else { 
-    DataPageV2 page(combined, num_values, null_count, num_values, encoding_, 
-                    def_levels_byte_length, rep_levels_byte_length, uncompressed_size, 
-                    pager_->has_compressor()); 
-    WriteDataPage(page); 
-  } 
-} 
- 
-int64_t ColumnWriterImpl::Close() { 
-  if (!closed_) { 
-    closed_ = true; 
-    if (has_dictionary_ && !fallback_) { 
-      WriteDictionaryPage(); 
-    } 
- 
-    FlushBufferedDataPages(); 
- 
-    EncodedStatistics chunk_statistics = GetChunkStatistics(); 
-    chunk_statistics.ApplyStatSizeLimits( 
-        properties_->max_statistics_size(descr_->path())); 
-    chunk_statistics.set_is_signed(SortOrder::SIGNED == descr_->sort_order()); 
- 
-    // Write stats only if the column has at least one row written 
-    if (rows_written_ > 0 && chunk_statistics.is_set()) { 
-      metadata_->SetStatistics(chunk_statistics); 
-    } 
-    pager_->Close(has_dictionary_, fallback_); 
-  } 
- 
-  return total_bytes_written_; 
-} 
- 
-void ColumnWriterImpl::FlushBufferedDataPages() { 
-  // Write all outstanding data to a new page 
-  if (num_buffered_values_ > 0) { 
-    AddDataPage(); 
-  } 
-  for (const auto& page_ptr : data_pages_) { 
-    WriteDataPage(*page_ptr); 
-  } 
-  data_pages_.clear(); 
-  total_compressed_bytes_ = 0; 
-} 
- 
-// ---------------------------------------------------------------------- 
-// TypedColumnWriter 
- 
-template <typename Action> 
-inline void DoInBatches(int64_t total, int64_t batch_size, Action&& action) { 
-  int64_t num_batches = static_cast<int>(total / batch_size); 
-  for (int round = 0; round < num_batches; round++) { 
-    action(round * batch_size, batch_size); 
-  } 
-  // Write the remaining values 
-  if (total % batch_size > 0) { 
-    action(num_batches * batch_size, total % batch_size); 
-  } 
-} 
- 
-bool DictionaryDirectWriteSupported(const ::arrow::Array& array) { 
-  DCHECK_EQ(array.type_id(), ::arrow::Type::DICTIONARY); 
-  const ::arrow::DictionaryType& dict_type = 
-      static_cast<const ::arrow::DictionaryType&>(*array.type()); 
-  return ::arrow::is_base_binary_like(dict_type.value_type()->id()); 
-} 
- 
-Status ConvertDictionaryToDense(const ::arrow::Array& array, MemoryPool* pool, 
-                                std::shared_ptr<::arrow::Array>* out) { 
-  const ::arrow::DictionaryType& dict_type = 
-      static_cast<const ::arrow::DictionaryType&>(*array.type()); 
- 
-  ::arrow::compute::ExecContext ctx(pool); 
-  ARROW_ASSIGN_OR_RAISE(Datum cast_output, 
-                        ::arrow::compute::Cast(array.data(), dict_type.value_type(), 
-                                               ::arrow::compute::CastOptions(), &ctx)); 
-  *out = cast_output.make_array(); 
-  return Status::OK(); 
-} 
- 
-static inline bool IsDictionaryEncoding(Encoding::type encoding) { 
-  return encoding == Encoding::PLAIN_DICTIONARY; 
-} 
- 
-template <typename DType> 
-class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter<DType> { 
- public: 
-  using T = typename DType::c_type; 
- 
-  TypedColumnWriterImpl(ColumnChunkMetaDataBuilder* metadata, 
-                        std::unique_ptr<PageWriter> pager, const bool use_dictionary, 
-                        Encoding::type encoding, const WriterProperties* properties) 
-      : ColumnWriterImpl(metadata, std::move(pager), use_dictionary, encoding, 
-                         properties) { 
-    current_encoder_ = MakeEncoder(DType::type_num, encoding, use_dictionary, descr_, 
-                                   properties->memory_pool()); 
- 
-    if (properties->statistics_enabled(descr_->path()) && 
-        (SortOrder::UNKNOWN != descr_->sort_order())) { 
-      page_statistics_ = MakeStatistics<DType>(descr_, allocator_); 
-      chunk_statistics_ = MakeStatistics<DType>(descr_, allocator_); 
-    } 
-  } 
- 
-  int64_t Close() override { return ColumnWriterImpl::Close(); } 
- 
-  int64_t WriteBatch(int64_t num_values, const int16_t* def_levels, 
-                     const int16_t* rep_levels, const T* values) override { 
-    // We check for DataPage limits only after we have inserted the values. If a user 
-    // writes a large number of values, the DataPage size can be much above the limit. 
-    // The purpose of this chunking is to bound this. Even if a user writes large number 
-    // of values, the chunking will ensure the AddDataPage() is called at a reasonable 
-    // pagesize limit 
-    int64_t value_offset = 0; 
- 
-    auto WriteChunk = [&](int64_t offset, int64_t batch_size) { 
-      int64_t values_to_write = WriteLevels(batch_size, AddIfNotNull(def_levels, offset), 
-                                            AddIfNotNull(rep_levels, offset)); 
- 
-      // PARQUET-780 
-      if (values_to_write > 0) { 
-        DCHECK_NE(nullptr, values); 
-      } 
-      WriteValues(AddIfNotNull(values, value_offset), values_to_write, 
-                  batch_size - values_to_write); 
-      CommitWriteAndCheckPageLimit(batch_size, values_to_write); 
-      value_offset += values_to_write; 
- 
-      // Dictionary size checked separately from data page size since we 
-      // circumvent this check when writing ::arrow::DictionaryArray directly 
-      CheckDictionarySizeLimit(); 
-    }; 
-    DoInBatches(num_values, properties_->write_batch_size(), WriteChunk); 
-    return value_offset; 
-  } 
- 
-  void WriteBatchSpaced(int64_t num_values, const int16_t* def_levels, 
-                        const int16_t* rep_levels, const uint8_t* valid_bits, 
-                        int64_t valid_bits_offset, const T* values) override { 
-    // Like WriteBatch, but for spaced values 
-    int64_t value_offset = 0; 
-    auto WriteChunk = [&](int64_t offset, int64_t batch_size) { 
-      int64_t batch_num_values = 0; 
-      int64_t batch_num_spaced_values = 0; 
-      int64_t null_count; 
-      MaybeCalculateValidityBits(AddIfNotNull(def_levels, offset), batch_size, 
-                                 &batch_num_values, &batch_num_spaced_values, 
-                                 &null_count); 
- 
-      WriteLevelsSpaced(batch_size, AddIfNotNull(def_levels, offset), 
-                        AddIfNotNull(rep_levels, offset)); 
-      if (bits_buffer_ != nullptr) { 
-        WriteValuesSpaced(AddIfNotNull(values, value_offset), batch_num_values, 
-                          batch_num_spaced_values, bits_buffer_->data(), /*offset=*/0); 
-      } else { 
-        WriteValuesSpaced(AddIfNotNull(values, value_offset), batch_num_values, 
-                          batch_num_spaced_values, valid_bits, 
-                          valid_bits_offset + value_offset); 
-      } 
-      CommitWriteAndCheckPageLimit(batch_size, batch_num_spaced_values); 
-      value_offset += batch_num_spaced_values; 
- 
-      // Dictionary size checked separately from data page size since we 
-      // circumvent this check when writing ::arrow::DictionaryArray directly 
-      CheckDictionarySizeLimit(); 
-    }; 
-    DoInBatches(num_values, properties_->write_batch_size(), WriteChunk); 
-  } 
- 
-  Status WriteArrow(const int16_t* def_levels, const int16_t* rep_levels, 
-                    int64_t num_levels, const ::arrow::Array& leaf_array, 
-                    ArrowWriteContext* ctx, bool leaf_field_nullable) override { 
-    BEGIN_PARQUET_CATCH_EXCEPTIONS 
-    // Leaf nulls are canonical when there is only a single null element after a list 
-    // and it is at the leaf. 
-    bool single_nullable_element = 
-        (level_info_.def_level == level_info_.repeated_ancestor_def_level + 1) && 
-        leaf_field_nullable; 
-    bool maybe_parent_nulls = level_info_.HasNullableValues() && !single_nullable_element; 
-    if (maybe_parent_nulls) { 
-      ARROW_ASSIGN_OR_RAISE( 
-          bits_buffer_, 
-          ::arrow::AllocateResizableBuffer( 
-              BitUtil::BytesForBits(properties_->write_batch_size()), ctx->memory_pool)); 
-      bits_buffer_->ZeroPadding(); 
-    } 
- 
-    if (leaf_array.type()->id() == ::arrow::Type::DICTIONARY) { 
-      return WriteArrowDictionary(def_levels, rep_levels, num_levels, leaf_array, ctx, 
-                                  maybe_parent_nulls); 
-    } else { 
-      return WriteArrowDense(def_levels, rep_levels, num_levels, leaf_array, ctx, 
-                             maybe_parent_nulls); 
-    } 
-    END_PARQUET_CATCH_EXCEPTIONS 
-  } 
- 
-  int64_t EstimatedBufferedValueBytes() const override { 
-    return current_encoder_->EstimatedDataEncodedSize(); 
-  } 
- 
- protected: 
-  std::shared_ptr<Buffer> GetValuesBuffer() override { 
-    return current_encoder_->FlushValues(); 
-  } 
- 
-  // Internal function to handle direct writing of ::arrow::DictionaryArray, 
-  // since the standard logic concerning dictionary size limits and fallback to 
-  // plain encoding is circumvented 
-  Status WriteArrowDictionary(const int16_t* def_levels, const int16_t* rep_levels, 
-                              int64_t num_levels, const ::arrow::Array& array, 
-                              ArrowWriteContext* context, bool maybe_parent_nulls); 
- 
-  Status WriteArrowDense(const int16_t* def_levels, const int16_t* rep_levels, 
-                         int64_t num_levels, const ::arrow::Array& array, 
-                         ArrowWriteContext* context, bool maybe_parent_nulls); 
- 
-  void WriteDictionaryPage() override { 
-    // We have to dynamic cast here because of TypedEncoder<Type> as 
-    // some compilers don't want to cast through virtual inheritance 
-    auto dict_encoder = dynamic_cast<DictEncoder<DType>*>(current_encoder_.get()); 
-    DCHECK(dict_encoder); 
-    std::shared_ptr<ResizableBuffer> buffer = 
-        AllocateBuffer(properties_->memory_pool(), dict_encoder->dict_encoded_size()); 
-    dict_encoder->WriteDict(buffer->mutable_data()); 
- 
-    DictionaryPage page(buffer, dict_encoder->num_entries(), 
-                        properties_->dictionary_page_encoding()); 
-    total_bytes_written_ += pager_->WriteDictionaryPage(page); 
-  } 
- 
-  EncodedStatistics GetPageStatistics() override { 
-    EncodedStatistics result; 
-    if (page_statistics_) result = page_statistics_->Encode(); 
-    return result; 
-  } 
- 
-  EncodedStatistics GetChunkStatistics() override { 
-    EncodedStatistics result; 
-    if (chunk_statistics_) result = chunk_statistics_->Encode(); 
-    return result; 
-  } 
- 
-  void ResetPageStatistics() override { 
-    if (chunk_statistics_ != nullptr) { 
-      chunk_statistics_->Merge(*page_statistics_); 
-      page_statistics_->Reset(); 
-    } 
-  } 
- 
-  Type::type type() const override { return descr_->physical_type(); } 
- 
-  const ColumnDescriptor* descr() const override { return descr_; } 
- 
-  int64_t rows_written() const override { return rows_written_; } 
- 
-  int64_t total_compressed_bytes() const override { return total_compressed_bytes_; } 
- 
-  int64_t total_bytes_written() const override { return total_bytes_written_; } 
- 
-  const WriterProperties* properties() override { return properties_; } 
- 
- private: 
-  using ValueEncoderType = typename EncodingTraits<DType>::Encoder; 
-  using TypedStats = TypedStatistics<DType>; 
-  std::unique_ptr<Encoder> current_encoder_; 
-  std::shared_ptr<TypedStats> page_statistics_; 
-  std::shared_ptr<TypedStats> chunk_statistics_; 
- 
-  // If writing a sequence of ::arrow::DictionaryArray to the writer, we keep the 
-  // dictionary passed to DictEncoder<T>::PutDictionary so we can check 
-  // subsequent array chunks to see either if materialization is required (in 
-  // which case we call back to the dense write path) 
-  std::shared_ptr<::arrow::Array> preserved_dictionary_; 
- 
-  int64_t WriteLevels(int64_t num_values, const int16_t* def_levels, 
-                      const int16_t* rep_levels) { 
-    int64_t values_to_write = 0; 
-    // If the field is required and non-repeated, there are no definition levels 
-    if (descr_->max_definition_level() > 0) { 
-      for (int64_t i = 0; i < num_values; ++i) { 
-        if (def_levels[i] == descr_->max_definition_level()) { 
-          ++values_to_write; 
-        } 
-      } 
- 
-      WriteDefinitionLevels(num_values, def_levels); 
-    } else { 
-      // Required field, write all values 
-      values_to_write = num_values; 
-    } 
- 
-    // Not present for non-repeated fields 
-    if (descr_->max_repetition_level() > 0) { 
-      // A row could include more than one value 
-      // Count the occasions where we start a new row 
-      for (int64_t i = 0; i < num_values; ++i) { 
-        if (rep_levels[i] == 0) { 
-          rows_written_++; 
-        } 
-      } 
- 
-      WriteRepetitionLevels(num_values, rep_levels); 
-    } else { 
-      // Each value is exactly one row 
-      rows_written_ += static_cast<int>(num_values); 
-    } 
-    return values_to_write; 
-  } 
- 
-  // This method will always update the three output parameters, 
-  // out_values_to_write, out_spaced_values_to_write and null_count.  Additionally 
-  // it will update the validity bitmap if required (i.e. if at least one level 
-  // of nullable structs directly precede the leaf node). 
-  void MaybeCalculateValidityBits(const int16_t* def_levels, int64_t batch_size, 
-                                  int64_t* out_values_to_write, 
-                                  int64_t* out_spaced_values_to_write, 
-                                  int64_t* null_count) { 
-    if (bits_buffer_ == nullptr) { 
-      if (level_info_.def_level == 0) { 
-        // In this case def levels should be null and we only 
-        // need to output counts which will always be equal to 
-        // the batch size passed in (max def_level == 0 indicates 
-        // there cannot be repeated or null fields). 
-        DCHECK_EQ(def_levels, nullptr); 
-        *out_values_to_write = batch_size; 
-        *out_spaced_values_to_write = batch_size; 
-        *null_count = 0; 
-      } else { 
-        for (int x = 0; x < batch_size; x++) { 
-          *out_values_to_write += def_levels[x] == level_info_.def_level ? 1 : 0; 
-          *out_spaced_values_to_write += 
-              def_levels[x] >= level_info_.repeated_ancestor_def_level ? 1 : 0; 
-        } 
-        *null_count = *out_values_to_write - *out_spaced_values_to_write; 
-      } 
-      return; 
-    } 
-    // Shrink to fit possible causes another allocation, and would only be necessary 
-    // on the last batch. 
-    int64_t new_bitmap_size = BitUtil::BytesForBits(batch_size); 
-    if (new_bitmap_size != bits_buffer_->size()) { 
-      PARQUET_THROW_NOT_OK( 
-          bits_buffer_->Resize(new_bitmap_size, /*shrink_to_fit=*/false)); 
-      bits_buffer_->ZeroPadding(); 
-    } 
-    internal::ValidityBitmapInputOutput io; 
-    io.valid_bits = bits_buffer_->mutable_data(); 
-    io.values_read_upper_bound = batch_size; 
-    internal::DefLevelsToBitmap(def_levels, batch_size, level_info_, &io); 
-    *out_values_to_write = io.values_read - io.null_count; 
-    *out_spaced_values_to_write = io.values_read; 
-    *null_count = io.null_count; 
-  } 
- 
-  Result<std::shared_ptr<Array>> MaybeReplaceValidity(std::shared_ptr<Array> array, 
-                                                      int64_t new_null_count, 
-                                                      ::arrow::MemoryPool* memory_pool) { 
-    if (bits_buffer_ == nullptr) { 
-      return array; 
-    } 
-    std::vector<std::shared_ptr<Buffer>> buffers = array->data()->buffers; 
-    if (buffers.empty()) { 
-      return array; 
-    } 
-    buffers[0] = bits_buffer_; 
-    // Should be a leaf array. 
-    DCHECK_GT(buffers.size(), 1); 
-    ValueBufferSlicer slicer{memory_pool, /*buffer=*/nullptr}; 
-    if (array->data()->offset > 0) { 
-      RETURN_NOT_OK(::arrow::VisitArrayInline(*array, &slicer)); 
-      buffers[1] = slicer.buffer_; 
-    } 
-    return ::arrow::MakeArray(std::make_shared<ArrayData>( 
-        array->type(), array->length(), std::move(buffers), new_null_count)); 
-  } 
- 
-  void WriteLevelsSpaced(int64_t num_levels, const int16_t* def_levels, 
-                         const int16_t* rep_levels) { 
-    // If the field is required and non-repeated, there are no definition levels 
-    if (descr_->max_definition_level() > 0) { 
-      WriteDefinitionLevels(num_levels, def_levels); 
-    } 
-    // Not present for non-repeated fields 
-    if (descr_->max_repetition_level() > 0) { 
-      // A row could include more than one value 
-      // Count the occasions where we start a new row 
-      for (int64_t i = 0; i < num_levels; ++i) { 
-        if (rep_levels[i] == 0) { 
-          rows_written_++; 
-        } 
-      } 
-      WriteRepetitionLevels(num_levels, rep_levels); 
-    } else { 
-      // Each value is exactly one row 
-      rows_written_ += static_cast<int>(num_levels); 
-    } 
-  } 
- 
-  void CommitWriteAndCheckPageLimit(int64_t num_levels, int64_t num_values) { 
-    num_buffered_values_ += num_levels; 
-    num_buffered_encoded_values_ += num_values; 
- 
-    if (current_encoder_->EstimatedDataEncodedSize() >= properties_->data_pagesize()) { 
-      AddDataPage(); 
-    } 
-  } 
- 
-  void FallbackToPlainEncoding() { 
-    if (IsDictionaryEncoding(current_encoder_->encoding())) { 
-      WriteDictionaryPage(); 
-      // Serialize the buffered Dictionary Indices 
-      FlushBufferedDataPages(); 
-      fallback_ = true; 
-      // Only PLAIN encoding is supported for fallback in V1 
-      current_encoder_ = MakeEncoder(DType::type_num, Encoding::PLAIN, false, descr_, 
-                                     properties_->memory_pool()); 
-      encoding_ = Encoding::PLAIN; 
-    } 
-  } 
- 
-  // Checks if the Dictionary Page size limit is reached 
-  // If the limit is reached, the Dictionary and Data Pages are serialized 
-  // The encoding is switched to PLAIN 
-  // 
-  // Only one Dictionary Page is written. 
-  // Fallback to PLAIN if dictionary page limit is reached. 
-  void CheckDictionarySizeLimit() { 
-    if (!has_dictionary_ || fallback_) { 
-      // Either not using dictionary encoding, or we have already fallen back 
-      // to PLAIN encoding because the size threshold was reached 
-      return; 
-    } 
- 
-    // We have to dynamic cast here because TypedEncoder<Type> as some compilers 
-    // don't want to cast through virtual inheritance 
-    auto dict_encoder = dynamic_cast<DictEncoder<DType>*>(current_encoder_.get()); 
-    if (dict_encoder->dict_encoded_size() >= properties_->dictionary_pagesize_limit()) { 
-      FallbackToPlainEncoding(); 
-    } 
-  } 
- 
-  void WriteValues(const T* values, int64_t num_values, int64_t num_nulls) { 
-    dynamic_cast<ValueEncoderType*>(current_encoder_.get()) 
-        ->Put(values, static_cast<int>(num_values)); 
-    if (page_statistics_ != nullptr) { 
-      page_statistics_->Update(values, num_values, num_nulls); 
-    } 
-  } 
- 
-  void WriteValuesSpaced(const T* values, int64_t num_values, int64_t num_spaced_values, 
-                         const uint8_t* valid_bits, int64_t valid_bits_offset) { 
-    if (num_values != num_spaced_values) { 
-      dynamic_cast<ValueEncoderType*>(current_encoder_.get()) 
-          ->PutSpaced(values, static_cast<int>(num_spaced_values), valid_bits, 
-                      valid_bits_offset); 
-    } else { 
-      dynamic_cast<ValueEncoderType*>(current_encoder_.get()) 
-          ->Put(values, static_cast<int>(num_values)); 
-    } 
-    if (page_statistics_ != nullptr) { 
-      const int64_t num_nulls = num_spaced_values - num_values; 
-      page_statistics_->UpdateSpaced(values, valid_bits, valid_bits_offset, num_values, 
-                                     num_nulls); 
-    } 
-  } 
-}; 
- 
-template <typename DType> 
-Status TypedColumnWriterImpl<DType>::WriteArrowDictionary( 
-    const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels, 
-    const ::arrow::Array& array, ArrowWriteContext* ctx, bool maybe_parent_nulls) { 
-  // If this is the first time writing a DictionaryArray, then there's 
-  // a few possible paths to take: 
-  // 
-  // - If dictionary encoding is not enabled, convert to densely 
-  //   encoded and call WriteArrow 
-  // - Dictionary encoding enabled 
-  //   - If this is the first time this is called, then we call 
-  //     PutDictionary into the encoder and then PutIndices on each 
-  //     chunk. We store the dictionary that was written in 
-  //     preserved_dictionary_ so that subsequent calls to this method 
-  //     can make sure the dictionary has not changed 
-  //   - On subsequent calls, we have to check whether the dictionary 
-  //     has changed. If it has, then we trigger the varying 
-  //     dictionary path and materialize each chunk and then call 
-  //     WriteArrow with that 
-  auto WriteDense = [&] { 
-    std::shared_ptr<::arrow::Array> dense_array; 
-    RETURN_NOT_OK( 
-        ConvertDictionaryToDense(array, properties_->memory_pool(), &dense_array)); 
-    return WriteArrowDense(def_levels, rep_levels, num_levels, *dense_array, ctx, 
-                           maybe_parent_nulls); 
-  }; 
- 
-  if (!IsDictionaryEncoding(current_encoder_->encoding()) || 
-      !DictionaryDirectWriteSupported(array)) { 
-    // No longer dictionary-encoding for whatever reason, maybe we never were 
-    // or we decided to stop. Note that WriteArrow can be invoked multiple 
-    // times with both dense and dictionary-encoded versions of the same data 
-    // without a problem. Any dense data will be hashed to indices until the 
-    // dictionary page limit is reached, at which everything (dictionary and 
-    // dense) will fall back to plain encoding 
-    return WriteDense(); 
-  } 
- 
-  auto dict_encoder = dynamic_cast<DictEncoder<DType>*>(current_encoder_.get()); 
-  const auto& data = checked_cast<const ::arrow::DictionaryArray&>(array); 
-  std::shared_ptr<::arrow::Array> dictionary = data.dictionary(); 
-  std::shared_ptr<::arrow::Array> indices = data.indices(); 
- 
-  int64_t value_offset = 0; 
-  auto WriteIndicesChunk = [&](int64_t offset, int64_t batch_size) { 
-    int64_t batch_num_values = 0; 
-    int64_t batch_num_spaced_values = 0; 
-    int64_t null_count = ::arrow::kUnknownNullCount; 
-    // Bits is not null for nullable values.  At this point in the code we can't determine 
-    // if the leaf array has the same null values as any parents it might have had so we 
-    // need to recompute it from def levels. 
-    MaybeCalculateValidityBits(AddIfNotNull(def_levels, offset), batch_size, 
-                               &batch_num_values, &batch_num_spaced_values, &null_count); 
-    WriteLevelsSpaced(batch_size, AddIfNotNull(def_levels, offset), 
-                      AddIfNotNull(rep_levels, offset)); 
-    std::shared_ptr<Array> writeable_indices = 
-        indices->Slice(value_offset, batch_num_spaced_values); 
-    PARQUET_ASSIGN_OR_THROW( 
-        writeable_indices, 
-        MaybeReplaceValidity(writeable_indices, null_count, ctx->memory_pool)); 
-    dict_encoder->PutIndices(*writeable_indices); 
-    CommitWriteAndCheckPageLimit(batch_size, batch_num_values); 
-    value_offset += batch_num_spaced_values; 
-  }; 
- 
-  // Handle seeing dictionary for the first time 
-  if (!preserved_dictionary_) { 
-    // It's a new dictionary. Call PutDictionary and keep track of it 
-    PARQUET_CATCH_NOT_OK(dict_encoder->PutDictionary(*dictionary)); 
- 
-    // If there were duplicate value in the dictionary, the encoder's memo table 
-    // will be out of sync with the indices in the Arrow array. 
-    // The easiest solution for this uncommon case is to fallback to plain encoding. 
-    if (dict_encoder->num_entries() != dictionary->length()) { 
-      PARQUET_CATCH_NOT_OK(FallbackToPlainEncoding()); 
-      return WriteDense(); 
-    } 
- 
-    // TODO(wesm): If some dictionary values are unobserved, then the 
-    // statistics will be inaccurate. Do we care enough to fix it? 
-    if (page_statistics_ != nullptr) { 
-      PARQUET_CATCH_NOT_OK(page_statistics_->Update(*dictionary)); 
-    } 
-    preserved_dictionary_ = dictionary; 
-  } else if (!dictionary->Equals(*preserved_dictionary_)) { 
-    // Dictionary has changed 
-    PARQUET_CATCH_NOT_OK(FallbackToPlainEncoding()); 
-    return WriteDense(); 
-  } 
- 
-  PARQUET_CATCH_NOT_OK( 
-      DoInBatches(num_levels, properties_->write_batch_size(), WriteIndicesChunk)); 
-  return Status::OK(); 
-} 
- 
-// ---------------------------------------------------------------------- 
-// Direct Arrow write path 
- 
-template <typename ParquetType, typename ArrowType, typename Enable = void> 
-struct SerializeFunctor { 
-  using ArrowCType = typename ArrowType::c_type; 
-  using ArrayType = typename ::arrow::TypeTraits<ArrowType>::ArrayType; 
-  using ParquetCType = typename ParquetType::c_type; 
-  Status Serialize(const ArrayType& array, ArrowWriteContext*, ParquetCType* out) { 
-    const ArrowCType* input = array.raw_values(); 
-    if (array.null_count() > 0) { 
-      for (int i = 0; i < array.length(); i++) { 
-        out[i] = static_cast<ParquetCType>(input[i]); 
-      } 
-    } else { 
-      std::copy(input, input + array.length(), out); 
-    } 
-    return Status::OK(); 
-  } 
-}; 
- 
-template <typename ParquetType, typename ArrowType> 
-Status WriteArrowSerialize(const ::arrow::Array& array, int64_t num_levels, 
-                           const int16_t* def_levels, const int16_t* rep_levels, 
-                           ArrowWriteContext* ctx, TypedColumnWriter<ParquetType>* writer, 
-                           bool maybe_parent_nulls) { 
-  using ParquetCType = typename ParquetType::c_type; 
-  using ArrayType = typename ::arrow::TypeTraits<ArrowType>::ArrayType; 
- 
-  ParquetCType* buffer = nullptr; 
-  PARQUET_THROW_NOT_OK(ctx->GetScratchData<ParquetCType>(array.length(), &buffer)); 
- 
-  SerializeFunctor<ParquetType, ArrowType> functor; 
-  RETURN_NOT_OK(functor.Serialize(checked_cast<const ArrayType&>(array), ctx, buffer)); 
-  bool no_nulls = 
-      writer->descr()->schema_node()->is_required() || (array.null_count() == 0); 
-  if (!maybe_parent_nulls && no_nulls) { 
-    PARQUET_CATCH_NOT_OK(writer->WriteBatch(num_levels, def_levels, rep_levels, buffer)); 
-  } else { 
-    PARQUET_CATCH_NOT_OK(writer->WriteBatchSpaced(num_levels, def_levels, rep_levels, 
-                                                  array.null_bitmap_data(), 
-                                                  array.offset(), buffer)); 
-  } 
-  return Status::OK(); 
-} 
- 
-template <typename ParquetType> 
-Status WriteArrowZeroCopy(const ::arrow::Array& array, int64_t num_levels, 
-                          const int16_t* def_levels, const int16_t* rep_levels, 
-                          ArrowWriteContext* ctx, TypedColumnWriter<ParquetType>* writer, 
-                          bool maybe_parent_nulls) { 
-  using T = typename ParquetType::c_type; 
-  const auto& data = static_cast<const ::arrow::PrimitiveArray&>(array); 
-  const T* values = nullptr; 
-  // The values buffer may be null if the array is empty (ARROW-2744) 
-  if (data.values() != nullptr) { 
-    values = reinterpret_cast<const T*>(data.values()->data()) + data.offset(); 
-  } else { 
-    DCHECK_EQ(data.length(), 0); 
-  } 
-  bool no_nulls = 
-      writer->descr()->schema_node()->is_required() || (array.null_count() == 0); 
- 
-  if (!maybe_parent_nulls && no_nulls) { 
-    PARQUET_CATCH_NOT_OK(writer->WriteBatch(num_levels, def_levels, rep_levels, values)); 
-  } else { 
-    PARQUET_CATCH_NOT_OK(writer->WriteBatchSpaced(num_levels, def_levels, rep_levels, 
-                                                  data.null_bitmap_data(), data.offset(), 
-                                                  values)); 
-  } 
-  return Status::OK(); 
-} 
- 
-#define WRITE_SERIALIZE_CASE(ArrowEnum, ArrowType, ParquetType)  \ 
-  case ::arrow::Type::ArrowEnum:                                 \ 
-    return WriteArrowSerialize<ParquetType, ::arrow::ArrowType>( \ 
-        array, num_levels, def_levels, rep_levels, ctx, this, maybe_parent_nulls); 
- 
-#define WRITE_ZERO_COPY_CASE(ArrowEnum, ArrowType, ParquetType)                       \ 
-  case ::arrow::Type::ArrowEnum:                                                      \ 
-    return WriteArrowZeroCopy<ParquetType>(array, num_levels, def_levels, rep_levels, \ 
-                                           ctx, this, maybe_parent_nulls); 
- 
-#define ARROW_UNSUPPORTED()                                          \ 
-  std::stringstream ss;                                              \ 
-  ss << "Arrow type " << array.type()->ToString()                    \ 
-     << " cannot be written to Parquet type " << descr_->ToString(); \ 
-  return Status::Invalid(ss.str()); 
- 
-// ---------------------------------------------------------------------- 
-// Write Arrow to BooleanType 
- 
-template <> 
-struct SerializeFunctor<BooleanType, ::arrow::BooleanType> { 
-  Status Serialize(const ::arrow::BooleanArray& data, ArrowWriteContext*, bool* out) { 
-    for (int i = 0; i < data.length(); i++) { 
-      *out++ = data.Value(i); 
-    } 
-    return Status::OK(); 
-  } 
-}; 
- 
-template <> 
-Status TypedColumnWriterImpl<BooleanType>::WriteArrowDense( 
-    const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels, 
-    const ::arrow::Array& array, ArrowWriteContext* ctx, bool maybe_parent_nulls) { 
-  if (array.type_id() != ::arrow::Type::BOOL) { 
-    ARROW_UNSUPPORTED(); 
-  } 
-  return WriteArrowSerialize<BooleanType, ::arrow::BooleanType>( 
-      array, num_levels, def_levels, rep_levels, ctx, this, maybe_parent_nulls); 
-} 
- 
-// ---------------------------------------------------------------------- 
-// Write Arrow types to INT32 
- 
-template <> 
-struct SerializeFunctor<Int32Type, ::arrow::Date64Type> { 
-  Status Serialize(const ::arrow::Date64Array& array, ArrowWriteContext*, int32_t* out) { 
-    const int64_t* input = array.raw_values(); 
-    for (int i = 0; i < array.length(); i++) { 
-      *out++ = static_cast<int32_t>(*input++ / 86400000); 
-    } 
-    return Status::OK(); 
-  } 
-}; 
- 
-template <> 
-struct SerializeFunctor<Int32Type, ::arrow::Time32Type> { 
-  Status Serialize(const ::arrow::Time32Array& array, ArrowWriteContext*, int32_t* out) { 
-    const int32_t* input = array.raw_values(); 
-    const auto& type = static_cast<const ::arrow::Time32Type&>(*array.type()); 
-    if (type.unit() == ::arrow::TimeUnit::SECOND) { 
-      for (int i = 0; i < array.length(); i++) { 
-        out[i] = input[i] * 1000; 
-      } 
-    } else { 
-      std::copy(input, input + array.length(), out); 
-    } 
-    return Status::OK(); 
-  } 
-}; 
- 
-template <> 
-Status TypedColumnWriterImpl<Int32Type>::WriteArrowDense( 
-    const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels, 
-    const ::arrow::Array& array, ArrowWriteContext* ctx, bool maybe_parent_nulls) { 
-  switch (array.type()->id()) { 
-    case ::arrow::Type::NA: { 
-      PARQUET_CATCH_NOT_OK(WriteBatch(num_levels, def_levels, rep_levels, nullptr)); 
-    } break; 
-      WRITE_SERIALIZE_CASE(INT8, Int8Type, Int32Type) 
-      WRITE_SERIALIZE_CASE(UINT8, UInt8Type, Int32Type) 
-      WRITE_SERIALIZE_CASE(INT16, Int16Type, Int32Type) 
-      WRITE_SERIALIZE_CASE(UINT16, UInt16Type, Int32Type) 
-      WRITE_SERIALIZE_CASE(UINT32, UInt32Type, Int32Type) 
-      WRITE_ZERO_COPY_CASE(INT32, Int32Type, Int32Type) 
-      WRITE_ZERO_COPY_CASE(DATE32, Date32Type, Int32Type) 
-      WRITE_SERIALIZE_CASE(DATE64, Date64Type, Int32Type) 
-      WRITE_SERIALIZE_CASE(TIME32, Time32Type, Int32Type) 
-    default: 
-      ARROW_UNSUPPORTED() 
-  } 
-  return Status::OK(); 
-} 
- 
-// ---------------------------------------------------------------------- 
-// Write Arrow to Int64 and Int96 
- 
-#define INT96_CONVERT_LOOP(ConversionFunction) \ 
-  for (int64_t i = 0; i < array.length(); i++) ConversionFunction(input[i], &out[i]); 
- 
-template <> 
-struct SerializeFunctor<Int96Type, ::arrow::TimestampType> { 
-  Status Serialize(const ::arrow::TimestampArray& array, ArrowWriteContext*, Int96* out) { 
-    const int64_t* input = array.raw_values(); 
-    const auto& type = static_cast<const ::arrow::TimestampType&>(*array.type()); 
-    switch (type.unit()) { 
-      case ::arrow::TimeUnit::NANO: 
-        INT96_CONVERT_LOOP(internal::NanosecondsToImpalaTimestamp); 
-        break; 
-      case ::arrow::TimeUnit::MICRO: 
-        INT96_CONVERT_LOOP(internal::MicrosecondsToImpalaTimestamp); 
-        break; 
-      case ::arrow::TimeUnit::MILLI: 
-        INT96_CONVERT_LOOP(internal::MillisecondsToImpalaTimestamp); 
-        break; 
-      case ::arrow::TimeUnit::SECOND: 
-        INT96_CONVERT_LOOP(internal::SecondsToImpalaTimestamp); 
-        break; 
-    } 
-    return Status::OK(); 
-  } 
-}; 
- 
-#define COERCE_DIVIDE -1 
-#define COERCE_INVALID 0 
-#define COERCE_MULTIPLY +1 
- 
-static std::pair<int, int64_t> kTimestampCoercionFactors[4][4] = { 
-    // from seconds ... 
-    {{COERCE_INVALID, 0},                      // ... to seconds 
-     {COERCE_MULTIPLY, 1000},                  // ... to millis 
-     {COERCE_MULTIPLY, 1000000},               // ... to micros 
-     {COERCE_MULTIPLY, INT64_C(1000000000)}},  // ... to nanos 
-    // from millis ... 
-    {{COERCE_INVALID, 0}, 
-     {COERCE_MULTIPLY, 1}, 
-     {COERCE_MULTIPLY, 1000}, 
-     {COERCE_MULTIPLY, 1000000}}, 
-    // from micros ... 
-    {{COERCE_INVALID, 0}, 
-     {COERCE_DIVIDE, 1000}, 
-     {COERCE_MULTIPLY, 1}, 
-     {COERCE_MULTIPLY, 1000}}, 
-    // from nanos ... 
-    {{COERCE_INVALID, 0}, 
-     {COERCE_DIVIDE, 1000000}, 
-     {COERCE_DIVIDE, 1000}, 
-     {COERCE_MULTIPLY, 1}}}; 
- 
-template <> 
-struct SerializeFunctor<Int64Type, ::arrow::TimestampType> { 
-  Status Serialize(const ::arrow::TimestampArray& array, ArrowWriteContext* ctx, 
-                   int64_t* out) { 
-    const auto& source_type = static_cast<const ::arrow::TimestampType&>(*array.type()); 
-    auto source_unit = source_type.unit(); 
-    const int64_t* values = array.raw_values(); 
- 
-    ::arrow::TimeUnit::type target_unit = ctx->properties->coerce_timestamps_unit(); 
-    auto target_type = ::arrow::timestamp(target_unit); 
-    bool truncation_allowed = ctx->properties->truncated_timestamps_allowed(); 
- 
-    auto DivideBy = [&](const int64_t factor) { 
-      for (int64_t i = 0; i < array.length(); i++) { 
-        if (!truncation_allowed && array.IsValid(i) && (values[i] % factor != 0)) { 
-          return Status::Invalid("Casting from ", source_type.ToString(), " to ", 
-                                 target_type->ToString(), 
-                                 " would lose data: ", values[i]); 
-        } 
-        out[i] = values[i] / factor; 
-      } 
-      return Status::OK(); 
-    }; 
- 
-    auto MultiplyBy = [&](const int64_t factor) { 
-      for (int64_t i = 0; i < array.length(); i++) { 
-        out[i] = values[i] * factor; 
-      } 
-      return Status::OK(); 
-    }; 
- 
-    const auto& coercion = kTimestampCoercionFactors[static_cast<int>(source_unit)] 
-                                                    [static_cast<int>(target_unit)]; 
- 
-    // .first -> coercion operation; .second -> scale factor 
-    DCHECK_NE(coercion.first, COERCE_INVALID); 
-    return coercion.first == COERCE_DIVIDE ? DivideBy(coercion.second) 
-                                           : MultiplyBy(coercion.second); 
-  } 
-}; 
- 
-#undef COERCE_DIVIDE 
-#undef COERCE_INVALID 
-#undef COERCE_MULTIPLY 
- 
-Status WriteTimestamps(const ::arrow::Array& values, int64_t num_levels, 
-                       const int16_t* def_levels, const int16_t* rep_levels, 
-                       ArrowWriteContext* ctx, TypedColumnWriter<Int64Type>* writer, 
-                       bool maybe_parent_nulls) { 
-  const auto& source_type = static_cast<const ::arrow::TimestampType&>(*values.type()); 
- 
-  auto WriteCoerce = [&](const ArrowWriterProperties* properties) { 
-    ArrowWriteContext temp_ctx = *ctx; 
-    temp_ctx.properties = properties; 
-    return WriteArrowSerialize<Int64Type, ::arrow::TimestampType>( 
-        values, num_levels, def_levels, rep_levels, &temp_ctx, writer, 
-        maybe_parent_nulls); 
-  }; 
- 
-  if (ctx->properties->coerce_timestamps_enabled()) { 
-    // User explicitly requested coercion to specific unit 
-    if (source_type.unit() == ctx->properties->coerce_timestamps_unit()) { 
-      // No data conversion necessary 
-      return WriteArrowZeroCopy<Int64Type>(values, num_levels, def_levels, rep_levels, 
-                                           ctx, writer, maybe_parent_nulls); 
-    } else { 
-      return WriteCoerce(ctx->properties); 
-    } 
-  } else if (writer->properties()->version() == ParquetVersion::PARQUET_1_0 && 
-             source_type.unit() == ::arrow::TimeUnit::NANO) { 
-    // Absent superseding user instructions, when writing Parquet version 1.0 files, 
-    // timestamps in nanoseconds are coerced to microseconds 
-    std::shared_ptr<ArrowWriterProperties> properties = 
-        (ArrowWriterProperties::Builder()) 
-            .coerce_timestamps(::arrow::TimeUnit::MICRO) 
-            ->disallow_truncated_timestamps() 
-            ->build(); 
-    return WriteCoerce(properties.get()); 
-  } else if (source_type.unit() == ::arrow::TimeUnit::SECOND) { 
-    // Absent superseding user instructions, timestamps in seconds are coerced to 
-    // milliseconds 
-    std::shared_ptr<ArrowWriterProperties> properties = 
-        (ArrowWriterProperties::Builder()) 
-            .coerce_timestamps(::arrow::TimeUnit::MILLI) 
-            ->build(); 
-    return WriteCoerce(properties.get()); 
-  } else { 
-    // No data conversion necessary 
-    return WriteArrowZeroCopy<Int64Type>(values, num_levels, def_levels, rep_levels, ctx, 
-                                         writer, maybe_parent_nulls); 
-  } 
-} 
- 
-template <> 
-Status TypedColumnWriterImpl<Int64Type>::WriteArrowDense( 
-    const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels, 
-    const ::arrow::Array& array, ArrowWriteContext* ctx, bool maybe_parent_nulls) { 
-  switch (array.type()->id()) { 
-    case ::arrow::Type::TIMESTAMP: 
-      return WriteTimestamps(array, num_levels, def_levels, rep_levels, ctx, this, 
-                             maybe_parent_nulls); 
-      WRITE_ZERO_COPY_CASE(INT64, Int64Type, Int64Type) 
-      WRITE_SERIALIZE_CASE(UINT32, UInt32Type, Int64Type) 
-      WRITE_SERIALIZE_CASE(UINT64, UInt64Type, Int64Type) 
-      WRITE_ZERO_COPY_CASE(TIME64, Time64Type, Int64Type) 
-    default: 
-      ARROW_UNSUPPORTED(); 
-  } 
-} 
- 
-template <> 
-Status TypedColumnWriterImpl<Int96Type>::WriteArrowDense( 
-    const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels, 
-    const ::arrow::Array& array, ArrowWriteContext* ctx, bool maybe_parent_nulls) { 
-  if (array.type_id() != ::arrow::Type::TIMESTAMP) { 
-    ARROW_UNSUPPORTED(); 
-  } 
-  return WriteArrowSerialize<Int96Type, ::arrow::TimestampType>( 
-      array, num_levels, def_levels, rep_levels, ctx, this, maybe_parent_nulls); 
-} 
- 
-// ---------------------------------------------------------------------- 
-// Floating point types 
- 
-template <> 
-Status TypedColumnWriterImpl<FloatType>::WriteArrowDense( 
-    const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels, 
-    const ::arrow::Array& array, ArrowWriteContext* ctx, bool maybe_parent_nulls) { 
-  if (array.type_id() != ::arrow::Type::FLOAT) { 
-    ARROW_UNSUPPORTED(); 
-  } 
-  return WriteArrowZeroCopy<FloatType>(array, num_levels, def_levels, rep_levels, ctx, 
-                                       this, maybe_parent_nulls); 
-} 
- 
-template <> 
-Status TypedColumnWriterImpl<DoubleType>::WriteArrowDense( 
-    const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels, 
-    const ::arrow::Array& array, ArrowWriteContext* ctx, bool maybe_parent_nulls) { 
-  if (array.type_id() != ::arrow::Type::DOUBLE) { 
-    ARROW_UNSUPPORTED(); 
-  } 
-  return WriteArrowZeroCopy<DoubleType>(array, num_levels, def_levels, rep_levels, ctx, 
-                                        this, maybe_parent_nulls); 
-} 
- 
-// ---------------------------------------------------------------------- 
-// Write Arrow to BYTE_ARRAY 
- 
-template <> 
-Status TypedColumnWriterImpl<ByteArrayType>::WriteArrowDense( 
-    const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels, 
-    const ::arrow::Array& array, ArrowWriteContext* ctx, bool maybe_parent_nulls) { 
-  if (!::arrow::is_base_binary_like(array.type()->id())) { 
-    ARROW_UNSUPPORTED(); 
-  } 
- 
-  int64_t value_offset = 0; 
-  auto WriteChunk = [&](int64_t offset, int64_t batch_size) { 
-    int64_t batch_num_values = 0; 
-    int64_t batch_num_spaced_values = 0; 
-    int64_t null_count = 0; 
- 
-    MaybeCalculateValidityBits(AddIfNotNull(def_levels, offset), batch_size, 
-                               &batch_num_values, &batch_num_spaced_values, &null_count); 
-    WriteLevelsSpaced(batch_size, AddIfNotNull(def_levels, offset), 
-                      AddIfNotNull(rep_levels, offset)); 
-    std::shared_ptr<Array> data_slice = 
-        array.Slice(value_offset, batch_num_spaced_values); 
-    PARQUET_ASSIGN_OR_THROW( 
-        data_slice, MaybeReplaceValidity(data_slice, null_count, ctx->memory_pool)); 
- 
-    current_encoder_->Put(*data_slice); 
-    if (page_statistics_ != nullptr) { 
-      page_statistics_->Update(*data_slice); 
-    } 
-    CommitWriteAndCheckPageLimit(batch_size, batch_num_values); 
-    CheckDictionarySizeLimit(); 
-    value_offset += batch_num_spaced_values; 
-  }; 
- 
-  PARQUET_CATCH_NOT_OK( 
-      DoInBatches(num_levels, properties_->write_batch_size(), WriteChunk)); 
-  return Status::OK(); 
-} 
- 
-// ---------------------------------------------------------------------- 
-// Write Arrow to FIXED_LEN_BYTE_ARRAY 
- 
-template <typename ParquetType, typename ArrowType> 
-struct SerializeFunctor< 
-    ParquetType, ArrowType, 
-    ::arrow::enable_if_t<::arrow::is_fixed_size_binary_type<ArrowType>::value && 
-                         !::arrow::is_decimal_type<ArrowType>::value>> { 
-  Status Serialize(const ::arrow::FixedSizeBinaryArray& array, ArrowWriteContext*, 
-                   FLBA* out) { 
-    if (array.null_count() == 0) { 
-      // no nulls, just dump the data 
-      // todo(advancedxy): use a writeBatch to avoid this step 
-      for (int64_t i = 0; i < array.length(); i++) { 
-        out[i] = FixedLenByteArray(array.GetValue(i)); 
-      } 
-    } else { 
-      for (int64_t i = 0; i < array.length(); i++) { 
-        if (array.IsValid(i)) { 
-          out[i] = FixedLenByteArray(array.GetValue(i)); 
-        } 
-      } 
-    } 
-    return Status::OK(); 
-  } 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// Write Arrow to Decimal128 
- 
-// Requires a custom serializer because decimal in parquet are in big-endian 
-// format. Thus, a temporary local buffer is required. 
-template <typename ParquetType, typename ArrowType> 
-struct SerializeFunctor<ParquetType, ArrowType, ::arrow::enable_if_decimal<ArrowType>> { 
-  Status Serialize(const typename ::arrow::TypeTraits<ArrowType>::ArrayType& array, 
-                   ArrowWriteContext* ctx, FLBA* out) { 
-    AllocateScratch(array, ctx); 
-    auto offset = Offset(array); 
- 
-    if (array.null_count() == 0) { 
-      for (int64_t i = 0; i < array.length(); i++) { 
-        out[i] = FixDecimalEndianess<ArrowType::kByteWidth>(array.GetValue(i), offset); 
-      } 
-    } else { 
-      for (int64_t i = 0; i < array.length(); i++) { 
-        out[i] = array.IsValid(i) ? FixDecimalEndianess<ArrowType::kByteWidth>( 
-                                        array.GetValue(i), offset) 
-                                  : FixedLenByteArray(); 
-      } 
-    } 
- 
-    return Status::OK(); 
-  } 
- 
-  // Parquet's Decimal are stored with FixedLength values where the length is 
-  // proportional to the precision. Arrow's Decimal are always stored with 16/32 
-  // bytes. Thus the internal FLBA pointer must be adjusted by the offset calculated 
-  // here. 
-  int32_t Offset(const Array& array) { 
-    auto decimal_type = checked_pointer_cast<::arrow::DecimalType>(array.type()); 
-    return decimal_type->byte_width() - 
-           ::arrow::DecimalType::DecimalSize(decimal_type->precision()); 
-  } 
- 
-  void AllocateScratch(const typename ::arrow::TypeTraits<ArrowType>::ArrayType& array, 
-                       ArrowWriteContext* ctx) { 
-    int64_t non_null_count = array.length() - array.null_count(); 
-    int64_t size = non_null_count * ArrowType::kByteWidth; 
-    scratch_buffer = AllocateBuffer(ctx->memory_pool, size); 
-    scratch = reinterpret_cast<int64_t*>(scratch_buffer->mutable_data()); 
-  } 
- 
-  template <int byte_width> 
-  FixedLenByteArray FixDecimalEndianess(const uint8_t* in, int64_t offset) { 
-    const auto* u64_in = reinterpret_cast<const int64_t*>(in); 
-    auto out = reinterpret_cast<const uint8_t*>(scratch) + offset; 
-    static_assert(byte_width == 16 || byte_width == 32, 
-                  "only 16 and 32 byte Decimals supported"); 
-    if (byte_width == 32) { 
-      *scratch++ = ::arrow::BitUtil::ToBigEndian(u64_in[3]); 
-      *scratch++ = ::arrow::BitUtil::ToBigEndian(u64_in[2]); 
-      *scratch++ = ::arrow::BitUtil::ToBigEndian(u64_in[1]); 
-      *scratch++ = ::arrow::BitUtil::ToBigEndian(u64_in[0]); 
-    } else { 
-      *scratch++ = ::arrow::BitUtil::ToBigEndian(u64_in[1]); 
-      *scratch++ = ::arrow::BitUtil::ToBigEndian(u64_in[0]); 
-    } 
-    return FixedLenByteArray(out); 
-  } 
- 
-  std::shared_ptr<ResizableBuffer> scratch_buffer; 
-  int64_t* scratch; 
-}; 
- 
-template <> 
-Status TypedColumnWriterImpl<FLBAType>::WriteArrowDense( 
-    const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels, 
-    const ::arrow::Array& array, ArrowWriteContext* ctx, bool maybe_parent_nulls) { 
-  switch (array.type()->id()) { 
-    WRITE_SERIALIZE_CASE(FIXED_SIZE_BINARY, FixedSizeBinaryType, FLBAType) 
-    WRITE_SERIALIZE_CASE(DECIMAL128, Decimal128Type, FLBAType) 
-    WRITE_SERIALIZE_CASE(DECIMAL256, Decimal256Type, FLBAType) 
-    default: 
-      break; 
-  } 
-  return Status::OK(); 
-} 
- 
-// ---------------------------------------------------------------------- 
-// Dynamic column writer constructor 
- 
-std::shared_ptr<ColumnWriter> ColumnWriter::Make(ColumnChunkMetaDataBuilder* metadata, 
-                                                 std::unique_ptr<PageWriter> pager, 
-                                                 const WriterProperties* properties) { 
-  const ColumnDescriptor* descr = metadata->descr(); 
-  const bool use_dictionary = properties->dictionary_enabled(descr->path()) && 
-                              descr->physical_type() != Type::BOOLEAN; 
-  Encoding::type encoding = properties->encoding(descr->path()); 
-  if (use_dictionary) { 
-    encoding = properties->dictionary_index_encoding(); 
-  } 
-  switch (descr->physical_type()) { 
-    case Type::BOOLEAN: 
-      return std::make_shared<TypedColumnWriterImpl<BooleanType>>( 
-          metadata, std::move(pager), use_dictionary, encoding, properties); 
-    case Type::INT32: 
-      return std::make_shared<TypedColumnWriterImpl<Int32Type>>( 
-          metadata, std::move(pager), use_dictionary, encoding, properties); 
-    case Type::INT64: 
-      return std::make_shared<TypedColumnWriterImpl<Int64Type>>( 
-          metadata, std::move(pager), use_dictionary, encoding, properties); 
-    case Type::INT96: 
-      return std::make_shared<TypedColumnWriterImpl<Int96Type>>( 
-          metadata, std::move(pager), use_dictionary, encoding, properties); 
-    case Type::FLOAT: 
-      return std::make_shared<TypedColumnWriterImpl<FloatType>>( 
-          metadata, std::move(pager), use_dictionary, encoding, properties); 
-    case Type::DOUBLE: 
-      return std::make_shared<TypedColumnWriterImpl<DoubleType>>( 
-          metadata, std::move(pager), use_dictionary, encoding, properties); 
-    case Type::BYTE_ARRAY: 
-      return std::make_shared<TypedColumnWriterImpl<ByteArrayType>>( 
-          metadata, std::move(pager), use_dictionary, encoding, properties); 
-    case Type::FIXED_LEN_BYTE_ARRAY: 
-      return std::make_shared<TypedColumnWriterImpl<FLBAType>>( 
-          metadata, std::move(pager), use_dictionary, encoding, properties); 
-    default: 
-      ParquetException::NYI("type reader not implemented"); 
-  } 
-  // Unreachable code, but suppress compiler warning 
-  return std::shared_ptr<ColumnWriter>(nullptr); 
-} 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/column_writer.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/compute/api.h"
+#include "arrow/io/memory.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit_stream_utils.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/compression.h"
+#include "arrow/util/endian.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/rle_encoding.h"
+#include "arrow/visitor_inline.h"
+#include "parquet/column_page.h"
+#include "parquet/encoding.h"
+#include "parquet/encryption/encryption_internal.h"
+#include "parquet/encryption/internal_file_encryptor.h"
+#include "parquet/level_conversion.h"
+#include "parquet/metadata.h"
+#include "parquet/platform.h"
+#include "parquet/properties.h"
+#include "parquet/schema.h"
+#include "parquet/statistics.h"
+#include "parquet/thrift_internal.h"
+#include "parquet/types.h"
+
+using arrow::Array;
+using arrow::ArrayData;
+using arrow::Datum;
+using arrow::Result;
+using arrow::Status;
+using arrow::BitUtil::BitWriter;
+using arrow::internal::checked_cast;
+using arrow::internal::checked_pointer_cast;
+using arrow::util::RleEncoder;
+
+namespace BitUtil = arrow::BitUtil;
+
+namespace parquet {
+
+namespace {
+
+// Visitor that exracts the value buffer from a FlatArray at a given offset.
+struct ValueBufferSlicer {
+  template <typename T>
+  ::arrow::enable_if_base_binary<typename T::TypeClass, Status> Visit(const T& array) {
+    auto data = array.data();
+    buffer_ =
+        SliceBuffer(data->buffers[1], data->offset * sizeof(typename T::offset_type),
+                    data->length * sizeof(typename T::offset_type));
+    return Status::OK();
+  }
+
+  template <typename T>
+  ::arrow::enable_if_fixed_size_binary<typename T::TypeClass, Status> Visit(
+      const T& array) {
+    auto data = array.data();
+    buffer_ = SliceBuffer(data->buffers[1], data->offset * array.byte_width(),
+                          data->length * array.byte_width());
+    return Status::OK();
+  }
+
+  template <typename T>
+  ::arrow::enable_if_t<::arrow::has_c_type<typename T::TypeClass>::value &&
+                           !std::is_same<BooleanType, typename T::TypeClass>::value,
+                       Status>
+  Visit(const T& array) {
+    auto data = array.data();
+    buffer_ = SliceBuffer(
+        data->buffers[1],
+        ::arrow::TypeTraits<typename T::TypeClass>::bytes_required(data->offset),
+        ::arrow::TypeTraits<typename T::TypeClass>::bytes_required(data->length));
+    return Status::OK();
+  }
+
+  Status Visit(const ::arrow::BooleanArray& array) {
+    auto data = array.data();
+    if (BitUtil::IsMultipleOf8(data->offset)) {
+      buffer_ = SliceBuffer(data->buffers[1], BitUtil::BytesForBits(data->offset),
+                            BitUtil::BytesForBits(data->length));
+      return Status::OK();
+    }
+    PARQUET_ASSIGN_OR_THROW(buffer_,
+                            ::arrow::internal::CopyBitmap(pool_, data->buffers[1]->data(),
+                                                          data->offset, data->length));
+    return Status::OK();
+  }
+#define NOT_IMPLEMENTED_VISIT(ArrowTypePrefix)                                      \
+  Status Visit(const ::arrow::ArrowTypePrefix##Array& array) {                      \
+    return Status::NotImplemented("Slicing not implemented for " #ArrowTypePrefix); \
+  }
+
+  NOT_IMPLEMENTED_VISIT(Null);
+  NOT_IMPLEMENTED_VISIT(Union);
+  NOT_IMPLEMENTED_VISIT(List);
+  NOT_IMPLEMENTED_VISIT(LargeList);
+  NOT_IMPLEMENTED_VISIT(Struct);
+  NOT_IMPLEMENTED_VISIT(FixedSizeList);
+  NOT_IMPLEMENTED_VISIT(Dictionary);
+  NOT_IMPLEMENTED_VISIT(Extension);
+
+#undef NOT_IMPLEMENTED_VISIT
+
+  MemoryPool* pool_;
+  std::shared_ptr<Buffer> buffer_;
+};
+
+internal::LevelInfo ComputeLevelInfo(const ColumnDescriptor* descr) {
+  internal::LevelInfo level_info;
+  level_info.def_level = descr->max_definition_level();
+  level_info.rep_level = descr->max_repetition_level();
+
+  int16_t min_spaced_def_level = descr->max_definition_level();
+  const ::parquet::schema::Node* node = descr->schema_node().get();
+  while (node != nullptr && !node->is_repeated()) {
+    if (node->is_optional()) {
+      min_spaced_def_level--;
+    }
+    node = node->parent();
+  }
+  level_info.repeated_ancestor_def_level = min_spaced_def_level;
+  return level_info;
+}
+
+template <class T>
+inline const T* AddIfNotNull(const T* base, int64_t offset) {
+  if (base != nullptr) {
+    return base + offset;
+  }
+  return nullptr;
+}
+
+}  // namespace
+
+LevelEncoder::LevelEncoder() {}
+LevelEncoder::~LevelEncoder() {}
+
+void LevelEncoder::Init(Encoding::type encoding, int16_t max_level,
+                        int num_buffered_values, uint8_t* data, int data_size) {
+  bit_width_ = BitUtil::Log2(max_level + 1);
+  encoding_ = encoding;
+  switch (encoding) {
+    case Encoding::RLE: {
+      rle_encoder_.reset(new RleEncoder(data, data_size, bit_width_));
+      break;
+    }
+    case Encoding::BIT_PACKED: {
+      int num_bytes =
+          static_cast<int>(BitUtil::BytesForBits(num_buffered_values * bit_width_));
+      bit_packed_encoder_.reset(new BitWriter(data, num_bytes));
+      break;
+    }
+    default:
+      throw ParquetException("Unknown encoding type for levels.");
+  }
+}
+
+int LevelEncoder::MaxBufferSize(Encoding::type encoding, int16_t max_level,
+                                int num_buffered_values) {
+  int bit_width = BitUtil::Log2(max_level + 1);
+  int num_bytes = 0;
+  switch (encoding) {
+    case Encoding::RLE: {
+      // TODO: Due to the way we currently check if the buffer is full enough,
+      // we need to have MinBufferSize as head room.
+      num_bytes = RleEncoder::MaxBufferSize(bit_width, num_buffered_values) +
+                  RleEncoder::MinBufferSize(bit_width);
+      break;
+    }
+    case Encoding::BIT_PACKED: {
+      num_bytes =
+          static_cast<int>(BitUtil::BytesForBits(num_buffered_values * bit_width));
+      break;
+    }
+    default:
+      throw ParquetException("Unknown encoding type for levels.");
+  }
+  return num_bytes;
+}
+
+int LevelEncoder::Encode(int batch_size, const int16_t* levels) {
+  int num_encoded = 0;
+  if (!rle_encoder_ && !bit_packed_encoder_) {
+    throw ParquetException("Level encoders are not initialized.");
+  }
+
+  if (encoding_ == Encoding::RLE) {
+    for (int i = 0; i < batch_size; ++i) {
+      if (!rle_encoder_->Put(*(levels + i))) {
+        break;
+      }
+      ++num_encoded;
+    }
+    rle_encoder_->Flush();
+    rle_length_ = rle_encoder_->len();
+  } else {
+    for (int i = 0; i < batch_size; ++i) {
+      if (!bit_packed_encoder_->PutValue(*(levels + i), bit_width_)) {
+        break;
+      }
+      ++num_encoded;
+    }
+    bit_packed_encoder_->Flush();
+  }
+  return num_encoded;
+}
+
+// ----------------------------------------------------------------------
+// PageWriter implementation
+
+// This subclass delimits pages appearing in a serialized stream, each preceded
+// by a serialized Thrift format::PageHeader indicating the type of each page
+// and the page metadata.
+class SerializedPageWriter : public PageWriter {
+ public:
+  SerializedPageWriter(std::shared_ptr<ArrowOutputStream> sink, Compression::type codec,
+                       int compression_level, ColumnChunkMetaDataBuilder* metadata,
+                       int16_t row_group_ordinal, int16_t column_chunk_ordinal,
+                       MemoryPool* pool = ::arrow::default_memory_pool(),
+                       std::shared_ptr<Encryptor> meta_encryptor = nullptr,
+                       std::shared_ptr<Encryptor> data_encryptor = nullptr)
+      : sink_(std::move(sink)),
+        metadata_(metadata),
+        pool_(pool),
+        num_values_(0),
+        dictionary_page_offset_(0),
+        data_page_offset_(0),
+        total_uncompressed_size_(0),
+        total_compressed_size_(0),
+        page_ordinal_(0),
+        row_group_ordinal_(row_group_ordinal),
+        column_ordinal_(column_chunk_ordinal),
+        meta_encryptor_(std::move(meta_encryptor)),
+        data_encryptor_(std::move(data_encryptor)),
+        encryption_buffer_(AllocateBuffer(pool, 0)) {
+    if (data_encryptor_ != nullptr || meta_encryptor_ != nullptr) {
+      InitEncryption();
+    }
+    compressor_ = GetCodec(codec, compression_level);
+    thrift_serializer_.reset(new ThriftSerializer);
+  }
+
+  int64_t WriteDictionaryPage(const DictionaryPage& page) override {
+    int64_t uncompressed_size = page.size();
+    std::shared_ptr<Buffer> compressed_data;
+    if (has_compressor()) {
+      auto buffer = std::static_pointer_cast<ResizableBuffer>(
+          AllocateBuffer(pool_, uncompressed_size));
+      Compress(*(page.buffer().get()), buffer.get());
+      compressed_data = std::static_pointer_cast<Buffer>(buffer);
+    } else {
+      compressed_data = page.buffer();
+    }
+
+    format::DictionaryPageHeader dict_page_header;
+    dict_page_header.__set_num_values(page.num_values());
+    dict_page_header.__set_encoding(ToThrift(page.encoding()));
+    dict_page_header.__set_is_sorted(page.is_sorted());
+
+    const uint8_t* output_data_buffer = compressed_data->data();
+    int32_t output_data_len = static_cast<int32_t>(compressed_data->size());
+
+    if (data_encryptor_.get()) {
+      UpdateEncryption(encryption::kDictionaryPage);
+      PARQUET_THROW_NOT_OK(encryption_buffer_->Resize(
+          data_encryptor_->CiphertextSizeDelta() + output_data_len, false));
+      output_data_len = data_encryptor_->Encrypt(compressed_data->data(), output_data_len,
+                                                 encryption_buffer_->mutable_data());
+      output_data_buffer = encryption_buffer_->data();
+    }
+
+    format::PageHeader page_header;
+    page_header.__set_type(format::PageType::DICTIONARY_PAGE);
+    page_header.__set_uncompressed_page_size(static_cast<int32_t>(uncompressed_size));
+    page_header.__set_compressed_page_size(static_cast<int32_t>(output_data_len));
+    page_header.__set_dictionary_page_header(dict_page_header);
+    // TODO(PARQUET-594) crc checksum
+
+    PARQUET_ASSIGN_OR_THROW(int64_t start_pos, sink_->Tell());
+    if (dictionary_page_offset_ == 0) {
+      dictionary_page_offset_ = start_pos;
+    }
+
+    if (meta_encryptor_) {
+      UpdateEncryption(encryption::kDictionaryPageHeader);
+    }
+    const int64_t header_size =
+        thrift_serializer_->Serialize(&page_header, sink_.get(), meta_encryptor_);
+
+    PARQUET_THROW_NOT_OK(sink_->Write(output_data_buffer, output_data_len));
+
+    total_uncompressed_size_ += uncompressed_size + header_size;
+    total_compressed_size_ += output_data_len + header_size;
+    ++dict_encoding_stats_[page.encoding()];
+    return uncompressed_size + header_size;
+  }
+
+  void Close(bool has_dictionary, bool fallback) override {
+    if (meta_encryptor_ != nullptr) {
+      UpdateEncryption(encryption::kColumnMetaData);
+    }
+    // index_page_offset = -1 since they are not supported
+    metadata_->Finish(num_values_, dictionary_page_offset_, -1, data_page_offset_,
+                      total_compressed_size_, total_uncompressed_size_, has_dictionary,
+                      fallback, dict_encoding_stats_, data_encoding_stats_,
+                      meta_encryptor_);
+    // Write metadata at end of column chunk
+    metadata_->WriteTo(sink_.get());
+  }
+
+  /**
+   * Compress a buffer.
+   */
+  void Compress(const Buffer& src_buffer, ResizableBuffer* dest_buffer) override {
+    DCHECK(compressor_ != nullptr);
+
+    // Compress the data
+    int64_t max_compressed_size =
+        compressor_->MaxCompressedLen(src_buffer.size(), src_buffer.data());
+
+    // Use Arrow::Buffer::shrink_to_fit = false
+    // underlying buffer only keeps growing. Resize to a smaller size does not reallocate.
+    PARQUET_THROW_NOT_OK(dest_buffer->Resize(max_compressed_size, false));
+
+    PARQUET_ASSIGN_OR_THROW(
+        int64_t compressed_size,
+        compressor_->Compress(src_buffer.size(), src_buffer.data(), max_compressed_size,
+                              dest_buffer->mutable_data()));
+    PARQUET_THROW_NOT_OK(dest_buffer->Resize(compressed_size, false));
+  }
+
+  int64_t WriteDataPage(const DataPage& page) override {
+    const int64_t uncompressed_size = page.uncompressed_size();
+    std::shared_ptr<Buffer> compressed_data = page.buffer();
+    const uint8_t* output_data_buffer = compressed_data->data();
+    int32_t output_data_len = static_cast<int32_t>(compressed_data->size());
+
+    if (data_encryptor_.get()) {
+      PARQUET_THROW_NOT_OK(encryption_buffer_->Resize(
+          data_encryptor_->CiphertextSizeDelta() + output_data_len, false));
+      UpdateEncryption(encryption::kDataPage);
+      output_data_len = data_encryptor_->Encrypt(compressed_data->data(), output_data_len,
+                                                 encryption_buffer_->mutable_data());
+      output_data_buffer = encryption_buffer_->data();
+    }
+
+    format::PageHeader page_header;
+    page_header.__set_uncompressed_page_size(static_cast<int32_t>(uncompressed_size));
+    page_header.__set_compressed_page_size(static_cast<int32_t>(output_data_len));
+    // TODO(PARQUET-594) crc checksum
+
+    if (page.type() == PageType::DATA_PAGE) {
+      const DataPageV1& v1_page = checked_cast<const DataPageV1&>(page);
+      SetDataPageHeader(page_header, v1_page);
+    } else if (page.type() == PageType::DATA_PAGE_V2) {
+      const DataPageV2& v2_page = checked_cast<const DataPageV2&>(page);
+      SetDataPageV2Header(page_header, v2_page);
+    } else {
+      throw ParquetException("Unexpected page type");
+    }
+
+    PARQUET_ASSIGN_OR_THROW(int64_t start_pos, sink_->Tell());
+    if (page_ordinal_ == 0) {
+      data_page_offset_ = start_pos;
+    }
+
+    if (meta_encryptor_) {
+      UpdateEncryption(encryption::kDataPageHeader);
+    }
+    const int64_t header_size =
+        thrift_serializer_->Serialize(&page_header, sink_.get(), meta_encryptor_);
+    PARQUET_THROW_NOT_OK(sink_->Write(output_data_buffer, output_data_len));
+
+    total_uncompressed_size_ += uncompressed_size + header_size;
+    total_compressed_size_ += output_data_len + header_size;
+    num_values_ += page.num_values();
+    ++data_encoding_stats_[page.encoding()];
+    ++page_ordinal_;
+    return uncompressed_size + header_size;
+  }
+
+  void SetDataPageHeader(format::PageHeader& page_header, const DataPageV1& page) {
+    format::DataPageHeader data_page_header;
+    data_page_header.__set_num_values(page.num_values());
+    data_page_header.__set_encoding(ToThrift(page.encoding()));
+    data_page_header.__set_definition_level_encoding(
+        ToThrift(page.definition_level_encoding()));
+    data_page_header.__set_repetition_level_encoding(
+        ToThrift(page.repetition_level_encoding()));
+    data_page_header.__set_statistics(ToThrift(page.statistics()));
+
+    page_header.__set_type(format::PageType::DATA_PAGE);
+    page_header.__set_data_page_header(data_page_header);
+  }
+
+  void SetDataPageV2Header(format::PageHeader& page_header, const DataPageV2 page) {
+    format::DataPageHeaderV2 data_page_header;
+    data_page_header.__set_num_values(page.num_values());
+    data_page_header.__set_num_nulls(page.num_nulls());
+    data_page_header.__set_num_rows(page.num_rows());
+    data_page_header.__set_encoding(ToThrift(page.encoding()));
+
+    data_page_header.__set_definition_levels_byte_length(
+        page.definition_levels_byte_length());
+    data_page_header.__set_repetition_levels_byte_length(
+        page.repetition_levels_byte_length());
+
+    data_page_header.__set_is_compressed(page.is_compressed());
+    data_page_header.__set_statistics(ToThrift(page.statistics()));
+
+    page_header.__set_type(format::PageType::DATA_PAGE_V2);
+    page_header.__set_data_page_header_v2(data_page_header);
+  }
+
+  bool has_compressor() override { return (compressor_ != nullptr); }
+
+  int64_t num_values() { return num_values_; }
+
+  int64_t dictionary_page_offset() { return dictionary_page_offset_; }
+
+  int64_t data_page_offset() { return data_page_offset_; }
+
+  int64_t total_compressed_size() { return total_compressed_size_; }
+
+  int64_t total_uncompressed_size() { return total_uncompressed_size_; }
+
+ private:
+  // To allow UpdateEncryption on Close
+  friend class BufferedPageWriter;
+
+  void InitEncryption() {
+    // Prepare the AAD for quick update later.
+    if (data_encryptor_ != nullptr) {
+      data_page_aad_ = encryption::CreateModuleAad(
+          data_encryptor_->file_aad(), encryption::kDataPage, row_group_ordinal_,
+          column_ordinal_, kNonPageOrdinal);
+    }
+    if (meta_encryptor_ != nullptr) {
+      data_page_header_aad_ = encryption::CreateModuleAad(
+          meta_encryptor_->file_aad(), encryption::kDataPageHeader, row_group_ordinal_,
+          column_ordinal_, kNonPageOrdinal);
+    }
+  }
+
+  void UpdateEncryption(int8_t module_type) {
+    switch (module_type) {
+      case encryption::kColumnMetaData: {
+        meta_encryptor_->UpdateAad(encryption::CreateModuleAad(
+            meta_encryptor_->file_aad(), module_type, row_group_ordinal_, column_ordinal_,
+            kNonPageOrdinal));
+        break;
+      }
+      case encryption::kDataPage: {
+        encryption::QuickUpdatePageAad(data_page_aad_, page_ordinal_);
+        data_encryptor_->UpdateAad(data_page_aad_);
+        break;
+      }
+      case encryption::kDataPageHeader: {
+        encryption::QuickUpdatePageAad(data_page_header_aad_, page_ordinal_);
+        meta_encryptor_->UpdateAad(data_page_header_aad_);
+        break;
+      }
+      case encryption::kDictionaryPageHeader: {
+        meta_encryptor_->UpdateAad(encryption::CreateModuleAad(
+            meta_encryptor_->file_aad(), module_type, row_group_ordinal_, column_ordinal_,
+            kNonPageOrdinal));
+        break;
+      }
+      case encryption::kDictionaryPage: {
+        data_encryptor_->UpdateAad(encryption::CreateModuleAad(
+            data_encryptor_->file_aad(), module_type, row_group_ordinal_, column_ordinal_,
+            kNonPageOrdinal));
+        break;
+      }
+      default:
+        throw ParquetException("Unknown module type in UpdateEncryption");
+    }
+  }
+
+  std::shared_ptr<ArrowOutputStream> sink_;
+  ColumnChunkMetaDataBuilder* metadata_;
+  MemoryPool* pool_;
+  int64_t num_values_;
+  int64_t dictionary_page_offset_;
+  int64_t data_page_offset_;
+  int64_t total_uncompressed_size_;
+  int64_t total_compressed_size_;
+  int16_t page_ordinal_;
+  int16_t row_group_ordinal_;
+  int16_t column_ordinal_;
+
+  std::unique_ptr<ThriftSerializer> thrift_serializer_;
+
+  // Compression codec to use.
+  std::unique_ptr<::arrow::util::Codec> compressor_;
+
+  std::string data_page_aad_;
+  std::string data_page_header_aad_;
+
+  std::shared_ptr<Encryptor> meta_encryptor_;
+  std::shared_ptr<Encryptor> data_encryptor_;
+
+  std::shared_ptr<ResizableBuffer> encryption_buffer_;
+
+  std::map<Encoding::type, int32_t> dict_encoding_stats_;
+  std::map<Encoding::type, int32_t> data_encoding_stats_;
+};
+
+// This implementation of the PageWriter writes to the final sink on Close .
+class BufferedPageWriter : public PageWriter {
+ public:
+  BufferedPageWriter(std::shared_ptr<ArrowOutputStream> sink, Compression::type codec,
+                     int compression_level, ColumnChunkMetaDataBuilder* metadata,
+                     int16_t row_group_ordinal, int16_t current_column_ordinal,
+                     MemoryPool* pool = ::arrow::default_memory_pool(),
+                     std::shared_ptr<Encryptor> meta_encryptor = nullptr,
+                     std::shared_ptr<Encryptor> data_encryptor = nullptr)
+      : final_sink_(std::move(sink)), metadata_(metadata), has_dictionary_pages_(false) {
+    in_memory_sink_ = CreateOutputStream(pool);
+    pager_ = std::unique_ptr<SerializedPageWriter>(
+        new SerializedPageWriter(in_memory_sink_, codec, compression_level, metadata,
+                                 row_group_ordinal, current_column_ordinal, pool,
+                                 std::move(meta_encryptor), std::move(data_encryptor)));
+  }
+
+  int64_t WriteDictionaryPage(const DictionaryPage& page) override {
+    has_dictionary_pages_ = true;
+    return pager_->WriteDictionaryPage(page);
+  }
+
+  void Close(bool has_dictionary, bool fallback) override {
+    if (pager_->meta_encryptor_ != nullptr) {
+      pager_->UpdateEncryption(encryption::kColumnMetaData);
+    }
+    // index_page_offset = -1 since they are not supported
+    PARQUET_ASSIGN_OR_THROW(int64_t final_position, final_sink_->Tell());
+    // dictionary page offset should be 0 iff there are no dictionary pages
+    auto dictionary_page_offset =
+        has_dictionary_pages_ ? pager_->dictionary_page_offset() + final_position : 0;
+    metadata_->Finish(pager_->num_values(), dictionary_page_offset, -1,
+                      pager_->data_page_offset() + final_position,
+                      pager_->total_compressed_size(), pager_->total_uncompressed_size(),
+                      has_dictionary, fallback, pager_->dict_encoding_stats_,
+                      pager_->data_encoding_stats_, pager_->meta_encryptor_);
+
+    // Write metadata at end of column chunk
+    metadata_->WriteTo(in_memory_sink_.get());
+
+    // flush everything to the serialized sink
+    PARQUET_ASSIGN_OR_THROW(auto buffer, in_memory_sink_->Finish());
+    PARQUET_THROW_NOT_OK(final_sink_->Write(buffer));
+  }
+
+  int64_t WriteDataPage(const DataPage& page) override {
+    return pager_->WriteDataPage(page);
+  }
+
+  void Compress(const Buffer& src_buffer, ResizableBuffer* dest_buffer) override {
+    pager_->Compress(src_buffer, dest_buffer);
+  }
+
+  bool has_compressor() override { return pager_->has_compressor(); }
+
+ private:
+  std::shared_ptr<ArrowOutputStream> final_sink_;
+  ColumnChunkMetaDataBuilder* metadata_;
+  std::shared_ptr<::arrow::io::BufferOutputStream> in_memory_sink_;
+  std::unique_ptr<SerializedPageWriter> pager_;
+  bool has_dictionary_pages_;
+};
+
+std::unique_ptr<PageWriter> PageWriter::Open(
+    std::shared_ptr<ArrowOutputStream> sink, Compression::type codec,
+    int compression_level, ColumnChunkMetaDataBuilder* metadata,
+    int16_t row_group_ordinal, int16_t column_chunk_ordinal, MemoryPool* pool,
+    bool buffered_row_group, std::shared_ptr<Encryptor> meta_encryptor,
+    std::shared_ptr<Encryptor> data_encryptor) {
+  if (buffered_row_group) {
+    return std::unique_ptr<PageWriter>(
+        new BufferedPageWriter(std::move(sink), codec, compression_level, metadata,
+                               row_group_ordinal, column_chunk_ordinal, pool,
+                               std::move(meta_encryptor), std::move(data_encryptor)));
+  } else {
+    return std::unique_ptr<PageWriter>(
+        new SerializedPageWriter(std::move(sink), codec, compression_level, metadata,
+                                 row_group_ordinal, column_chunk_ordinal, pool,
+                                 std::move(meta_encryptor), std::move(data_encryptor)));
+  }
+}
+
+// ----------------------------------------------------------------------
+// ColumnWriter
+
+const std::shared_ptr<WriterProperties>& default_writer_properties() {
+  static std::shared_ptr<WriterProperties> default_writer_properties =
+      WriterProperties::Builder().build();
+  return default_writer_properties;
+}
+
+class ColumnWriterImpl {
+ public:
+  ColumnWriterImpl(ColumnChunkMetaDataBuilder* metadata,
+                   std::unique_ptr<PageWriter> pager, const bool use_dictionary,
+                   Encoding::type encoding, const WriterProperties* properties)
+      : metadata_(metadata),
+        descr_(metadata->descr()),
+        level_info_(ComputeLevelInfo(metadata->descr())),
+        pager_(std::move(pager)),
+        has_dictionary_(use_dictionary),
+        encoding_(encoding),
+        properties_(properties),
+        allocator_(properties->memory_pool()),
+        num_buffered_values_(0),
+        num_buffered_encoded_values_(0),
+        rows_written_(0),
+        total_bytes_written_(0),
+        total_compressed_bytes_(0),
+        closed_(false),
+        fallback_(false),
+        definition_levels_sink_(allocator_),
+        repetition_levels_sink_(allocator_) {
+    definition_levels_rle_ =
+        std::static_pointer_cast<ResizableBuffer>(AllocateBuffer(allocator_, 0));
+    repetition_levels_rle_ =
+        std::static_pointer_cast<ResizableBuffer>(AllocateBuffer(allocator_, 0));
+    uncompressed_data_ =
+        std::static_pointer_cast<ResizableBuffer>(AllocateBuffer(allocator_, 0));
+
+    if (pager_->has_compressor()) {
+      compressor_temp_buffer_ =
+          std::static_pointer_cast<ResizableBuffer>(AllocateBuffer(allocator_, 0));
+    }
+  }
+
+  virtual ~ColumnWriterImpl() = default;
+
+  int64_t Close();
+
+ protected:
+  virtual std::shared_ptr<Buffer> GetValuesBuffer() = 0;
+
+  // Serializes Dictionary Page if enabled
+  virtual void WriteDictionaryPage() = 0;
+
+  // Plain-encoded statistics of the current page
+  virtual EncodedStatistics GetPageStatistics() = 0;
+
+  // Plain-encoded statistics of the whole chunk
+  virtual EncodedStatistics GetChunkStatistics() = 0;
+
+  // Merges page statistics into chunk statistics, then resets the values
+  virtual void ResetPageStatistics() = 0;
+
+  // Adds Data Pages to an in memory buffer in dictionary encoding mode
+  // Serializes the Data Pages in other encoding modes
+  void AddDataPage();
+
+  void BuildDataPageV1(int64_t definition_levels_rle_size,
+                       int64_t repetition_levels_rle_size, int64_t uncompressed_size,
+                       const std::shared_ptr<Buffer>& values);
+  void BuildDataPageV2(int64_t definition_levels_rle_size,
+                       int64_t repetition_levels_rle_size, int64_t uncompressed_size,
+                       const std::shared_ptr<Buffer>& values);
+
+  // Serializes Data Pages
+  void WriteDataPage(const DataPage& page) {
+    total_bytes_written_ += pager_->WriteDataPage(page);
+  }
+
+  // Write multiple definition levels
+  void WriteDefinitionLevels(int64_t num_levels, const int16_t* levels) {
+    DCHECK(!closed_);
+    PARQUET_THROW_NOT_OK(
+        definition_levels_sink_.Append(levels, sizeof(int16_t) * num_levels));
+  }
+
+  // Write multiple repetition levels
+  void WriteRepetitionLevels(int64_t num_levels, const int16_t* levels) {
+    DCHECK(!closed_);
+    PARQUET_THROW_NOT_OK(
+        repetition_levels_sink_.Append(levels, sizeof(int16_t) * num_levels));
+  }
+
+  // RLE encode the src_buffer into dest_buffer and return the encoded size
+  int64_t RleEncodeLevels(const void* src_buffer, ResizableBuffer* dest_buffer,
+                          int16_t max_level, bool include_length_prefix = true);
+
+  // Serialize the buffered Data Pages
+  void FlushBufferedDataPages();
+
+  ColumnChunkMetaDataBuilder* metadata_;
+  const ColumnDescriptor* descr_;
+  // scratch buffer if validity bits need to be recalculated.
+  std::shared_ptr<ResizableBuffer> bits_buffer_;
+  const internal::LevelInfo level_info_;
+
+  std::unique_ptr<PageWriter> pager_;
+
+  bool has_dictionary_;
+  Encoding::type encoding_;
+  const WriterProperties* properties_;
+
+  LevelEncoder level_encoder_;
+
+  MemoryPool* allocator_;
+
+  // The total number of values stored in the data page. This is the maximum of
+  // the number of encoded definition levels or encoded values. For
+  // non-repeated, required columns, this is equal to the number of encoded
+  // values. For repeated or optional values, there may be fewer data values
+  // than levels, and this tells you how many encoded levels there are in that
+  // case.
+  int64_t num_buffered_values_;
+
+  // The total number of stored values. For repeated or optional values, this
+  // number may be lower than num_buffered_values_.
+  int64_t num_buffered_encoded_values_;
+
+  // Total number of rows written with this ColumnWriter
+  int rows_written_;
+
+  // Records the total number of uncompressed bytes written by the serializer
+  int64_t total_bytes_written_;
+
+  // Records the current number of compressed bytes in a column
+  int64_t total_compressed_bytes_;
+
+  // Flag to check if the Writer has been closed
+  bool closed_;
+
+  // Flag to infer if dictionary encoding has fallen back to PLAIN
+  bool fallback_;
+
+  ::arrow::BufferBuilder definition_levels_sink_;
+  ::arrow::BufferBuilder repetition_levels_sink_;
+
+  std::shared_ptr<ResizableBuffer> definition_levels_rle_;
+  std::shared_ptr<ResizableBuffer> repetition_levels_rle_;
+
+  std::shared_ptr<ResizableBuffer> uncompressed_data_;
+  std::shared_ptr<ResizableBuffer> compressor_temp_buffer_;
+
+  std::vector<std::unique_ptr<DataPage>> data_pages_;
+
+ private:
+  void InitSinks() {
+    definition_levels_sink_.Rewind(0);
+    repetition_levels_sink_.Rewind(0);
+  }
+
+  // Concatenate the encoded levels and values into one buffer
+  void ConcatenateBuffers(int64_t definition_levels_rle_size,
+                          int64_t repetition_levels_rle_size,
+                          const std::shared_ptr<Buffer>& values, uint8_t* combined) {
+    memcpy(combined, repetition_levels_rle_->data(), repetition_levels_rle_size);
+    combined += repetition_levels_rle_size;
+    memcpy(combined, definition_levels_rle_->data(), definition_levels_rle_size);
+    combined += definition_levels_rle_size;
+    memcpy(combined, values->data(), values->size());
+  }
+};
+
+// return the size of the encoded buffer
+int64_t ColumnWriterImpl::RleEncodeLevels(const void* src_buffer,
+                                          ResizableBuffer* dest_buffer, int16_t max_level,
+                                          bool include_length_prefix) {
+  // V1 DataPage includes the length of the RLE level as a prefix.
+  int32_t prefix_size = include_length_prefix ? sizeof(int32_t) : 0;
+
+  // TODO: This only works with due to some RLE specifics
+  int64_t rle_size = LevelEncoder::MaxBufferSize(Encoding::RLE, max_level,
+                                                 static_cast<int>(num_buffered_values_)) +
+                     prefix_size;
+
+  // Use Arrow::Buffer::shrink_to_fit = false
+  // underlying buffer only keeps growing. Resize to a smaller size does not reallocate.
+  PARQUET_THROW_NOT_OK(dest_buffer->Resize(rle_size, false));
+
+  level_encoder_.Init(Encoding::RLE, max_level, static_cast<int>(num_buffered_values_),
+                      dest_buffer->mutable_data() + prefix_size,
+                      static_cast<int>(dest_buffer->size() - prefix_size));
+  int encoded = level_encoder_.Encode(static_cast<int>(num_buffered_values_),
+                                      reinterpret_cast<const int16_t*>(src_buffer));
+  DCHECK_EQ(encoded, num_buffered_values_);
+
+  if (include_length_prefix) {
+    reinterpret_cast<int32_t*>(dest_buffer->mutable_data())[0] = level_encoder_.len();
+  }
+
+  return level_encoder_.len() + prefix_size;
+}
+
+void ColumnWriterImpl::AddDataPage() {
+  int64_t definition_levels_rle_size = 0;
+  int64_t repetition_levels_rle_size = 0;
+
+  std::shared_ptr<Buffer> values = GetValuesBuffer();
+  bool is_v1_data_page = properties_->data_page_version() == ParquetDataPageVersion::V1;
+
+  if (descr_->max_definition_level() > 0) {
+    definition_levels_rle_size = RleEncodeLevels(
+        definition_levels_sink_.data(), definition_levels_rle_.get(),
+        descr_->max_definition_level(), /*include_length_prefix=*/is_v1_data_page);
+  }
+
+  if (descr_->max_repetition_level() > 0) {
+    repetition_levels_rle_size = RleEncodeLevels(
+        repetition_levels_sink_.data(), repetition_levels_rle_.get(),
+        descr_->max_repetition_level(), /*include_length_prefix=*/is_v1_data_page);
+  }
+
+  int64_t uncompressed_size =
+      definition_levels_rle_size + repetition_levels_rle_size + values->size();
+
+  if (is_v1_data_page) {
+    BuildDataPageV1(definition_levels_rle_size, repetition_levels_rle_size,
+                    uncompressed_size, values);
+  } else {
+    BuildDataPageV2(definition_levels_rle_size, repetition_levels_rle_size,
+                    uncompressed_size, values);
+  }
+
+  // Re-initialize the sinks for next Page.
+  InitSinks();
+  num_buffered_values_ = 0;
+  num_buffered_encoded_values_ = 0;
+}
+
+void ColumnWriterImpl::BuildDataPageV1(int64_t definition_levels_rle_size,
+                                       int64_t repetition_levels_rle_size,
+                                       int64_t uncompressed_size,
+                                       const std::shared_ptr<Buffer>& values) {
+  // Use Arrow::Buffer::shrink_to_fit = false
+  // underlying buffer only keeps growing. Resize to a smaller size does not reallocate.
+  PARQUET_THROW_NOT_OK(uncompressed_data_->Resize(uncompressed_size, false));
+  ConcatenateBuffers(definition_levels_rle_size, repetition_levels_rle_size, values,
+                     uncompressed_data_->mutable_data());
+
+  EncodedStatistics page_stats = GetPageStatistics();
+  page_stats.ApplyStatSizeLimits(properties_->max_statistics_size(descr_->path()));
+  page_stats.set_is_signed(SortOrder::SIGNED == descr_->sort_order());
+  ResetPageStatistics();
+
+  std::shared_ptr<Buffer> compressed_data;
+  if (pager_->has_compressor()) {
+    pager_->Compress(*(uncompressed_data_.get()), compressor_temp_buffer_.get());
+    compressed_data = compressor_temp_buffer_;
+  } else {
+    compressed_data = uncompressed_data_;
+  }
+
+  // Write the page to OutputStream eagerly if there is no dictionary or
+  // if dictionary encoding has fallen back to PLAIN
+  if (has_dictionary_ && !fallback_) {  // Save pages until end of dictionary encoding
+    PARQUET_ASSIGN_OR_THROW(
+        auto compressed_data_copy,
+        compressed_data->CopySlice(0, compressed_data->size(), allocator_));
+    std::unique_ptr<DataPage> page_ptr(new DataPageV1(
+        compressed_data_copy, static_cast<int32_t>(num_buffered_values_), encoding_,
+        Encoding::RLE, Encoding::RLE, uncompressed_size, page_stats));
+    total_compressed_bytes_ += page_ptr->size() + sizeof(format::PageHeader);
+
+    data_pages_.push_back(std::move(page_ptr));
+  } else {  // Eagerly write pages
+    DataPageV1 page(compressed_data, static_cast<int32_t>(num_buffered_values_),
+                    encoding_, Encoding::RLE, Encoding::RLE, uncompressed_size,
+                    page_stats);
+    WriteDataPage(page);
+  }
+}
+
+void ColumnWriterImpl::BuildDataPageV2(int64_t definition_levels_rle_size,
+                                       int64_t repetition_levels_rle_size,
+                                       int64_t uncompressed_size,
+                                       const std::shared_ptr<Buffer>& values) {
+  // Compress the values if needed. Repetition and definition levels are uncompressed in
+  // V2.
+  std::shared_ptr<Buffer> compressed_values;
+  if (pager_->has_compressor()) {
+    pager_->Compress(*values, compressor_temp_buffer_.get());
+    compressed_values = compressor_temp_buffer_;
+  } else {
+    compressed_values = values;
+  }
+
+  // Concatenate uncompressed levels and the possibly compressed values
+  int64_t combined_size =
+      definition_levels_rle_size + repetition_levels_rle_size + compressed_values->size();
+  std::shared_ptr<ResizableBuffer> combined = AllocateBuffer(allocator_, combined_size);
+
+  ConcatenateBuffers(definition_levels_rle_size, repetition_levels_rle_size,
+                     compressed_values, combined->mutable_data());
+
+  EncodedStatistics page_stats = GetPageStatistics();
+  page_stats.ApplyStatSizeLimits(properties_->max_statistics_size(descr_->path()));
+  page_stats.set_is_signed(SortOrder::SIGNED == descr_->sort_order());
+  ResetPageStatistics();
+
+  int32_t num_values = static_cast<int32_t>(num_buffered_values_);
+  int32_t null_count = static_cast<int32_t>(page_stats.null_count);
+  int32_t def_levels_byte_length = static_cast<int32_t>(definition_levels_rle_size);
+  int32_t rep_levels_byte_length = static_cast<int32_t>(repetition_levels_rle_size);
+
+  // Write the page to OutputStream eagerly if there is no dictionary or
+  // if dictionary encoding has fallen back to PLAIN
+  if (has_dictionary_ && !fallback_) {  // Save pages until end of dictionary encoding
+    PARQUET_ASSIGN_OR_THROW(auto data_copy,
+                            combined->CopySlice(0, combined->size(), allocator_));
+    std::unique_ptr<DataPage> page_ptr(new DataPageV2(
+        combined, num_values, null_count, num_values, encoding_, def_levels_byte_length,
+        rep_levels_byte_length, uncompressed_size, pager_->has_compressor()));
+    total_compressed_bytes_ += page_ptr->size() + sizeof(format::PageHeader);
+    data_pages_.push_back(std::move(page_ptr));
+  } else {
+    DataPageV2 page(combined, num_values, null_count, num_values, encoding_,
+                    def_levels_byte_length, rep_levels_byte_length, uncompressed_size,
+                    pager_->has_compressor());
+    WriteDataPage(page);
+  }
+}
+
+int64_t ColumnWriterImpl::Close() {
+  if (!closed_) {
+    closed_ = true;
+    if (has_dictionary_ && !fallback_) {
+      WriteDictionaryPage();
+    }
+
+    FlushBufferedDataPages();
+
+    EncodedStatistics chunk_statistics = GetChunkStatistics();
+    chunk_statistics.ApplyStatSizeLimits(
+        properties_->max_statistics_size(descr_->path()));
+    chunk_statistics.set_is_signed(SortOrder::SIGNED == descr_->sort_order());
+
+    // Write stats only if the column has at least one row written
+    if (rows_written_ > 0 && chunk_statistics.is_set()) {
+      metadata_->SetStatistics(chunk_statistics);
+    }
+    pager_->Close(has_dictionary_, fallback_);
+  }
+
+  return total_bytes_written_;
+}
+
+void ColumnWriterImpl::FlushBufferedDataPages() {
+  // Write all outstanding data to a new page
+  if (num_buffered_values_ > 0) {
+    AddDataPage();
+  }
+  for (const auto& page_ptr : data_pages_) {
+    WriteDataPage(*page_ptr);
+  }
+  data_pages_.clear();
+  total_compressed_bytes_ = 0;
+}
+
+// ----------------------------------------------------------------------
+// TypedColumnWriter
+
+template <typename Action>
+inline void DoInBatches(int64_t total, int64_t batch_size, Action&& action) {
+  int64_t num_batches = static_cast<int>(total / batch_size);
+  for (int round = 0; round < num_batches; round++) {
+    action(round * batch_size, batch_size);
+  }
+  // Write the remaining values
+  if (total % batch_size > 0) {
+    action(num_batches * batch_size, total % batch_size);
+  }
+}
+
+bool DictionaryDirectWriteSupported(const ::arrow::Array& array) {
+  DCHECK_EQ(array.type_id(), ::arrow::Type::DICTIONARY);
+  const ::arrow::DictionaryType& dict_type =
+      static_cast<const ::arrow::DictionaryType&>(*array.type());
+  return ::arrow::is_base_binary_like(dict_type.value_type()->id());
+}
+
+Status ConvertDictionaryToDense(const ::arrow::Array& array, MemoryPool* pool,
+                                std::shared_ptr<::arrow::Array>* out) {
+  const ::arrow::DictionaryType& dict_type =
+      static_cast<const ::arrow::DictionaryType&>(*array.type());
+
+  ::arrow::compute::ExecContext ctx(pool);
+  ARROW_ASSIGN_OR_RAISE(Datum cast_output,
+                        ::arrow::compute::Cast(array.data(), dict_type.value_type(),
+                                               ::arrow::compute::CastOptions(), &ctx));
+  *out = cast_output.make_array();
+  return Status::OK();
+}
+
+static inline bool IsDictionaryEncoding(Encoding::type encoding) {
+  return encoding == Encoding::PLAIN_DICTIONARY;
+}
+
+template <typename DType>
+class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter<DType> {
+ public:
+  using T = typename DType::c_type;
+
+  TypedColumnWriterImpl(ColumnChunkMetaDataBuilder* metadata,
+                        std::unique_ptr<PageWriter> pager, const bool use_dictionary,
+                        Encoding::type encoding, const WriterProperties* properties)
+      : ColumnWriterImpl(metadata, std::move(pager), use_dictionary, encoding,
+                         properties) {
+    current_encoder_ = MakeEncoder(DType::type_num, encoding, use_dictionary, descr_,
+                                   properties->memory_pool());
+
+    if (properties->statistics_enabled(descr_->path()) &&
+        (SortOrder::UNKNOWN != descr_->sort_order())) {
+      page_statistics_ = MakeStatistics<DType>(descr_, allocator_);
+      chunk_statistics_ = MakeStatistics<DType>(descr_, allocator_);
+    }
+  }
+
+  int64_t Close() override { return ColumnWriterImpl::Close(); }
+
+  int64_t WriteBatch(int64_t num_values, const int16_t* def_levels,
+                     const int16_t* rep_levels, const T* values) override {
+    // We check for DataPage limits only after we have inserted the values. If a user
+    // writes a large number of values, the DataPage size can be much above the limit.
+    // The purpose of this chunking is to bound this. Even if a user writes large number
+    // of values, the chunking will ensure the AddDataPage() is called at a reasonable
+    // pagesize limit
+    int64_t value_offset = 0;
+
+    auto WriteChunk = [&](int64_t offset, int64_t batch_size) {
+      int64_t values_to_write = WriteLevels(batch_size, AddIfNotNull(def_levels, offset),
+                                            AddIfNotNull(rep_levels, offset));
+
+      // PARQUET-780
+      if (values_to_write > 0) {
+        DCHECK_NE(nullptr, values);
+      }
+      WriteValues(AddIfNotNull(values, value_offset), values_to_write,
+                  batch_size - values_to_write);
+      CommitWriteAndCheckPageLimit(batch_size, values_to_write);
+      value_offset += values_to_write;
+
+      // Dictionary size checked separately from data page size since we
+      // circumvent this check when writing ::arrow::DictionaryArray directly
+      CheckDictionarySizeLimit();
+    };
+    DoInBatches(num_values, properties_->write_batch_size(), WriteChunk);
+    return value_offset;
+  }
+
+  void WriteBatchSpaced(int64_t num_values, const int16_t* def_levels,
+                        const int16_t* rep_levels, const uint8_t* valid_bits,
+                        int64_t valid_bits_offset, const T* values) override {
+    // Like WriteBatch, but for spaced values
+    int64_t value_offset = 0;
+    auto WriteChunk = [&](int64_t offset, int64_t batch_size) {
+      int64_t batch_num_values = 0;
+      int64_t batch_num_spaced_values = 0;
+      int64_t null_count;
+      MaybeCalculateValidityBits(AddIfNotNull(def_levels, offset), batch_size,
+                                 &batch_num_values, &batch_num_spaced_values,
+                                 &null_count);
+
+      WriteLevelsSpaced(batch_size, AddIfNotNull(def_levels, offset),
+                        AddIfNotNull(rep_levels, offset));
+      if (bits_buffer_ != nullptr) {
+        WriteValuesSpaced(AddIfNotNull(values, value_offset), batch_num_values,
+                          batch_num_spaced_values, bits_buffer_->data(), /*offset=*/0);
+      } else {
+        WriteValuesSpaced(AddIfNotNull(values, value_offset), batch_num_values,
+                          batch_num_spaced_values, valid_bits,
+                          valid_bits_offset + value_offset);
+      }
+      CommitWriteAndCheckPageLimit(batch_size, batch_num_spaced_values);
+      value_offset += batch_num_spaced_values;
+
+      // Dictionary size checked separately from data page size since we
+      // circumvent this check when writing ::arrow::DictionaryArray directly
+      CheckDictionarySizeLimit();
+    };
+    DoInBatches(num_values, properties_->write_batch_size(), WriteChunk);
+  }
+
+  Status WriteArrow(const int16_t* def_levels, const int16_t* rep_levels,
+                    int64_t num_levels, const ::arrow::Array& leaf_array,
+                    ArrowWriteContext* ctx, bool leaf_field_nullable) override {
+    BEGIN_PARQUET_CATCH_EXCEPTIONS
+    // Leaf nulls are canonical when there is only a single null element after a list
+    // and it is at the leaf.
+    bool single_nullable_element =
+        (level_info_.def_level == level_info_.repeated_ancestor_def_level + 1) &&
+        leaf_field_nullable;
+    bool maybe_parent_nulls = level_info_.HasNullableValues() && !single_nullable_element;
+    if (maybe_parent_nulls) {
+      ARROW_ASSIGN_OR_RAISE(
+          bits_buffer_,
+          ::arrow::AllocateResizableBuffer(
+              BitUtil::BytesForBits(properties_->write_batch_size()), ctx->memory_pool));
+      bits_buffer_->ZeroPadding();
+    }
+
+    if (leaf_array.type()->id() == ::arrow::Type::DICTIONARY) {
+      return WriteArrowDictionary(def_levels, rep_levels, num_levels, leaf_array, ctx,
+                                  maybe_parent_nulls);
+    } else {
+      return WriteArrowDense(def_levels, rep_levels, num_levels, leaf_array, ctx,
+                             maybe_parent_nulls);
+    }
+    END_PARQUET_CATCH_EXCEPTIONS
+  }
+
+  int64_t EstimatedBufferedValueBytes() const override {
+    return current_encoder_->EstimatedDataEncodedSize();
+  }
+
+ protected:
+  std::shared_ptr<Buffer> GetValuesBuffer() override {
+    return current_encoder_->FlushValues();
+  }
+
+  // Internal function to handle direct writing of ::arrow::DictionaryArray,
+  // since the standard logic concerning dictionary size limits and fallback to
+  // plain encoding is circumvented
+  Status WriteArrowDictionary(const int16_t* def_levels, const int16_t* rep_levels,
+                              int64_t num_levels, const ::arrow::Array& array,
+                              ArrowWriteContext* context, bool maybe_parent_nulls);
+
+  Status WriteArrowDense(const int16_t* def_levels, const int16_t* rep_levels,
+                         int64_t num_levels, const ::arrow::Array& array,
+                         ArrowWriteContext* context, bool maybe_parent_nulls);
+
+  void WriteDictionaryPage() override {
+    // We have to dynamic cast here because of TypedEncoder<Type> as
+    // some compilers don't want to cast through virtual inheritance
+    auto dict_encoder = dynamic_cast<DictEncoder<DType>*>(current_encoder_.get());
+    DCHECK(dict_encoder);
+    std::shared_ptr<ResizableBuffer> buffer =
+        AllocateBuffer(properties_->memory_pool(), dict_encoder->dict_encoded_size());
+    dict_encoder->WriteDict(buffer->mutable_data());
+
+    DictionaryPage page(buffer, dict_encoder->num_entries(),
+                        properties_->dictionary_page_encoding());
+    total_bytes_written_ += pager_->WriteDictionaryPage(page);
+  }
+
+  EncodedStatistics GetPageStatistics() override {
+    EncodedStatistics result;
+    if (page_statistics_) result = page_statistics_->Encode();
+    return result;
+  }
+
+  EncodedStatistics GetChunkStatistics() override {
+    EncodedStatistics result;
+    if (chunk_statistics_) result = chunk_statistics_->Encode();
+    return result;
+  }
+
+  void ResetPageStatistics() override {
+    if (chunk_statistics_ != nullptr) {
+      chunk_statistics_->Merge(*page_statistics_);
+      page_statistics_->Reset();
+    }
+  }
+
+  Type::type type() const override { return descr_->physical_type(); }
+
+  const ColumnDescriptor* descr() const override { return descr_; }
+
+  int64_t rows_written() const override { return rows_written_; }
+
+  int64_t total_compressed_bytes() const override { return total_compressed_bytes_; }
+
+  int64_t total_bytes_written() const override { return total_bytes_written_; }
+
+  const WriterProperties* properties() override { return properties_; }
+
+ private:
+  using ValueEncoderType = typename EncodingTraits<DType>::Encoder;
+  using TypedStats = TypedStatistics<DType>;
+  std::unique_ptr<Encoder> current_encoder_;
+  std::shared_ptr<TypedStats> page_statistics_;
+  std::shared_ptr<TypedStats> chunk_statistics_;
+
+  // If writing a sequence of ::arrow::DictionaryArray to the writer, we keep the
+  // dictionary passed to DictEncoder<T>::PutDictionary so we can check
+  // subsequent array chunks to see either if materialization is required (in
+  // which case we call back to the dense write path)
+  std::shared_ptr<::arrow::Array> preserved_dictionary_;
+
+  int64_t WriteLevels(int64_t num_values, const int16_t* def_levels,
+                      const int16_t* rep_levels) {
+    int64_t values_to_write = 0;
+    // If the field is required and non-repeated, there are no definition levels
+    if (descr_->max_definition_level() > 0) {
+      for (int64_t i = 0; i < num_values; ++i) {
+        if (def_levels[i] == descr_->max_definition_level()) {
+          ++values_to_write;
+        }
+      }
+
+      WriteDefinitionLevels(num_values, def_levels);
+    } else {
+      // Required field, write all values
+      values_to_write = num_values;
+    }
+
+    // Not present for non-repeated fields
+    if (descr_->max_repetition_level() > 0) {
+      // A row could include more than one value
+      // Count the occasions where we start a new row
+      for (int64_t i = 0; i < num_values; ++i) {
+        if (rep_levels[i] == 0) {
+          rows_written_++;
+        }
+      }
+
+      WriteRepetitionLevels(num_values, rep_levels);
+    } else {
+      // Each value is exactly one row
+      rows_written_ += static_cast<int>(num_values);
+    }
+    return values_to_write;
+  }
+
+  // This method will always update the three output parameters,
+  // out_values_to_write, out_spaced_values_to_write and null_count.  Additionally
+  // it will update the validity bitmap if required (i.e. if at least one level
+  // of nullable structs directly precede the leaf node).
+  void MaybeCalculateValidityBits(const int16_t* def_levels, int64_t batch_size,
+                                  int64_t* out_values_to_write,
+                                  int64_t* out_spaced_values_to_write,
+                                  int64_t* null_count) {
+    if (bits_buffer_ == nullptr) {
+      if (level_info_.def_level == 0) {
+        // In this case def levels should be null and we only
+        // need to output counts which will always be equal to
+        // the batch size passed in (max def_level == 0 indicates
+        // there cannot be repeated or null fields).
+        DCHECK_EQ(def_levels, nullptr);
+        *out_values_to_write = batch_size;
+        *out_spaced_values_to_write = batch_size;
+        *null_count = 0;
+      } else {
+        for (int x = 0; x < batch_size; x++) {
+          *out_values_to_write += def_levels[x] == level_info_.def_level ? 1 : 0;
+          *out_spaced_values_to_write +=
+              def_levels[x] >= level_info_.repeated_ancestor_def_level ? 1 : 0;
+        }
+        *null_count = *out_values_to_write - *out_spaced_values_to_write;
+      }
+      return;
+    }
+    // Shrink to fit possible causes another allocation, and would only be necessary
+    // on the last batch.
+    int64_t new_bitmap_size = BitUtil::BytesForBits(batch_size);
+    if (new_bitmap_size != bits_buffer_->size()) {
+      PARQUET_THROW_NOT_OK(
+          bits_buffer_->Resize(new_bitmap_size, /*shrink_to_fit=*/false));
+      bits_buffer_->ZeroPadding();
+    }
+    internal::ValidityBitmapInputOutput io;
+    io.valid_bits = bits_buffer_->mutable_data();
+    io.values_read_upper_bound = batch_size;
+    internal::DefLevelsToBitmap(def_levels, batch_size, level_info_, &io);
+    *out_values_to_write = io.values_read - io.null_count;
+    *out_spaced_values_to_write = io.values_read;
+    *null_count = io.null_count;
+  }
+
+  Result<std::shared_ptr<Array>> MaybeReplaceValidity(std::shared_ptr<Array> array,
+                                                      int64_t new_null_count,
+                                                      ::arrow::MemoryPool* memory_pool) {
+    if (bits_buffer_ == nullptr) {
+      return array;
+    }
+    std::vector<std::shared_ptr<Buffer>> buffers = array->data()->buffers;
+    if (buffers.empty()) {
+      return array;
+    }
+    buffers[0] = bits_buffer_;
+    // Should be a leaf array.
+    DCHECK_GT(buffers.size(), 1);
+    ValueBufferSlicer slicer{memory_pool, /*buffer=*/nullptr};
+    if (array->data()->offset > 0) {
+      RETURN_NOT_OK(::arrow::VisitArrayInline(*array, &slicer));
+      buffers[1] = slicer.buffer_;
+    }
+    return ::arrow::MakeArray(std::make_shared<ArrayData>(
+        array->type(), array->length(), std::move(buffers), new_null_count));
+  }
+
+  void WriteLevelsSpaced(int64_t num_levels, const int16_t* def_levels,
+                         const int16_t* rep_levels) {
+    // If the field is required and non-repeated, there are no definition levels
+    if (descr_->max_definition_level() > 0) {
+      WriteDefinitionLevels(num_levels, def_levels);
+    }
+    // Not present for non-repeated fields
+    if (descr_->max_repetition_level() > 0) {
+      // A row could include more than one value
+      // Count the occasions where we start a new row
+      for (int64_t i = 0; i < num_levels; ++i) {
+        if (rep_levels[i] == 0) {
+          rows_written_++;
+        }
+      }
+      WriteRepetitionLevels(num_levels, rep_levels);
+    } else {
+      // Each value is exactly one row
+      rows_written_ += static_cast<int>(num_levels);
+    }
+  }
+
+  void CommitWriteAndCheckPageLimit(int64_t num_levels, int64_t num_values) {
+    num_buffered_values_ += num_levels;
+    num_buffered_encoded_values_ += num_values;
+
+    if (current_encoder_->EstimatedDataEncodedSize() >= properties_->data_pagesize()) {
+      AddDataPage();
+    }
+  }
+
+  void FallbackToPlainEncoding() {
+    if (IsDictionaryEncoding(current_encoder_->encoding())) {
+      WriteDictionaryPage();
+      // Serialize the buffered Dictionary Indices
+      FlushBufferedDataPages();
+      fallback_ = true;
+      // Only PLAIN encoding is supported for fallback in V1
+      current_encoder_ = MakeEncoder(DType::type_num, Encoding::PLAIN, false, descr_,
+                                     properties_->memory_pool());
+      encoding_ = Encoding::PLAIN;
+    }
+  }
+
+  // Checks if the Dictionary Page size limit is reached
+  // If the limit is reached, the Dictionary and Data Pages are serialized
+  // The encoding is switched to PLAIN
+  //
+  // Only one Dictionary Page is written.
+  // Fallback to PLAIN if dictionary page limit is reached.
+  void CheckDictionarySizeLimit() {
+    if (!has_dictionary_ || fallback_) {
+      // Either not using dictionary encoding, or we have already fallen back
+      // to PLAIN encoding because the size threshold was reached
+      return;
+    }
+
+    // We have to dynamic cast here because TypedEncoder<Type> as some compilers
+    // don't want to cast through virtual inheritance
+    auto dict_encoder = dynamic_cast<DictEncoder<DType>*>(current_encoder_.get());
+    if (dict_encoder->dict_encoded_size() >= properties_->dictionary_pagesize_limit()) {
+      FallbackToPlainEncoding();
+    }
+  }
+
+  void WriteValues(const T* values, int64_t num_values, int64_t num_nulls) {
+    dynamic_cast<ValueEncoderType*>(current_encoder_.get())
+        ->Put(values, static_cast<int>(num_values));
+    if (page_statistics_ != nullptr) {
+      page_statistics_->Update(values, num_values, num_nulls);
+    }
+  }
+
+  void WriteValuesSpaced(const T* values, int64_t num_values, int64_t num_spaced_values,
+                         const uint8_t* valid_bits, int64_t valid_bits_offset) {
+    if (num_values != num_spaced_values) {
+      dynamic_cast<ValueEncoderType*>(current_encoder_.get())
+          ->PutSpaced(values, static_cast<int>(num_spaced_values), valid_bits,
+                      valid_bits_offset);
+    } else {
+      dynamic_cast<ValueEncoderType*>(current_encoder_.get())
+          ->Put(values, static_cast<int>(num_values));
+    }
+    if (page_statistics_ != nullptr) {
+      const int64_t num_nulls = num_spaced_values - num_values;
+      page_statistics_->UpdateSpaced(values, valid_bits, valid_bits_offset, num_values,
+                                     num_nulls);
+    }
+  }
+};
+
+template <typename DType>
+Status TypedColumnWriterImpl<DType>::WriteArrowDictionary(
+    const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels,
+    const ::arrow::Array& array, ArrowWriteContext* ctx, bool maybe_parent_nulls) {
+  // If this is the first time writing a DictionaryArray, then there's
+  // a few possible paths to take:
+  //
+  // - If dictionary encoding is not enabled, convert to densely
+  //   encoded and call WriteArrow
+  // - Dictionary encoding enabled
+  //   - If this is the first time this is called, then we call
+  //     PutDictionary into the encoder and then PutIndices on each
+  //     chunk. We store the dictionary that was written in
+  //     preserved_dictionary_ so that subsequent calls to this method
+  //     can make sure the dictionary has not changed
+  //   - On subsequent calls, we have to check whether the dictionary
+  //     has changed. If it has, then we trigger the varying
+  //     dictionary path and materialize each chunk and then call
+  //     WriteArrow with that
+  auto WriteDense = [&] {
+    std::shared_ptr<::arrow::Array> dense_array;
+    RETURN_NOT_OK(
+        ConvertDictionaryToDense(array, properties_->memory_pool(), &dense_array));
+    return WriteArrowDense(def_levels, rep_levels, num_levels, *dense_array, ctx,
+                           maybe_parent_nulls);
+  };
+
+  if (!IsDictionaryEncoding(current_encoder_->encoding()) ||
+      !DictionaryDirectWriteSupported(array)) {
+    // No longer dictionary-encoding for whatever reason, maybe we never were
+    // or we decided to stop. Note that WriteArrow can be invoked multiple
+    // times with both dense and dictionary-encoded versions of the same data
+    // without a problem. Any dense data will be hashed to indices until the
+    // dictionary page limit is reached, at which everything (dictionary and
+    // dense) will fall back to plain encoding
+    return WriteDense();
+  }
+
+  auto dict_encoder = dynamic_cast<DictEncoder<DType>*>(current_encoder_.get());
+  const auto& data = checked_cast<const ::arrow::DictionaryArray&>(array);
+  std::shared_ptr<::arrow::Array> dictionary = data.dictionary();
+  std::shared_ptr<::arrow::Array> indices = data.indices();
+
+  int64_t value_offset = 0;
+  auto WriteIndicesChunk = [&](int64_t offset, int64_t batch_size) {
+    int64_t batch_num_values = 0;
+    int64_t batch_num_spaced_values = 0;
+    int64_t null_count = ::arrow::kUnknownNullCount;
+    // Bits is not null for nullable values.  At this point in the code we can't determine
+    // if the leaf array has the same null values as any parents it might have had so we
+    // need to recompute it from def levels.
+    MaybeCalculateValidityBits(AddIfNotNull(def_levels, offset), batch_size,
+                               &batch_num_values, &batch_num_spaced_values, &null_count);
+    WriteLevelsSpaced(batch_size, AddIfNotNull(def_levels, offset),
+                      AddIfNotNull(rep_levels, offset));
+    std::shared_ptr<Array> writeable_indices =
+        indices->Slice(value_offset, batch_num_spaced_values);
+    PARQUET_ASSIGN_OR_THROW(
+        writeable_indices,
+        MaybeReplaceValidity(writeable_indices, null_count, ctx->memory_pool));
+    dict_encoder->PutIndices(*writeable_indices);
+    CommitWriteAndCheckPageLimit(batch_size, batch_num_values);
+    value_offset += batch_num_spaced_values;
+  };
+
+  // Handle seeing dictionary for the first time
+  if (!preserved_dictionary_) {
+    // It's a new dictionary. Call PutDictionary and keep track of it
+    PARQUET_CATCH_NOT_OK(dict_encoder->PutDictionary(*dictionary));
+
+    // If there were duplicate value in the dictionary, the encoder's memo table
+    // will be out of sync with the indices in the Arrow array.
+    // The easiest solution for this uncommon case is to fallback to plain encoding.
+    if (dict_encoder->num_entries() != dictionary->length()) {
+      PARQUET_CATCH_NOT_OK(FallbackToPlainEncoding());
+      return WriteDense();
+    }
+
+    // TODO(wesm): If some dictionary values are unobserved, then the
+    // statistics will be inaccurate. Do we care enough to fix it?
+    if (page_statistics_ != nullptr) {
+      PARQUET_CATCH_NOT_OK(page_statistics_->Update(*dictionary));
+    }
+    preserved_dictionary_ = dictionary;
+  } else if (!dictionary->Equals(*preserved_dictionary_)) {
+    // Dictionary has changed
+    PARQUET_CATCH_NOT_OK(FallbackToPlainEncoding());
+    return WriteDense();
+  }
+
+  PARQUET_CATCH_NOT_OK(
+      DoInBatches(num_levels, properties_->write_batch_size(), WriteIndicesChunk));
+  return Status::OK();
+}
+
+// ----------------------------------------------------------------------
+// Direct Arrow write path
+
+template <typename ParquetType, typename ArrowType, typename Enable = void>
+struct SerializeFunctor {
+  using ArrowCType = typename ArrowType::c_type;
+  using ArrayType = typename ::arrow::TypeTraits<ArrowType>::ArrayType;
+  using ParquetCType = typename ParquetType::c_type;
+  Status Serialize(const ArrayType& array, ArrowWriteContext*, ParquetCType* out) {
+    const ArrowCType* input = array.raw_values();
+    if (array.null_count() > 0) {
+      for (int i = 0; i < array.length(); i++) {
+        out[i] = static_cast<ParquetCType>(input[i]);
+      }
+    } else {
+      std::copy(input, input + array.length(), out);
+    }
+    return Status::OK();
+  }
+};
+
+template <typename ParquetType, typename ArrowType>
+Status WriteArrowSerialize(const ::arrow::Array& array, int64_t num_levels,
+                           const int16_t* def_levels, const int16_t* rep_levels,
+                           ArrowWriteContext* ctx, TypedColumnWriter<ParquetType>* writer,
+                           bool maybe_parent_nulls) {
+  using ParquetCType = typename ParquetType::c_type;
+  using ArrayType = typename ::arrow::TypeTraits<ArrowType>::ArrayType;
+
+  ParquetCType* buffer = nullptr;
+  PARQUET_THROW_NOT_OK(ctx->GetScratchData<ParquetCType>(array.length(), &buffer));
+
+  SerializeFunctor<ParquetType, ArrowType> functor;
+  RETURN_NOT_OK(functor.Serialize(checked_cast<const ArrayType&>(array), ctx, buffer));
+  bool no_nulls =
+      writer->descr()->schema_node()->is_required() || (array.null_count() == 0);
+  if (!maybe_parent_nulls && no_nulls) {
+    PARQUET_CATCH_NOT_OK(writer->WriteBatch(num_levels, def_levels, rep_levels, buffer));
+  } else {
+    PARQUET_CATCH_NOT_OK(writer->WriteBatchSpaced(num_levels, def_levels, rep_levels,
+                                                  array.null_bitmap_data(),
+                                                  array.offset(), buffer));
+  }
+  return Status::OK();
+}
+
+template <typename ParquetType>
+Status WriteArrowZeroCopy(const ::arrow::Array& array, int64_t num_levels,
+                          const int16_t* def_levels, const int16_t* rep_levels,
+                          ArrowWriteContext* ctx, TypedColumnWriter<ParquetType>* writer,
+                          bool maybe_parent_nulls) {
+  using T = typename ParquetType::c_type;
+  const auto& data = static_cast<const ::arrow::PrimitiveArray&>(array);
+  const T* values = nullptr;
+  // The values buffer may be null if the array is empty (ARROW-2744)
+  if (data.values() != nullptr) {
+    values = reinterpret_cast<const T*>(data.values()->data()) + data.offset();
+  } else {
+    DCHECK_EQ(data.length(), 0);
+  }
+  bool no_nulls =
+      writer->descr()->schema_node()->is_required() || (array.null_count() == 0);
+
+  if (!maybe_parent_nulls && no_nulls) {
+    PARQUET_CATCH_NOT_OK(writer->WriteBatch(num_levels, def_levels, rep_levels, values));
+  } else {
+    PARQUET_CATCH_NOT_OK(writer->WriteBatchSpaced(num_levels, def_levels, rep_levels,
+                                                  data.null_bitmap_data(), data.offset(),
+                                                  values));
+  }
+  return Status::OK();
+}
+
+#define WRITE_SERIALIZE_CASE(ArrowEnum, ArrowType, ParquetType)  \
+  case ::arrow::Type::ArrowEnum:                                 \
+    return WriteArrowSerialize<ParquetType, ::arrow::ArrowType>( \
+        array, num_levels, def_levels, rep_levels, ctx, this, maybe_parent_nulls);
+
+#define WRITE_ZERO_COPY_CASE(ArrowEnum, ArrowType, ParquetType)                       \
+  case ::arrow::Type::ArrowEnum:                                                      \
+    return WriteArrowZeroCopy<ParquetType>(array, num_levels, def_levels, rep_levels, \
+                                           ctx, this, maybe_parent_nulls);
+
+#define ARROW_UNSUPPORTED()                                          \
+  std::stringstream ss;                                              \
+  ss << "Arrow type " << array.type()->ToString()                    \
+     << " cannot be written to Parquet type " << descr_->ToString(); \
+  return Status::Invalid(ss.str());
+
+// ----------------------------------------------------------------------
+// Write Arrow to BooleanType
+
+template <>
+struct SerializeFunctor<BooleanType, ::arrow::BooleanType> {
+  Status Serialize(const ::arrow::BooleanArray& data, ArrowWriteContext*, bool* out) {
+    for (int i = 0; i < data.length(); i++) {
+      *out++ = data.Value(i);
+    }
+    return Status::OK();
+  }
+};
+
+template <>
+Status TypedColumnWriterImpl<BooleanType>::WriteArrowDense(
+    const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels,
+    const ::arrow::Array& array, ArrowWriteContext* ctx, bool maybe_parent_nulls) {
+  if (array.type_id() != ::arrow::Type::BOOL) {
+    ARROW_UNSUPPORTED();
+  }
+  return WriteArrowSerialize<BooleanType, ::arrow::BooleanType>(
+      array, num_levels, def_levels, rep_levels, ctx, this, maybe_parent_nulls);
+}
+
+// ----------------------------------------------------------------------
+// Write Arrow types to INT32
+
+template <>
+struct SerializeFunctor<Int32Type, ::arrow::Date64Type> {
+  Status Serialize(const ::arrow::Date64Array& array, ArrowWriteContext*, int32_t* out) {
+    const int64_t* input = array.raw_values();
+    for (int i = 0; i < array.length(); i++) {
+      *out++ = static_cast<int32_t>(*input++ / 86400000);
+    }
+    return Status::OK();
+  }
+};
+
+template <>
+struct SerializeFunctor<Int32Type, ::arrow::Time32Type> {
+  Status Serialize(const ::arrow::Time32Array& array, ArrowWriteContext*, int32_t* out) {
+    const int32_t* input = array.raw_values();
+    const auto& type = static_cast<const ::arrow::Time32Type&>(*array.type());
+    if (type.unit() == ::arrow::TimeUnit::SECOND) {
+      for (int i = 0; i < array.length(); i++) {
+        out[i] = input[i] * 1000;
+      }
+    } else {
+      std::copy(input, input + array.length(), out);
+    }
+    return Status::OK();
+  }
+};
+
+template <>
+Status TypedColumnWriterImpl<Int32Type>::WriteArrowDense(
+    const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels,
+    const ::arrow::Array& array, ArrowWriteContext* ctx, bool maybe_parent_nulls) {
+  switch (array.type()->id()) {
+    case ::arrow::Type::NA: {
+      PARQUET_CATCH_NOT_OK(WriteBatch(num_levels, def_levels, rep_levels, nullptr));
+    } break;
+      WRITE_SERIALIZE_CASE(INT8, Int8Type, Int32Type)
+      WRITE_SERIALIZE_CASE(UINT8, UInt8Type, Int32Type)
+      WRITE_SERIALIZE_CASE(INT16, Int16Type, Int32Type)
+      WRITE_SERIALIZE_CASE(UINT16, UInt16Type, Int32Type)
+      WRITE_SERIALIZE_CASE(UINT32, UInt32Type, Int32Type)
+      WRITE_ZERO_COPY_CASE(INT32, Int32Type, Int32Type)
+      WRITE_ZERO_COPY_CASE(DATE32, Date32Type, Int32Type)
+      WRITE_SERIALIZE_CASE(DATE64, Date64Type, Int32Type)
+      WRITE_SERIALIZE_CASE(TIME32, Time32Type, Int32Type)
+    default:
+      ARROW_UNSUPPORTED()
+  }
+  return Status::OK();
+}
+
+// ----------------------------------------------------------------------
+// Write Arrow to Int64 and Int96
+
+#define INT96_CONVERT_LOOP(ConversionFunction) \
+  for (int64_t i = 0; i < array.length(); i++) ConversionFunction(input[i], &out[i]);
+
+template <>
+struct SerializeFunctor<Int96Type, ::arrow::TimestampType> {
+  Status Serialize(const ::arrow::TimestampArray& array, ArrowWriteContext*, Int96* out) {
+    const int64_t* input = array.raw_values();
+    const auto& type = static_cast<const ::arrow::TimestampType&>(*array.type());
+    switch (type.unit()) {
+      case ::arrow::TimeUnit::NANO:
+        INT96_CONVERT_LOOP(internal::NanosecondsToImpalaTimestamp);
+        break;
+      case ::arrow::TimeUnit::MICRO:
+        INT96_CONVERT_LOOP(internal::MicrosecondsToImpalaTimestamp);
+        break;
+      case ::arrow::TimeUnit::MILLI:
+        INT96_CONVERT_LOOP(internal::MillisecondsToImpalaTimestamp);
+        break;
+      case ::arrow::TimeUnit::SECOND:
+        INT96_CONVERT_LOOP(internal::SecondsToImpalaTimestamp);
+        break;
+    }
+    return Status::OK();
+  }
+};
+
+#define COERCE_DIVIDE -1
+#define COERCE_INVALID 0
+#define COERCE_MULTIPLY +1
+
+static std::pair<int, int64_t> kTimestampCoercionFactors[4][4] = {
+    // from seconds ...
+    {{COERCE_INVALID, 0},                      // ... to seconds
+     {COERCE_MULTIPLY, 1000},                  // ... to millis
+     {COERCE_MULTIPLY, 1000000},               // ... to micros
+     {COERCE_MULTIPLY, INT64_C(1000000000)}},  // ... to nanos
+    // from millis ...
+    {{COERCE_INVALID, 0},
+     {COERCE_MULTIPLY, 1},
+     {COERCE_MULTIPLY, 1000},
+     {COERCE_MULTIPLY, 1000000}},
+    // from micros ...
+    {{COERCE_INVALID, 0},
+     {COERCE_DIVIDE, 1000},
+     {COERCE_MULTIPLY, 1},
+     {COERCE_MULTIPLY, 1000}},
+    // from nanos ...
+    {{COERCE_INVALID, 0},
+     {COERCE_DIVIDE, 1000000},
+     {COERCE_DIVIDE, 1000},
+     {COERCE_MULTIPLY, 1}}};
+
+template <>
+struct SerializeFunctor<Int64Type, ::arrow::TimestampType> {
+  Status Serialize(const ::arrow::TimestampArray& array, ArrowWriteContext* ctx,
+                   int64_t* out) {
+    const auto& source_type = static_cast<const ::arrow::TimestampType&>(*array.type());
+    auto source_unit = source_type.unit();
+    const int64_t* values = array.raw_values();
+
+    ::arrow::TimeUnit::type target_unit = ctx->properties->coerce_timestamps_unit();
+    auto target_type = ::arrow::timestamp(target_unit);
+    bool truncation_allowed = ctx->properties->truncated_timestamps_allowed();
+
+    auto DivideBy = [&](const int64_t factor) {
+      for (int64_t i = 0; i < array.length(); i++) {
+        if (!truncation_allowed && array.IsValid(i) && (values[i] % factor != 0)) {
+          return Status::Invalid("Casting from ", source_type.ToString(), " to ",
+                                 target_type->ToString(),
+                                 " would lose data: ", values[i]);
+        }
+        out[i] = values[i] / factor;
+      }
+      return Status::OK();
+    };
+
+    auto MultiplyBy = [&](const int64_t factor) {
+      for (int64_t i = 0; i < array.length(); i++) {
+        out[i] = values[i] * factor;
+      }
+      return Status::OK();
+    };
+
+    const auto& coercion = kTimestampCoercionFactors[static_cast<int>(source_unit)]
+                                                    [static_cast<int>(target_unit)];
+
+    // .first -> coercion operation; .second -> scale factor
+    DCHECK_NE(coercion.first, COERCE_INVALID);
+    return coercion.first == COERCE_DIVIDE ? DivideBy(coercion.second)
+                                           : MultiplyBy(coercion.second);
+  }
+};
+
+#undef COERCE_DIVIDE
+#undef COERCE_INVALID
+#undef COERCE_MULTIPLY
+
+Status WriteTimestamps(const ::arrow::Array& values, int64_t num_levels,
+                       const int16_t* def_levels, const int16_t* rep_levels,
+                       ArrowWriteContext* ctx, TypedColumnWriter<Int64Type>* writer,
+                       bool maybe_parent_nulls) {
+  const auto& source_type = static_cast<const ::arrow::TimestampType&>(*values.type());
+
+  auto WriteCoerce = [&](const ArrowWriterProperties* properties) {
+    ArrowWriteContext temp_ctx = *ctx;
+    temp_ctx.properties = properties;
+    return WriteArrowSerialize<Int64Type, ::arrow::TimestampType>(
+        values, num_levels, def_levels, rep_levels, &temp_ctx, writer,
+        maybe_parent_nulls);
+  };
+
+  if (ctx->properties->coerce_timestamps_enabled()) {
+    // User explicitly requested coercion to specific unit
+    if (source_type.unit() == ctx->properties->coerce_timestamps_unit()) {
+      // No data conversion necessary
+      return WriteArrowZeroCopy<Int64Type>(values, num_levels, def_levels, rep_levels,
+                                           ctx, writer, maybe_parent_nulls);
+    } else {
+      return WriteCoerce(ctx->properties);
+    }
+  } else if (writer->properties()->version() == ParquetVersion::PARQUET_1_0 &&
+             source_type.unit() == ::arrow::TimeUnit::NANO) {
+    // Absent superseding user instructions, when writing Parquet version 1.0 files,
+    // timestamps in nanoseconds are coerced to microseconds
+    std::shared_ptr<ArrowWriterProperties> properties =
+        (ArrowWriterProperties::Builder())
+            .coerce_timestamps(::arrow::TimeUnit::MICRO)
+            ->disallow_truncated_timestamps()
+            ->build();
+    return WriteCoerce(properties.get());
+  } else if (source_type.unit() == ::arrow::TimeUnit::SECOND) {
+    // Absent superseding user instructions, timestamps in seconds are coerced to
+    // milliseconds
+    std::shared_ptr<ArrowWriterProperties> properties =
+        (ArrowWriterProperties::Builder())
+            .coerce_timestamps(::arrow::TimeUnit::MILLI)
+            ->build();
+    return WriteCoerce(properties.get());
+  } else {
+    // No data conversion necessary
+    return WriteArrowZeroCopy<Int64Type>(values, num_levels, def_levels, rep_levels, ctx,
+                                         writer, maybe_parent_nulls);
+  }
+}
+
+template <>
+Status TypedColumnWriterImpl<Int64Type>::WriteArrowDense(
+    const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels,
+    const ::arrow::Array& array, ArrowWriteContext* ctx, bool maybe_parent_nulls) {
+  switch (array.type()->id()) {
+    case ::arrow::Type::TIMESTAMP:
+      return WriteTimestamps(array, num_levels, def_levels, rep_levels, ctx, this,
+                             maybe_parent_nulls);
+      WRITE_ZERO_COPY_CASE(INT64, Int64Type, Int64Type)
+      WRITE_SERIALIZE_CASE(UINT32, UInt32Type, Int64Type)
+      WRITE_SERIALIZE_CASE(UINT64, UInt64Type, Int64Type)
+      WRITE_ZERO_COPY_CASE(TIME64, Time64Type, Int64Type)
+    default:
+      ARROW_UNSUPPORTED();
+  }
+}
+
+template <>
+Status TypedColumnWriterImpl<Int96Type>::WriteArrowDense(
+    const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels,
+    const ::arrow::Array& array, ArrowWriteContext* ctx, bool maybe_parent_nulls) {
+  if (array.type_id() != ::arrow::Type::TIMESTAMP) {
+    ARROW_UNSUPPORTED();
+  }
+  return WriteArrowSerialize<Int96Type, ::arrow::TimestampType>(
+      array, num_levels, def_levels, rep_levels, ctx, this, maybe_parent_nulls);
+}
+
+// ----------------------------------------------------------------------
+// Floating point types
+
+template <>
+Status TypedColumnWriterImpl<FloatType>::WriteArrowDense(
+    const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels,
+    const ::arrow::Array& array, ArrowWriteContext* ctx, bool maybe_parent_nulls) {
+  if (array.type_id() != ::arrow::Type::FLOAT) {
+    ARROW_UNSUPPORTED();
+  }
+  return WriteArrowZeroCopy<FloatType>(array, num_levels, def_levels, rep_levels, ctx,
+                                       this, maybe_parent_nulls);
+}
+
+template <>
+Status TypedColumnWriterImpl<DoubleType>::WriteArrowDense(
+    const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels,
+    const ::arrow::Array& array, ArrowWriteContext* ctx, bool maybe_parent_nulls) {
+  if (array.type_id() != ::arrow::Type::DOUBLE) {
+    ARROW_UNSUPPORTED();
+  }
+  return WriteArrowZeroCopy<DoubleType>(array, num_levels, def_levels, rep_levels, ctx,
+                                        this, maybe_parent_nulls);
+}
+
+// ----------------------------------------------------------------------
+// Write Arrow to BYTE_ARRAY
+
+template <>
+Status TypedColumnWriterImpl<ByteArrayType>::WriteArrowDense(
+    const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels,
+    const ::arrow::Array& array, ArrowWriteContext* ctx, bool maybe_parent_nulls) {
+  if (!::arrow::is_base_binary_like(array.type()->id())) {
+    ARROW_UNSUPPORTED();
+  }
+
+  int64_t value_offset = 0;
+  auto WriteChunk = [&](int64_t offset, int64_t batch_size) {
+    int64_t batch_num_values = 0;
+    int64_t batch_num_spaced_values = 0;
+    int64_t null_count = 0;
+
+    MaybeCalculateValidityBits(AddIfNotNull(def_levels, offset), batch_size,
+                               &batch_num_values, &batch_num_spaced_values, &null_count);
+    WriteLevelsSpaced(batch_size, AddIfNotNull(def_levels, offset),
+                      AddIfNotNull(rep_levels, offset));
+    std::shared_ptr<Array> data_slice =
+        array.Slice(value_offset, batch_num_spaced_values);
+    PARQUET_ASSIGN_OR_THROW(
+        data_slice, MaybeReplaceValidity(data_slice, null_count, ctx->memory_pool));
+
+    current_encoder_->Put(*data_slice);
+    if (page_statistics_ != nullptr) {
+      page_statistics_->Update(*data_slice);
+    }
+    CommitWriteAndCheckPageLimit(batch_size, batch_num_values);
+    CheckDictionarySizeLimit();
+    value_offset += batch_num_spaced_values;
+  };
+
+  PARQUET_CATCH_NOT_OK(
+      DoInBatches(num_levels, properties_->write_batch_size(), WriteChunk));
+  return Status::OK();
+}
+
+// ----------------------------------------------------------------------
+// Write Arrow to FIXED_LEN_BYTE_ARRAY
+
+template <typename ParquetType, typename ArrowType>
+struct SerializeFunctor<
+    ParquetType, ArrowType,
+    ::arrow::enable_if_t<::arrow::is_fixed_size_binary_type<ArrowType>::value &&
+                         !::arrow::is_decimal_type<ArrowType>::value>> {
+  Status Serialize(const ::arrow::FixedSizeBinaryArray& array, ArrowWriteContext*,
+                   FLBA* out) {
+    if (array.null_count() == 0) {
+      // no nulls, just dump the data
+      // todo(advancedxy): use a writeBatch to avoid this step
+      for (int64_t i = 0; i < array.length(); i++) {
+        out[i] = FixedLenByteArray(array.GetValue(i));
+      }
+    } else {
+      for (int64_t i = 0; i < array.length(); i++) {
+        if (array.IsValid(i)) {
+          out[i] = FixedLenByteArray(array.GetValue(i));
+        }
+      }
+    }
+    return Status::OK();
+  }
+};
+
+// ----------------------------------------------------------------------
+// Write Arrow to Decimal128
+
+// Requires a custom serializer because decimal in parquet are in big-endian
+// format. Thus, a temporary local buffer is required.
+template <typename ParquetType, typename ArrowType>
+struct SerializeFunctor<ParquetType, ArrowType, ::arrow::enable_if_decimal<ArrowType>> {
+  Status Serialize(const typename ::arrow::TypeTraits<ArrowType>::ArrayType& array,
+                   ArrowWriteContext* ctx, FLBA* out) {
+    AllocateScratch(array, ctx);
+    auto offset = Offset(array);
+
+    if (array.null_count() == 0) {
+      for (int64_t i = 0; i < array.length(); i++) {
+        out[i] = FixDecimalEndianess<ArrowType::kByteWidth>(array.GetValue(i), offset);
+      }
+    } else {
+      for (int64_t i = 0; i < array.length(); i++) {
+        out[i] = array.IsValid(i) ? FixDecimalEndianess<ArrowType::kByteWidth>(
+                                        array.GetValue(i), offset)
+                                  : FixedLenByteArray();
+      }
+    }
+
+    return Status::OK();
+  }
+
+  // Parquet's Decimal are stored with FixedLength values where the length is
+  // proportional to the precision. Arrow's Decimal are always stored with 16/32
+  // bytes. Thus the internal FLBA pointer must be adjusted by the offset calculated
+  // here.
+  int32_t Offset(const Array& array) {
+    auto decimal_type = checked_pointer_cast<::arrow::DecimalType>(array.type());
+    return decimal_type->byte_width() -
+           ::arrow::DecimalType::DecimalSize(decimal_type->precision());
+  }
+
+  void AllocateScratch(const typename ::arrow::TypeTraits<ArrowType>::ArrayType& array,
+                       ArrowWriteContext* ctx) {
+    int64_t non_null_count = array.length() - array.null_count();
+    int64_t size = non_null_count * ArrowType::kByteWidth;
+    scratch_buffer = AllocateBuffer(ctx->memory_pool, size);
+    scratch = reinterpret_cast<int64_t*>(scratch_buffer->mutable_data());
+  }
+
+  template <int byte_width>
+  FixedLenByteArray FixDecimalEndianess(const uint8_t* in, int64_t offset) {
+    const auto* u64_in = reinterpret_cast<const int64_t*>(in);
+    auto out = reinterpret_cast<const uint8_t*>(scratch) + offset;
+    static_assert(byte_width == 16 || byte_width == 32,
+                  "only 16 and 32 byte Decimals supported");
+    if (byte_width == 32) {
+      *scratch++ = ::arrow::BitUtil::ToBigEndian(u64_in[3]);
+      *scratch++ = ::arrow::BitUtil::ToBigEndian(u64_in[2]);
+      *scratch++ = ::arrow::BitUtil::ToBigEndian(u64_in[1]);
+      *scratch++ = ::arrow::BitUtil::ToBigEndian(u64_in[0]);
+    } else {
+      *scratch++ = ::arrow::BitUtil::ToBigEndian(u64_in[1]);
+      *scratch++ = ::arrow::BitUtil::ToBigEndian(u64_in[0]);
+    }
+    return FixedLenByteArray(out);
+  }
+
+  std::shared_ptr<ResizableBuffer> scratch_buffer;
+  int64_t* scratch;
+};
+
+template <>
+Status TypedColumnWriterImpl<FLBAType>::WriteArrowDense(
+    const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels,
+    const ::arrow::Array& array, ArrowWriteContext* ctx, bool maybe_parent_nulls) {
+  switch (array.type()->id()) {
+    WRITE_SERIALIZE_CASE(FIXED_SIZE_BINARY, FixedSizeBinaryType, FLBAType)
+    WRITE_SERIALIZE_CASE(DECIMAL128, Decimal128Type, FLBAType)
+    WRITE_SERIALIZE_CASE(DECIMAL256, Decimal256Type, FLBAType)
+    default:
+      break;
+  }
+  return Status::OK();
+}
+
+// ----------------------------------------------------------------------
+// Dynamic column writer constructor
+
+std::shared_ptr<ColumnWriter> ColumnWriter::Make(ColumnChunkMetaDataBuilder* metadata,
+                                                 std::unique_ptr<PageWriter> pager,
+                                                 const WriterProperties* properties) {
+  const ColumnDescriptor* descr = metadata->descr();
+  const bool use_dictionary = properties->dictionary_enabled(descr->path()) &&
+                              descr->physical_type() != Type::BOOLEAN;
+  Encoding::type encoding = properties->encoding(descr->path());
+  if (use_dictionary) {
+    encoding = properties->dictionary_index_encoding();
+  }
+  switch (descr->physical_type()) {
+    case Type::BOOLEAN:
+      return std::make_shared<TypedColumnWriterImpl<BooleanType>>(
+          metadata, std::move(pager), use_dictionary, encoding, properties);
+    case Type::INT32:
+      return std::make_shared<TypedColumnWriterImpl<Int32Type>>(
+          metadata, std::move(pager), use_dictionary, encoding, properties);
+    case Type::INT64:
+      return std::make_shared<TypedColumnWriterImpl<Int64Type>>(
+          metadata, std::move(pager), use_dictionary, encoding, properties);
+    case Type::INT96:
+      return std::make_shared<TypedColumnWriterImpl<Int96Type>>(
+          metadata, std::move(pager), use_dictionary, encoding, properties);
+    case Type::FLOAT:
+      return std::make_shared<TypedColumnWriterImpl<FloatType>>(
+          metadata, std::move(pager), use_dictionary, encoding, properties);
+    case Type::DOUBLE:
+      return std::make_shared<TypedColumnWriterImpl<DoubleType>>(
+          metadata, std::move(pager), use_dictionary, encoding, properties);
+    case Type::BYTE_ARRAY:
+      return std::make_shared<TypedColumnWriterImpl<ByteArrayType>>(
+          metadata, std::move(pager), use_dictionary, encoding, properties);
+    case Type::FIXED_LEN_BYTE_ARRAY:
+      return std::make_shared<TypedColumnWriterImpl<FLBAType>>(
+          metadata, std::move(pager), use_dictionary, encoding, properties);
+    default:
+      ParquetException::NYI("type reader not implemented");
+  }
+  // Unreachable code, but suppress compiler warning
+  return std::shared_ptr<ColumnWriter>(nullptr);
+}
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/column_writer.h b/contrib/libs/apache/arrow/cpp/src/parquet/column_writer.h
index 6661385abdb..0a609021739 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/column_writer.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/column_writer.h
@@ -1,270 +1,270 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <cstdint> 
-#include <cstring> 
-#include <memory> 
- 
-#include "parquet/exception.h" 
-#include "parquet/platform.h" 
-#include "parquet/types.h" 
- 
-namespace arrow { 
- 
-class Array; 
- 
-namespace BitUtil { 
-class BitWriter; 
-}  // namespace BitUtil 
- 
-namespace util { 
-class RleEncoder; 
-}  // namespace util 
- 
-}  // namespace arrow 
- 
-namespace parquet { 
- 
-struct ArrowWriteContext; 
-class ColumnDescriptor; 
-class DataPage; 
-class DictionaryPage; 
-class ColumnChunkMetaDataBuilder; 
-class Encryptor; 
-class WriterProperties; 
- 
-class PARQUET_EXPORT LevelEncoder { 
- public: 
-  LevelEncoder(); 
-  ~LevelEncoder(); 
- 
-  static int MaxBufferSize(Encoding::type encoding, int16_t max_level, 
-                           int num_buffered_values); 
- 
-  // Initialize the LevelEncoder. 
-  void Init(Encoding::type encoding, int16_t max_level, int num_buffered_values, 
-            uint8_t* data, int data_size); 
- 
-  // Encodes a batch of levels from an array and returns the number of levels encoded 
-  int Encode(int batch_size, const int16_t* levels); 
- 
-  int32_t len() { 
-    if (encoding_ != Encoding::RLE) { 
-      throw ParquetException("Only implemented for RLE encoding"); 
-    } 
-    return rle_length_; 
-  } 
- 
- private: 
-  int bit_width_; 
-  int rle_length_; 
-  Encoding::type encoding_; 
-  std::unique_ptr<::arrow::util::RleEncoder> rle_encoder_; 
-  std::unique_ptr<::arrow::BitUtil::BitWriter> bit_packed_encoder_; 
-}; 
- 
-class PARQUET_EXPORT PageWriter { 
- public: 
-  virtual ~PageWriter() {} 
- 
-  static std::unique_ptr<PageWriter> Open( 
-      std::shared_ptr<ArrowOutputStream> sink, Compression::type codec, 
-      int compression_level, ColumnChunkMetaDataBuilder* metadata, 
-      int16_t row_group_ordinal = -1, int16_t column_chunk_ordinal = -1, 
-      ::arrow::MemoryPool* pool = ::arrow::default_memory_pool(), 
-      bool buffered_row_group = false, 
-      std::shared_ptr<Encryptor> header_encryptor = NULLPTR, 
-      std::shared_ptr<Encryptor> data_encryptor = NULLPTR); 
- 
-  // The Column Writer decides if dictionary encoding is used if set and 
-  // if the dictionary encoding has fallen back to default encoding on reaching dictionary 
-  // page limit 
-  virtual void Close(bool has_dictionary, bool fallback) = 0; 
- 
-  // Return the number of uncompressed bytes written (including header size) 
-  virtual int64_t WriteDataPage(const DataPage& page) = 0; 
- 
-  // Return the number of uncompressed bytes written (including header size) 
-  virtual int64_t WriteDictionaryPage(const DictionaryPage& page) = 0; 
- 
-  virtual bool has_compressor() = 0; 
- 
-  virtual void Compress(const Buffer& src_buffer, ResizableBuffer* dest_buffer) = 0; 
-}; 
- 
-static constexpr int WRITE_BATCH_SIZE = 1000; 
-class PARQUET_EXPORT ColumnWriter { 
- public: 
-  virtual ~ColumnWriter() = default; 
- 
-  static std::shared_ptr<ColumnWriter> Make(ColumnChunkMetaDataBuilder*, 
-                                            std::unique_ptr<PageWriter>, 
-                                            const WriterProperties* properties); 
- 
-  /// \brief Closes the ColumnWriter, commits any buffered values to pages. 
-  /// \return Total size of the column in bytes 
-  virtual int64_t Close() = 0; 
- 
-  /// \brief The physical Parquet type of the column 
-  virtual Type::type type() const = 0; 
- 
-  /// \brief The schema for the column 
-  virtual const ColumnDescriptor* descr() const = 0; 
- 
-  /// \brief The number of rows written so far 
-  virtual int64_t rows_written() const = 0; 
- 
-  /// \brief The total size of the compressed pages + page headers. Some values 
-  /// might be still buffered and not written to a page yet 
-  virtual int64_t total_compressed_bytes() const = 0; 
- 
-  /// \brief The total number of bytes written as serialized data and 
-  /// dictionary pages to the ColumnChunk so far 
-  virtual int64_t total_bytes_written() const = 0; 
- 
-  /// \brief The file-level writer properties 
-  virtual const WriterProperties* properties() = 0; 
- 
-  /// \brief Write Apache Arrow columnar data directly to ColumnWriter. Returns 
-  /// error status if the array data type is not compatible with the concrete 
-  /// writer type. 
-  /// 
-  /// leaf_array is always a primitive (possibly dictionary encoded type). 
-  /// Leaf_field_nullable indicates whether the leaf array is considered nullable 
-  /// according to its schema in a Table or its parent array. 
-  virtual ::arrow::Status WriteArrow(const int16_t* def_levels, const int16_t* rep_levels, 
-                                     int64_t num_levels, const ::arrow::Array& leaf_array, 
-                                     ArrowWriteContext* ctx, 
-                                     bool leaf_field_nullable) = 0; 
-}; 
- 
-// API to write values to a single column. This is the main client facing API. 
-template <typename DType> 
-class TypedColumnWriter : public ColumnWriter { 
- public: 
-  using T = typename DType::c_type; 
- 
-  // Write a batch of repetition levels, definition levels, and values to the 
-  // column. 
-  // `num_values` is the number of logical leaf values. 
-  // `def_levels` (resp. `rep_levels`) can be null if the column's max definition level 
-  // (resp. max repetition level) is 0. 
-  // If not null, each of `def_levels` and `rep_levels` must have at least 
-  // `num_values`. 
-  // 
-  // The number of physical values written (taken from `values`) is returned. 
-  // It can be smaller than `num_values` is there are some undefined values. 
-  virtual int64_t WriteBatch(int64_t num_values, const int16_t* def_levels, 
-                             const int16_t* rep_levels, const T* values) = 0; 
- 
-  /// Write a batch of repetition levels, definition levels, and values to the 
-  /// column. 
-  /// 
-  /// In comparison to WriteBatch the length of repetition and definition levels 
-  /// is the same as of the number of values read for max_definition_level == 1. 
-  /// In the case of max_definition_level > 1, the repetition and definition 
-  /// levels are larger than the values but the values include the null entries 
-  /// with definition_level == (max_definition_level - 1). Thus we have to differentiate 
-  /// in the parameters of this function if the input has the length of num_values or the 
-  /// _number of rows in the lowest nesting level_. 
-  /// 
-  /// In the case that the most inner node in the Parquet is required, the _number of rows 
-  /// in the lowest nesting level_ is equal to the number of non-null values. If the 
-  /// inner-most schema node is optional, the _number of rows in the lowest nesting level_ 
-  /// also includes all values with definition_level == (max_definition_level - 1). 
-  /// 
-  /// @param num_values number of levels to write. 
-  /// @param def_levels The Parquet definition levels, length is num_values 
-  /// @param rep_levels The Parquet repetition levels, length is num_values 
-  /// @param valid_bits Bitmap that indicates if the row is null on the lowest nesting 
-  ///   level. The length is number of rows in the lowest nesting level. 
-  /// @param valid_bits_offset The offset in bits of the valid_bits where the 
-  ///   first relevant bit resides. 
-  /// @param values The values in the lowest nested level including 
-  ///   spacing for nulls on the lowest levels; input has the length 
-  ///   of the number of rows on the lowest nesting level. 
-  virtual void WriteBatchSpaced(int64_t num_values, const int16_t* def_levels, 
-                                const int16_t* rep_levels, const uint8_t* valid_bits, 
-                                int64_t valid_bits_offset, const T* values) = 0; 
- 
-  // Estimated size of the values that are not written to a page yet 
-  virtual int64_t EstimatedBufferedValueBytes() const = 0; 
-}; 
- 
-using BoolWriter = TypedColumnWriter<BooleanType>; 
-using Int32Writer = TypedColumnWriter<Int32Type>; 
-using Int64Writer = TypedColumnWriter<Int64Type>; 
-using Int96Writer = TypedColumnWriter<Int96Type>; 
-using FloatWriter = TypedColumnWriter<FloatType>; 
-using DoubleWriter = TypedColumnWriter<DoubleType>; 
-using ByteArrayWriter = TypedColumnWriter<ByteArrayType>; 
-using FixedLenByteArrayWriter = TypedColumnWriter<FLBAType>; 
- 
-namespace internal { 
- 
-/** 
- * Timestamp conversion constants 
- */ 
-constexpr int64_t kJulianEpochOffsetDays = INT64_C(2440588); 
- 
-template <int64_t UnitPerDay, int64_t NanosecondsPerUnit> 
-inline void ArrowTimestampToImpalaTimestamp(const int64_t time, Int96* impala_timestamp) { 
-  int64_t julian_days = (time / UnitPerDay) + kJulianEpochOffsetDays; 
-  (*impala_timestamp).value[2] = (uint32_t)julian_days; 
- 
-  int64_t last_day_units = time % UnitPerDay; 
-  auto last_day_nanos = last_day_units * NanosecondsPerUnit; 
-  // impala_timestamp will be unaligned every other entry so do memcpy instead 
-  // of assign and reinterpret cast to avoid undefined behavior. 
-  std::memcpy(impala_timestamp, &last_day_nanos, sizeof(int64_t)); 
-} 
- 
-constexpr int64_t kSecondsInNanos = INT64_C(1000000000); 
- 
-inline void SecondsToImpalaTimestamp(const int64_t seconds, Int96* impala_timestamp) { 
-  ArrowTimestampToImpalaTimestamp<kSecondsPerDay, kSecondsInNanos>(seconds, 
-                                                                   impala_timestamp); 
-} 
- 
-constexpr int64_t kMillisecondsInNanos = kSecondsInNanos / INT64_C(1000); 
- 
-inline void MillisecondsToImpalaTimestamp(const int64_t milliseconds, 
-                                          Int96* impala_timestamp) { 
-  ArrowTimestampToImpalaTimestamp<kMillisecondsPerDay, kMillisecondsInNanos>( 
-      milliseconds, impala_timestamp); 
-} 
- 
-constexpr int64_t kMicrosecondsInNanos = kMillisecondsInNanos / INT64_C(1000); 
- 
-inline void MicrosecondsToImpalaTimestamp(const int64_t microseconds, 
-                                          Int96* impala_timestamp) { 
-  ArrowTimestampToImpalaTimestamp<kMicrosecondsPerDay, kMicrosecondsInNanos>( 
-      microseconds, impala_timestamp); 
-} 
- 
-constexpr int64_t kNanosecondsInNanos = INT64_C(1); 
- 
-inline void NanosecondsToImpalaTimestamp(const int64_t nanoseconds, 
-                                         Int96* impala_timestamp) { 
-  ArrowTimestampToImpalaTimestamp<kNanosecondsPerDay, kNanosecondsInNanos>( 
-      nanoseconds, impala_timestamp); 
-} 
- 
-}  // namespace internal 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+#include <memory>
+
+#include "parquet/exception.h"
+#include "parquet/platform.h"
+#include "parquet/types.h"
+
+namespace arrow {
+
+class Array;
+
+namespace BitUtil {
+class BitWriter;
+}  // namespace BitUtil
+
+namespace util {
+class RleEncoder;
+}  // namespace util
+
+}  // namespace arrow
+
+namespace parquet {
+
+struct ArrowWriteContext;
+class ColumnDescriptor;
+class DataPage;
+class DictionaryPage;
+class ColumnChunkMetaDataBuilder;
+class Encryptor;
+class WriterProperties;
+
+class PARQUET_EXPORT LevelEncoder {
+ public:
+  LevelEncoder();
+  ~LevelEncoder();
+
+  static int MaxBufferSize(Encoding::type encoding, int16_t max_level,
+                           int num_buffered_values);
+
+  // Initialize the LevelEncoder.
+  void Init(Encoding::type encoding, int16_t max_level, int num_buffered_values,
+            uint8_t* data, int data_size);
+
+  // Encodes a batch of levels from an array and returns the number of levels encoded
+  int Encode(int batch_size, const int16_t* levels);
+
+  int32_t len() {
+    if (encoding_ != Encoding::RLE) {
+      throw ParquetException("Only implemented for RLE encoding");
+    }
+    return rle_length_;
+  }
+
+ private:
+  int bit_width_;
+  int rle_length_;
+  Encoding::type encoding_;
+  std::unique_ptr<::arrow::util::RleEncoder> rle_encoder_;
+  std::unique_ptr<::arrow::BitUtil::BitWriter> bit_packed_encoder_;
+};
+
+class PARQUET_EXPORT PageWriter {
+ public:
+  virtual ~PageWriter() {}
+
+  static std::unique_ptr<PageWriter> Open(
+      std::shared_ptr<ArrowOutputStream> sink, Compression::type codec,
+      int compression_level, ColumnChunkMetaDataBuilder* metadata,
+      int16_t row_group_ordinal = -1, int16_t column_chunk_ordinal = -1,
+      ::arrow::MemoryPool* pool = ::arrow::default_memory_pool(),
+      bool buffered_row_group = false,
+      std::shared_ptr<Encryptor> header_encryptor = NULLPTR,
+      std::shared_ptr<Encryptor> data_encryptor = NULLPTR);
+
+  // The Column Writer decides if dictionary encoding is used if set and
+  // if the dictionary encoding has fallen back to default encoding on reaching dictionary
+  // page limit
+  virtual void Close(bool has_dictionary, bool fallback) = 0;
+
+  // Return the number of uncompressed bytes written (including header size)
+  virtual int64_t WriteDataPage(const DataPage& page) = 0;
+
+  // Return the number of uncompressed bytes written (including header size)
+  virtual int64_t WriteDictionaryPage(const DictionaryPage& page) = 0;
+
+  virtual bool has_compressor() = 0;
+
+  virtual void Compress(const Buffer& src_buffer, ResizableBuffer* dest_buffer) = 0;
+};
+
+static constexpr int WRITE_BATCH_SIZE = 1000;
+class PARQUET_EXPORT ColumnWriter {
+ public:
+  virtual ~ColumnWriter() = default;
+
+  static std::shared_ptr<ColumnWriter> Make(ColumnChunkMetaDataBuilder*,
+                                            std::unique_ptr<PageWriter>,
+                                            const WriterProperties* properties);
+
+  /// \brief Closes the ColumnWriter, commits any buffered values to pages.
+  /// \return Total size of the column in bytes
+  virtual int64_t Close() = 0;
+
+  /// \brief The physical Parquet type of the column
+  virtual Type::type type() const = 0;
+
+  /// \brief The schema for the column
+  virtual const ColumnDescriptor* descr() const = 0;
+
+  /// \brief The number of rows written so far
+  virtual int64_t rows_written() const = 0;
+
+  /// \brief The total size of the compressed pages + page headers. Some values
+  /// might be still buffered and not written to a page yet
+  virtual int64_t total_compressed_bytes() const = 0;
+
+  /// \brief The total number of bytes written as serialized data and
+  /// dictionary pages to the ColumnChunk so far
+  virtual int64_t total_bytes_written() const = 0;
+
+  /// \brief The file-level writer properties
+  virtual const WriterProperties* properties() = 0;
+
+  /// \brief Write Apache Arrow columnar data directly to ColumnWriter. Returns
+  /// error status if the array data type is not compatible with the concrete
+  /// writer type.
+  ///
+  /// leaf_array is always a primitive (possibly dictionary encoded type).
+  /// Leaf_field_nullable indicates whether the leaf array is considered nullable
+  /// according to its schema in a Table or its parent array.
+  virtual ::arrow::Status WriteArrow(const int16_t* def_levels, const int16_t* rep_levels,
+                                     int64_t num_levels, const ::arrow::Array& leaf_array,
+                                     ArrowWriteContext* ctx,
+                                     bool leaf_field_nullable) = 0;
+};
+
+// API to write values to a single column. This is the main client facing API.
+template <typename DType>
+class TypedColumnWriter : public ColumnWriter {
+ public:
+  using T = typename DType::c_type;
+
+  // Write a batch of repetition levels, definition levels, and values to the
+  // column.
+  // `num_values` is the number of logical leaf values.
+  // `def_levels` (resp. `rep_levels`) can be null if the column's max definition level
+  // (resp. max repetition level) is 0.
+  // If not null, each of `def_levels` and `rep_levels` must have at least
+  // `num_values`.
+  //
+  // The number of physical values written (taken from `values`) is returned.
+  // It can be smaller than `num_values` is there are some undefined values.
+  virtual int64_t WriteBatch(int64_t num_values, const int16_t* def_levels,
+                             const int16_t* rep_levels, const T* values) = 0;
+
+  /// Write a batch of repetition levels, definition levels, and values to the
+  /// column.
+  ///
+  /// In comparison to WriteBatch the length of repetition and definition levels
+  /// is the same as of the number of values read for max_definition_level == 1.
+  /// In the case of max_definition_level > 1, the repetition and definition
+  /// levels are larger than the values but the values include the null entries
+  /// with definition_level == (max_definition_level - 1). Thus we have to differentiate
+  /// in the parameters of this function if the input has the length of num_values or the
+  /// _number of rows in the lowest nesting level_.
+  ///
+  /// In the case that the most inner node in the Parquet is required, the _number of rows
+  /// in the lowest nesting level_ is equal to the number of non-null values. If the
+  /// inner-most schema node is optional, the _number of rows in the lowest nesting level_
+  /// also includes all values with definition_level == (max_definition_level - 1).
+  ///
+  /// @param num_values number of levels to write.
+  /// @param def_levels The Parquet definition levels, length is num_values
+  /// @param rep_levels The Parquet repetition levels, length is num_values
+  /// @param valid_bits Bitmap that indicates if the row is null on the lowest nesting
+  ///   level. The length is number of rows in the lowest nesting level.
+  /// @param valid_bits_offset The offset in bits of the valid_bits where the
+  ///   first relevant bit resides.
+  /// @param values The values in the lowest nested level including
+  ///   spacing for nulls on the lowest levels; input has the length
+  ///   of the number of rows on the lowest nesting level.
+  virtual void WriteBatchSpaced(int64_t num_values, const int16_t* def_levels,
+                                const int16_t* rep_levels, const uint8_t* valid_bits,
+                                int64_t valid_bits_offset, const T* values) = 0;
+
+  // Estimated size of the values that are not written to a page yet
+  virtual int64_t EstimatedBufferedValueBytes() const = 0;
+};
+
+using BoolWriter = TypedColumnWriter<BooleanType>;
+using Int32Writer = TypedColumnWriter<Int32Type>;
+using Int64Writer = TypedColumnWriter<Int64Type>;
+using Int96Writer = TypedColumnWriter<Int96Type>;
+using FloatWriter = TypedColumnWriter<FloatType>;
+using DoubleWriter = TypedColumnWriter<DoubleType>;
+using ByteArrayWriter = TypedColumnWriter<ByteArrayType>;
+using FixedLenByteArrayWriter = TypedColumnWriter<FLBAType>;
+
+namespace internal {
+
+/**
+ * Timestamp conversion constants
+ */
+constexpr int64_t kJulianEpochOffsetDays = INT64_C(2440588);
+
+template <int64_t UnitPerDay, int64_t NanosecondsPerUnit>
+inline void ArrowTimestampToImpalaTimestamp(const int64_t time, Int96* impala_timestamp) {
+  int64_t julian_days = (time / UnitPerDay) + kJulianEpochOffsetDays;
+  (*impala_timestamp).value[2] = (uint32_t)julian_days;
+
+  int64_t last_day_units = time % UnitPerDay;
+  auto last_day_nanos = last_day_units * NanosecondsPerUnit;
+  // impala_timestamp will be unaligned every other entry so do memcpy instead
+  // of assign and reinterpret cast to avoid undefined behavior.
+  std::memcpy(impala_timestamp, &last_day_nanos, sizeof(int64_t));
+}
+
+constexpr int64_t kSecondsInNanos = INT64_C(1000000000);
+
+inline void SecondsToImpalaTimestamp(const int64_t seconds, Int96* impala_timestamp) {
+  ArrowTimestampToImpalaTimestamp<kSecondsPerDay, kSecondsInNanos>(seconds,
+                                                                   impala_timestamp);
+}
+
+constexpr int64_t kMillisecondsInNanos = kSecondsInNanos / INT64_C(1000);
+
+inline void MillisecondsToImpalaTimestamp(const int64_t milliseconds,
+                                          Int96* impala_timestamp) {
+  ArrowTimestampToImpalaTimestamp<kMillisecondsPerDay, kMillisecondsInNanos>(
+      milliseconds, impala_timestamp);
+}
+
+constexpr int64_t kMicrosecondsInNanos = kMillisecondsInNanos / INT64_C(1000);
+
+inline void MicrosecondsToImpalaTimestamp(const int64_t microseconds,
+                                          Int96* impala_timestamp) {
+  ArrowTimestampToImpalaTimestamp<kMicrosecondsPerDay, kMicrosecondsInNanos>(
+      microseconds, impala_timestamp);
+}
+
+constexpr int64_t kNanosecondsInNanos = INT64_C(1);
+
+inline void NanosecondsToImpalaTimestamp(const int64_t nanoseconds,
+                                         Int96* impala_timestamp) {
+  ArrowTimestampToImpalaTimestamp<kNanosecondsPerDay, kNanosecondsInNanos>(
+      nanoseconds, impala_timestamp);
+}
+
+}  // namespace internal
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/encoding.cc b/contrib/libs/apache/arrow/cpp/src/parquet/encoding.cc
index 3b615af706d..6e8f7ee5491 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/encoding.cc
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/encoding.cc
@@ -1,2547 +1,2547 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "parquet/encoding.h" 
- 
-#include <algorithm> 
-#include <cstdint> 
-#include <cstdlib> 
-#include <limits> 
-#include <memory> 
-#include <string> 
-#include <utility> 
-#include <vector> 
- 
-#include "arrow/array.h" 
-#include "arrow/array/builder_dict.h" 
-#include "arrow/stl_allocator.h" 
-#include "arrow/type_traits.h" 
-#include "arrow/util/bit_run_reader.h" 
-#include "arrow/util/bit_stream_utils.h" 
-#include "arrow/util/bit_util.h" 
-#include "arrow/util/bitmap_ops.h" 
-#include "arrow/util/bitmap_writer.h" 
-#include "arrow/util/byte_stream_split.h" 
-#include "arrow/util/checked_cast.h" 
-#include "arrow/util/hashing.h" 
-#include "arrow/util/logging.h" 
-#include "arrow/util/rle_encoding.h" 
-#include "arrow/util/ubsan.h" 
-#include "arrow/visitor_inline.h" 
- 
-#include "parquet/exception.h" 
-#include "parquet/platform.h" 
-#include "parquet/schema.h" 
-#include "parquet/types.h" 
- 
-namespace BitUtil = arrow::BitUtil; 
- 
-using arrow::Status; 
-using arrow::VisitNullBitmapInline; 
-using arrow::internal::checked_cast; 
- 
-template <typename T> 
-using ArrowPoolVector = std::vector<T, ::arrow::stl::allocator<T>>; 
- 
-namespace parquet { 
-namespace { 
- 
-constexpr int64_t kInMemoryDefaultCapacity = 1024; 
-// The Parquet spec isn't very clear whether ByteArray lengths are signed or 
-// unsigned, but the Java implementation uses signed ints. 
-constexpr size_t kMaxByteArraySize = std::numeric_limits<int32_t>::max(); 
- 
-class EncoderImpl : virtual public Encoder { 
- public: 
-  EncoderImpl(const ColumnDescriptor* descr, Encoding::type encoding, MemoryPool* pool) 
-      : descr_(descr), 
-        encoding_(encoding), 
-        pool_(pool), 
-        type_length_(descr ? descr->type_length() : -1) {} 
- 
-  Encoding::type encoding() const override { return encoding_; } 
- 
-  MemoryPool* memory_pool() const override { return pool_; } 
- 
- protected: 
-  // For accessing type-specific metadata, like FIXED_LEN_BYTE_ARRAY 
-  const ColumnDescriptor* descr_; 
-  const Encoding::type encoding_; 
-  MemoryPool* pool_; 
- 
-  /// Type length from descr 
-  int type_length_; 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// Plain encoder implementation 
- 
-template <typename DType> 
-class PlainEncoder : public EncoderImpl, virtual public TypedEncoder<DType> { 
- public: 
-  using T = typename DType::c_type; 
- 
-  explicit PlainEncoder(const ColumnDescriptor* descr, MemoryPool* pool) 
-      : EncoderImpl(descr, Encoding::PLAIN, pool), sink_(pool) {} 
- 
-  int64_t EstimatedDataEncodedSize() override { return sink_.length(); } 
- 
-  std::shared_ptr<Buffer> FlushValues() override { 
-    std::shared_ptr<Buffer> buffer; 
-    PARQUET_THROW_NOT_OK(sink_.Finish(&buffer)); 
-    return buffer; 
-  } 
- 
-  using TypedEncoder<DType>::Put; 
- 
-  void Put(const T* buffer, int num_values) override; 
- 
-  void Put(const ::arrow::Array& values) override; 
- 
-  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits, 
-                 int64_t valid_bits_offset) override { 
-    if (valid_bits != NULLPTR) { 
-      PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T), 
-                                                                   this->memory_pool())); 
-      T* data = reinterpret_cast<T*>(buffer->mutable_data()); 
-      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>( 
-          src, num_values, valid_bits, valid_bits_offset, data); 
-      Put(data, num_valid_values); 
-    } else { 
-      Put(src, num_values); 
-    } 
-  } 
- 
-  void UnsafePutByteArray(const void* data, uint32_t length) { 
-    DCHECK(length == 0 || data != nullptr) << "Value ptr cannot be NULL"; 
-    sink_.UnsafeAppend(&length, sizeof(uint32_t)); 
-    sink_.UnsafeAppend(data, static_cast<int64_t>(length)); 
-  } 
- 
-  void Put(const ByteArray& val) { 
-    // Write the result to the output stream 
-    const int64_t increment = static_cast<int64_t>(val.len + sizeof(uint32_t)); 
-    if (ARROW_PREDICT_FALSE(sink_.length() + increment > sink_.capacity())) { 
-      PARQUET_THROW_NOT_OK(sink_.Reserve(increment)); 
-    } 
-    UnsafePutByteArray(val.ptr, val.len); 
-  } 
- 
- protected: 
-  template <typename ArrayType> 
-  void PutBinaryArray(const ArrayType& array) { 
-    const int64_t total_bytes = 
-        array.value_offset(array.length()) - array.value_offset(0); 
-    PARQUET_THROW_NOT_OK(sink_.Reserve(total_bytes + array.length() * sizeof(uint32_t))); 
- 
-    PARQUET_THROW_NOT_OK(::arrow::VisitArrayDataInline<typename ArrayType::TypeClass>( 
-        *array.data(), 
-        [&](::arrow::util::string_view view) { 
-          if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) { 
-            return Status::Invalid("Parquet cannot store strings with size 2GB or more"); 
-          } 
-          UnsafePutByteArray(view.data(), static_cast<uint32_t>(view.size())); 
-          return Status::OK(); 
-        }, 
-        []() { return Status::OK(); })); 
-  } 
- 
-  ::arrow::BufferBuilder sink_; 
-}; 
- 
-template <typename DType> 
-void PlainEncoder<DType>::Put(const T* buffer, int num_values) { 
-  if (num_values > 0) { 
-    PARQUET_THROW_NOT_OK(sink_.Append(buffer, num_values * sizeof(T))); 
-  } 
-} 
- 
-template <> 
-inline void PlainEncoder<ByteArrayType>::Put(const ByteArray* src, int num_values) { 
-  for (int i = 0; i < num_values; ++i) { 
-    Put(src[i]); 
-  } 
-} 
- 
-template <typename ArrayType> 
-void DirectPutImpl(const ::arrow::Array& values, ::arrow::BufferBuilder* sink) { 
-  if (values.type_id() != ArrayType::TypeClass::type_id) { 
-    std::string type_name = ArrayType::TypeClass::type_name(); 
-    throw ParquetException("direct put to " + type_name + " from " + 
-                           values.type()->ToString() + " not supported"); 
-  } 
- 
-  using value_type = typename ArrayType::value_type; 
-  constexpr auto value_size = sizeof(value_type); 
-  auto raw_values = checked_cast<const ArrayType&>(values).raw_values(); 
- 
-  if (values.null_count() == 0) { 
-    // no nulls, just dump the data 
-    PARQUET_THROW_NOT_OK(sink->Append(raw_values, values.length() * value_size)); 
-  } else { 
-    PARQUET_THROW_NOT_OK( 
-        sink->Reserve((values.length() - values.null_count()) * value_size)); 
- 
-    for (int64_t i = 0; i < values.length(); i++) { 
-      if (values.IsValid(i)) { 
-        sink->UnsafeAppend(&raw_values[i], value_size); 
-      } 
-    } 
-  } 
-} 
- 
-template <> 
-void PlainEncoder<Int32Type>::Put(const ::arrow::Array& values) { 
-  DirectPutImpl<::arrow::Int32Array>(values, &sink_); 
-} 
- 
-template <> 
-void PlainEncoder<Int64Type>::Put(const ::arrow::Array& values) { 
-  DirectPutImpl<::arrow::Int64Array>(values, &sink_); 
-} 
- 
-template <> 
-void PlainEncoder<Int96Type>::Put(const ::arrow::Array& values) { 
-  ParquetException::NYI("direct put to Int96"); 
-} 
- 
-template <> 
-void PlainEncoder<FloatType>::Put(const ::arrow::Array& values) { 
-  DirectPutImpl<::arrow::FloatArray>(values, &sink_); 
-} 
- 
-template <> 
-void PlainEncoder<DoubleType>::Put(const ::arrow::Array& values) { 
-  DirectPutImpl<::arrow::DoubleArray>(values, &sink_); 
-} 
- 
-template <typename DType> 
-void PlainEncoder<DType>::Put(const ::arrow::Array& values) { 
-  ParquetException::NYI("direct put of " + values.type()->ToString()); 
-} 
- 
-void AssertBaseBinary(const ::arrow::Array& values) { 
-  if (!::arrow::is_base_binary_like(values.type_id())) { 
-    throw ParquetException("Only BaseBinaryArray and subclasses supported"); 
-  } 
-} 
- 
-template <> 
-inline void PlainEncoder<ByteArrayType>::Put(const ::arrow::Array& values) { 
-  AssertBaseBinary(values); 
- 
-  if (::arrow::is_binary_like(values.type_id())) { 
-    PutBinaryArray(checked_cast<const ::arrow::BinaryArray&>(values)); 
-  } else { 
-    DCHECK(::arrow::is_large_binary_like(values.type_id())); 
-    PutBinaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values)); 
-  } 
-} 
- 
-void AssertFixedSizeBinary(const ::arrow::Array& values, int type_length) { 
-  if (values.type_id() != ::arrow::Type::FIXED_SIZE_BINARY && 
-      values.type_id() != ::arrow::Type::DECIMAL) { 
-    throw ParquetException("Only FixedSizeBinaryArray and subclasses supported"); 
-  } 
-  if (checked_cast<const ::arrow::FixedSizeBinaryType&>(*values.type()).byte_width() != 
-      type_length) { 
-    throw ParquetException("Size mismatch: " + values.type()->ToString() + 
-                           " should have been " + std::to_string(type_length) + " wide"); 
-  } 
-} 
- 
-template <> 
-inline void PlainEncoder<FLBAType>::Put(const ::arrow::Array& values) { 
-  AssertFixedSizeBinary(values, descr_->type_length()); 
-  const auto& data = checked_cast<const ::arrow::FixedSizeBinaryArray&>(values); 
- 
-  if (data.null_count() == 0) { 
-    // no nulls, just dump the data 
-    PARQUET_THROW_NOT_OK( 
-        sink_.Append(data.raw_values(), data.length() * data.byte_width())); 
-  } else { 
-    const int64_t total_bytes = 
-        data.length() * data.byte_width() - data.null_count() * data.byte_width(); 
-    PARQUET_THROW_NOT_OK(sink_.Reserve(total_bytes)); 
-    for (int64_t i = 0; i < data.length(); i++) { 
-      if (data.IsValid(i)) { 
-        sink_.UnsafeAppend(data.Value(i), data.byte_width()); 
-      } 
-    } 
-  } 
-} 
- 
-template <> 
-inline void PlainEncoder<FLBAType>::Put(const FixedLenByteArray* src, int num_values) { 
-  if (descr_->type_length() == 0) { 
-    return; 
-  } 
-  for (int i = 0; i < num_values; ++i) { 
-    // Write the result to the output stream 
-    DCHECK(src[i].ptr != nullptr) << "Value ptr cannot be NULL"; 
-    PARQUET_THROW_NOT_OK(sink_.Append(src[i].ptr, descr_->type_length())); 
-  } 
-} 
- 
-template <> 
-class PlainEncoder<BooleanType> : public EncoderImpl, virtual public BooleanEncoder { 
- public: 
-  explicit PlainEncoder(const ColumnDescriptor* descr, MemoryPool* pool) 
-      : EncoderImpl(descr, Encoding::PLAIN, pool), 
-        bits_available_(kInMemoryDefaultCapacity * 8), 
-        bits_buffer_(AllocateBuffer(pool, kInMemoryDefaultCapacity)), 
-        sink_(pool), 
-        bit_writer_(bits_buffer_->mutable_data(), 
-                    static_cast<int>(bits_buffer_->size())) {} 
- 
-  int64_t EstimatedDataEncodedSize() override; 
-  std::shared_ptr<Buffer> FlushValues() override; 
- 
-  void Put(const bool* src, int num_values) override; 
- 
-  void Put(const std::vector<bool>& src, int num_values) override; 
- 
-  void PutSpaced(const bool* src, int num_values, const uint8_t* valid_bits, 
-                 int64_t valid_bits_offset) override { 
-    if (valid_bits != NULLPTR) { 
-      PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T), 
-                                                                   this->memory_pool())); 
-      T* data = reinterpret_cast<T*>(buffer->mutable_data()); 
-      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>( 
-          src, num_values, valid_bits, valid_bits_offset, data); 
-      Put(data, num_valid_values); 
-    } else { 
-      Put(src, num_values); 
-    } 
-  } 
- 
-  void Put(const ::arrow::Array& values) override { 
-    if (values.type_id() != ::arrow::Type::BOOL) { 
-      throw ParquetException("direct put to boolean from " + values.type()->ToString() + 
-                             " not supported"); 
-    } 
- 
-    const auto& data = checked_cast<const ::arrow::BooleanArray&>(values); 
-    if (data.null_count() == 0) { 
-      PARQUET_THROW_NOT_OK(sink_.Reserve(BitUtil::BytesForBits(data.length()))); 
-      // no nulls, just dump the data 
-      ::arrow::internal::CopyBitmap(data.data()->GetValues<uint8_t>(1), data.offset(), 
-                                    data.length(), sink_.mutable_data(), sink_.length()); 
-    } else { 
-      auto n_valid = BitUtil::BytesForBits(data.length() - data.null_count()); 
-      PARQUET_THROW_NOT_OK(sink_.Reserve(n_valid)); 
-      ::arrow::internal::FirstTimeBitmapWriter writer(sink_.mutable_data(), 
-                                                      sink_.length(), n_valid); 
- 
-      for (int64_t i = 0; i < data.length(); i++) { 
-        if (data.IsValid(i)) { 
-          if (data.Value(i)) { 
-            writer.Set(); 
-          } else { 
-            writer.Clear(); 
-          } 
-          writer.Next(); 
-        } 
-      } 
-      writer.Finish(); 
-    } 
-    sink_.UnsafeAdvance(data.length()); 
-  } 
- 
- private: 
-  int bits_available_; 
-  std::shared_ptr<ResizableBuffer> bits_buffer_; 
-  ::arrow::BufferBuilder sink_; 
-  ::arrow::BitUtil::BitWriter bit_writer_; 
- 
-  template <typename SequenceType> 
-  void PutImpl(const SequenceType& src, int num_values); 
-}; 
- 
-template <typename SequenceType> 
-void PlainEncoder<BooleanType>::PutImpl(const SequenceType& src, int num_values) { 
-  int bit_offset = 0; 
-  if (bits_available_ > 0) { 
-    int bits_to_write = std::min(bits_available_, num_values); 
-    for (int i = 0; i < bits_to_write; i++) { 
-      bit_writer_.PutValue(src[i], 1); 
-    } 
-    bits_available_ -= bits_to_write; 
-    bit_offset = bits_to_write; 
- 
-    if (bits_available_ == 0) { 
-      bit_writer_.Flush(); 
-      PARQUET_THROW_NOT_OK( 
-          sink_.Append(bit_writer_.buffer(), bit_writer_.bytes_written())); 
-      bit_writer_.Clear(); 
-    } 
-  } 
- 
-  int bits_remaining = num_values - bit_offset; 
-  while (bit_offset < num_values) { 
-    bits_available_ = static_cast<int>(bits_buffer_->size()) * 8; 
- 
-    int bits_to_write = std::min(bits_available_, bits_remaining); 
-    for (int i = bit_offset; i < bit_offset + bits_to_write; i++) { 
-      bit_writer_.PutValue(src[i], 1); 
-    } 
-    bit_offset += bits_to_write; 
-    bits_available_ -= bits_to_write; 
-    bits_remaining -= bits_to_write; 
- 
-    if (bits_available_ == 0) { 
-      bit_writer_.Flush(); 
-      PARQUET_THROW_NOT_OK( 
-          sink_.Append(bit_writer_.buffer(), bit_writer_.bytes_written())); 
-      bit_writer_.Clear(); 
-    } 
-  } 
-} 
- 
-int64_t PlainEncoder<BooleanType>::EstimatedDataEncodedSize() { 
-  int64_t position = sink_.length(); 
-  return position + bit_writer_.bytes_written(); 
-} 
- 
-std::shared_ptr<Buffer> PlainEncoder<BooleanType>::FlushValues() { 
-  if (bits_available_ > 0) { 
-    bit_writer_.Flush(); 
-    PARQUET_THROW_NOT_OK(sink_.Append(bit_writer_.buffer(), bit_writer_.bytes_written())); 
-    bit_writer_.Clear(); 
-    bits_available_ = static_cast<int>(bits_buffer_->size()) * 8; 
-  } 
- 
-  std::shared_ptr<Buffer> buffer; 
-  PARQUET_THROW_NOT_OK(sink_.Finish(&buffer)); 
-  return buffer; 
-} 
- 
-void PlainEncoder<BooleanType>::Put(const bool* src, int num_values) { 
-  PutImpl(src, num_values); 
-} 
- 
-void PlainEncoder<BooleanType>::Put(const std::vector<bool>& src, int num_values) { 
-  PutImpl(src, num_values); 
-} 
- 
-// ---------------------------------------------------------------------- 
-// DictEncoder<T> implementations 
- 
-template <typename DType> 
-struct DictEncoderTraits { 
-  using c_type = typename DType::c_type; 
-  using MemoTableType = ::arrow::internal::ScalarMemoTable<c_type>; 
-}; 
- 
-template <> 
-struct DictEncoderTraits<ByteArrayType> { 
-  using MemoTableType = ::arrow::internal::BinaryMemoTable<::arrow::BinaryBuilder>; 
-}; 
- 
-template <> 
-struct DictEncoderTraits<FLBAType> { 
-  using MemoTableType = ::arrow::internal::BinaryMemoTable<::arrow::BinaryBuilder>; 
-}; 
- 
-// Initially 1024 elements 
-static constexpr int32_t kInitialHashTableSize = 1 << 10; 
- 
-/// See the dictionary encoding section of 
-/// https://github.com/Parquet/parquet-format.  The encoding supports 
-/// streaming encoding. Values are encoded as they are added while the 
-/// dictionary is being constructed. At any time, the buffered values 
-/// can be written out with the current dictionary size. More values 
-/// can then be added to the encoder, including new dictionary 
-/// entries. 
-template <typename DType> 
-class DictEncoderImpl : public EncoderImpl, virtual public DictEncoder<DType> { 
-  using MemoTableType = typename DictEncoderTraits<DType>::MemoTableType; 
- 
- public: 
-  typedef typename DType::c_type T; 
- 
-  explicit DictEncoderImpl(const ColumnDescriptor* desc, MemoryPool* pool) 
-      : EncoderImpl(desc, Encoding::PLAIN_DICTIONARY, pool), 
-        buffered_indices_(::arrow::stl::allocator<int32_t>(pool)), 
-        dict_encoded_size_(0), 
-        memo_table_(pool, kInitialHashTableSize) {} 
- 
-  ~DictEncoderImpl() override { DCHECK(buffered_indices_.empty()); } 
- 
-  int dict_encoded_size() override { return dict_encoded_size_; } 
- 
-  int WriteIndices(uint8_t* buffer, int buffer_len) override { 
-    // Write bit width in first byte 
-    *buffer = static_cast<uint8_t>(bit_width()); 
-    ++buffer; 
-    --buffer_len; 
- 
-    ::arrow::util::RleEncoder encoder(buffer, buffer_len, bit_width()); 
- 
-    for (int32_t index : buffered_indices_) { 
-      if (!encoder.Put(index)) return -1; 
-    } 
-    encoder.Flush(); 
- 
-    ClearIndices(); 
-    return 1 + encoder.len(); 
-  } 
- 
-  void set_type_length(int type_length) { this->type_length_ = type_length; } 
- 
-  /// Returns a conservative estimate of the number of bytes needed to encode the buffered 
-  /// indices. Used to size the buffer passed to WriteIndices(). 
-  int64_t EstimatedDataEncodedSize() override { 
-    // Note: because of the way RleEncoder::CheckBufferFull() is called, we have to 
-    // reserve 
-    // an extra "RleEncoder::MinBufferSize" bytes. These extra bytes won't be used 
-    // but not reserving them would cause the encoder to fail. 
-    return 1 + 
-           ::arrow::util::RleEncoder::MaxBufferSize( 
-               bit_width(), static_cast<int>(buffered_indices_.size())) + 
-           ::arrow::util::RleEncoder::MinBufferSize(bit_width()); 
-  } 
- 
-  /// The minimum bit width required to encode the currently buffered indices. 
-  int bit_width() const override { 
-    if (ARROW_PREDICT_FALSE(num_entries() == 0)) return 0; 
-    if (ARROW_PREDICT_FALSE(num_entries() == 1)) return 1; 
-    return BitUtil::Log2(num_entries()); 
-  } 
- 
-  /// Encode value. Note that this does not actually write any data, just 
-  /// buffers the value's index to be written later. 
-  inline void Put(const T& value); 
- 
-  // Not implemented for other data types 
-  inline void PutByteArray(const void* ptr, int32_t length); 
- 
-  void Put(const T* src, int num_values) override { 
-    for (int32_t i = 0; i < num_values; i++) { 
-      Put(src[i]); 
-    } 
-  } 
- 
-  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits, 
-                 int64_t valid_bits_offset) override { 
-    ::arrow::internal::VisitSetBitRunsVoid(valid_bits, valid_bits_offset, num_values, 
-                                           [&](int64_t position, int64_t length) { 
-                                             for (int64_t i = 0; i < length; i++) { 
-                                               Put(src[i + position]); 
-                                             } 
-                                           }); 
-  } 
- 
-  using TypedEncoder<DType>::Put; 
- 
-  void Put(const ::arrow::Array& values) override; 
-  void PutDictionary(const ::arrow::Array& values) override; 
- 
-  template <typename ArrowType, typename T = typename ArrowType::c_type> 
-  void PutIndicesTyped(const ::arrow::Array& data) { 
-    auto values = data.data()->GetValues<T>(1); 
-    size_t buffer_position = buffered_indices_.size(); 
-    buffered_indices_.resize(buffer_position + 
-                             static_cast<size_t>(data.length() - data.null_count())); 
-    ::arrow::internal::VisitSetBitRunsVoid( 
-        data.null_bitmap_data(), data.offset(), data.length(), 
-        [&](int64_t position, int64_t length) { 
-          for (int64_t i = 0; i < length; ++i) { 
-            buffered_indices_[buffer_position++] = 
-                static_cast<int32_t>(values[i + position]); 
-          } 
-        }); 
-  } 
- 
-  void PutIndices(const ::arrow::Array& data) override { 
-    switch (data.type()->id()) { 
-      case ::arrow::Type::UINT8: 
-      case ::arrow::Type::INT8: 
-        return PutIndicesTyped<::arrow::UInt8Type>(data); 
-      case ::arrow::Type::UINT16: 
-      case ::arrow::Type::INT16: 
-        return PutIndicesTyped<::arrow::UInt16Type>(data); 
-      case ::arrow::Type::UINT32: 
-      case ::arrow::Type::INT32: 
-        return PutIndicesTyped<::arrow::UInt32Type>(data); 
-      case ::arrow::Type::UINT64: 
-      case ::arrow::Type::INT64: 
-        return PutIndicesTyped<::arrow::UInt64Type>(data); 
-      default: 
-        throw ParquetException("Passed non-integer array to PutIndices"); 
-    } 
-  } 
- 
-  std::shared_ptr<Buffer> FlushValues() override { 
-    std::shared_ptr<ResizableBuffer> buffer = 
-        AllocateBuffer(this->pool_, EstimatedDataEncodedSize()); 
-    int result_size = WriteIndices(buffer->mutable_data(), 
-                                   static_cast<int>(EstimatedDataEncodedSize())); 
-    PARQUET_THROW_NOT_OK(buffer->Resize(result_size, false)); 
-    return std::move(buffer); 
-  } 
- 
-  /// Writes out the encoded dictionary to buffer. buffer must be preallocated to 
-  /// dict_encoded_size() bytes. 
-  void WriteDict(uint8_t* buffer) override; 
- 
-  /// The number of entries in the dictionary. 
-  int num_entries() const override { return memo_table_.size(); } 
- 
- private: 
-  /// Clears all the indices (but leaves the dictionary). 
-  void ClearIndices() { buffered_indices_.clear(); } 
- 
-  /// Indices that have not yet be written out by WriteIndices(). 
-  ArrowPoolVector<int32_t> buffered_indices_; 
- 
-  template <typename ArrayType> 
-  void PutBinaryArray(const ArrayType& array) { 
-    PARQUET_THROW_NOT_OK(::arrow::VisitArrayDataInline<typename ArrayType::TypeClass>( 
-        *array.data(), 
-        [&](::arrow::util::string_view view) { 
-          if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) { 
-            return Status::Invalid("Parquet cannot store strings with size 2GB or more"); 
-          } 
-          PutByteArray(view.data(), static_cast<uint32_t>(view.size())); 
-          return Status::OK(); 
-        }, 
-        []() { return Status::OK(); })); 
-  } 
- 
-  template <typename ArrayType> 
-  void PutBinaryDictionaryArray(const ArrayType& array) { 
-    DCHECK_EQ(array.null_count(), 0); 
-    for (int64_t i = 0; i < array.length(); i++) { 
-      auto v = array.GetView(i); 
-      if (ARROW_PREDICT_FALSE(v.size() > kMaxByteArraySize)) { 
-        throw ParquetException("Parquet cannot store strings with size 2GB or more"); 
-      } 
-      dict_encoded_size_ += static_cast<int>(v.size() + sizeof(uint32_t)); 
-      int32_t unused_memo_index; 
-      PARQUET_THROW_NOT_OK(memo_table_.GetOrInsert( 
-          v.data(), static_cast<int32_t>(v.size()), &unused_memo_index)); 
-    } 
-  } 
- 
-  /// The number of bytes needed to encode the dictionary. 
-  int dict_encoded_size_; 
- 
-  MemoTableType memo_table_; 
-}; 
- 
-template <typename DType> 
-void DictEncoderImpl<DType>::WriteDict(uint8_t* buffer) { 
-  // For primitive types, only a memcpy 
-  DCHECK_EQ(static_cast<size_t>(dict_encoded_size_), sizeof(T) * memo_table_.size()); 
-  memo_table_.CopyValues(0 /* start_pos */, reinterpret_cast<T*>(buffer)); 
-} 
- 
-// ByteArray and FLBA already have the dictionary encoded in their data heaps 
-template <> 
-void DictEncoderImpl<ByteArrayType>::WriteDict(uint8_t* buffer) { 
-  memo_table_.VisitValues(0, [&buffer](const ::arrow::util::string_view& v) { 
-    uint32_t len = static_cast<uint32_t>(v.length()); 
-    memcpy(buffer, &len, sizeof(len)); 
-    buffer += sizeof(len); 
-    memcpy(buffer, v.data(), len); 
-    buffer += len; 
-  }); 
-} 
- 
-template <> 
-void DictEncoderImpl<FLBAType>::WriteDict(uint8_t* buffer) { 
-  memo_table_.VisitValues(0, [&](const ::arrow::util::string_view& v) { 
-    DCHECK_EQ(v.length(), static_cast<size_t>(type_length_)); 
-    memcpy(buffer, v.data(), type_length_); 
-    buffer += type_length_; 
-  }); 
-} 
- 
-template <typename DType> 
-inline void DictEncoderImpl<DType>::Put(const T& v) { 
-  // Put() implementation for primitive types 
-  auto on_found = [](int32_t memo_index) {}; 
-  auto on_not_found = [this](int32_t memo_index) { 
-    dict_encoded_size_ += static_cast<int>(sizeof(T)); 
-  }; 
- 
-  int32_t memo_index; 
-  PARQUET_THROW_NOT_OK(memo_table_.GetOrInsert(v, on_found, on_not_found, &memo_index)); 
-  buffered_indices_.push_back(memo_index); 
-} 
- 
-template <typename DType> 
-inline void DictEncoderImpl<DType>::PutByteArray(const void* ptr, int32_t length) { 
-  DCHECK(false); 
-} 
- 
-template <> 
-inline void DictEncoderImpl<ByteArrayType>::PutByteArray(const void* ptr, 
-                                                         int32_t length) { 
-  static const uint8_t empty[] = {0}; 
- 
-  auto on_found = [](int32_t memo_index) {}; 
-  auto on_not_found = [&](int32_t memo_index) { 
-    dict_encoded_size_ += static_cast<int>(length + sizeof(uint32_t)); 
-  }; 
- 
-  DCHECK(ptr != nullptr || length == 0); 
-  ptr = (ptr != nullptr) ? ptr : empty; 
-  int32_t memo_index; 
-  PARQUET_THROW_NOT_OK( 
-      memo_table_.GetOrInsert(ptr, length, on_found, on_not_found, &memo_index)); 
-  buffered_indices_.push_back(memo_index); 
-} 
- 
-template <> 
-inline void DictEncoderImpl<ByteArrayType>::Put(const ByteArray& val) { 
-  return PutByteArray(val.ptr, static_cast<int32_t>(val.len)); 
-} 
- 
-template <> 
-inline void DictEncoderImpl<FLBAType>::Put(const FixedLenByteArray& v) { 
-  static const uint8_t empty[] = {0}; 
- 
-  auto on_found = [](int32_t memo_index) {}; 
-  auto on_not_found = [this](int32_t memo_index) { dict_encoded_size_ += type_length_; }; 
- 
-  DCHECK(v.ptr != nullptr || type_length_ == 0); 
-  const void* ptr = (v.ptr != nullptr) ? v.ptr : empty; 
-  int32_t memo_index; 
-  PARQUET_THROW_NOT_OK( 
-      memo_table_.GetOrInsert(ptr, type_length_, on_found, on_not_found, &memo_index)); 
-  buffered_indices_.push_back(memo_index); 
-} 
- 
-template <> 
-void DictEncoderImpl<Int96Type>::Put(const ::arrow::Array& values) { 
-  ParquetException::NYI("Direct put to Int96"); 
-} 
- 
-template <> 
-void DictEncoderImpl<Int96Type>::PutDictionary(const ::arrow::Array& values) { 
-  ParquetException::NYI("Direct put to Int96"); 
-} 
- 
-template <typename DType> 
-void DictEncoderImpl<DType>::Put(const ::arrow::Array& values) { 
-  using ArrayType = typename ::arrow::CTypeTraits<typename DType::c_type>::ArrayType; 
-  const auto& data = checked_cast<const ArrayType&>(values); 
-  if (data.null_count() == 0) { 
-    // no nulls, just dump the data 
-    for (int64_t i = 0; i < data.length(); i++) { 
-      Put(data.Value(i)); 
-    } 
-  } else { 
-    for (int64_t i = 0; i < data.length(); i++) { 
-      if (data.IsValid(i)) { 
-        Put(data.Value(i)); 
-      } 
-    } 
-  } 
-} 
- 
-template <> 
-void DictEncoderImpl<FLBAType>::Put(const ::arrow::Array& values) { 
-  AssertFixedSizeBinary(values, type_length_); 
-  const auto& data = checked_cast<const ::arrow::FixedSizeBinaryArray&>(values); 
-  if (data.null_count() == 0) { 
-    // no nulls, just dump the data 
-    for (int64_t i = 0; i < data.length(); i++) { 
-      Put(FixedLenByteArray(data.Value(i))); 
-    } 
-  } else { 
-    std::vector<uint8_t> empty(type_length_, 0); 
-    for (int64_t i = 0; i < data.length(); i++) { 
-      if (data.IsValid(i)) { 
-        Put(FixedLenByteArray(data.Value(i))); 
-      } 
-    } 
-  } 
-} 
- 
-template <> 
-void DictEncoderImpl<ByteArrayType>::Put(const ::arrow::Array& values) { 
-  AssertBaseBinary(values); 
-  if (::arrow::is_binary_like(values.type_id())) { 
-    PutBinaryArray(checked_cast<const ::arrow::BinaryArray&>(values)); 
-  } else { 
-    DCHECK(::arrow::is_large_binary_like(values.type_id())); 
-    PutBinaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values)); 
-  } 
-} 
- 
-template <typename DType> 
-void AssertCanPutDictionary(DictEncoderImpl<DType>* encoder, const ::arrow::Array& dict) { 
-  if (dict.null_count() > 0) { 
-    throw ParquetException("Inserted dictionary cannot cannot contain nulls"); 
-  } 
- 
-  if (encoder->num_entries() > 0) { 
-    throw ParquetException("Can only call PutDictionary on an empty DictEncoder"); 
-  } 
-} 
- 
-template <typename DType> 
-void DictEncoderImpl<DType>::PutDictionary(const ::arrow::Array& values) { 
-  AssertCanPutDictionary(this, values); 
- 
-  using ArrayType = typename ::arrow::CTypeTraits<typename DType::c_type>::ArrayType; 
-  const auto& data = checked_cast<const ArrayType&>(values); 
- 
-  dict_encoded_size_ += static_cast<int>(sizeof(typename DType::c_type) * data.length()); 
-  for (int64_t i = 0; i < data.length(); i++) { 
-    int32_t unused_memo_index; 
-    PARQUET_THROW_NOT_OK(memo_table_.GetOrInsert(data.Value(i), &unused_memo_index)); 
-  } 
-} 
- 
-template <> 
-void DictEncoderImpl<FLBAType>::PutDictionary(const ::arrow::Array& values) { 
-  AssertFixedSizeBinary(values, type_length_); 
-  AssertCanPutDictionary(this, values); 
- 
-  const auto& data = checked_cast<const ::arrow::FixedSizeBinaryArray&>(values); 
- 
-  dict_encoded_size_ += static_cast<int>(type_length_ * data.length()); 
-  for (int64_t i = 0; i < data.length(); i++) { 
-    int32_t unused_memo_index; 
-    PARQUET_THROW_NOT_OK( 
-        memo_table_.GetOrInsert(data.Value(i), type_length_, &unused_memo_index)); 
-  } 
-} 
- 
-template <> 
-void DictEncoderImpl<ByteArrayType>::PutDictionary(const ::arrow::Array& values) { 
-  AssertBaseBinary(values); 
-  AssertCanPutDictionary(this, values); 
- 
-  if (::arrow::is_binary_like(values.type_id())) { 
-    PutBinaryDictionaryArray(checked_cast<const ::arrow::BinaryArray&>(values)); 
-  } else { 
-    DCHECK(::arrow::is_large_binary_like(values.type_id())); 
-    PutBinaryDictionaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values)); 
-  } 
-} 
- 
-// ---------------------------------------------------------------------- 
-// ByteStreamSplitEncoder<T> implementations 
- 
-template <typename DType> 
-class ByteStreamSplitEncoder : public EncoderImpl, virtual public TypedEncoder<DType> { 
- public: 
-  using T = typename DType::c_type; 
-  using TypedEncoder<DType>::Put; 
- 
-  explicit ByteStreamSplitEncoder( 
-      const ColumnDescriptor* descr, 
-      ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()); 
- 
-  int64_t EstimatedDataEncodedSize() override; 
-  std::shared_ptr<Buffer> FlushValues() override; 
- 
-  void Put(const T* buffer, int num_values) override; 
-  void Put(const ::arrow::Array& values) override; 
-  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits, 
-                 int64_t valid_bits_offset) override; 
- 
- protected: 
-  template <typename ArrowType> 
-  void PutImpl(const ::arrow::Array& values) { 
-    if (values.type_id() != ArrowType::type_id) { 
-      throw ParquetException(std::string() + "direct put to " + ArrowType::type_name() + 
-                             " from " + values.type()->ToString() + " not supported"); 
-    } 
-    const auto& data = *values.data(); 
-    PutSpaced(data.GetValues<typename ArrowType::c_type>(1), 
-              static_cast<int>(data.length), data.GetValues<uint8_t>(0, 0), data.offset); 
-  } 
- 
-  ::arrow::BufferBuilder sink_; 
-  int64_t num_values_in_buffer_; 
-}; 
- 
-template <typename DType> 
-ByteStreamSplitEncoder<DType>::ByteStreamSplitEncoder(const ColumnDescriptor* descr, 
-                                                      ::arrow::MemoryPool* pool) 
-    : EncoderImpl(descr, Encoding::BYTE_STREAM_SPLIT, pool), 
-      sink_{pool}, 
-      num_values_in_buffer_{0} {} 
- 
-template <typename DType> 
-int64_t ByteStreamSplitEncoder<DType>::EstimatedDataEncodedSize() { 
-  return sink_.length(); 
-} 
- 
-template <typename DType> 
-std::shared_ptr<Buffer> ByteStreamSplitEncoder<DType>::FlushValues() { 
-  std::shared_ptr<ResizableBuffer> output_buffer = 
-      AllocateBuffer(this->memory_pool(), EstimatedDataEncodedSize()); 
-  uint8_t* output_buffer_raw = output_buffer->mutable_data(); 
-  const uint8_t* raw_values = sink_.data(); 
-  ::arrow::util::internal::ByteStreamSplitEncode<T>(raw_values, num_values_in_buffer_, 
-                                                    output_buffer_raw); 
-  sink_.Reset(); 
-  num_values_in_buffer_ = 0; 
-  return std::move(output_buffer); 
-} 
- 
-template <typename DType> 
-void ByteStreamSplitEncoder<DType>::Put(const T* buffer, int num_values) { 
-  if (num_values > 0) { 
-    PARQUET_THROW_NOT_OK(sink_.Append(buffer, num_values * sizeof(T))); 
-    num_values_in_buffer_ += num_values; 
-  } 
-} 
- 
-template <> 
-void ByteStreamSplitEncoder<FloatType>::Put(const ::arrow::Array& values) { 
-  PutImpl<::arrow::FloatType>(values); 
-} 
- 
-template <> 
-void ByteStreamSplitEncoder<DoubleType>::Put(const ::arrow::Array& values) { 
-  PutImpl<::arrow::DoubleType>(values); 
-} 
- 
-template <typename DType> 
-void ByteStreamSplitEncoder<DType>::PutSpaced(const T* src, int num_values, 
-                                              const uint8_t* valid_bits, 
-                                              int64_t valid_bits_offset) { 
-  if (valid_bits != NULLPTR) { 
-    PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T), 
-                                                                 this->memory_pool())); 
-    T* data = reinterpret_cast<T*>(buffer->mutable_data()); 
-    int num_valid_values = ::arrow::util::internal::SpacedCompress<T>( 
-        src, num_values, valid_bits, valid_bits_offset, data); 
-    Put(data, num_valid_values); 
-  } else { 
-    Put(src, num_values); 
-  } 
-} 
- 
-class DecoderImpl : virtual public Decoder { 
- public: 
-  void SetData(int num_values, const uint8_t* data, int len) override { 
-    num_values_ = num_values; 
-    data_ = data; 
-    len_ = len; 
-  } 
- 
-  int values_left() const override { return num_values_; } 
-  Encoding::type encoding() const override { return encoding_; } 
- 
- protected: 
-  explicit DecoderImpl(const ColumnDescriptor* descr, Encoding::type encoding) 
-      : descr_(descr), encoding_(encoding), num_values_(0), data_(NULLPTR), len_(0) {} 
- 
-  // For accessing type-specific metadata, like FIXED_LEN_BYTE_ARRAY 
-  const ColumnDescriptor* descr_; 
- 
-  const Encoding::type encoding_; 
-  int num_values_; 
-  const uint8_t* data_; 
-  int len_; 
-  int type_length_; 
-}; 
- 
-template <typename DType> 
-class PlainDecoder : public DecoderImpl, virtual public TypedDecoder<DType> { 
- public: 
-  using T = typename DType::c_type; 
-  explicit PlainDecoder(const ColumnDescriptor* descr); 
- 
-  int Decode(T* buffer, int max_values) override; 
- 
-  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits, 
-                  int64_t valid_bits_offset, 
-                  typename EncodingTraits<DType>::Accumulator* builder) override; 
- 
-  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits, 
-                  int64_t valid_bits_offset, 
-                  typename EncodingTraits<DType>::DictAccumulator* builder) override; 
-}; 
- 
-template <> 
-inline int PlainDecoder<Int96Type>::DecodeArrow( 
-    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset, 
-    typename EncodingTraits<Int96Type>::Accumulator* builder) { 
-  ParquetException::NYI("DecodeArrow not supported for Int96"); 
-} 
- 
-template <> 
-inline int PlainDecoder<Int96Type>::DecodeArrow( 
-    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset, 
-    typename EncodingTraits<Int96Type>::DictAccumulator* builder) { 
-  ParquetException::NYI("DecodeArrow not supported for Int96"); 
-} 
- 
-template <> 
-inline int PlainDecoder<BooleanType>::DecodeArrow( 
-    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset, 
-    typename EncodingTraits<BooleanType>::DictAccumulator* builder) { 
-  ParquetException::NYI("dictionaries of BooleanType"); 
-} 
- 
-template <typename DType> 
-int PlainDecoder<DType>::DecodeArrow( 
-    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset, 
-    typename EncodingTraits<DType>::Accumulator* builder) { 
-  using value_type = typename DType::c_type; 
- 
-  constexpr int value_size = static_cast<int>(sizeof(value_type)); 
-  int values_decoded = num_values - null_count; 
-  if (ARROW_PREDICT_FALSE(len_ < value_size * values_decoded)) { 
-    ParquetException::EofException(); 
-  } 
- 
-  PARQUET_THROW_NOT_OK(builder->Reserve(num_values)); 
- 
-  VisitNullBitmapInline( 
-      valid_bits, valid_bits_offset, num_values, null_count, 
-      [&]() { 
-        builder->UnsafeAppend(::arrow::util::SafeLoadAs<value_type>(data_)); 
-        data_ += sizeof(value_type); 
-      }, 
-      [&]() { builder->UnsafeAppendNull(); }); 
- 
-  num_values_ -= values_decoded; 
-  len_ -= sizeof(value_type) * values_decoded; 
-  return values_decoded; 
-} 
- 
-template <typename DType> 
-int PlainDecoder<DType>::DecodeArrow( 
-    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset, 
-    typename EncodingTraits<DType>::DictAccumulator* builder) { 
-  using value_type = typename DType::c_type; 
- 
-  constexpr int value_size = static_cast<int>(sizeof(value_type)); 
-  int values_decoded = num_values - null_count; 
-  if (ARROW_PREDICT_FALSE(len_ < value_size * values_decoded)) { 
-    ParquetException::EofException(); 
-  } 
- 
-  PARQUET_THROW_NOT_OK(builder->Reserve(num_values)); 
- 
-  VisitNullBitmapInline( 
-      valid_bits, valid_bits_offset, num_values, null_count, 
-      [&]() { 
-        PARQUET_THROW_NOT_OK( 
-            builder->Append(::arrow::util::SafeLoadAs<value_type>(data_))); 
-        data_ += sizeof(value_type); 
-      }, 
-      [&]() { PARQUET_THROW_NOT_OK(builder->AppendNull()); }); 
- 
-  num_values_ -= values_decoded; 
-  len_ -= sizeof(value_type) * values_decoded; 
-  return values_decoded; 
-} 
- 
-// Decode routine templated on C++ type rather than type enum 
-template <typename T> 
-inline int DecodePlain(const uint8_t* data, int64_t data_size, int num_values, 
-                       int type_length, T* out) { 
-  int64_t bytes_to_decode = num_values * static_cast<int64_t>(sizeof(T)); 
-  if (bytes_to_decode > data_size || bytes_to_decode > INT_MAX) { 
-    ParquetException::EofException(); 
-  } 
-  // If bytes_to_decode == 0, data could be null 
-  if (bytes_to_decode > 0) { 
-    memcpy(out, data, bytes_to_decode); 
-  } 
-  return static_cast<int>(bytes_to_decode); 
-} 
- 
-template <typename DType> 
-PlainDecoder<DType>::PlainDecoder(const ColumnDescriptor* descr) 
-    : DecoderImpl(descr, Encoding::PLAIN) { 
-  if (descr_ && descr_->physical_type() == Type::FIXED_LEN_BYTE_ARRAY) { 
-    type_length_ = descr_->type_length(); 
-  } else { 
-    type_length_ = -1; 
-  } 
-} 
- 
-// Template specialization for BYTE_ARRAY. The written values do not own their 
-// own data. 
- 
-static inline int64_t ReadByteArray(const uint8_t* data, int64_t data_size, 
-                                    ByteArray* out) { 
-  if (ARROW_PREDICT_FALSE(data_size < 4)) { 
-    ParquetException::EofException(); 
-  } 
-  const int32_t len = ::arrow::util::SafeLoadAs<int32_t>(data); 
-  if (len < 0) { 
-    throw ParquetException("Invalid BYTE_ARRAY value"); 
-  } 
-  const int64_t consumed_length = static_cast<int64_t>(len) + 4; 
-  if (ARROW_PREDICT_FALSE(data_size < consumed_length)) { 
-    ParquetException::EofException(); 
-  } 
-  *out = ByteArray{static_cast<uint32_t>(len), data + 4}; 
-  return consumed_length; 
-} 
- 
-template <> 
-inline int DecodePlain<ByteArray>(const uint8_t* data, int64_t data_size, int num_values, 
-                                  int type_length, ByteArray* out) { 
-  int bytes_decoded = 0; 
-  for (int i = 0; i < num_values; ++i) { 
-    const auto increment = ReadByteArray(data, data_size, out + i); 
-    if (ARROW_PREDICT_FALSE(increment > INT_MAX - bytes_decoded)) { 
-      throw ParquetException("BYTE_ARRAY chunk too large"); 
-    } 
-    data += increment; 
-    data_size -= increment; 
-    bytes_decoded += static_cast<int>(increment); 
-  } 
-  return bytes_decoded; 
-} 
- 
-// Template specialization for FIXED_LEN_BYTE_ARRAY. The written values do not 
-// own their own data. 
-template <> 
-inline int DecodePlain<FixedLenByteArray>(const uint8_t* data, int64_t data_size, 
-                                          int num_values, int type_length, 
-                                          FixedLenByteArray* out) { 
-  int64_t bytes_to_decode = static_cast<int64_t>(type_length) * num_values; 
-  if (bytes_to_decode > data_size || bytes_to_decode > INT_MAX) { 
-    ParquetException::EofException(); 
-  } 
-  for (int i = 0; i < num_values; ++i) { 
-    out[i].ptr = data; 
-    data += type_length; 
-    data_size -= type_length; 
-  } 
-  return static_cast<int>(bytes_to_decode); 
-} 
- 
-template <typename DType> 
-int PlainDecoder<DType>::Decode(T* buffer, int max_values) { 
-  max_values = std::min(max_values, num_values_); 
-  int bytes_consumed = DecodePlain<T>(data_, len_, max_values, type_length_, buffer); 
-  data_ += bytes_consumed; 
-  len_ -= bytes_consumed; 
-  num_values_ -= max_values; 
-  return max_values; 
-} 
- 
-class PlainBooleanDecoder : public DecoderImpl, 
-                            virtual public TypedDecoder<BooleanType>, 
-                            virtual public BooleanDecoder { 
- public: 
-  explicit PlainBooleanDecoder(const ColumnDescriptor* descr); 
-  void SetData(int num_values, const uint8_t* data, int len) override; 
- 
-  // Two flavors of bool decoding 
-  int Decode(uint8_t* buffer, int max_values) override; 
-  int Decode(bool* buffer, int max_values) override; 
-  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits, 
-                  int64_t valid_bits_offset, 
-                  typename EncodingTraits<BooleanType>::Accumulator* out) override; 
- 
-  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits, 
-                  int64_t valid_bits_offset, 
-                  typename EncodingTraits<BooleanType>::DictAccumulator* out) override; 
- 
- private: 
-  std::unique_ptr<::arrow::BitUtil::BitReader> bit_reader_; 
-}; 
- 
-PlainBooleanDecoder::PlainBooleanDecoder(const ColumnDescriptor* descr) 
-    : DecoderImpl(descr, Encoding::PLAIN) {} 
- 
-void PlainBooleanDecoder::SetData(int num_values, const uint8_t* data, int len) { 
-  num_values_ = num_values; 
-  bit_reader_.reset(new BitUtil::BitReader(data, len)); 
-} 
- 
-int PlainBooleanDecoder::DecodeArrow( 
-    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset, 
-    typename EncodingTraits<BooleanType>::Accumulator* builder) { 
-  int values_decoded = num_values - null_count; 
-  if (ARROW_PREDICT_FALSE(num_values_ < values_decoded)) { 
-    ParquetException::EofException(); 
-  } 
- 
-  PARQUET_THROW_NOT_OK(builder->Reserve(num_values)); 
- 
-  VisitNullBitmapInline( 
-      valid_bits, valid_bits_offset, num_values, null_count, 
-      [&]() { 
-        bool value; 
-        ARROW_IGNORE_EXPR(bit_reader_->GetValue(1, &value)); 
-        builder->UnsafeAppend(value); 
-      }, 
-      [&]() { builder->UnsafeAppendNull(); }); 
- 
-  num_values_ -= values_decoded; 
-  return values_decoded; 
-} 
- 
-inline int PlainBooleanDecoder::DecodeArrow( 
-    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset, 
-    typename EncodingTraits<BooleanType>::DictAccumulator* builder) { 
-  ParquetException::NYI("dictionaries of BooleanType"); 
-} 
- 
-int PlainBooleanDecoder::Decode(uint8_t* buffer, int max_values) { 
-  max_values = std::min(max_values, num_values_); 
-  bool val; 
-  ::arrow::internal::BitmapWriter bit_writer(buffer, 0, max_values); 
-  for (int i = 0; i < max_values; ++i) { 
-    if (!bit_reader_->GetValue(1, &val)) { 
-      ParquetException::EofException(); 
-    } 
-    if (val) { 
-      bit_writer.Set(); 
-    } 
-    bit_writer.Next(); 
-  } 
-  bit_writer.Finish(); 
-  num_values_ -= max_values; 
-  return max_values; 
-} 
- 
-int PlainBooleanDecoder::Decode(bool* buffer, int max_values) { 
-  max_values = std::min(max_values, num_values_); 
-  if (bit_reader_->GetBatch(1, buffer, max_values) != max_values) { 
-    ParquetException::EofException(); 
-  } 
-  num_values_ -= max_values; 
-  return max_values; 
-} 
- 
-struct ArrowBinaryHelper { 
-  explicit ArrowBinaryHelper(typename EncodingTraits<ByteArrayType>::Accumulator* out) { 
-    this->out = out; 
-    this->builder = out->builder.get(); 
-    this->chunk_space_remaining = 
-        ::arrow::kBinaryMemoryLimit - this->builder->value_data_length(); 
-  } 
- 
-  Status PushChunk() { 
-    std::shared_ptr<::arrow::Array> result; 
-    RETURN_NOT_OK(builder->Finish(&result)); 
-    out->chunks.push_back(result); 
-    chunk_space_remaining = ::arrow::kBinaryMemoryLimit; 
-    return Status::OK(); 
-  } 
- 
-  bool CanFit(int64_t length) const { return length <= chunk_space_remaining; } 
- 
-  void UnsafeAppend(const uint8_t* data, int32_t length) { 
-    chunk_space_remaining -= length; 
-    builder->UnsafeAppend(data, length); 
-  } 
- 
-  void UnsafeAppendNull() { builder->UnsafeAppendNull(); } 
- 
-  Status Append(const uint8_t* data, int32_t length) { 
-    chunk_space_remaining -= length; 
-    return builder->Append(data, length); 
-  } 
- 
-  Status AppendNull() { return builder->AppendNull(); } 
- 
-  typename EncodingTraits<ByteArrayType>::Accumulator* out; 
-  ::arrow::BinaryBuilder* builder; 
-  int64_t chunk_space_remaining; 
-}; 
- 
-template <> 
-inline int PlainDecoder<ByteArrayType>::DecodeArrow( 
-    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset, 
-    typename EncodingTraits<ByteArrayType>::Accumulator* builder) { 
-  ParquetException::NYI(); 
-} 
- 
-template <> 
-inline int PlainDecoder<ByteArrayType>::DecodeArrow( 
-    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset, 
-    typename EncodingTraits<ByteArrayType>::DictAccumulator* builder) { 
-  ParquetException::NYI(); 
-} 
- 
-template <> 
-inline int PlainDecoder<FLBAType>::DecodeArrow( 
-    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset, 
-    typename EncodingTraits<FLBAType>::Accumulator* builder) { 
-  int values_decoded = num_values - null_count; 
-  if (ARROW_PREDICT_FALSE(len_ < descr_->type_length() * values_decoded)) { 
-    ParquetException::EofException(); 
-  } 
- 
-  PARQUET_THROW_NOT_OK(builder->Reserve(num_values)); 
- 
-  VisitNullBitmapInline( 
-      valid_bits, valid_bits_offset, num_values, null_count, 
-      [&]() { 
-        builder->UnsafeAppend(data_); 
-        data_ += descr_->type_length(); 
-      }, 
-      [&]() { builder->UnsafeAppendNull(); }); 
- 
-  num_values_ -= values_decoded; 
-  len_ -= descr_->type_length() * values_decoded; 
-  return values_decoded; 
-} 
- 
-template <> 
-inline int PlainDecoder<FLBAType>::DecodeArrow( 
-    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset, 
-    typename EncodingTraits<FLBAType>::DictAccumulator* builder) { 
-  int values_decoded = num_values - null_count; 
-  if (ARROW_PREDICT_FALSE(len_ < descr_->type_length() * values_decoded)) { 
-    ParquetException::EofException(); 
-  } 
- 
-  PARQUET_THROW_NOT_OK(builder->Reserve(num_values)); 
- 
-  VisitNullBitmapInline( 
-      valid_bits, valid_bits_offset, num_values, null_count, 
-      [&]() { 
-        PARQUET_THROW_NOT_OK(builder->Append(data_)); 
-        data_ += descr_->type_length(); 
-      }, 
-      [&]() { PARQUET_THROW_NOT_OK(builder->AppendNull()); }); 
- 
-  num_values_ -= values_decoded; 
-  len_ -= descr_->type_length() * values_decoded; 
-  return values_decoded; 
-} 
- 
-class PlainByteArrayDecoder : public PlainDecoder<ByteArrayType>, 
-                              virtual public ByteArrayDecoder { 
- public: 
-  using Base = PlainDecoder<ByteArrayType>; 
-  using Base::DecodeSpaced; 
-  using Base::PlainDecoder; 
- 
-  // ---------------------------------------------------------------------- 
-  // Dictionary read paths 
- 
-  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits, 
-                  int64_t valid_bits_offset, 
-                  ::arrow::BinaryDictionary32Builder* builder) override { 
-    int result = 0; 
-    PARQUET_THROW_NOT_OK(DecodeArrow(num_values, null_count, valid_bits, 
-                                     valid_bits_offset, builder, &result)); 
-    return result; 
-  } 
- 
-  // ---------------------------------------------------------------------- 
-  // Optimized dense binary read paths 
- 
-  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits, 
-                  int64_t valid_bits_offset, 
-                  typename EncodingTraits<ByteArrayType>::Accumulator* out) override { 
-    int result = 0; 
-    PARQUET_THROW_NOT_OK(DecodeArrowDense(num_values, null_count, valid_bits, 
-                                          valid_bits_offset, out, &result)); 
-    return result; 
-  } 
- 
- private: 
-  Status DecodeArrowDense(int num_values, int null_count, const uint8_t* valid_bits, 
-                          int64_t valid_bits_offset, 
-                          typename EncodingTraits<ByteArrayType>::Accumulator* out, 
-                          int* out_values_decoded) { 
-    ArrowBinaryHelper helper(out); 
-    int values_decoded = 0; 
- 
-    RETURN_NOT_OK(helper.builder->Reserve(num_values)); 
-    RETURN_NOT_OK(helper.builder->ReserveData( 
-        std::min<int64_t>(len_, helper.chunk_space_remaining))); 
- 
-    int i = 0; 
-    RETURN_NOT_OK(VisitNullBitmapInline( 
-        valid_bits, valid_bits_offset, num_values, null_count, 
-        [&]() { 
-          if (ARROW_PREDICT_FALSE(len_ < 4)) { 
-            ParquetException::EofException(); 
-          } 
-          auto value_len = ::arrow::util::SafeLoadAs<int32_t>(data_); 
-          if (ARROW_PREDICT_FALSE(value_len < 0 || value_len > INT32_MAX - 4)) { 
-            return Status::Invalid("Invalid or corrupted value_len '", value_len, "'"); 
-          } 
-          auto increment = value_len + 4; 
-          if (ARROW_PREDICT_FALSE(len_ < increment)) { 
-            ParquetException::EofException(); 
-          } 
-          if (ARROW_PREDICT_FALSE(!helper.CanFit(value_len))) { 
-            // This element would exceed the capacity of a chunk 
-            RETURN_NOT_OK(helper.PushChunk()); 
-            RETURN_NOT_OK(helper.builder->Reserve(num_values - i)); 
-            RETURN_NOT_OK(helper.builder->ReserveData( 
-                std::min<int64_t>(len_, helper.chunk_space_remaining))); 
-          } 
-          helper.UnsafeAppend(data_ + 4, value_len); 
-          data_ += increment; 
-          len_ -= increment; 
-          ++values_decoded; 
-          ++i; 
-          return Status::OK(); 
-        }, 
-        [&]() { 
-          helper.UnsafeAppendNull(); 
-          ++i; 
-          return Status::OK(); 
-        })); 
- 
-    num_values_ -= values_decoded; 
-    *out_values_decoded = values_decoded; 
-    return Status::OK(); 
-  } 
- 
-  template <typename BuilderType> 
-  Status DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits, 
-                     int64_t valid_bits_offset, BuilderType* builder, 
-                     int* out_values_decoded) { 
-    RETURN_NOT_OK(builder->Reserve(num_values)); 
-    int values_decoded = 0; 
- 
-    RETURN_NOT_OK(VisitNullBitmapInline( 
-        valid_bits, valid_bits_offset, num_values, null_count, 
-        [&]() { 
-          if (ARROW_PREDICT_FALSE(len_ < 4)) { 
-            ParquetException::EofException(); 
-          } 
-          auto value_len = ::arrow::util::SafeLoadAs<int32_t>(data_); 
-          if (ARROW_PREDICT_FALSE(value_len < 0 || value_len > INT32_MAX - 4)) { 
-            return Status::Invalid("Invalid or corrupted value_len '", value_len, "'"); 
-          } 
-          auto increment = value_len + 4; 
-          if (ARROW_PREDICT_FALSE(len_ < increment)) { 
-            ParquetException::EofException(); 
-          } 
-          RETURN_NOT_OK(builder->Append(data_ + 4, value_len)); 
-          data_ += increment; 
-          len_ -= increment; 
-          ++values_decoded; 
-          return Status::OK(); 
-        }, 
-        [&]() { return builder->AppendNull(); })); 
- 
-    num_values_ -= values_decoded; 
-    *out_values_decoded = values_decoded; 
-    return Status::OK(); 
-  } 
-}; 
- 
-class PlainFLBADecoder : public PlainDecoder<FLBAType>, virtual public FLBADecoder { 
- public: 
-  using Base = PlainDecoder<FLBAType>; 
-  using Base::PlainDecoder; 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// Dictionary encoding and decoding 
- 
-template <typename Type> 
-class DictDecoderImpl : public DecoderImpl, virtual public DictDecoder<Type> { 
- public: 
-  typedef typename Type::c_type T; 
- 
-  // Initializes the dictionary with values from 'dictionary'. The data in 
-  // dictionary is not guaranteed to persist in memory after this call so the 
-  // dictionary decoder needs to copy the data out if necessary. 
-  explicit DictDecoderImpl(const ColumnDescriptor* descr, 
-                           MemoryPool* pool = ::arrow::default_memory_pool()) 
-      : DecoderImpl(descr, Encoding::RLE_DICTIONARY), 
-        dictionary_(AllocateBuffer(pool, 0)), 
-        dictionary_length_(0), 
-        byte_array_data_(AllocateBuffer(pool, 0)), 
-        byte_array_offsets_(AllocateBuffer(pool, 0)), 
-        indices_scratch_space_(AllocateBuffer(pool, 0)) {} 
- 
-  // Perform type-specific initiatialization 
-  void SetDict(TypedDecoder<Type>* dictionary) override; 
- 
-  void SetData(int num_values, const uint8_t* data, int len) override { 
-    num_values_ = num_values; 
-    if (len == 0) { 
-      // Initialize dummy decoder to avoid crashes later on 
-      idx_decoder_ = ::arrow::util::RleDecoder(data, len, /*bit_width=*/1); 
-      return; 
-    } 
-    uint8_t bit_width = *data; 
-    if (ARROW_PREDICT_FALSE(bit_width >= 64)) { 
-      throw ParquetException("Invalid or corrupted bit_width"); 
-    } 
-    idx_decoder_ = ::arrow::util::RleDecoder(++data, --len, bit_width); 
-  } 
- 
-  int Decode(T* buffer, int num_values) override { 
-    num_values = std::min(num_values, num_values_); 
-    int decoded_values = 
-        idx_decoder_.GetBatchWithDict(reinterpret_cast<const T*>(dictionary_->data()), 
-                                      dictionary_length_, buffer, num_values); 
-    if (decoded_values != num_values) { 
-      ParquetException::EofException(); 
-    } 
-    num_values_ -= num_values; 
-    return num_values; 
-  } 
- 
-  int DecodeSpaced(T* buffer, int num_values, int null_count, const uint8_t* valid_bits, 
-                   int64_t valid_bits_offset) override { 
-    num_values = std::min(num_values, num_values_); 
-    if (num_values != idx_decoder_.GetBatchWithDictSpaced( 
-                          reinterpret_cast<const T*>(dictionary_->data()), 
-                          dictionary_length_, buffer, num_values, null_count, valid_bits, 
-                          valid_bits_offset)) { 
-      ParquetException::EofException(); 
-    } 
-    num_values_ -= num_values; 
-    return num_values; 
-  } 
- 
-  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits, 
-                  int64_t valid_bits_offset, 
-                  typename EncodingTraits<Type>::Accumulator* out) override; 
- 
-  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits, 
-                  int64_t valid_bits_offset, 
-                  typename EncodingTraits<Type>::DictAccumulator* out) override; 
- 
-  void InsertDictionary(::arrow::ArrayBuilder* builder) override; 
- 
-  int DecodeIndicesSpaced(int num_values, int null_count, const uint8_t* valid_bits, 
-                          int64_t valid_bits_offset, 
-                          ::arrow::ArrayBuilder* builder) override { 
-    if (num_values > 0) { 
-      // TODO(wesm): Refactor to batch reads for improved memory use. It is not 
-      // trivial because the null_count is relative to the entire bitmap 
-      PARQUET_THROW_NOT_OK(indices_scratch_space_->TypedResize<int32_t>( 
-          num_values, /*shrink_to_fit=*/false)); 
-    } 
- 
-    auto indices_buffer = 
-        reinterpret_cast<int32_t*>(indices_scratch_space_->mutable_data()); 
- 
-    if (num_values != idx_decoder_.GetBatchSpaced(num_values, null_count, valid_bits, 
-                                                  valid_bits_offset, indices_buffer)) { 
-      ParquetException::EofException(); 
-    } 
- 
-    /// XXX(wesm): Cannot append "valid bits" directly to the builder 
-    std::vector<uint8_t> valid_bytes(num_values); 
-    ::arrow::internal::BitmapReader bit_reader(valid_bits, valid_bits_offset, num_values); 
-    for (int64_t i = 0; i < num_values; ++i) { 
-      valid_bytes[i] = static_cast<uint8_t>(bit_reader.IsSet()); 
-      bit_reader.Next(); 
-    } 
- 
-    auto binary_builder = checked_cast<::arrow::BinaryDictionary32Builder*>(builder); 
-    PARQUET_THROW_NOT_OK( 
-        binary_builder->AppendIndices(indices_buffer, num_values, valid_bytes.data())); 
-    num_values_ -= num_values - null_count; 
-    return num_values - null_count; 
-  } 
- 
-  int DecodeIndices(int num_values, ::arrow::ArrayBuilder* builder) override { 
-    num_values = std::min(num_values, num_values_); 
-    if (num_values > 0) { 
-      // TODO(wesm): Refactor to batch reads for improved memory use. This is 
-      // relatively simple here because we don't have to do any bookkeeping of 
-      // nulls 
-      PARQUET_THROW_NOT_OK(indices_scratch_space_->TypedResize<int32_t>( 
-          num_values, /*shrink_to_fit=*/false)); 
-    } 
-    auto indices_buffer = 
-        reinterpret_cast<int32_t*>(indices_scratch_space_->mutable_data()); 
-    if (num_values != idx_decoder_.GetBatch(indices_buffer, num_values)) { 
-      ParquetException::EofException(); 
-    } 
-    auto binary_builder = checked_cast<::arrow::BinaryDictionary32Builder*>(builder); 
-    PARQUET_THROW_NOT_OK(binary_builder->AppendIndices(indices_buffer, num_values)); 
-    num_values_ -= num_values; 
-    return num_values; 
-  } 
- 
-  int DecodeIndices(int num_values, int32_t* indices) override { 
-    if (num_values != idx_decoder_.GetBatch(indices, num_values)) { 
-      ParquetException::EofException(); 
-    } 
-    num_values_ -= num_values; 
-    return num_values; 
-  } 
- 
-  void GetDictionary(const T** dictionary, int32_t* dictionary_length) override { 
-    *dictionary_length = dictionary_length_; 
-    *dictionary = reinterpret_cast<T*>(dictionary_->mutable_data()); 
-  } 
- 
- protected: 
-  Status IndexInBounds(int32_t index) { 
-    if (ARROW_PREDICT_TRUE(0 <= index && index < dictionary_length_)) { 
-      return Status::OK(); 
-    } 
-    return Status::Invalid("Index not in dictionary bounds"); 
-  } 
- 
-  inline void DecodeDict(TypedDecoder<Type>* dictionary) { 
-    dictionary_length_ = static_cast<int32_t>(dictionary->values_left()); 
-    PARQUET_THROW_NOT_OK(dictionary_->Resize(dictionary_length_ * sizeof(T), 
-                                             /*shrink_to_fit=*/false)); 
-    dictionary->Decode(reinterpret_cast<T*>(dictionary_->mutable_data()), 
-                       dictionary_length_); 
-  } 
- 
-  // Only one is set. 
-  std::shared_ptr<ResizableBuffer> dictionary_; 
- 
-  int32_t dictionary_length_; 
- 
-  // Data that contains the byte array data (byte_array_dictionary_ just has the 
-  // pointers). 
-  std::shared_ptr<ResizableBuffer> byte_array_data_; 
- 
-  // Arrow-style byte offsets for each dictionary value. We maintain two 
-  // representations of the dictionary, one as ByteArray* for non-Arrow 
-  // consumers and this one for Arrow consumers. Since dictionaries are 
-  // generally pretty small to begin with this doesn't mean too much extra 
-  // memory use in most cases 
-  std::shared_ptr<ResizableBuffer> byte_array_offsets_; 
- 
-  // Reusable buffer for decoding dictionary indices to be appended to a 
-  // BinaryDictionary32Builder 
-  std::shared_ptr<ResizableBuffer> indices_scratch_space_; 
- 
-  ::arrow::util::RleDecoder idx_decoder_; 
-}; 
- 
-template <typename Type> 
-void DictDecoderImpl<Type>::SetDict(TypedDecoder<Type>* dictionary) { 
-  DecodeDict(dictionary); 
-} 
- 
-template <> 
-void DictDecoderImpl<BooleanType>::SetDict(TypedDecoder<BooleanType>* dictionary) { 
-  ParquetException::NYI("Dictionary encoding is not implemented for boolean values"); 
-} 
- 
-template <> 
-void DictDecoderImpl<ByteArrayType>::SetDict(TypedDecoder<ByteArrayType>* dictionary) { 
-  DecodeDict(dictionary); 
- 
-  auto dict_values = reinterpret_cast<ByteArray*>(dictionary_->mutable_data()); 
- 
-  int total_size = 0; 
-  for (int i = 0; i < dictionary_length_; ++i) { 
-    total_size += dict_values[i].len; 
-  } 
-  PARQUET_THROW_NOT_OK(byte_array_data_->Resize(total_size, 
-                                                /*shrink_to_fit=*/false)); 
-  PARQUET_THROW_NOT_OK( 
-      byte_array_offsets_->Resize((dictionary_length_ + 1) * sizeof(int32_t), 
-                                  /*shrink_to_fit=*/false)); 
- 
-  int32_t offset = 0; 
-  uint8_t* bytes_data = byte_array_data_->mutable_data(); 
-  int32_t* bytes_offsets = 
-      reinterpret_cast<int32_t*>(byte_array_offsets_->mutable_data()); 
-  for (int i = 0; i < dictionary_length_; ++i) { 
-    memcpy(bytes_data + offset, dict_values[i].ptr, dict_values[i].len); 
-    bytes_offsets[i] = offset; 
-    dict_values[i].ptr = bytes_data + offset; 
-    offset += dict_values[i].len; 
-  } 
-  bytes_offsets[dictionary_length_] = offset; 
-} 
- 
-template <> 
-inline void DictDecoderImpl<FLBAType>::SetDict(TypedDecoder<FLBAType>* dictionary) { 
-  DecodeDict(dictionary); 
- 
-  auto dict_values = reinterpret_cast<FLBA*>(dictionary_->mutable_data()); 
- 
-  int fixed_len = descr_->type_length(); 
-  int total_size = dictionary_length_ * fixed_len; 
- 
-  PARQUET_THROW_NOT_OK(byte_array_data_->Resize(total_size, 
-                                                /*shrink_to_fit=*/false)); 
-  uint8_t* bytes_data = byte_array_data_->mutable_data(); 
-  for (int32_t i = 0, offset = 0; i < dictionary_length_; ++i, offset += fixed_len) { 
-    memcpy(bytes_data + offset, dict_values[i].ptr, fixed_len); 
-    dict_values[i].ptr = bytes_data + offset; 
-  } 
-} 
- 
-template <> 
-inline int DictDecoderImpl<Int96Type>::DecodeArrow( 
-    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset, 
-    typename EncodingTraits<Int96Type>::Accumulator* builder) { 
-  ParquetException::NYI("DecodeArrow to Int96Type"); 
-} 
- 
-template <> 
-inline int DictDecoderImpl<Int96Type>::DecodeArrow( 
-    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset, 
-    typename EncodingTraits<Int96Type>::DictAccumulator* builder) { 
-  ParquetException::NYI("DecodeArrow to Int96Type"); 
-} 
- 
-template <> 
-inline int DictDecoderImpl<ByteArrayType>::DecodeArrow( 
-    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset, 
-    typename EncodingTraits<ByteArrayType>::Accumulator* builder) { 
-  ParquetException::NYI("DecodeArrow implemented elsewhere"); 
-} 
- 
-template <> 
-inline int DictDecoderImpl<ByteArrayType>::DecodeArrow( 
-    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset, 
-    typename EncodingTraits<ByteArrayType>::DictAccumulator* builder) { 
-  ParquetException::NYI("DecodeArrow implemented elsewhere"); 
-} 
- 
-template <typename DType> 
-int DictDecoderImpl<DType>::DecodeArrow( 
-    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset, 
-    typename EncodingTraits<DType>::DictAccumulator* builder) { 
-  PARQUET_THROW_NOT_OK(builder->Reserve(num_values)); 
- 
-  auto dict_values = reinterpret_cast<const typename DType::c_type*>(dictionary_->data()); 
- 
-  VisitNullBitmapInline( 
-      valid_bits, valid_bits_offset, num_values, null_count, 
-      [&]() { 
-        int32_t index; 
-        if (ARROW_PREDICT_FALSE(!idx_decoder_.Get(&index))) { 
-          throw ParquetException(""); 
-        } 
-        PARQUET_THROW_NOT_OK(IndexInBounds(index)); 
-        PARQUET_THROW_NOT_OK(builder->Append(dict_values[index])); 
-      }, 
-      [&]() { PARQUET_THROW_NOT_OK(builder->AppendNull()); }); 
- 
-  return num_values - null_count; 
-} 
- 
-template <> 
-int DictDecoderImpl<BooleanType>::DecodeArrow( 
-    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset, 
-    typename EncodingTraits<BooleanType>::DictAccumulator* builder) { 
-  ParquetException::NYI("No dictionary encoding for BooleanType"); 
-} 
- 
-template <> 
-inline int DictDecoderImpl<FLBAType>::DecodeArrow( 
-    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset, 
-    typename EncodingTraits<FLBAType>::Accumulator* builder) { 
-  if (builder->byte_width() != descr_->type_length()) { 
-    throw ParquetException("Byte width mismatch: builder was " + 
-                           std::to_string(builder->byte_width()) + " but decoder was " + 
-                           std::to_string(descr_->type_length())); 
-  } 
- 
-  PARQUET_THROW_NOT_OK(builder->Reserve(num_values)); 
- 
-  auto dict_values = reinterpret_cast<const FLBA*>(dictionary_->data()); 
- 
-  VisitNullBitmapInline( 
-      valid_bits, valid_bits_offset, num_values, null_count, 
-      [&]() { 
-        int32_t index; 
-        if (ARROW_PREDICT_FALSE(!idx_decoder_.Get(&index))) { 
-          throw ParquetException(""); 
-        } 
-        PARQUET_THROW_NOT_OK(IndexInBounds(index)); 
-        builder->UnsafeAppend(dict_values[index].ptr); 
-      }, 
-      [&]() { builder->UnsafeAppendNull(); }); 
- 
-  return num_values - null_count; 
-} 
- 
-template <> 
-int DictDecoderImpl<FLBAType>::DecodeArrow( 
-    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset, 
-    typename EncodingTraits<FLBAType>::DictAccumulator* builder) { 
-  auto value_type = 
-      checked_cast<const ::arrow::DictionaryType&>(*builder->type()).value_type(); 
-  auto byte_width = 
-      checked_cast<const ::arrow::FixedSizeBinaryType&>(*value_type).byte_width(); 
-  if (byte_width != descr_->type_length()) { 
-    throw ParquetException("Byte width mismatch: builder was " + 
-                           std::to_string(byte_width) + " but decoder was " + 
-                           std::to_string(descr_->type_length())); 
-  } 
- 
-  PARQUET_THROW_NOT_OK(builder->Reserve(num_values)); 
- 
-  auto dict_values = reinterpret_cast<const FLBA*>(dictionary_->data()); 
- 
-  VisitNullBitmapInline( 
-      valid_bits, valid_bits_offset, num_values, null_count, 
-      [&]() { 
-        int32_t index; 
-        if (ARROW_PREDICT_FALSE(!idx_decoder_.Get(&index))) { 
-          throw ParquetException(""); 
-        } 
-        PARQUET_THROW_NOT_OK(IndexInBounds(index)); 
-        PARQUET_THROW_NOT_OK(builder->Append(dict_values[index].ptr)); 
-      }, 
-      [&]() { PARQUET_THROW_NOT_OK(builder->AppendNull()); }); 
- 
-  return num_values - null_count; 
-} 
- 
-template <typename Type> 
-int DictDecoderImpl<Type>::DecodeArrow( 
-    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset, 
-    typename EncodingTraits<Type>::Accumulator* builder) { 
-  PARQUET_THROW_NOT_OK(builder->Reserve(num_values)); 
- 
-  using value_type = typename Type::c_type; 
-  auto dict_values = reinterpret_cast<const value_type*>(dictionary_->data()); 
- 
-  VisitNullBitmapInline( 
-      valid_bits, valid_bits_offset, num_values, null_count, 
-      [&]() { 
-        int32_t index; 
-        if (ARROW_PREDICT_FALSE(!idx_decoder_.Get(&index))) { 
-          throw ParquetException(""); 
-        } 
-        PARQUET_THROW_NOT_OK(IndexInBounds(index)); 
-        builder->UnsafeAppend(dict_values[index]); 
-      }, 
-      [&]() { builder->UnsafeAppendNull(); }); 
- 
-  return num_values - null_count; 
-} 
- 
-template <typename Type> 
-void DictDecoderImpl<Type>::InsertDictionary(::arrow::ArrayBuilder* builder) { 
-  ParquetException::NYI("InsertDictionary only implemented for BYTE_ARRAY types"); 
-} 
- 
-template <> 
-void DictDecoderImpl<ByteArrayType>::InsertDictionary(::arrow::ArrayBuilder* builder) { 
-  auto binary_builder = checked_cast<::arrow::BinaryDictionary32Builder*>(builder); 
- 
-  // Make a BinaryArray referencing the internal dictionary data 
-  auto arr = std::make_shared<::arrow::BinaryArray>( 
-      dictionary_length_, byte_array_offsets_, byte_array_data_); 
-  PARQUET_THROW_NOT_OK(binary_builder->InsertMemoValues(*arr)); 
-} 
- 
-class DictByteArrayDecoderImpl : public DictDecoderImpl<ByteArrayType>, 
-                                 virtual public ByteArrayDecoder { 
- public: 
-  using BASE = DictDecoderImpl<ByteArrayType>; 
-  using BASE::DictDecoderImpl; 
- 
-  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits, 
-                  int64_t valid_bits_offset, 
-                  ::arrow::BinaryDictionary32Builder* builder) override { 
-    int result = 0; 
-    if (null_count == 0) { 
-      PARQUET_THROW_NOT_OK(DecodeArrowNonNull(num_values, builder, &result)); 
-    } else { 
-      PARQUET_THROW_NOT_OK(DecodeArrow(num_values, null_count, valid_bits, 
-                                       valid_bits_offset, builder, &result)); 
-    } 
-    return result; 
-  } 
- 
-  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits, 
-                  int64_t valid_bits_offset, 
-                  typename EncodingTraits<ByteArrayType>::Accumulator* out) override { 
-    int result = 0; 
-    if (null_count == 0) { 
-      PARQUET_THROW_NOT_OK(DecodeArrowDenseNonNull(num_values, out, &result)); 
-    } else { 
-      PARQUET_THROW_NOT_OK(DecodeArrowDense(num_values, null_count, valid_bits, 
-                                            valid_bits_offset, out, &result)); 
-    } 
-    return result; 
-  } 
- 
- private: 
-  Status DecodeArrowDense(int num_values, int null_count, const uint8_t* valid_bits, 
-                          int64_t valid_bits_offset, 
-                          typename EncodingTraits<ByteArrayType>::Accumulator* out, 
-                          int* out_num_values) { 
-    constexpr int32_t kBufferSize = 1024; 
-    int32_t indices[kBufferSize]; 
- 
-    ArrowBinaryHelper helper(out); 
- 
-    ::arrow::internal::BitmapReader bit_reader(valid_bits, valid_bits_offset, num_values); 
- 
-    auto dict_values = reinterpret_cast<const ByteArray*>(dictionary_->data()); 
-    int values_decoded = 0; 
-    int num_appended = 0; 
-    while (num_appended < num_values) { 
-      bool is_valid = bit_reader.IsSet(); 
-      bit_reader.Next(); 
- 
-      if (is_valid) { 
-        int32_t batch_size = 
-            std::min<int32_t>(kBufferSize, num_values - num_appended - null_count); 
-        int num_indices = idx_decoder_.GetBatch(indices, batch_size); 
- 
-        if (ARROW_PREDICT_FALSE(num_indices < 1)) { 
-          return Status::Invalid("Invalid number of indices '", num_indices, "'"); 
-        } 
- 
-        int i = 0; 
-        while (true) { 
-          // Consume all indices 
-          if (is_valid) { 
-            auto idx = indices[i]; 
-            RETURN_NOT_OK(IndexInBounds(idx)); 
-            const auto& val = dict_values[idx]; 
-            if (ARROW_PREDICT_FALSE(!helper.CanFit(val.len))) { 
-              RETURN_NOT_OK(helper.PushChunk()); 
-            } 
-            RETURN_NOT_OK(helper.Append(val.ptr, static_cast<int32_t>(val.len))); 
-            ++i; 
-            ++values_decoded; 
-          } else { 
-            RETURN_NOT_OK(helper.AppendNull()); 
-            --null_count; 
-          } 
-          ++num_appended; 
-          if (i == num_indices) { 
-            // Do not advance the bit_reader if we have fulfilled the decode 
-            // request 
-            break; 
-          } 
-          is_valid = bit_reader.IsSet(); 
-          bit_reader.Next(); 
-        } 
-      } else { 
-        RETURN_NOT_OK(helper.AppendNull()); 
-        --null_count; 
-        ++num_appended; 
-      } 
-    } 
-    *out_num_values = values_decoded; 
-    return Status::OK(); 
-  } 
- 
-  Status DecodeArrowDenseNonNull(int num_values, 
-                                 typename EncodingTraits<ByteArrayType>::Accumulator* out, 
-                                 int* out_num_values) { 
-    constexpr int32_t kBufferSize = 2048; 
-    int32_t indices[kBufferSize]; 
-    int values_decoded = 0; 
- 
-    ArrowBinaryHelper helper(out); 
-    auto dict_values = reinterpret_cast<const ByteArray*>(dictionary_->data()); 
- 
-    while (values_decoded < num_values) { 
-      int32_t batch_size = std::min<int32_t>(kBufferSize, num_values - values_decoded); 
-      int num_indices = idx_decoder_.GetBatch(indices, batch_size); 
-      if (num_indices == 0) ParquetException::EofException(); 
-      for (int i = 0; i < num_indices; ++i) { 
-        auto idx = indices[i]; 
-        RETURN_NOT_OK(IndexInBounds(idx)); 
-        const auto& val = dict_values[idx]; 
-        if (ARROW_PREDICT_FALSE(!helper.CanFit(val.len))) { 
-          RETURN_NOT_OK(helper.PushChunk()); 
-        } 
-        RETURN_NOT_OK(helper.Append(val.ptr, static_cast<int32_t>(val.len))); 
-      } 
-      values_decoded += num_indices; 
-    } 
-    *out_num_values = values_decoded; 
-    return Status::OK(); 
-  } 
- 
-  template <typename BuilderType> 
-  Status DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits, 
-                     int64_t valid_bits_offset, BuilderType* builder, 
-                     int* out_num_values) { 
-    constexpr int32_t kBufferSize = 1024; 
-    int32_t indices[kBufferSize]; 
- 
-    RETURN_NOT_OK(builder->Reserve(num_values)); 
-    ::arrow::internal::BitmapReader bit_reader(valid_bits, valid_bits_offset, num_values); 
- 
-    auto dict_values = reinterpret_cast<const ByteArray*>(dictionary_->data()); 
- 
-    int values_decoded = 0; 
-    int num_appended = 0; 
-    while (num_appended < num_values) { 
-      bool is_valid = bit_reader.IsSet(); 
-      bit_reader.Next(); 
- 
-      if (is_valid) { 
-        int32_t batch_size = 
-            std::min<int32_t>(kBufferSize, num_values - num_appended - null_count); 
-        int num_indices = idx_decoder_.GetBatch(indices, batch_size); 
- 
-        int i = 0; 
-        while (true) { 
-          // Consume all indices 
-          if (is_valid) { 
-            auto idx = indices[i]; 
-            RETURN_NOT_OK(IndexInBounds(idx)); 
-            const auto& val = dict_values[idx]; 
-            RETURN_NOT_OK(builder->Append(val.ptr, val.len)); 
-            ++i; 
-            ++values_decoded; 
-          } else { 
-            RETURN_NOT_OK(builder->AppendNull()); 
-            --null_count; 
-          } 
-          ++num_appended; 
-          if (i == num_indices) { 
-            // Do not advance the bit_reader if we have fulfilled the decode 
-            // request 
-            break; 
-          } 
-          is_valid = bit_reader.IsSet(); 
-          bit_reader.Next(); 
-        } 
-      } else { 
-        RETURN_NOT_OK(builder->AppendNull()); 
-        --null_count; 
-        ++num_appended; 
-      } 
-    } 
-    *out_num_values = values_decoded; 
-    return Status::OK(); 
-  } 
- 
-  template <typename BuilderType> 
-  Status DecodeArrowNonNull(int num_values, BuilderType* builder, int* out_num_values) { 
-    constexpr int32_t kBufferSize = 2048; 
-    int32_t indices[kBufferSize]; 
- 
-    RETURN_NOT_OK(builder->Reserve(num_values)); 
- 
-    auto dict_values = reinterpret_cast<const ByteArray*>(dictionary_->data()); 
- 
-    int values_decoded = 0; 
-    while (values_decoded < num_values) { 
-      int32_t batch_size = std::min<int32_t>(kBufferSize, num_values - values_decoded); 
-      int num_indices = idx_decoder_.GetBatch(indices, batch_size); 
-      if (num_indices == 0) ParquetException::EofException(); 
-      for (int i = 0; i < num_indices; ++i) { 
-        auto idx = indices[i]; 
-        RETURN_NOT_OK(IndexInBounds(idx)); 
-        const auto& val = dict_values[idx]; 
-        RETURN_NOT_OK(builder->Append(val.ptr, val.len)); 
-      } 
-      values_decoded += num_indices; 
-    } 
-    *out_num_values = values_decoded; 
-    return Status::OK(); 
-  } 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// DeltaBitPackDecoder 
- 
-template <typename DType> 
-class DeltaBitPackDecoder : public DecoderImpl, virtual public TypedDecoder<DType> { 
- public: 
-  typedef typename DType::c_type T; 
- 
-  explicit DeltaBitPackDecoder(const ColumnDescriptor* descr, 
-                               MemoryPool* pool = ::arrow::default_memory_pool()) 
-      : DecoderImpl(descr, Encoding::DELTA_BINARY_PACKED), pool_(pool) { 
-    if (DType::type_num != Type::INT32 && DType::type_num != Type::INT64) { 
-      throw ParquetException("Delta bit pack encoding should only be for integer data."); 
-    } 
-  } 
- 
-  void SetData(int num_values, const uint8_t* data, int len) override { 
-    this->num_values_ = num_values; 
-    decoder_ = ::arrow::BitUtil::BitReader(data, len); 
-    values_current_block_ = 0; 
-    values_current_mini_block_ = 0; 
-  } 
- 
-  int Decode(T* buffer, int max_values) override { 
-    return GetInternal(buffer, max_values); 
-  } 
- 
-  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits, 
-                  int64_t valid_bits_offset, 
-                  typename EncodingTraits<DType>::Accumulator* out) override { 
-    if (null_count != 0) { 
-      ParquetException::NYI("Delta bit pack DecodeArrow with null slots"); 
-    } 
-    std::vector<T> values(num_values); 
-    GetInternal(values.data(), num_values); 
-    PARQUET_THROW_NOT_OK(out->AppendValues(values)); 
-    return num_values; 
-  } 
- 
-  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits, 
-                  int64_t valid_bits_offset, 
-                  typename EncodingTraits<DType>::DictAccumulator* out) override { 
-    if (null_count != 0) { 
-      ParquetException::NYI("Delta bit pack DecodeArrow with null slots"); 
-    } 
-    std::vector<T> values(num_values); 
-    GetInternal(values.data(), num_values); 
-    PARQUET_THROW_NOT_OK(out->Reserve(num_values)); 
-    for (T value : values) { 
-      PARQUET_THROW_NOT_OK(out->Append(value)); 
-    } 
-    return num_values; 
-  } 
- 
- private: 
-  void InitBlock() { 
-    // The number of values per block. 
-    uint32_t block_size; 
-    if (!decoder_.GetVlqInt(&block_size)) ParquetException::EofException(); 
-    if (!decoder_.GetVlqInt(&num_mini_blocks_)) ParquetException::EofException(); 
-    if (!decoder_.GetVlqInt(&values_current_block_)) { 
-      ParquetException::EofException(); 
-    } 
-    if (!decoder_.GetZigZagVlqInt(&last_value_)) ParquetException::EofException(); 
- 
-    delta_bit_widths_ = AllocateBuffer(pool_, num_mini_blocks_); 
-    uint8_t* bit_width_data = delta_bit_widths_->mutable_data(); 
- 
-    if (!decoder_.GetZigZagVlqInt(&min_delta_)) ParquetException::EofException(); 
-    for (uint32_t i = 0; i < num_mini_blocks_; ++i) { 
-      if (!decoder_.GetAligned<uint8_t>(1, bit_width_data + i)) { 
-        ParquetException::EofException(); 
-      } 
-    } 
-    values_per_mini_block_ = block_size / num_mini_blocks_; 
-    mini_block_idx_ = 0; 
-    delta_bit_width_ = bit_width_data[0]; 
-    values_current_mini_block_ = values_per_mini_block_; 
-  } 
- 
-  template <typename T> 
-  int GetInternal(T* buffer, int max_values) { 
-    max_values = std::min(max_values, this->num_values_); 
-    const uint8_t* bit_width_data = delta_bit_widths_->data(); 
-    for (int i = 0; i < max_values; ++i) { 
-      if (ARROW_PREDICT_FALSE(values_current_mini_block_ == 0)) { 
-        ++mini_block_idx_; 
-        if (mini_block_idx_ < static_cast<size_t>(delta_bit_widths_->size())) { 
-          delta_bit_width_ = bit_width_data[mini_block_idx_]; 
-          values_current_mini_block_ = values_per_mini_block_; 
-        } else { 
-          InitBlock(); 
-          buffer[i] = last_value_; 
-          continue; 
-        } 
-      } 
- 
-      // TODO: the key to this algorithm is to decode the entire miniblock at once. 
-      int64_t delta; 
-      if (!decoder_.GetValue(delta_bit_width_, &delta)) ParquetException::EofException(); 
-      delta += min_delta_; 
-      last_value_ += static_cast<int32_t>(delta); 
-      buffer[i] = last_value_; 
-      --values_current_mini_block_; 
-    } 
-    this->num_values_ -= max_values; 
-    return max_values; 
-  } 
- 
-  MemoryPool* pool_; 
-  ::arrow::BitUtil::BitReader decoder_; 
-  uint32_t values_current_block_; 
-  uint32_t num_mini_blocks_; 
-  uint64_t values_per_mini_block_; 
-  uint64_t values_current_mini_block_; 
- 
-  int32_t min_delta_; 
-  size_t mini_block_idx_; 
-  std::shared_ptr<ResizableBuffer> delta_bit_widths_; 
-  int delta_bit_width_; 
- 
-  int32_t last_value_; 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// DELTA_LENGTH_BYTE_ARRAY 
- 
-class DeltaLengthByteArrayDecoder : public DecoderImpl, 
-                                    virtual public TypedDecoder<ByteArrayType> { 
- public: 
-  explicit DeltaLengthByteArrayDecoder(const ColumnDescriptor* descr, 
-                                       MemoryPool* pool = ::arrow::default_memory_pool()) 
-      : DecoderImpl(descr, Encoding::DELTA_LENGTH_BYTE_ARRAY), 
-        len_decoder_(nullptr, pool), 
-        pool_(pool) {} 
- 
-  void SetData(int num_values, const uint8_t* data, int len) override { 
-    num_values_ = num_values; 
-    if (len == 0) return; 
-    int total_lengths_len = ::arrow::util::SafeLoadAs<int32_t>(data); 
-    data += 4; 
-    this->len_decoder_.SetData(num_values, data, total_lengths_len); 
-    data_ = data + total_lengths_len; 
-    this->len_ = len - 4 - total_lengths_len; 
-  } 
- 
-  int Decode(ByteArray* buffer, int max_values) override { 
-    using VectorT = ArrowPoolVector<int>; 
-    max_values = std::min(max_values, num_values_); 
-    VectorT lengths(max_values, 0, ::arrow::stl::allocator<int>(pool_)); 
-    len_decoder_.Decode(lengths.data(), max_values); 
-    for (int i = 0; i < max_values; ++i) { 
-      buffer[i].len = lengths[i]; 
-      buffer[i].ptr = data_; 
-      this->data_ += lengths[i]; 
-      this->len_ -= lengths[i]; 
-    } 
-    this->num_values_ -= max_values; 
-    return max_values; 
-  } 
- 
-  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits, 
-                  int64_t valid_bits_offset, 
-                  typename EncodingTraits<ByteArrayType>::Accumulator* out) override { 
-    ParquetException::NYI("DecodeArrow for DeltaLengthByteArrayDecoder"); 
-  } 
- 
-  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits, 
-                  int64_t valid_bits_offset, 
-                  typename EncodingTraits<ByteArrayType>::DictAccumulator* out) override { 
-    ParquetException::NYI("DecodeArrow for DeltaLengthByteArrayDecoder"); 
-  } 
- 
- private: 
-  DeltaBitPackDecoder<Int32Type> len_decoder_; 
-  ::arrow::MemoryPool* pool_; 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// DELTA_BYTE_ARRAY 
- 
-class DeltaByteArrayDecoder : public DecoderImpl, 
-                              virtual public TypedDecoder<ByteArrayType> { 
- public: 
-  explicit DeltaByteArrayDecoder(const ColumnDescriptor* descr, 
-                                 MemoryPool* pool = ::arrow::default_memory_pool()) 
-      : DecoderImpl(descr, Encoding::DELTA_BYTE_ARRAY), 
-        prefix_len_decoder_(nullptr, pool), 
-        suffix_decoder_(nullptr, pool), 
-        last_value_(0, nullptr) {} 
- 
-  virtual void SetData(int num_values, const uint8_t* data, int len) { 
-    num_values_ = num_values; 
-    if (len == 0) return; 
-    int prefix_len_length = ::arrow::util::SafeLoadAs<int32_t>(data); 
-    data += 4; 
-    len -= 4; 
-    prefix_len_decoder_.SetData(num_values, data, prefix_len_length); 
-    data += prefix_len_length; 
-    len -= prefix_len_length; 
-    suffix_decoder_.SetData(num_values, data, len); 
-  } 
- 
-  // TODO: this doesn't work and requires memory management. We need to allocate 
-  // new strings to store the results. 
-  virtual int Decode(ByteArray* buffer, int max_values) { 
-    max_values = std::min(max_values, this->num_values_); 
-    for (int i = 0; i < max_values; ++i) { 
-      int prefix_len = 0; 
-      prefix_len_decoder_.Decode(&prefix_len, 1); 
-      ByteArray suffix = {0, nullptr}; 
-      suffix_decoder_.Decode(&suffix, 1); 
-      buffer[i].len = prefix_len + suffix.len; 
- 
-      uint8_t* result = reinterpret_cast<uint8_t*>(malloc(buffer[i].len)); 
-      memcpy(result, last_value_.ptr, prefix_len); 
-      memcpy(result + prefix_len, suffix.ptr, suffix.len); 
- 
-      buffer[i].ptr = result; 
-      last_value_ = buffer[i]; 
-    } 
-    this->num_values_ -= max_values; 
-    return max_values; 
-  } 
- 
- private: 
-  DeltaBitPackDecoder<Int32Type> prefix_len_decoder_; 
-  DeltaLengthByteArrayDecoder suffix_decoder_; 
-  ByteArray last_value_; 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// BYTE_STREAM_SPLIT 
- 
-template <typename DType> 
-class ByteStreamSplitDecoder : public DecoderImpl, virtual public TypedDecoder<DType> { 
- public: 
-  using T = typename DType::c_type; 
-  explicit ByteStreamSplitDecoder(const ColumnDescriptor* descr); 
- 
-  int Decode(T* buffer, int max_values) override; 
- 
-  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits, 
-                  int64_t valid_bits_offset, 
-                  typename EncodingTraits<DType>::Accumulator* builder) override; 
- 
-  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits, 
-                  int64_t valid_bits_offset, 
-                  typename EncodingTraits<DType>::DictAccumulator* builder) override; 
- 
-  void SetData(int num_values, const uint8_t* data, int len) override; 
- 
-  T* EnsureDecodeBuffer(int64_t min_values) { 
-    const int64_t size = sizeof(T) * min_values; 
-    if (!decode_buffer_ || decode_buffer_->size() < size) { 
-      PARQUET_ASSIGN_OR_THROW(decode_buffer_, ::arrow::AllocateBuffer(size)); 
-    } 
-    return reinterpret_cast<T*>(decode_buffer_->mutable_data()); 
-  } 
- 
- private: 
-  int num_values_in_buffer_{0}; 
-  std::shared_ptr<Buffer> decode_buffer_; 
- 
-  static constexpr size_t kNumStreams = sizeof(T); 
-}; 
- 
-template <typename DType> 
-ByteStreamSplitDecoder<DType>::ByteStreamSplitDecoder(const ColumnDescriptor* descr) 
-    : DecoderImpl(descr, Encoding::BYTE_STREAM_SPLIT) {} 
- 
-template <typename DType> 
-void ByteStreamSplitDecoder<DType>::SetData(int num_values, const uint8_t* data, 
-                                            int len) { 
-  DecoderImpl::SetData(num_values, data, len); 
-  if (num_values * static_cast<int64_t>(sizeof(T)) > len) { 
-    throw ParquetException("Data size too small for number of values (corrupted file?)"); 
-  } 
-  num_values_in_buffer_ = num_values; 
-} 
- 
-template <typename DType> 
-int ByteStreamSplitDecoder<DType>::Decode(T* buffer, int max_values) { 
-  const int values_to_decode = std::min(num_values_, max_values); 
-  const int num_decoded_previously = num_values_in_buffer_ - num_values_; 
-  const uint8_t* data = data_ + num_decoded_previously; 
- 
-  ::arrow::util::internal::ByteStreamSplitDecode<T>(data, values_to_decode, 
-                                                    num_values_in_buffer_, buffer); 
-  num_values_ -= values_to_decode; 
-  len_ -= sizeof(T) * values_to_decode; 
-  return values_to_decode; 
-} 
- 
-template <typename DType> 
-int ByteStreamSplitDecoder<DType>::DecodeArrow( 
-    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset, 
-    typename EncodingTraits<DType>::Accumulator* builder) { 
-  constexpr int value_size = static_cast<int>(kNumStreams); 
-  int values_decoded = num_values - null_count; 
-  if (ARROW_PREDICT_FALSE(len_ < value_size * values_decoded)) { 
-    ParquetException::EofException(); 
-  } 
- 
-  PARQUET_THROW_NOT_OK(builder->Reserve(num_values)); 
- 
-  const int num_decoded_previously = num_values_in_buffer_ - num_values_; 
-  const uint8_t* data = data_ + num_decoded_previously; 
-  int offset = 0; 
- 
-#if defined(ARROW_HAVE_SIMD_SPLIT) 
-  // Use fast decoding into intermediate buffer.  This will also decode 
-  // some null values, but it's fast enough that we don't care. 
-  T* decode_out = EnsureDecodeBuffer(values_decoded); 
-  ::arrow::util::internal::ByteStreamSplitDecode<T>(data, values_decoded, 
-                                                    num_values_in_buffer_, decode_out); 
- 
-  // XXX If null_count is 0, we could even append in bulk or decode directly into 
-  // builder 
-  VisitNullBitmapInline( 
-      valid_bits, valid_bits_offset, num_values, null_count, 
-      [&]() { 
-        builder->UnsafeAppend(decode_out[offset]); 
-        ++offset; 
-      }, 
-      [&]() { builder->UnsafeAppendNull(); }); 
- 
-#else 
-  VisitNullBitmapInline( 
-      valid_bits, valid_bits_offset, num_values, null_count, 
-      [&]() { 
-        uint8_t gathered_byte_data[kNumStreams]; 
-        for (size_t b = 0; b < kNumStreams; ++b) { 
-          const size_t byte_index = b * num_values_in_buffer_ + offset; 
-          gathered_byte_data[b] = data[byte_index]; 
-        } 
-        builder->UnsafeAppend(::arrow::util::SafeLoadAs<T>(&gathered_byte_data[0])); 
-        ++offset; 
-      }, 
-      [&]() { builder->UnsafeAppendNull(); }); 
-#endif 
- 
-  num_values_ -= values_decoded; 
-  len_ -= sizeof(T) * values_decoded; 
-  return values_decoded; 
-} 
- 
-template <typename DType> 
-int ByteStreamSplitDecoder<DType>::DecodeArrow( 
-    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset, 
-    typename EncodingTraits<DType>::DictAccumulator* builder) { 
-  ParquetException::NYI("DecodeArrow for ByteStreamSplitDecoder"); 
-} 
- 
-}  // namespace 
- 
-// ---------------------------------------------------------------------- 
-// Encoder and decoder factory functions 
- 
-std::unique_ptr<Encoder> MakeEncoder(Type::type type_num, Encoding::type encoding, 
-                                     bool use_dictionary, const ColumnDescriptor* descr, 
-                                     MemoryPool* pool) { 
-  if (use_dictionary) { 
-    switch (type_num) { 
-      case Type::INT32: 
-        return std::unique_ptr<Encoder>(new DictEncoderImpl<Int32Type>(descr, pool)); 
-      case Type::INT64: 
-        return std::unique_ptr<Encoder>(new DictEncoderImpl<Int64Type>(descr, pool)); 
-      case Type::INT96: 
-        return std::unique_ptr<Encoder>(new DictEncoderImpl<Int96Type>(descr, pool)); 
-      case Type::FLOAT: 
-        return std::unique_ptr<Encoder>(new DictEncoderImpl<FloatType>(descr, pool)); 
-      case Type::DOUBLE: 
-        return std::unique_ptr<Encoder>(new DictEncoderImpl<DoubleType>(descr, pool)); 
-      case Type::BYTE_ARRAY: 
-        return std::unique_ptr<Encoder>(new DictEncoderImpl<ByteArrayType>(descr, pool)); 
-      case Type::FIXED_LEN_BYTE_ARRAY: 
-        return std::unique_ptr<Encoder>(new DictEncoderImpl<FLBAType>(descr, pool)); 
-      default: 
-        DCHECK(false) << "Encoder not implemented"; 
-        break; 
-    } 
-  } else if (encoding == Encoding::PLAIN) { 
-    switch (type_num) { 
-      case Type::BOOLEAN: 
-        return std::unique_ptr<Encoder>(new PlainEncoder<BooleanType>(descr, pool)); 
-      case Type::INT32: 
-        return std::unique_ptr<Encoder>(new PlainEncoder<Int32Type>(descr, pool)); 
-      case Type::INT64: 
-        return std::unique_ptr<Encoder>(new PlainEncoder<Int64Type>(descr, pool)); 
-      case Type::INT96: 
-        return std::unique_ptr<Encoder>(new PlainEncoder<Int96Type>(descr, pool)); 
-      case Type::FLOAT: 
-        return std::unique_ptr<Encoder>(new PlainEncoder<FloatType>(descr, pool)); 
-      case Type::DOUBLE: 
-        return std::unique_ptr<Encoder>(new PlainEncoder<DoubleType>(descr, pool)); 
-      case Type::BYTE_ARRAY: 
-        return std::unique_ptr<Encoder>(new PlainEncoder<ByteArrayType>(descr, pool)); 
-      case Type::FIXED_LEN_BYTE_ARRAY: 
-        return std::unique_ptr<Encoder>(new PlainEncoder<FLBAType>(descr, pool)); 
-      default: 
-        DCHECK(false) << "Encoder not implemented"; 
-        break; 
-    } 
-  } else if (encoding == Encoding::BYTE_STREAM_SPLIT) { 
-    switch (type_num) { 
-      case Type::FLOAT: 
-        return std::unique_ptr<Encoder>( 
-            new ByteStreamSplitEncoder<FloatType>(descr, pool)); 
-      case Type::DOUBLE: 
-        return std::unique_ptr<Encoder>( 
-            new ByteStreamSplitEncoder<DoubleType>(descr, pool)); 
-      default: 
-        throw ParquetException("BYTE_STREAM_SPLIT only supports FLOAT and DOUBLE"); 
-        break; 
-    } 
-  } else { 
-    ParquetException::NYI("Selected encoding is not supported"); 
-  } 
-  DCHECK(false) << "Should not be able to reach this code"; 
-  return nullptr; 
-} 
- 
-std::unique_ptr<Decoder> MakeDecoder(Type::type type_num, Encoding::type encoding, 
-                                     const ColumnDescriptor* descr) { 
-  if (encoding == Encoding::PLAIN) { 
-    switch (type_num) { 
-      case Type::BOOLEAN: 
-        return std::unique_ptr<Decoder>(new PlainBooleanDecoder(descr)); 
-      case Type::INT32: 
-        return std::unique_ptr<Decoder>(new PlainDecoder<Int32Type>(descr)); 
-      case Type::INT64: 
-        return std::unique_ptr<Decoder>(new PlainDecoder<Int64Type>(descr)); 
-      case Type::INT96: 
-        return std::unique_ptr<Decoder>(new PlainDecoder<Int96Type>(descr)); 
-      case Type::FLOAT: 
-        return std::unique_ptr<Decoder>(new PlainDecoder<FloatType>(descr)); 
-      case Type::DOUBLE: 
-        return std::unique_ptr<Decoder>(new PlainDecoder<DoubleType>(descr)); 
-      case Type::BYTE_ARRAY: 
-        return std::unique_ptr<Decoder>(new PlainByteArrayDecoder(descr)); 
-      case Type::FIXED_LEN_BYTE_ARRAY: 
-        return std::unique_ptr<Decoder>(new PlainFLBADecoder(descr)); 
-      default: 
-        break; 
-    } 
-  } else if (encoding == Encoding::BYTE_STREAM_SPLIT) { 
-    switch (type_num) { 
-      case Type::FLOAT: 
-        return std::unique_ptr<Decoder>(new ByteStreamSplitDecoder<FloatType>(descr)); 
-      case Type::DOUBLE: 
-        return std::unique_ptr<Decoder>(new ByteStreamSplitDecoder<DoubleType>(descr)); 
-      default: 
-        throw ParquetException("BYTE_STREAM_SPLIT only supports FLOAT and DOUBLE"); 
-        break; 
-    } 
-  } else { 
-    ParquetException::NYI("Selected encoding is not supported"); 
-  } 
-  DCHECK(false) << "Should not be able to reach this code"; 
-  return nullptr; 
-} 
- 
-namespace detail { 
-std::unique_ptr<Decoder> MakeDictDecoder(Type::type type_num, 
-                                         const ColumnDescriptor* descr, 
-                                         MemoryPool* pool) { 
-  switch (type_num) { 
-    case Type::BOOLEAN: 
-      ParquetException::NYI("Dictionary encoding not implemented for boolean type"); 
-    case Type::INT32: 
-      return std::unique_ptr<Decoder>(new DictDecoderImpl<Int32Type>(descr, pool)); 
-    case Type::INT64: 
-      return std::unique_ptr<Decoder>(new DictDecoderImpl<Int64Type>(descr, pool)); 
-    case Type::INT96: 
-      return std::unique_ptr<Decoder>(new DictDecoderImpl<Int96Type>(descr, pool)); 
-    case Type::FLOAT: 
-      return std::unique_ptr<Decoder>(new DictDecoderImpl<FloatType>(descr, pool)); 
-    case Type::DOUBLE: 
-      return std::unique_ptr<Decoder>(new DictDecoderImpl<DoubleType>(descr, pool)); 
-    case Type::BYTE_ARRAY: 
-      return std::unique_ptr<Decoder>(new DictByteArrayDecoderImpl(descr, pool)); 
-    case Type::FIXED_LEN_BYTE_ARRAY: 
-      return std::unique_ptr<Decoder>(new DictDecoderImpl<FLBAType>(descr, pool)); 
-    default: 
-      break; 
-  } 
-  DCHECK(false) << "Should not be able to reach this code"; 
-  return nullptr; 
-} 
- 
-}  // namespace detail 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/encoding.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <cstdlib>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/array/builder_dict.h"
+#include "arrow/stl_allocator.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit_run_reader.h"
+#include "arrow/util/bit_stream_utils.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/bitmap_writer.h"
+#include "arrow/util/byte_stream_split.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/hashing.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/rle_encoding.h"
+#include "arrow/util/ubsan.h"
+#include "arrow/visitor_inline.h"
+
+#include "parquet/exception.h"
+#include "parquet/platform.h"
+#include "parquet/schema.h"
+#include "parquet/types.h"
+
+namespace BitUtil = arrow::BitUtil;
+
+using arrow::Status;
+using arrow::VisitNullBitmapInline;
+using arrow::internal::checked_cast;
+
+template <typename T>
+using ArrowPoolVector = std::vector<T, ::arrow::stl::allocator<T>>;
+
+namespace parquet {
+namespace {
+
+constexpr int64_t kInMemoryDefaultCapacity = 1024;
+// The Parquet spec isn't very clear whether ByteArray lengths are signed or
+// unsigned, but the Java implementation uses signed ints.
+constexpr size_t kMaxByteArraySize = std::numeric_limits<int32_t>::max();
+
+class EncoderImpl : virtual public Encoder {
+ public:
+  EncoderImpl(const ColumnDescriptor* descr, Encoding::type encoding, MemoryPool* pool)
+      : descr_(descr),
+        encoding_(encoding),
+        pool_(pool),
+        type_length_(descr ? descr->type_length() : -1) {}
+
+  Encoding::type encoding() const override { return encoding_; }
+
+  MemoryPool* memory_pool() const override { return pool_; }
+
+ protected:
+  // For accessing type-specific metadata, like FIXED_LEN_BYTE_ARRAY
+  const ColumnDescriptor* descr_;
+  const Encoding::type encoding_;
+  MemoryPool* pool_;
+
+  /// Type length from descr
+  int type_length_;
+};
+
+// ----------------------------------------------------------------------
+// Plain encoder implementation
+
+template <typename DType>
+class PlainEncoder : public EncoderImpl, virtual public TypedEncoder<DType> {
+ public:
+  using T = typename DType::c_type;
+
+  explicit PlainEncoder(const ColumnDescriptor* descr, MemoryPool* pool)
+      : EncoderImpl(descr, Encoding::PLAIN, pool), sink_(pool) {}
+
+  int64_t EstimatedDataEncodedSize() override { return sink_.length(); }
+
+  std::shared_ptr<Buffer> FlushValues() override {
+    std::shared_ptr<Buffer> buffer;
+    PARQUET_THROW_NOT_OK(sink_.Finish(&buffer));
+    return buffer;
+  }
+
+  using TypedEncoder<DType>::Put;
+
+  void Put(const T* buffer, int num_values) override;
+
+  void Put(const ::arrow::Array& values) override;
+
+  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
+                 int64_t valid_bits_offset) override {
+    if (valid_bits != NULLPTR) {
+      PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
+                                                                   this->memory_pool()));
+      T* data = reinterpret_cast<T*>(buffer->mutable_data());
+      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
+          src, num_values, valid_bits, valid_bits_offset, data);
+      Put(data, num_valid_values);
+    } else {
+      Put(src, num_values);
+    }
+  }
+
+  void UnsafePutByteArray(const void* data, uint32_t length) {
+    DCHECK(length == 0 || data != nullptr) << "Value ptr cannot be NULL";
+    sink_.UnsafeAppend(&length, sizeof(uint32_t));
+    sink_.UnsafeAppend(data, static_cast<int64_t>(length));
+  }
+
+  void Put(const ByteArray& val) {
+    // Write the result to the output stream
+    const int64_t increment = static_cast<int64_t>(val.len + sizeof(uint32_t));
+    if (ARROW_PREDICT_FALSE(sink_.length() + increment > sink_.capacity())) {
+      PARQUET_THROW_NOT_OK(sink_.Reserve(increment));
+    }
+    UnsafePutByteArray(val.ptr, val.len);
+  }
+
+ protected:
+  template <typename ArrayType>
+  void PutBinaryArray(const ArrayType& array) {
+    const int64_t total_bytes =
+        array.value_offset(array.length()) - array.value_offset(0);
+    PARQUET_THROW_NOT_OK(sink_.Reserve(total_bytes + array.length() * sizeof(uint32_t)));
+
+    PARQUET_THROW_NOT_OK(::arrow::VisitArrayDataInline<typename ArrayType::TypeClass>(
+        *array.data(),
+        [&](::arrow::util::string_view view) {
+          if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) {
+            return Status::Invalid("Parquet cannot store strings with size 2GB or more");
+          }
+          UnsafePutByteArray(view.data(), static_cast<uint32_t>(view.size()));
+          return Status::OK();
+        },
+        []() { return Status::OK(); }));
+  }
+
+  ::arrow::BufferBuilder sink_;
+};
+
+template <typename DType>
+void PlainEncoder<DType>::Put(const T* buffer, int num_values) {
+  if (num_values > 0) {
+    PARQUET_THROW_NOT_OK(sink_.Append(buffer, num_values * sizeof(T)));
+  }
+}
+
+template <>
+inline void PlainEncoder<ByteArrayType>::Put(const ByteArray* src, int num_values) {
+  for (int i = 0; i < num_values; ++i) {
+    Put(src[i]);
+  }
+}
+
+template <typename ArrayType>
+void DirectPutImpl(const ::arrow::Array& values, ::arrow::BufferBuilder* sink) {
+  if (values.type_id() != ArrayType::TypeClass::type_id) {
+    std::string type_name = ArrayType::TypeClass::type_name();
+    throw ParquetException("direct put to " + type_name + " from " +
+                           values.type()->ToString() + " not supported");
+  }
+
+  using value_type = typename ArrayType::value_type;
+  constexpr auto value_size = sizeof(value_type);
+  auto raw_values = checked_cast<const ArrayType&>(values).raw_values();
+
+  if (values.null_count() == 0) {
+    // no nulls, just dump the data
+    PARQUET_THROW_NOT_OK(sink->Append(raw_values, values.length() * value_size));
+  } else {
+    PARQUET_THROW_NOT_OK(
+        sink->Reserve((values.length() - values.null_count()) * value_size));
+
+    for (int64_t i = 0; i < values.length(); i++) {
+      if (values.IsValid(i)) {
+        sink->UnsafeAppend(&raw_values[i], value_size);
+      }
+    }
+  }
+}
+
+template <>
+void PlainEncoder<Int32Type>::Put(const ::arrow::Array& values) {
+  DirectPutImpl<::arrow::Int32Array>(values, &sink_);
+}
+
+template <>
+void PlainEncoder<Int64Type>::Put(const ::arrow::Array& values) {
+  DirectPutImpl<::arrow::Int64Array>(values, &sink_);
+}
+
+template <>
+void PlainEncoder<Int96Type>::Put(const ::arrow::Array& values) {
+  ParquetException::NYI("direct put to Int96");
+}
+
+template <>
+void PlainEncoder<FloatType>::Put(const ::arrow::Array& values) {
+  DirectPutImpl<::arrow::FloatArray>(values, &sink_);
+}
+
+template <>
+void PlainEncoder<DoubleType>::Put(const ::arrow::Array& values) {
+  DirectPutImpl<::arrow::DoubleArray>(values, &sink_);
+}
+
+template <typename DType>
+void PlainEncoder<DType>::Put(const ::arrow::Array& values) {
+  ParquetException::NYI("direct put of " + values.type()->ToString());
+}
+
+void AssertBaseBinary(const ::arrow::Array& values) {
+  if (!::arrow::is_base_binary_like(values.type_id())) {
+    throw ParquetException("Only BaseBinaryArray and subclasses supported");
+  }
+}
+
+template <>
+inline void PlainEncoder<ByteArrayType>::Put(const ::arrow::Array& values) {
+  AssertBaseBinary(values);
+
+  if (::arrow::is_binary_like(values.type_id())) {
+    PutBinaryArray(checked_cast<const ::arrow::BinaryArray&>(values));
+  } else {
+    DCHECK(::arrow::is_large_binary_like(values.type_id()));
+    PutBinaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values));
+  }
+}
+
+void AssertFixedSizeBinary(const ::arrow::Array& values, int type_length) {
+  if (values.type_id() != ::arrow::Type::FIXED_SIZE_BINARY &&
+      values.type_id() != ::arrow::Type::DECIMAL) {
+    throw ParquetException("Only FixedSizeBinaryArray and subclasses supported");
+  }
+  if (checked_cast<const ::arrow::FixedSizeBinaryType&>(*values.type()).byte_width() !=
+      type_length) {
+    throw ParquetException("Size mismatch: " + values.type()->ToString() +
+                           " should have been " + std::to_string(type_length) + " wide");
+  }
+}
+
+template <>
+inline void PlainEncoder<FLBAType>::Put(const ::arrow::Array& values) {
+  AssertFixedSizeBinary(values, descr_->type_length());
+  const auto& data = checked_cast<const ::arrow::FixedSizeBinaryArray&>(values);
+
+  if (data.null_count() == 0) {
+    // no nulls, just dump the data
+    PARQUET_THROW_NOT_OK(
+        sink_.Append(data.raw_values(), data.length() * data.byte_width()));
+  } else {
+    const int64_t total_bytes =
+        data.length() * data.byte_width() - data.null_count() * data.byte_width();
+    PARQUET_THROW_NOT_OK(sink_.Reserve(total_bytes));
+    for (int64_t i = 0; i < data.length(); i++) {
+      if (data.IsValid(i)) {
+        sink_.UnsafeAppend(data.Value(i), data.byte_width());
+      }
+    }
+  }
+}
+
+template <>
+inline void PlainEncoder<FLBAType>::Put(const FixedLenByteArray* src, int num_values) {
+  if (descr_->type_length() == 0) {
+    return;
+  }
+  for (int i = 0; i < num_values; ++i) {
+    // Write the result to the output stream
+    DCHECK(src[i].ptr != nullptr) << "Value ptr cannot be NULL";
+    PARQUET_THROW_NOT_OK(sink_.Append(src[i].ptr, descr_->type_length()));
+  }
+}
+
+template <>
+class PlainEncoder<BooleanType> : public EncoderImpl, virtual public BooleanEncoder {
+ public:
+  explicit PlainEncoder(const ColumnDescriptor* descr, MemoryPool* pool)
+      : EncoderImpl(descr, Encoding::PLAIN, pool),
+        bits_available_(kInMemoryDefaultCapacity * 8),
+        bits_buffer_(AllocateBuffer(pool, kInMemoryDefaultCapacity)),
+        sink_(pool),
+        bit_writer_(bits_buffer_->mutable_data(),
+                    static_cast<int>(bits_buffer_->size())) {}
+
+  int64_t EstimatedDataEncodedSize() override;
+  std::shared_ptr<Buffer> FlushValues() override;
+
+  void Put(const bool* src, int num_values) override;
+
+  void Put(const std::vector<bool>& src, int num_values) override;
+
+  void PutSpaced(const bool* src, int num_values, const uint8_t* valid_bits,
+                 int64_t valid_bits_offset) override {
+    if (valid_bits != NULLPTR) {
+      PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
+                                                                   this->memory_pool()));
+      T* data = reinterpret_cast<T*>(buffer->mutable_data());
+      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
+          src, num_values, valid_bits, valid_bits_offset, data);
+      Put(data, num_valid_values);
+    } else {
+      Put(src, num_values);
+    }
+  }
+
+  void Put(const ::arrow::Array& values) override {
+    if (values.type_id() != ::arrow::Type::BOOL) {
+      throw ParquetException("direct put to boolean from " + values.type()->ToString() +
+                             " not supported");
+    }
+
+    const auto& data = checked_cast<const ::arrow::BooleanArray&>(values);
+    if (data.null_count() == 0) {
+      PARQUET_THROW_NOT_OK(sink_.Reserve(BitUtil::BytesForBits(data.length())));
+      // no nulls, just dump the data
+      ::arrow::internal::CopyBitmap(data.data()->GetValues<uint8_t>(1), data.offset(),
+                                    data.length(), sink_.mutable_data(), sink_.length());
+    } else {
+      auto n_valid = BitUtil::BytesForBits(data.length() - data.null_count());
+      PARQUET_THROW_NOT_OK(sink_.Reserve(n_valid));
+      ::arrow::internal::FirstTimeBitmapWriter writer(sink_.mutable_data(),
+                                                      sink_.length(), n_valid);
+
+      for (int64_t i = 0; i < data.length(); i++) {
+        if (data.IsValid(i)) {
+          if (data.Value(i)) {
+            writer.Set();
+          } else {
+            writer.Clear();
+          }
+          writer.Next();
+        }
+      }
+      writer.Finish();
+    }
+    sink_.UnsafeAdvance(data.length());
+  }
+
+ private:
+  int bits_available_;
+  std::shared_ptr<ResizableBuffer> bits_buffer_;
+  ::arrow::BufferBuilder sink_;
+  ::arrow::BitUtil::BitWriter bit_writer_;
+
+  template <typename SequenceType>
+  void PutImpl(const SequenceType& src, int num_values);
+};
+
+template <typename SequenceType>
+void PlainEncoder<BooleanType>::PutImpl(const SequenceType& src, int num_values) {
+  int bit_offset = 0;
+  if (bits_available_ > 0) {
+    int bits_to_write = std::min(bits_available_, num_values);
+    for (int i = 0; i < bits_to_write; i++) {
+      bit_writer_.PutValue(src[i], 1);
+    }
+    bits_available_ -= bits_to_write;
+    bit_offset = bits_to_write;
+
+    if (bits_available_ == 0) {
+      bit_writer_.Flush();
+      PARQUET_THROW_NOT_OK(
+          sink_.Append(bit_writer_.buffer(), bit_writer_.bytes_written()));
+      bit_writer_.Clear();
+    }
+  }
+
+  int bits_remaining = num_values - bit_offset;
+  while (bit_offset < num_values) {
+    bits_available_ = static_cast<int>(bits_buffer_->size()) * 8;
+
+    int bits_to_write = std::min(bits_available_, bits_remaining);
+    for (int i = bit_offset; i < bit_offset + bits_to_write; i++) {
+      bit_writer_.PutValue(src[i], 1);
+    }
+    bit_offset += bits_to_write;
+    bits_available_ -= bits_to_write;
+    bits_remaining -= bits_to_write;
+
+    if (bits_available_ == 0) {
+      bit_writer_.Flush();
+      PARQUET_THROW_NOT_OK(
+          sink_.Append(bit_writer_.buffer(), bit_writer_.bytes_written()));
+      bit_writer_.Clear();
+    }
+  }
+}
+
+int64_t PlainEncoder<BooleanType>::EstimatedDataEncodedSize() {
+  int64_t position = sink_.length();
+  return position + bit_writer_.bytes_written();
+}
+
+std::shared_ptr<Buffer> PlainEncoder<BooleanType>::FlushValues() {
+  if (bits_available_ > 0) {
+    bit_writer_.Flush();
+    PARQUET_THROW_NOT_OK(sink_.Append(bit_writer_.buffer(), bit_writer_.bytes_written()));
+    bit_writer_.Clear();
+    bits_available_ = static_cast<int>(bits_buffer_->size()) * 8;
+  }
+
+  std::shared_ptr<Buffer> buffer;
+  PARQUET_THROW_NOT_OK(sink_.Finish(&buffer));
+  return buffer;
+}
+
+void PlainEncoder<BooleanType>::Put(const bool* src, int num_values) {
+  PutImpl(src, num_values);
+}
+
+void PlainEncoder<BooleanType>::Put(const std::vector<bool>& src, int num_values) {
+  PutImpl(src, num_values);
+}
+
+// ----------------------------------------------------------------------
+// DictEncoder<T> implementations
+
+template <typename DType>
+struct DictEncoderTraits {
+  using c_type = typename DType::c_type;
+  using MemoTableType = ::arrow::internal::ScalarMemoTable<c_type>;
+};
+
+template <>
+struct DictEncoderTraits<ByteArrayType> {
+  using MemoTableType = ::arrow::internal::BinaryMemoTable<::arrow::BinaryBuilder>;
+};
+
+template <>
+struct DictEncoderTraits<FLBAType> {
+  using MemoTableType = ::arrow::internal::BinaryMemoTable<::arrow::BinaryBuilder>;
+};
+
+// Initially 1024 elements
+static constexpr int32_t kInitialHashTableSize = 1 << 10;
+
+/// See the dictionary encoding section of
+/// https://github.com/Parquet/parquet-format.  The encoding supports
+/// streaming encoding. Values are encoded as they are added while the
+/// dictionary is being constructed. At any time, the buffered values
+/// can be written out with the current dictionary size. More values
+/// can then be added to the encoder, including new dictionary
+/// entries.
+template <typename DType>
+class DictEncoderImpl : public EncoderImpl, virtual public DictEncoder<DType> {
+  using MemoTableType = typename DictEncoderTraits<DType>::MemoTableType;
+
+ public:
+  typedef typename DType::c_type T;
+
+  explicit DictEncoderImpl(const ColumnDescriptor* desc, MemoryPool* pool)
+      : EncoderImpl(desc, Encoding::PLAIN_DICTIONARY, pool),
+        buffered_indices_(::arrow::stl::allocator<int32_t>(pool)),
+        dict_encoded_size_(0),
+        memo_table_(pool, kInitialHashTableSize) {}
+
+  ~DictEncoderImpl() override { DCHECK(buffered_indices_.empty()); }
+
+  int dict_encoded_size() override { return dict_encoded_size_; }
+
+  int WriteIndices(uint8_t* buffer, int buffer_len) override {
+    // Write bit width in first byte
+    *buffer = static_cast<uint8_t>(bit_width());
+    ++buffer;
+    --buffer_len;
+
+    ::arrow::util::RleEncoder encoder(buffer, buffer_len, bit_width());
+
+    for (int32_t index : buffered_indices_) {
+      if (!encoder.Put(index)) return -1;
+    }
+    encoder.Flush();
+
+    ClearIndices();
+    return 1 + encoder.len();
+  }
+
+  void set_type_length(int type_length) { this->type_length_ = type_length; }
+
+  /// Returns a conservative estimate of the number of bytes needed to encode the buffered
+  /// indices. Used to size the buffer passed to WriteIndices().
+  int64_t EstimatedDataEncodedSize() override {
+    // Note: because of the way RleEncoder::CheckBufferFull() is called, we have to
+    // reserve
+    // an extra "RleEncoder::MinBufferSize" bytes. These extra bytes won't be used
+    // but not reserving them would cause the encoder to fail.
+    return 1 +
+           ::arrow::util::RleEncoder::MaxBufferSize(
+               bit_width(), static_cast<int>(buffered_indices_.size())) +
+           ::arrow::util::RleEncoder::MinBufferSize(bit_width());
+  }
+
+  /// The minimum bit width required to encode the currently buffered indices.
+  int bit_width() const override {
+    if (ARROW_PREDICT_FALSE(num_entries() == 0)) return 0;
+    if (ARROW_PREDICT_FALSE(num_entries() == 1)) return 1;
+    return BitUtil::Log2(num_entries());
+  }
+
+  /// Encode value. Note that this does not actually write any data, just
+  /// buffers the value's index to be written later.
+  inline void Put(const T& value);
+
+  // Not implemented for other data types
+  inline void PutByteArray(const void* ptr, int32_t length);
+
+  void Put(const T* src, int num_values) override {
+    for (int32_t i = 0; i < num_values; i++) {
+      Put(src[i]);
+    }
+  }
+
+  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
+                 int64_t valid_bits_offset) override {
+    ::arrow::internal::VisitSetBitRunsVoid(valid_bits, valid_bits_offset, num_values,
+                                           [&](int64_t position, int64_t length) {
+                                             for (int64_t i = 0; i < length; i++) {
+                                               Put(src[i + position]);
+                                             }
+                                           });
+  }
+
+  using TypedEncoder<DType>::Put;
+
+  void Put(const ::arrow::Array& values) override;
+  void PutDictionary(const ::arrow::Array& values) override;
+
+  template <typename ArrowType, typename T = typename ArrowType::c_type>
+  void PutIndicesTyped(const ::arrow::Array& data) {
+    auto values = data.data()->GetValues<T>(1);
+    size_t buffer_position = buffered_indices_.size();
+    buffered_indices_.resize(buffer_position +
+                             static_cast<size_t>(data.length() - data.null_count()));
+    ::arrow::internal::VisitSetBitRunsVoid(
+        data.null_bitmap_data(), data.offset(), data.length(),
+        [&](int64_t position, int64_t length) {
+          for (int64_t i = 0; i < length; ++i) {
+            buffered_indices_[buffer_position++] =
+                static_cast<int32_t>(values[i + position]);
+          }
+        });
+  }
+
+  void PutIndices(const ::arrow::Array& data) override {
+    switch (data.type()->id()) {
+      case ::arrow::Type::UINT8:
+      case ::arrow::Type::INT8:
+        return PutIndicesTyped<::arrow::UInt8Type>(data);
+      case ::arrow::Type::UINT16:
+      case ::arrow::Type::INT16:
+        return PutIndicesTyped<::arrow::UInt16Type>(data);
+      case ::arrow::Type::UINT32:
+      case ::arrow::Type::INT32:
+        return PutIndicesTyped<::arrow::UInt32Type>(data);
+      case ::arrow::Type::UINT64:
+      case ::arrow::Type::INT64:
+        return PutIndicesTyped<::arrow::UInt64Type>(data);
+      default:
+        throw ParquetException("Passed non-integer array to PutIndices");
+    }
+  }
+
+  std::shared_ptr<Buffer> FlushValues() override {
+    std::shared_ptr<ResizableBuffer> buffer =
+        AllocateBuffer(this->pool_, EstimatedDataEncodedSize());
+    int result_size = WriteIndices(buffer->mutable_data(),
+                                   static_cast<int>(EstimatedDataEncodedSize()));
+    PARQUET_THROW_NOT_OK(buffer->Resize(result_size, false));
+    return std::move(buffer);
+  }
+
+  /// Writes out the encoded dictionary to buffer. buffer must be preallocated to
+  /// dict_encoded_size() bytes.
+  void WriteDict(uint8_t* buffer) override;
+
+  /// The number of entries in the dictionary.
+  int num_entries() const override { return memo_table_.size(); }
+
+ private:
+  /// Clears all the indices (but leaves the dictionary).
+  void ClearIndices() { buffered_indices_.clear(); }
+
+  /// Indices that have not yet be written out by WriteIndices().
+  ArrowPoolVector<int32_t> buffered_indices_;
+
+  template <typename ArrayType>
+  void PutBinaryArray(const ArrayType& array) {
+    PARQUET_THROW_NOT_OK(::arrow::VisitArrayDataInline<typename ArrayType::TypeClass>(
+        *array.data(),
+        [&](::arrow::util::string_view view) {
+          if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) {
+            return Status::Invalid("Parquet cannot store strings with size 2GB or more");
+          }
+          PutByteArray(view.data(), static_cast<uint32_t>(view.size()));
+          return Status::OK();
+        },
+        []() { return Status::OK(); }));
+  }
+
+  template <typename ArrayType>
+  void PutBinaryDictionaryArray(const ArrayType& array) {
+    DCHECK_EQ(array.null_count(), 0);
+    for (int64_t i = 0; i < array.length(); i++) {
+      auto v = array.GetView(i);
+      if (ARROW_PREDICT_FALSE(v.size() > kMaxByteArraySize)) {
+        throw ParquetException("Parquet cannot store strings with size 2GB or more");
+      }
+      dict_encoded_size_ += static_cast<int>(v.size() + sizeof(uint32_t));
+      int32_t unused_memo_index;
+      PARQUET_THROW_NOT_OK(memo_table_.GetOrInsert(
+          v.data(), static_cast<int32_t>(v.size()), &unused_memo_index));
+    }
+  }
+
+  /// The number of bytes needed to encode the dictionary.
+  int dict_encoded_size_;
+
+  MemoTableType memo_table_;
+};
+
+template <typename DType>
+void DictEncoderImpl<DType>::WriteDict(uint8_t* buffer) {
+  // For primitive types, only a memcpy
+  DCHECK_EQ(static_cast<size_t>(dict_encoded_size_), sizeof(T) * memo_table_.size());
+  memo_table_.CopyValues(0 /* start_pos */, reinterpret_cast<T*>(buffer));
+}
+
+// ByteArray and FLBA already have the dictionary encoded in their data heaps
+template <>
+void DictEncoderImpl<ByteArrayType>::WriteDict(uint8_t* buffer) {
+  memo_table_.VisitValues(0, [&buffer](const ::arrow::util::string_view& v) {
+    uint32_t len = static_cast<uint32_t>(v.length());
+    memcpy(buffer, &len, sizeof(len));
+    buffer += sizeof(len);
+    memcpy(buffer, v.data(), len);
+    buffer += len;
+  });
+}
+
+template <>
+void DictEncoderImpl<FLBAType>::WriteDict(uint8_t* buffer) {
+  memo_table_.VisitValues(0, [&](const ::arrow::util::string_view& v) {
+    DCHECK_EQ(v.length(), static_cast<size_t>(type_length_));
+    memcpy(buffer, v.data(), type_length_);
+    buffer += type_length_;
+  });
+}
+
+template <typename DType>
+inline void DictEncoderImpl<DType>::Put(const T& v) {
+  // Put() implementation for primitive types
+  auto on_found = [](int32_t memo_index) {};
+  auto on_not_found = [this](int32_t memo_index) {
+    dict_encoded_size_ += static_cast<int>(sizeof(T));
+  };
+
+  int32_t memo_index;
+  PARQUET_THROW_NOT_OK(memo_table_.GetOrInsert(v, on_found, on_not_found, &memo_index));
+  buffered_indices_.push_back(memo_index);
+}
+
+template <typename DType>
+inline void DictEncoderImpl<DType>::PutByteArray(const void* ptr, int32_t length) {
+  DCHECK(false);
+}
+
+template <>
+inline void DictEncoderImpl<ByteArrayType>::PutByteArray(const void* ptr,
+                                                         int32_t length) {
+  static const uint8_t empty[] = {0};
+
+  auto on_found = [](int32_t memo_index) {};
+  auto on_not_found = [&](int32_t memo_index) {
+    dict_encoded_size_ += static_cast<int>(length + sizeof(uint32_t));
+  };
+
+  DCHECK(ptr != nullptr || length == 0);
+  ptr = (ptr != nullptr) ? ptr : empty;
+  int32_t memo_index;
+  PARQUET_THROW_NOT_OK(
+      memo_table_.GetOrInsert(ptr, length, on_found, on_not_found, &memo_index));
+  buffered_indices_.push_back(memo_index);
+}
+
+template <>
+inline void DictEncoderImpl<ByteArrayType>::Put(const ByteArray& val) {
+  return PutByteArray(val.ptr, static_cast<int32_t>(val.len));
+}
+
+template <>
+inline void DictEncoderImpl<FLBAType>::Put(const FixedLenByteArray& v) {
+  static const uint8_t empty[] = {0};
+
+  auto on_found = [](int32_t memo_index) {};
+  auto on_not_found = [this](int32_t memo_index) { dict_encoded_size_ += type_length_; };
+
+  DCHECK(v.ptr != nullptr || type_length_ == 0);
+  const void* ptr = (v.ptr != nullptr) ? v.ptr : empty;
+  int32_t memo_index;
+  PARQUET_THROW_NOT_OK(
+      memo_table_.GetOrInsert(ptr, type_length_, on_found, on_not_found, &memo_index));
+  buffered_indices_.push_back(memo_index);
+}
+
+template <>
+void DictEncoderImpl<Int96Type>::Put(const ::arrow::Array& values) {
+  ParquetException::NYI("Direct put to Int96");
+}
+
+template <>
+void DictEncoderImpl<Int96Type>::PutDictionary(const ::arrow::Array& values) {
+  ParquetException::NYI("Direct put to Int96");
+}
+
+template <typename DType>
+void DictEncoderImpl<DType>::Put(const ::arrow::Array& values) {
+  using ArrayType = typename ::arrow::CTypeTraits<typename DType::c_type>::ArrayType;
+  const auto& data = checked_cast<const ArrayType&>(values);
+  if (data.null_count() == 0) {
+    // no nulls, just dump the data
+    for (int64_t i = 0; i < data.length(); i++) {
+      Put(data.Value(i));
+    }
+  } else {
+    for (int64_t i = 0; i < data.length(); i++) {
+      if (data.IsValid(i)) {
+        Put(data.Value(i));
+      }
+    }
+  }
+}
+
+template <>
+void DictEncoderImpl<FLBAType>::Put(const ::arrow::Array& values) {
+  AssertFixedSizeBinary(values, type_length_);
+  const auto& data = checked_cast<const ::arrow::FixedSizeBinaryArray&>(values);
+  if (data.null_count() == 0) {
+    // no nulls, just dump the data
+    for (int64_t i = 0; i < data.length(); i++) {
+      Put(FixedLenByteArray(data.Value(i)));
+    }
+  } else {
+    std::vector<uint8_t> empty(type_length_, 0);
+    for (int64_t i = 0; i < data.length(); i++) {
+      if (data.IsValid(i)) {
+        Put(FixedLenByteArray(data.Value(i)));
+      }
+    }
+  }
+}
+
+template <>
+void DictEncoderImpl<ByteArrayType>::Put(const ::arrow::Array& values) {
+  AssertBaseBinary(values);
+  if (::arrow::is_binary_like(values.type_id())) {
+    PutBinaryArray(checked_cast<const ::arrow::BinaryArray&>(values));
+  } else {
+    DCHECK(::arrow::is_large_binary_like(values.type_id()));
+    PutBinaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values));
+  }
+}
+
+template <typename DType>
+void AssertCanPutDictionary(DictEncoderImpl<DType>* encoder, const ::arrow::Array& dict) {
+  if (dict.null_count() > 0) {
+    throw ParquetException("Inserted dictionary cannot cannot contain nulls");
+  }
+
+  if (encoder->num_entries() > 0) {
+    throw ParquetException("Can only call PutDictionary on an empty DictEncoder");
+  }
+}
+
+template <typename DType>
+void DictEncoderImpl<DType>::PutDictionary(const ::arrow::Array& values) {
+  AssertCanPutDictionary(this, values);
+
+  using ArrayType = typename ::arrow::CTypeTraits<typename DType::c_type>::ArrayType;
+  const auto& data = checked_cast<const ArrayType&>(values);
+
+  dict_encoded_size_ += static_cast<int>(sizeof(typename DType::c_type) * data.length());
+  for (int64_t i = 0; i < data.length(); i++) {
+    int32_t unused_memo_index;
+    PARQUET_THROW_NOT_OK(memo_table_.GetOrInsert(data.Value(i), &unused_memo_index));
+  }
+}
+
+template <>
+void DictEncoderImpl<FLBAType>::PutDictionary(const ::arrow::Array& values) {
+  AssertFixedSizeBinary(values, type_length_);
+  AssertCanPutDictionary(this, values);
+
+  const auto& data = checked_cast<const ::arrow::FixedSizeBinaryArray&>(values);
+
+  dict_encoded_size_ += static_cast<int>(type_length_ * data.length());
+  for (int64_t i = 0; i < data.length(); i++) {
+    int32_t unused_memo_index;
+    PARQUET_THROW_NOT_OK(
+        memo_table_.GetOrInsert(data.Value(i), type_length_, &unused_memo_index));
+  }
+}
+
+template <>
+void DictEncoderImpl<ByteArrayType>::PutDictionary(const ::arrow::Array& values) {
+  AssertBaseBinary(values);
+  AssertCanPutDictionary(this, values);
+
+  if (::arrow::is_binary_like(values.type_id())) {
+    PutBinaryDictionaryArray(checked_cast<const ::arrow::BinaryArray&>(values));
+  } else {
+    DCHECK(::arrow::is_large_binary_like(values.type_id()));
+    PutBinaryDictionaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values));
+  }
+}
+
+// ----------------------------------------------------------------------
+// ByteStreamSplitEncoder<T> implementations
+
+template <typename DType>
+class ByteStreamSplitEncoder : public EncoderImpl, virtual public TypedEncoder<DType> {
+ public:
+  using T = typename DType::c_type;
+  using TypedEncoder<DType>::Put;
+
+  explicit ByteStreamSplitEncoder(
+      const ColumnDescriptor* descr,
+      ::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
+
+  int64_t EstimatedDataEncodedSize() override;
+  std::shared_ptr<Buffer> FlushValues() override;
+
+  void Put(const T* buffer, int num_values) override;
+  void Put(const ::arrow::Array& values) override;
+  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
+                 int64_t valid_bits_offset) override;
+
+ protected:
+  template <typename ArrowType>
+  void PutImpl(const ::arrow::Array& values) {
+    if (values.type_id() != ArrowType::type_id) {
+      throw ParquetException(std::string() + "direct put to " + ArrowType::type_name() +
+                             " from " + values.type()->ToString() + " not supported");
+    }
+    const auto& data = *values.data();
+    PutSpaced(data.GetValues<typename ArrowType::c_type>(1),
+              static_cast<int>(data.length), data.GetValues<uint8_t>(0, 0), data.offset);
+  }
+
+  ::arrow::BufferBuilder sink_;
+  int64_t num_values_in_buffer_;
+};
+
+template <typename DType>
+ByteStreamSplitEncoder<DType>::ByteStreamSplitEncoder(const ColumnDescriptor* descr,
+                                                      ::arrow::MemoryPool* pool)
+    : EncoderImpl(descr, Encoding::BYTE_STREAM_SPLIT, pool),
+      sink_{pool},
+      num_values_in_buffer_{0} {}
+
+template <typename DType>
+int64_t ByteStreamSplitEncoder<DType>::EstimatedDataEncodedSize() {
+  return sink_.length();
+}
+
+template <typename DType>
+std::shared_ptr<Buffer> ByteStreamSplitEncoder<DType>::FlushValues() {
+  std::shared_ptr<ResizableBuffer> output_buffer =
+      AllocateBuffer(this->memory_pool(), EstimatedDataEncodedSize());
+  uint8_t* output_buffer_raw = output_buffer->mutable_data();
+  const uint8_t* raw_values = sink_.data();
+  ::arrow::util::internal::ByteStreamSplitEncode<T>(raw_values, num_values_in_buffer_,
+                                                    output_buffer_raw);
+  sink_.Reset();
+  num_values_in_buffer_ = 0;
+  return std::move(output_buffer);
+}
+
+template <typename DType>
+void ByteStreamSplitEncoder<DType>::Put(const T* buffer, int num_values) {
+  if (num_values > 0) {
+    PARQUET_THROW_NOT_OK(sink_.Append(buffer, num_values * sizeof(T)));
+    num_values_in_buffer_ += num_values;
+  }
+}
+
+template <>
+void ByteStreamSplitEncoder<FloatType>::Put(const ::arrow::Array& values) {
+  PutImpl<::arrow::FloatType>(values);
+}
+
+template <>
+void ByteStreamSplitEncoder<DoubleType>::Put(const ::arrow::Array& values) {
+  PutImpl<::arrow::DoubleType>(values);
+}
+
+template <typename DType>
+void ByteStreamSplitEncoder<DType>::PutSpaced(const T* src, int num_values,
+                                              const uint8_t* valid_bits,
+                                              int64_t valid_bits_offset) {
+  if (valid_bits != NULLPTR) {
+    PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
+                                                                 this->memory_pool()));
+    T* data = reinterpret_cast<T*>(buffer->mutable_data());
+    int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
+        src, num_values, valid_bits, valid_bits_offset, data);
+    Put(data, num_valid_values);
+  } else {
+    Put(src, num_values);
+  }
+}
+
+class DecoderImpl : virtual public Decoder {
+ public:
+  void SetData(int num_values, const uint8_t* data, int len) override {
+    num_values_ = num_values;
+    data_ = data;
+    len_ = len;
+  }
+
+  int values_left() const override { return num_values_; }
+  Encoding::type encoding() const override { return encoding_; }
+
+ protected:
+  explicit DecoderImpl(const ColumnDescriptor* descr, Encoding::type encoding)
+      : descr_(descr), encoding_(encoding), num_values_(0), data_(NULLPTR), len_(0) {}
+
+  // For accessing type-specific metadata, like FIXED_LEN_BYTE_ARRAY
+  const ColumnDescriptor* descr_;
+
+  const Encoding::type encoding_;
+  int num_values_;
+  const uint8_t* data_;
+  int len_;
+  int type_length_;
+};
+
+template <typename DType>
+class PlainDecoder : public DecoderImpl, virtual public TypedDecoder<DType> {
+ public:
+  using T = typename DType::c_type;
+  explicit PlainDecoder(const ColumnDescriptor* descr);
+
+  int Decode(T* buffer, int max_values) override;
+
+  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
+                  int64_t valid_bits_offset,
+                  typename EncodingTraits<DType>::Accumulator* builder) override;
+
+  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
+                  int64_t valid_bits_offset,
+                  typename EncodingTraits<DType>::DictAccumulator* builder) override;
+};
+
+template <>
+inline int PlainDecoder<Int96Type>::DecodeArrow(
+    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset,
+    typename EncodingTraits<Int96Type>::Accumulator* builder) {
+  ParquetException::NYI("DecodeArrow not supported for Int96");
+}
+
+template <>
+inline int PlainDecoder<Int96Type>::DecodeArrow(
+    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset,
+    typename EncodingTraits<Int96Type>::DictAccumulator* builder) {
+  ParquetException::NYI("DecodeArrow not supported for Int96");
+}
+
+template <>
+inline int PlainDecoder<BooleanType>::DecodeArrow(
+    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset,
+    typename EncodingTraits<BooleanType>::DictAccumulator* builder) {
+  ParquetException::NYI("dictionaries of BooleanType");
+}
+
+template <typename DType>
+int PlainDecoder<DType>::DecodeArrow(
+    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset,
+    typename EncodingTraits<DType>::Accumulator* builder) {
+  using value_type = typename DType::c_type;
+
+  constexpr int value_size = static_cast<int>(sizeof(value_type));
+  int values_decoded = num_values - null_count;
+  if (ARROW_PREDICT_FALSE(len_ < value_size * values_decoded)) {
+    ParquetException::EofException();
+  }
+
+  PARQUET_THROW_NOT_OK(builder->Reserve(num_values));
+
+  VisitNullBitmapInline(
+      valid_bits, valid_bits_offset, num_values, null_count,
+      [&]() {
+        builder->UnsafeAppend(::arrow::util::SafeLoadAs<value_type>(data_));
+        data_ += sizeof(value_type);
+      },
+      [&]() { builder->UnsafeAppendNull(); });
+
+  num_values_ -= values_decoded;
+  len_ -= sizeof(value_type) * values_decoded;
+  return values_decoded;
+}
+
+template <typename DType>
+int PlainDecoder<DType>::DecodeArrow(
+    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset,
+    typename EncodingTraits<DType>::DictAccumulator* builder) {
+  using value_type = typename DType::c_type;
+
+  constexpr int value_size = static_cast<int>(sizeof(value_type));
+  int values_decoded = num_values - null_count;
+  if (ARROW_PREDICT_FALSE(len_ < value_size * values_decoded)) {
+    ParquetException::EofException();
+  }
+
+  PARQUET_THROW_NOT_OK(builder->Reserve(num_values));
+
+  VisitNullBitmapInline(
+      valid_bits, valid_bits_offset, num_values, null_count,
+      [&]() {
+        PARQUET_THROW_NOT_OK(
+            builder->Append(::arrow::util::SafeLoadAs<value_type>(data_)));
+        data_ += sizeof(value_type);
+      },
+      [&]() { PARQUET_THROW_NOT_OK(builder->AppendNull()); });
+
+  num_values_ -= values_decoded;
+  len_ -= sizeof(value_type) * values_decoded;
+  return values_decoded;
+}
+
+// Decode routine templated on C++ type rather than type enum
+template <typename T>
+inline int DecodePlain(const uint8_t* data, int64_t data_size, int num_values,
+                       int type_length, T* out) {
+  int64_t bytes_to_decode = num_values * static_cast<int64_t>(sizeof(T));
+  if (bytes_to_decode > data_size || bytes_to_decode > INT_MAX) {
+    ParquetException::EofException();
+  }
+  // If bytes_to_decode == 0, data could be null
+  if (bytes_to_decode > 0) {
+    memcpy(out, data, bytes_to_decode);
+  }
+  return static_cast<int>(bytes_to_decode);
+}
+
+template <typename DType>
+PlainDecoder<DType>::PlainDecoder(const ColumnDescriptor* descr)
+    : DecoderImpl(descr, Encoding::PLAIN) {
+  if (descr_ && descr_->physical_type() == Type::FIXED_LEN_BYTE_ARRAY) {
+    type_length_ = descr_->type_length();
+  } else {
+    type_length_ = -1;
+  }
+}
+
+// Template specialization for BYTE_ARRAY. The written values do not own their
+// own data.
+
+static inline int64_t ReadByteArray(const uint8_t* data, int64_t data_size,
+                                    ByteArray* out) {
+  if (ARROW_PREDICT_FALSE(data_size < 4)) {
+    ParquetException::EofException();
+  }
+  const int32_t len = ::arrow::util::SafeLoadAs<int32_t>(data);
+  if (len < 0) {
+    throw ParquetException("Invalid BYTE_ARRAY value");
+  }
+  const int64_t consumed_length = static_cast<int64_t>(len) + 4;
+  if (ARROW_PREDICT_FALSE(data_size < consumed_length)) {
+    ParquetException::EofException();
+  }
+  *out = ByteArray{static_cast<uint32_t>(len), data + 4};
+  return consumed_length;
+}
+
+template <>
+inline int DecodePlain<ByteArray>(const uint8_t* data, int64_t data_size, int num_values,
+                                  int type_length, ByteArray* out) {
+  int bytes_decoded = 0;
+  for (int i = 0; i < num_values; ++i) {
+    const auto increment = ReadByteArray(data, data_size, out + i);
+    if (ARROW_PREDICT_FALSE(increment > INT_MAX - bytes_decoded)) {
+      throw ParquetException("BYTE_ARRAY chunk too large");
+    }
+    data += increment;
+    data_size -= increment;
+    bytes_decoded += static_cast<int>(increment);
+  }
+  return bytes_decoded;
+}
+
+// Template specialization for FIXED_LEN_BYTE_ARRAY. The written values do not
+// own their own data.
+template <>
+inline int DecodePlain<FixedLenByteArray>(const uint8_t* data, int64_t data_size,
+                                          int num_values, int type_length,
+                                          FixedLenByteArray* out) {
+  int64_t bytes_to_decode = static_cast<int64_t>(type_length) * num_values;
+  if (bytes_to_decode > data_size || bytes_to_decode > INT_MAX) {
+    ParquetException::EofException();
+  }
+  for (int i = 0; i < num_values; ++i) {
+    out[i].ptr = data;
+    data += type_length;
+    data_size -= type_length;
+  }
+  return static_cast<int>(bytes_to_decode);
+}
+
+template <typename DType>
+int PlainDecoder<DType>::Decode(T* buffer, int max_values) {
+  max_values = std::min(max_values, num_values_);
+  int bytes_consumed = DecodePlain<T>(data_, len_, max_values, type_length_, buffer);
+  data_ += bytes_consumed;
+  len_ -= bytes_consumed;
+  num_values_ -= max_values;
+  return max_values;
+}
+
+class PlainBooleanDecoder : public DecoderImpl,
+                            virtual public TypedDecoder<BooleanType>,
+                            virtual public BooleanDecoder {
+ public:
+  explicit PlainBooleanDecoder(const ColumnDescriptor* descr);
+  void SetData(int num_values, const uint8_t* data, int len) override;
+
+  // Two flavors of bool decoding
+  int Decode(uint8_t* buffer, int max_values) override;
+  int Decode(bool* buffer, int max_values) override;
+  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
+                  int64_t valid_bits_offset,
+                  typename EncodingTraits<BooleanType>::Accumulator* out) override;
+
+  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
+                  int64_t valid_bits_offset,
+                  typename EncodingTraits<BooleanType>::DictAccumulator* out) override;
+
+ private:
+  std::unique_ptr<::arrow::BitUtil::BitReader> bit_reader_;
+};
+
+PlainBooleanDecoder::PlainBooleanDecoder(const ColumnDescriptor* descr)
+    : DecoderImpl(descr, Encoding::PLAIN) {}
+
+void PlainBooleanDecoder::SetData(int num_values, const uint8_t* data, int len) {
+  num_values_ = num_values;
+  bit_reader_.reset(new BitUtil::BitReader(data, len));
+}
+
+int PlainBooleanDecoder::DecodeArrow(
+    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset,
+    typename EncodingTraits<BooleanType>::Accumulator* builder) {
+  int values_decoded = num_values - null_count;
+  if (ARROW_PREDICT_FALSE(num_values_ < values_decoded)) {
+    ParquetException::EofException();
+  }
+
+  PARQUET_THROW_NOT_OK(builder->Reserve(num_values));
+
+  VisitNullBitmapInline(
+      valid_bits, valid_bits_offset, num_values, null_count,
+      [&]() {
+        bool value;
+        ARROW_IGNORE_EXPR(bit_reader_->GetValue(1, &value));
+        builder->UnsafeAppend(value);
+      },
+      [&]() { builder->UnsafeAppendNull(); });
+
+  num_values_ -= values_decoded;
+  return values_decoded;
+}
+
+inline int PlainBooleanDecoder::DecodeArrow(
+    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset,
+    typename EncodingTraits<BooleanType>::DictAccumulator* builder) {
+  ParquetException::NYI("dictionaries of BooleanType");
+}
+
+int PlainBooleanDecoder::Decode(uint8_t* buffer, int max_values) {
+  max_values = std::min(max_values, num_values_);
+  bool val;
+  ::arrow::internal::BitmapWriter bit_writer(buffer, 0, max_values);
+  for (int i = 0; i < max_values; ++i) {
+    if (!bit_reader_->GetValue(1, &val)) {
+      ParquetException::EofException();
+    }
+    if (val) {
+      bit_writer.Set();
+    }
+    bit_writer.Next();
+  }
+  bit_writer.Finish();
+  num_values_ -= max_values;
+  return max_values;
+}
+
+int PlainBooleanDecoder::Decode(bool* buffer, int max_values) {
+  max_values = std::min(max_values, num_values_);
+  if (bit_reader_->GetBatch(1, buffer, max_values) != max_values) {
+    ParquetException::EofException();
+  }
+  num_values_ -= max_values;
+  return max_values;
+}
+
+struct ArrowBinaryHelper {
+  explicit ArrowBinaryHelper(typename EncodingTraits<ByteArrayType>::Accumulator* out) {
+    this->out = out;
+    this->builder = out->builder.get();
+    this->chunk_space_remaining =
+        ::arrow::kBinaryMemoryLimit - this->builder->value_data_length();
+  }
+
+  Status PushChunk() {
+    std::shared_ptr<::arrow::Array> result;
+    RETURN_NOT_OK(builder->Finish(&result));
+    out->chunks.push_back(result);
+    chunk_space_remaining = ::arrow::kBinaryMemoryLimit;
+    return Status::OK();
+  }
+
+  bool CanFit(int64_t length) const { return length <= chunk_space_remaining; }
+
+  void UnsafeAppend(const uint8_t* data, int32_t length) {
+    chunk_space_remaining -= length;
+    builder->UnsafeAppend(data, length);
+  }
+
+  void UnsafeAppendNull() { builder->UnsafeAppendNull(); }
+
+  Status Append(const uint8_t* data, int32_t length) {
+    chunk_space_remaining -= length;
+    return builder->Append(data, length);
+  }
+
+  Status AppendNull() { return builder->AppendNull(); }
+
+  typename EncodingTraits<ByteArrayType>::Accumulator* out;
+  ::arrow::BinaryBuilder* builder;
+  int64_t chunk_space_remaining;
+};
+
+template <>
+inline int PlainDecoder<ByteArrayType>::DecodeArrow(
+    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset,
+    typename EncodingTraits<ByteArrayType>::Accumulator* builder) {
+  ParquetException::NYI();
+}
+
+template <>
+inline int PlainDecoder<ByteArrayType>::DecodeArrow(
+    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset,
+    typename EncodingTraits<ByteArrayType>::DictAccumulator* builder) {
+  ParquetException::NYI();
+}
+
+template <>
+inline int PlainDecoder<FLBAType>::DecodeArrow(
+    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset,
+    typename EncodingTraits<FLBAType>::Accumulator* builder) {
+  int values_decoded = num_values - null_count;
+  if (ARROW_PREDICT_FALSE(len_ < descr_->type_length() * values_decoded)) {
+    ParquetException::EofException();
+  }
+
+  PARQUET_THROW_NOT_OK(builder->Reserve(num_values));
+
+  VisitNullBitmapInline(
+      valid_bits, valid_bits_offset, num_values, null_count,
+      [&]() {
+        builder->UnsafeAppend(data_);
+        data_ += descr_->type_length();
+      },
+      [&]() { builder->UnsafeAppendNull(); });
+
+  num_values_ -= values_decoded;
+  len_ -= descr_->type_length() * values_decoded;
+  return values_decoded;
+}
+
+template <>
+inline int PlainDecoder<FLBAType>::DecodeArrow(
+    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset,
+    typename EncodingTraits<FLBAType>::DictAccumulator* builder) {
+  int values_decoded = num_values - null_count;
+  if (ARROW_PREDICT_FALSE(len_ < descr_->type_length() * values_decoded)) {
+    ParquetException::EofException();
+  }
+
+  PARQUET_THROW_NOT_OK(builder->Reserve(num_values));
+
+  VisitNullBitmapInline(
+      valid_bits, valid_bits_offset, num_values, null_count,
+      [&]() {
+        PARQUET_THROW_NOT_OK(builder->Append(data_));
+        data_ += descr_->type_length();
+      },
+      [&]() { PARQUET_THROW_NOT_OK(builder->AppendNull()); });
+
+  num_values_ -= values_decoded;
+  len_ -= descr_->type_length() * values_decoded;
+  return values_decoded;
+}
+
+class PlainByteArrayDecoder : public PlainDecoder<ByteArrayType>,
+                              virtual public ByteArrayDecoder {
+ public:
+  using Base = PlainDecoder<ByteArrayType>;
+  using Base::DecodeSpaced;
+  using Base::PlainDecoder;
+
+  // ----------------------------------------------------------------------
+  // Dictionary read paths
+
+  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
+                  int64_t valid_bits_offset,
+                  ::arrow::BinaryDictionary32Builder* builder) override {
+    int result = 0;
+    PARQUET_THROW_NOT_OK(DecodeArrow(num_values, null_count, valid_bits,
+                                     valid_bits_offset, builder, &result));
+    return result;
+  }
+
+  // ----------------------------------------------------------------------
+  // Optimized dense binary read paths
+
+  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
+                  int64_t valid_bits_offset,
+                  typename EncodingTraits<ByteArrayType>::Accumulator* out) override {
+    int result = 0;
+    PARQUET_THROW_NOT_OK(DecodeArrowDense(num_values, null_count, valid_bits,
+                                          valid_bits_offset, out, &result));
+    return result;
+  }
+
+ private:
+  Status DecodeArrowDense(int num_values, int null_count, const uint8_t* valid_bits,
+                          int64_t valid_bits_offset,
+                          typename EncodingTraits<ByteArrayType>::Accumulator* out,
+                          int* out_values_decoded) {
+    ArrowBinaryHelper helper(out);
+    int values_decoded = 0;
+
+    RETURN_NOT_OK(helper.builder->Reserve(num_values));
+    RETURN_NOT_OK(helper.builder->ReserveData(
+        std::min<int64_t>(len_, helper.chunk_space_remaining)));
+
+    int i = 0;
+    RETURN_NOT_OK(VisitNullBitmapInline(
+        valid_bits, valid_bits_offset, num_values, null_count,
+        [&]() {
+          if (ARROW_PREDICT_FALSE(len_ < 4)) {
+            ParquetException::EofException();
+          }
+          auto value_len = ::arrow::util::SafeLoadAs<int32_t>(data_);
+          if (ARROW_PREDICT_FALSE(value_len < 0 || value_len > INT32_MAX - 4)) {
+            return Status::Invalid("Invalid or corrupted value_len '", value_len, "'");
+          }
+          auto increment = value_len + 4;
+          if (ARROW_PREDICT_FALSE(len_ < increment)) {
+            ParquetException::EofException();
+          }
+          if (ARROW_PREDICT_FALSE(!helper.CanFit(value_len))) {
+            // This element would exceed the capacity of a chunk
+            RETURN_NOT_OK(helper.PushChunk());
+            RETURN_NOT_OK(helper.builder->Reserve(num_values - i));
+            RETURN_NOT_OK(helper.builder->ReserveData(
+                std::min<int64_t>(len_, helper.chunk_space_remaining)));
+          }
+          helper.UnsafeAppend(data_ + 4, value_len);
+          data_ += increment;
+          len_ -= increment;
+          ++values_decoded;
+          ++i;
+          return Status::OK();
+        },
+        [&]() {
+          helper.UnsafeAppendNull();
+          ++i;
+          return Status::OK();
+        }));
+
+    num_values_ -= values_decoded;
+    *out_values_decoded = values_decoded;
+    return Status::OK();
+  }
+
+  template <typename BuilderType>
+  Status DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
+                     int64_t valid_bits_offset, BuilderType* builder,
+                     int* out_values_decoded) {
+    RETURN_NOT_OK(builder->Reserve(num_values));
+    int values_decoded = 0;
+
+    RETURN_NOT_OK(VisitNullBitmapInline(
+        valid_bits, valid_bits_offset, num_values, null_count,
+        [&]() {
+          if (ARROW_PREDICT_FALSE(len_ < 4)) {
+            ParquetException::EofException();
+          }
+          auto value_len = ::arrow::util::SafeLoadAs<int32_t>(data_);
+          if (ARROW_PREDICT_FALSE(value_len < 0 || value_len > INT32_MAX - 4)) {
+            return Status::Invalid("Invalid or corrupted value_len '", value_len, "'");
+          }
+          auto increment = value_len + 4;
+          if (ARROW_PREDICT_FALSE(len_ < increment)) {
+            ParquetException::EofException();
+          }
+          RETURN_NOT_OK(builder->Append(data_ + 4, value_len));
+          data_ += increment;
+          len_ -= increment;
+          ++values_decoded;
+          return Status::OK();
+        },
+        [&]() { return builder->AppendNull(); }));
+
+    num_values_ -= values_decoded;
+    *out_values_decoded = values_decoded;
+    return Status::OK();
+  }
+};
+
+class PlainFLBADecoder : public PlainDecoder<FLBAType>, virtual public FLBADecoder {
+ public:
+  using Base = PlainDecoder<FLBAType>;
+  using Base::PlainDecoder;
+};
+
+// ----------------------------------------------------------------------
+// Dictionary encoding and decoding
+
+template <typename Type>
+class DictDecoderImpl : public DecoderImpl, virtual public DictDecoder<Type> {
+ public:
+  typedef typename Type::c_type T;
+
+  // Initializes the dictionary with values from 'dictionary'. The data in
+  // dictionary is not guaranteed to persist in memory after this call so the
+  // dictionary decoder needs to copy the data out if necessary.
+  explicit DictDecoderImpl(const ColumnDescriptor* descr,
+                           MemoryPool* pool = ::arrow::default_memory_pool())
+      : DecoderImpl(descr, Encoding::RLE_DICTIONARY),
+        dictionary_(AllocateBuffer(pool, 0)),
+        dictionary_length_(0),
+        byte_array_data_(AllocateBuffer(pool, 0)),
+        byte_array_offsets_(AllocateBuffer(pool, 0)),
+        indices_scratch_space_(AllocateBuffer(pool, 0)) {}
+
+  // Perform type-specific initiatialization
+  void SetDict(TypedDecoder<Type>* dictionary) override;
+
+  void SetData(int num_values, const uint8_t* data, int len) override {
+    num_values_ = num_values;
+    if (len == 0) {
+      // Initialize dummy decoder to avoid crashes later on
+      idx_decoder_ = ::arrow::util::RleDecoder(data, len, /*bit_width=*/1);
+      return;
+    }
+    uint8_t bit_width = *data;
+    if (ARROW_PREDICT_FALSE(bit_width >= 64)) {
+      throw ParquetException("Invalid or corrupted bit_width");
+    }
+    idx_decoder_ = ::arrow::util::RleDecoder(++data, --len, bit_width);
+  }
+
+  int Decode(T* buffer, int num_values) override {
+    num_values = std::min(num_values, num_values_);
+    int decoded_values =
+        idx_decoder_.GetBatchWithDict(reinterpret_cast<const T*>(dictionary_->data()),
+                                      dictionary_length_, buffer, num_values);
+    if (decoded_values != num_values) {
+      ParquetException::EofException();
+    }
+    num_values_ -= num_values;
+    return num_values;
+  }
+
+  int DecodeSpaced(T* buffer, int num_values, int null_count, const uint8_t* valid_bits,
+                   int64_t valid_bits_offset) override {
+    num_values = std::min(num_values, num_values_);
+    if (num_values != idx_decoder_.GetBatchWithDictSpaced(
+                          reinterpret_cast<const T*>(dictionary_->data()),
+                          dictionary_length_, buffer, num_values, null_count, valid_bits,
+                          valid_bits_offset)) {
+      ParquetException::EofException();
+    }
+    num_values_ -= num_values;
+    return num_values;
+  }
+
+  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
+                  int64_t valid_bits_offset,
+                  typename EncodingTraits<Type>::Accumulator* out) override;
+
+  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
+                  int64_t valid_bits_offset,
+                  typename EncodingTraits<Type>::DictAccumulator* out) override;
+
+  void InsertDictionary(::arrow::ArrayBuilder* builder) override;
+
+  int DecodeIndicesSpaced(int num_values, int null_count, const uint8_t* valid_bits,
+                          int64_t valid_bits_offset,
+                          ::arrow::ArrayBuilder* builder) override {
+    if (num_values > 0) {
+      // TODO(wesm): Refactor to batch reads for improved memory use. It is not
+      // trivial because the null_count is relative to the entire bitmap
+      PARQUET_THROW_NOT_OK(indices_scratch_space_->TypedResize<int32_t>(
+          num_values, /*shrink_to_fit=*/false));
+    }
+
+    auto indices_buffer =
+        reinterpret_cast<int32_t*>(indices_scratch_space_->mutable_data());
+
+    if (num_values != idx_decoder_.GetBatchSpaced(num_values, null_count, valid_bits,
+                                                  valid_bits_offset, indices_buffer)) {
+      ParquetException::EofException();
+    }
+
+    /// XXX(wesm): Cannot append "valid bits" directly to the builder
+    std::vector<uint8_t> valid_bytes(num_values);
+    ::arrow::internal::BitmapReader bit_reader(valid_bits, valid_bits_offset, num_values);
+    for (int64_t i = 0; i < num_values; ++i) {
+      valid_bytes[i] = static_cast<uint8_t>(bit_reader.IsSet());
+      bit_reader.Next();
+    }
+
+    auto binary_builder = checked_cast<::arrow::BinaryDictionary32Builder*>(builder);
+    PARQUET_THROW_NOT_OK(
+        binary_builder->AppendIndices(indices_buffer, num_values, valid_bytes.data()));
+    num_values_ -= num_values - null_count;
+    return num_values - null_count;
+  }
+
+  int DecodeIndices(int num_values, ::arrow::ArrayBuilder* builder) override {
+    num_values = std::min(num_values, num_values_);
+    if (num_values > 0) {
+      // TODO(wesm): Refactor to batch reads for improved memory use. This is
+      // relatively simple here because we don't have to do any bookkeeping of
+      // nulls
+      PARQUET_THROW_NOT_OK(indices_scratch_space_->TypedResize<int32_t>(
+          num_values, /*shrink_to_fit=*/false));
+    }
+    auto indices_buffer =
+        reinterpret_cast<int32_t*>(indices_scratch_space_->mutable_data());
+    if (num_values != idx_decoder_.GetBatch(indices_buffer, num_values)) {
+      ParquetException::EofException();
+    }
+    auto binary_builder = checked_cast<::arrow::BinaryDictionary32Builder*>(builder);
+    PARQUET_THROW_NOT_OK(binary_builder->AppendIndices(indices_buffer, num_values));
+    num_values_ -= num_values;
+    return num_values;
+  }
+
+  int DecodeIndices(int num_values, int32_t* indices) override {
+    if (num_values != idx_decoder_.GetBatch(indices, num_values)) {
+      ParquetException::EofException();
+    }
+    num_values_ -= num_values;
+    return num_values;
+  }
+
+  void GetDictionary(const T** dictionary, int32_t* dictionary_length) override {
+    *dictionary_length = dictionary_length_;
+    *dictionary = reinterpret_cast<T*>(dictionary_->mutable_data());
+  }
+
+ protected:
+  Status IndexInBounds(int32_t index) {
+    if (ARROW_PREDICT_TRUE(0 <= index && index < dictionary_length_)) {
+      return Status::OK();
+    }
+    return Status::Invalid("Index not in dictionary bounds");
+  }
+
+  inline void DecodeDict(TypedDecoder<Type>* dictionary) {
+    dictionary_length_ = static_cast<int32_t>(dictionary->values_left());
+    PARQUET_THROW_NOT_OK(dictionary_->Resize(dictionary_length_ * sizeof(T),
+                                             /*shrink_to_fit=*/false));
+    dictionary->Decode(reinterpret_cast<T*>(dictionary_->mutable_data()),
+                       dictionary_length_);
+  }
+
+  // Only one is set.
+  std::shared_ptr<ResizableBuffer> dictionary_;
+
+  int32_t dictionary_length_;
+
+  // Data that contains the byte array data (byte_array_dictionary_ just has the
+  // pointers).
+  std::shared_ptr<ResizableBuffer> byte_array_data_;
+
+  // Arrow-style byte offsets for each dictionary value. We maintain two
+  // representations of the dictionary, one as ByteArray* for non-Arrow
+  // consumers and this one for Arrow consumers. Since dictionaries are
+  // generally pretty small to begin with this doesn't mean too much extra
+  // memory use in most cases
+  std::shared_ptr<ResizableBuffer> byte_array_offsets_;
+
+  // Reusable buffer for decoding dictionary indices to be appended to a
+  // BinaryDictionary32Builder
+  std::shared_ptr<ResizableBuffer> indices_scratch_space_;
+
+  ::arrow::util::RleDecoder idx_decoder_;
+};
+
+template <typename Type>
+void DictDecoderImpl<Type>::SetDict(TypedDecoder<Type>* dictionary) {
+  DecodeDict(dictionary);
+}
+
+template <>
+void DictDecoderImpl<BooleanType>::SetDict(TypedDecoder<BooleanType>* dictionary) {
+  ParquetException::NYI("Dictionary encoding is not implemented for boolean values");
+}
+
+template <>
+void DictDecoderImpl<ByteArrayType>::SetDict(TypedDecoder<ByteArrayType>* dictionary) {
+  DecodeDict(dictionary);
+
+  auto dict_values = reinterpret_cast<ByteArray*>(dictionary_->mutable_data());
+
+  int total_size = 0;
+  for (int i = 0; i < dictionary_length_; ++i) {
+    total_size += dict_values[i].len;
+  }
+  PARQUET_THROW_NOT_OK(byte_array_data_->Resize(total_size,
+                                                /*shrink_to_fit=*/false));
+  PARQUET_THROW_NOT_OK(
+      byte_array_offsets_->Resize((dictionary_length_ + 1) * sizeof(int32_t),
+                                  /*shrink_to_fit=*/false));
+
+  int32_t offset = 0;
+  uint8_t* bytes_data = byte_array_data_->mutable_data();
+  int32_t* bytes_offsets =
+      reinterpret_cast<int32_t*>(byte_array_offsets_->mutable_data());
+  for (int i = 0; i < dictionary_length_; ++i) {
+    memcpy(bytes_data + offset, dict_values[i].ptr, dict_values[i].len);
+    bytes_offsets[i] = offset;
+    dict_values[i].ptr = bytes_data + offset;
+    offset += dict_values[i].len;
+  }
+  bytes_offsets[dictionary_length_] = offset;
+}
+
+template <>
+inline void DictDecoderImpl<FLBAType>::SetDict(TypedDecoder<FLBAType>* dictionary) {
+  DecodeDict(dictionary);
+
+  auto dict_values = reinterpret_cast<FLBA*>(dictionary_->mutable_data());
+
+  int fixed_len = descr_->type_length();
+  int total_size = dictionary_length_ * fixed_len;
+
+  PARQUET_THROW_NOT_OK(byte_array_data_->Resize(total_size,
+                                                /*shrink_to_fit=*/false));
+  uint8_t* bytes_data = byte_array_data_->mutable_data();
+  for (int32_t i = 0, offset = 0; i < dictionary_length_; ++i, offset += fixed_len) {
+    memcpy(bytes_data + offset, dict_values[i].ptr, fixed_len);
+    dict_values[i].ptr = bytes_data + offset;
+  }
+}
+
+template <>
+inline int DictDecoderImpl<Int96Type>::DecodeArrow(
+    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset,
+    typename EncodingTraits<Int96Type>::Accumulator* builder) {
+  ParquetException::NYI("DecodeArrow to Int96Type");
+}
+
+template <>
+inline int DictDecoderImpl<Int96Type>::DecodeArrow(
+    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset,
+    typename EncodingTraits<Int96Type>::DictAccumulator* builder) {
+  ParquetException::NYI("DecodeArrow to Int96Type");
+}
+
+template <>
+inline int DictDecoderImpl<ByteArrayType>::DecodeArrow(
+    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset,
+    typename EncodingTraits<ByteArrayType>::Accumulator* builder) {
+  ParquetException::NYI("DecodeArrow implemented elsewhere");
+}
+
+template <>
+inline int DictDecoderImpl<ByteArrayType>::DecodeArrow(
+    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset,
+    typename EncodingTraits<ByteArrayType>::DictAccumulator* builder) {
+  ParquetException::NYI("DecodeArrow implemented elsewhere");
+}
+
+template <typename DType>
+int DictDecoderImpl<DType>::DecodeArrow(
+    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset,
+    typename EncodingTraits<DType>::DictAccumulator* builder) {
+  PARQUET_THROW_NOT_OK(builder->Reserve(num_values));
+
+  auto dict_values = reinterpret_cast<const typename DType::c_type*>(dictionary_->data());
+
+  VisitNullBitmapInline(
+      valid_bits, valid_bits_offset, num_values, null_count,
+      [&]() {
+        int32_t index;
+        if (ARROW_PREDICT_FALSE(!idx_decoder_.Get(&index))) {
+          throw ParquetException("");
+        }
+        PARQUET_THROW_NOT_OK(IndexInBounds(index));
+        PARQUET_THROW_NOT_OK(builder->Append(dict_values[index]));
+      },
+      [&]() { PARQUET_THROW_NOT_OK(builder->AppendNull()); });
+
+  return num_values - null_count;
+}
+
+template <>
+int DictDecoderImpl<BooleanType>::DecodeArrow(
+    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset,
+    typename EncodingTraits<BooleanType>::DictAccumulator* builder) {
+  ParquetException::NYI("No dictionary encoding for BooleanType");
+}
+
+template <>
+inline int DictDecoderImpl<FLBAType>::DecodeArrow(
+    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset,
+    typename EncodingTraits<FLBAType>::Accumulator* builder) {
+  if (builder->byte_width() != descr_->type_length()) {
+    throw ParquetException("Byte width mismatch: builder was " +
+                           std::to_string(builder->byte_width()) + " but decoder was " +
+                           std::to_string(descr_->type_length()));
+  }
+
+  PARQUET_THROW_NOT_OK(builder->Reserve(num_values));
+
+  auto dict_values = reinterpret_cast<const FLBA*>(dictionary_->data());
+
+  VisitNullBitmapInline(
+      valid_bits, valid_bits_offset, num_values, null_count,
+      [&]() {
+        int32_t index;
+        if (ARROW_PREDICT_FALSE(!idx_decoder_.Get(&index))) {
+          throw ParquetException("");
+        }
+        PARQUET_THROW_NOT_OK(IndexInBounds(index));
+        builder->UnsafeAppend(dict_values[index].ptr);
+      },
+      [&]() { builder->UnsafeAppendNull(); });
+
+  return num_values - null_count;
+}
+
+template <>
+int DictDecoderImpl<FLBAType>::DecodeArrow(
+    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset,
+    typename EncodingTraits<FLBAType>::DictAccumulator* builder) {
+  auto value_type =
+      checked_cast<const ::arrow::DictionaryType&>(*builder->type()).value_type();
+  auto byte_width =
+      checked_cast<const ::arrow::FixedSizeBinaryType&>(*value_type).byte_width();
+  if (byte_width != descr_->type_length()) {
+    throw ParquetException("Byte width mismatch: builder was " +
+                           std::to_string(byte_width) + " but decoder was " +
+                           std::to_string(descr_->type_length()));
+  }
+
+  PARQUET_THROW_NOT_OK(builder->Reserve(num_values));
+
+  auto dict_values = reinterpret_cast<const FLBA*>(dictionary_->data());
+
+  VisitNullBitmapInline(
+      valid_bits, valid_bits_offset, num_values, null_count,
+      [&]() {
+        int32_t index;
+        if (ARROW_PREDICT_FALSE(!idx_decoder_.Get(&index))) {
+          throw ParquetException("");
+        }
+        PARQUET_THROW_NOT_OK(IndexInBounds(index));
+        PARQUET_THROW_NOT_OK(builder->Append(dict_values[index].ptr));
+      },
+      [&]() { PARQUET_THROW_NOT_OK(builder->AppendNull()); });
+
+  return num_values - null_count;
+}
+
+template <typename Type>
+int DictDecoderImpl<Type>::DecodeArrow(
+    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset,
+    typename EncodingTraits<Type>::Accumulator* builder) {
+  PARQUET_THROW_NOT_OK(builder->Reserve(num_values));
+
+  using value_type = typename Type::c_type;
+  auto dict_values = reinterpret_cast<const value_type*>(dictionary_->data());
+
+  VisitNullBitmapInline(
+      valid_bits, valid_bits_offset, num_values, null_count,
+      [&]() {
+        int32_t index;
+        if (ARROW_PREDICT_FALSE(!idx_decoder_.Get(&index))) {
+          throw ParquetException("");
+        }
+        PARQUET_THROW_NOT_OK(IndexInBounds(index));
+        builder->UnsafeAppend(dict_values[index]);
+      },
+      [&]() { builder->UnsafeAppendNull(); });
+
+  return num_values - null_count;
+}
+
+template <typename Type>
+void DictDecoderImpl<Type>::InsertDictionary(::arrow::ArrayBuilder* builder) {
+  ParquetException::NYI("InsertDictionary only implemented for BYTE_ARRAY types");
+}
+
+template <>
+void DictDecoderImpl<ByteArrayType>::InsertDictionary(::arrow::ArrayBuilder* builder) {
+  auto binary_builder = checked_cast<::arrow::BinaryDictionary32Builder*>(builder);
+
+  // Make a BinaryArray referencing the internal dictionary data
+  auto arr = std::make_shared<::arrow::BinaryArray>(
+      dictionary_length_, byte_array_offsets_, byte_array_data_);
+  PARQUET_THROW_NOT_OK(binary_builder->InsertMemoValues(*arr));
+}
+
+class DictByteArrayDecoderImpl : public DictDecoderImpl<ByteArrayType>,
+                                 virtual public ByteArrayDecoder {
+ public:
+  using BASE = DictDecoderImpl<ByteArrayType>;
+  using BASE::DictDecoderImpl;
+
+  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
+                  int64_t valid_bits_offset,
+                  ::arrow::BinaryDictionary32Builder* builder) override {
+    int result = 0;
+    if (null_count == 0) {
+      PARQUET_THROW_NOT_OK(DecodeArrowNonNull(num_values, builder, &result));
+    } else {
+      PARQUET_THROW_NOT_OK(DecodeArrow(num_values, null_count, valid_bits,
+                                       valid_bits_offset, builder, &result));
+    }
+    return result;
+  }
+
+  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
+                  int64_t valid_bits_offset,
+                  typename EncodingTraits<ByteArrayType>::Accumulator* out) override {
+    int result = 0;
+    if (null_count == 0) {
+      PARQUET_THROW_NOT_OK(DecodeArrowDenseNonNull(num_values, out, &result));
+    } else {
+      PARQUET_THROW_NOT_OK(DecodeArrowDense(num_values, null_count, valid_bits,
+                                            valid_bits_offset, out, &result));
+    }
+    return result;
+  }
+
+ private:
+  Status DecodeArrowDense(int num_values, int null_count, const uint8_t* valid_bits,
+                          int64_t valid_bits_offset,
+                          typename EncodingTraits<ByteArrayType>::Accumulator* out,
+                          int* out_num_values) {
+    constexpr int32_t kBufferSize = 1024;
+    int32_t indices[kBufferSize];
+
+    ArrowBinaryHelper helper(out);
+
+    ::arrow::internal::BitmapReader bit_reader(valid_bits, valid_bits_offset, num_values);
+
+    auto dict_values = reinterpret_cast<const ByteArray*>(dictionary_->data());
+    int values_decoded = 0;
+    int num_appended = 0;
+    while (num_appended < num_values) {
+      bool is_valid = bit_reader.IsSet();
+      bit_reader.Next();
+
+      if (is_valid) {
+        int32_t batch_size =
+            std::min<int32_t>(kBufferSize, num_values - num_appended - null_count);
+        int num_indices = idx_decoder_.GetBatch(indices, batch_size);
+
+        if (ARROW_PREDICT_FALSE(num_indices < 1)) {
+          return Status::Invalid("Invalid number of indices '", num_indices, "'");
+        }
+
+        int i = 0;
+        while (true) {
+          // Consume all indices
+          if (is_valid) {
+            auto idx = indices[i];
+            RETURN_NOT_OK(IndexInBounds(idx));
+            const auto& val = dict_values[idx];
+            if (ARROW_PREDICT_FALSE(!helper.CanFit(val.len))) {
+              RETURN_NOT_OK(helper.PushChunk());
+            }
+            RETURN_NOT_OK(helper.Append(val.ptr, static_cast<int32_t>(val.len)));
+            ++i;
+            ++values_decoded;
+          } else {
+            RETURN_NOT_OK(helper.AppendNull());
+            --null_count;
+          }
+          ++num_appended;
+          if (i == num_indices) {
+            // Do not advance the bit_reader if we have fulfilled the decode
+            // request
+            break;
+          }
+          is_valid = bit_reader.IsSet();
+          bit_reader.Next();
+        }
+      } else {
+        RETURN_NOT_OK(helper.AppendNull());
+        --null_count;
+        ++num_appended;
+      }
+    }
+    *out_num_values = values_decoded;
+    return Status::OK();
+  }
+
+  Status DecodeArrowDenseNonNull(int num_values,
+                                 typename EncodingTraits<ByteArrayType>::Accumulator* out,
+                                 int* out_num_values) {
+    constexpr int32_t kBufferSize = 2048;
+    int32_t indices[kBufferSize];
+    int values_decoded = 0;
+
+    ArrowBinaryHelper helper(out);
+    auto dict_values = reinterpret_cast<const ByteArray*>(dictionary_->data());
+
+    while (values_decoded < num_values) {
+      int32_t batch_size = std::min<int32_t>(kBufferSize, num_values - values_decoded);
+      int num_indices = idx_decoder_.GetBatch(indices, batch_size);
+      if (num_indices == 0) ParquetException::EofException();
+      for (int i = 0; i < num_indices; ++i) {
+        auto idx = indices[i];
+        RETURN_NOT_OK(IndexInBounds(idx));
+        const auto& val = dict_values[idx];
+        if (ARROW_PREDICT_FALSE(!helper.CanFit(val.len))) {
+          RETURN_NOT_OK(helper.PushChunk());
+        }
+        RETURN_NOT_OK(helper.Append(val.ptr, static_cast<int32_t>(val.len)));
+      }
+      values_decoded += num_indices;
+    }
+    *out_num_values = values_decoded;
+    return Status::OK();
+  }
+
+  template <typename BuilderType>
+  Status DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
+                     int64_t valid_bits_offset, BuilderType* builder,
+                     int* out_num_values) {
+    constexpr int32_t kBufferSize = 1024;
+    int32_t indices[kBufferSize];
+
+    RETURN_NOT_OK(builder->Reserve(num_values));
+    ::arrow::internal::BitmapReader bit_reader(valid_bits, valid_bits_offset, num_values);
+
+    auto dict_values = reinterpret_cast<const ByteArray*>(dictionary_->data());
+
+    int values_decoded = 0;
+    int num_appended = 0;
+    while (num_appended < num_values) {
+      bool is_valid = bit_reader.IsSet();
+      bit_reader.Next();
+
+      if (is_valid) {
+        int32_t batch_size =
+            std::min<int32_t>(kBufferSize, num_values - num_appended - null_count);
+        int num_indices = idx_decoder_.GetBatch(indices, batch_size);
+
+        int i = 0;
+        while (true) {
+          // Consume all indices
+          if (is_valid) {
+            auto idx = indices[i];
+            RETURN_NOT_OK(IndexInBounds(idx));
+            const auto& val = dict_values[idx];
+            RETURN_NOT_OK(builder->Append(val.ptr, val.len));
+            ++i;
+            ++values_decoded;
+          } else {
+            RETURN_NOT_OK(builder->AppendNull());
+            --null_count;
+          }
+          ++num_appended;
+          if (i == num_indices) {
+            // Do not advance the bit_reader if we have fulfilled the decode
+            // request
+            break;
+          }
+          is_valid = bit_reader.IsSet();
+          bit_reader.Next();
+        }
+      } else {
+        RETURN_NOT_OK(builder->AppendNull());
+        --null_count;
+        ++num_appended;
+      }
+    }
+    *out_num_values = values_decoded;
+    return Status::OK();
+  }
+
+  template <typename BuilderType>
+  Status DecodeArrowNonNull(int num_values, BuilderType* builder, int* out_num_values) {
+    constexpr int32_t kBufferSize = 2048;
+    int32_t indices[kBufferSize];
+
+    RETURN_NOT_OK(builder->Reserve(num_values));
+
+    auto dict_values = reinterpret_cast<const ByteArray*>(dictionary_->data());
+
+    int values_decoded = 0;
+    while (values_decoded < num_values) {
+      int32_t batch_size = std::min<int32_t>(kBufferSize, num_values - values_decoded);
+      int num_indices = idx_decoder_.GetBatch(indices, batch_size);
+      if (num_indices == 0) ParquetException::EofException();
+      for (int i = 0; i < num_indices; ++i) {
+        auto idx = indices[i];
+        RETURN_NOT_OK(IndexInBounds(idx));
+        const auto& val = dict_values[idx];
+        RETURN_NOT_OK(builder->Append(val.ptr, val.len));
+      }
+      values_decoded += num_indices;
+    }
+    *out_num_values = values_decoded;
+    return Status::OK();
+  }
+};
+
+// ----------------------------------------------------------------------
+// DeltaBitPackDecoder
+
+template <typename DType>
+class DeltaBitPackDecoder : public DecoderImpl, virtual public TypedDecoder<DType> {
+ public:
+  typedef typename DType::c_type T;
+
+  explicit DeltaBitPackDecoder(const ColumnDescriptor* descr,
+                               MemoryPool* pool = ::arrow::default_memory_pool())
+      : DecoderImpl(descr, Encoding::DELTA_BINARY_PACKED), pool_(pool) {
+    if (DType::type_num != Type::INT32 && DType::type_num != Type::INT64) {
+      throw ParquetException("Delta bit pack encoding should only be for integer data.");
+    }
+  }
+
+  void SetData(int num_values, const uint8_t* data, int len) override {
+    this->num_values_ = num_values;
+    decoder_ = ::arrow::BitUtil::BitReader(data, len);
+    values_current_block_ = 0;
+    values_current_mini_block_ = 0;
+  }
+
+  int Decode(T* buffer, int max_values) override {
+    return GetInternal(buffer, max_values);
+  }
+
+  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
+                  int64_t valid_bits_offset,
+                  typename EncodingTraits<DType>::Accumulator* out) override {
+    if (null_count != 0) {
+      ParquetException::NYI("Delta bit pack DecodeArrow with null slots");
+    }
+    std::vector<T> values(num_values);
+    GetInternal(values.data(), num_values);
+    PARQUET_THROW_NOT_OK(out->AppendValues(values));
+    return num_values;
+  }
+
+  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
+                  int64_t valid_bits_offset,
+                  typename EncodingTraits<DType>::DictAccumulator* out) override {
+    if (null_count != 0) {
+      ParquetException::NYI("Delta bit pack DecodeArrow with null slots");
+    }
+    std::vector<T> values(num_values);
+    GetInternal(values.data(), num_values);
+    PARQUET_THROW_NOT_OK(out->Reserve(num_values));
+    for (T value : values) {
+      PARQUET_THROW_NOT_OK(out->Append(value));
+    }
+    return num_values;
+  }
+
+ private:
+  void InitBlock() {
+    // The number of values per block.
+    uint32_t block_size;
+    if (!decoder_.GetVlqInt(&block_size)) ParquetException::EofException();
+    if (!decoder_.GetVlqInt(&num_mini_blocks_)) ParquetException::EofException();
+    if (!decoder_.GetVlqInt(&values_current_block_)) {
+      ParquetException::EofException();
+    }
+    if (!decoder_.GetZigZagVlqInt(&last_value_)) ParquetException::EofException();
+
+    delta_bit_widths_ = AllocateBuffer(pool_, num_mini_blocks_);
+    uint8_t* bit_width_data = delta_bit_widths_->mutable_data();
+
+    if (!decoder_.GetZigZagVlqInt(&min_delta_)) ParquetException::EofException();
+    for (uint32_t i = 0; i < num_mini_blocks_; ++i) {
+      if (!decoder_.GetAligned<uint8_t>(1, bit_width_data + i)) {
+        ParquetException::EofException();
+      }
+    }
+    values_per_mini_block_ = block_size / num_mini_blocks_;
+    mini_block_idx_ = 0;
+    delta_bit_width_ = bit_width_data[0];
+    values_current_mini_block_ = values_per_mini_block_;
+  }
+
+  template <typename T>
+  int GetInternal(T* buffer, int max_values) {
+    max_values = std::min(max_values, this->num_values_);
+    const uint8_t* bit_width_data = delta_bit_widths_->data();
+    for (int i = 0; i < max_values; ++i) {
+      if (ARROW_PREDICT_FALSE(values_current_mini_block_ == 0)) {
+        ++mini_block_idx_;
+        if (mini_block_idx_ < static_cast<size_t>(delta_bit_widths_->size())) {
+          delta_bit_width_ = bit_width_data[mini_block_idx_];
+          values_current_mini_block_ = values_per_mini_block_;
+        } else {
+          InitBlock();
+          buffer[i] = last_value_;
+          continue;
+        }
+      }
+
+      // TODO: the key to this algorithm is to decode the entire miniblock at once.
+      int64_t delta;
+      if (!decoder_.GetValue(delta_bit_width_, &delta)) ParquetException::EofException();
+      delta += min_delta_;
+      last_value_ += static_cast<int32_t>(delta);
+      buffer[i] = last_value_;
+      --values_current_mini_block_;
+    }
+    this->num_values_ -= max_values;
+    return max_values;
+  }
+
+  MemoryPool* pool_;
+  ::arrow::BitUtil::BitReader decoder_;
+  uint32_t values_current_block_;
+  uint32_t num_mini_blocks_;
+  uint64_t values_per_mini_block_;
+  uint64_t values_current_mini_block_;
+
+  int32_t min_delta_;
+  size_t mini_block_idx_;
+  std::shared_ptr<ResizableBuffer> delta_bit_widths_;
+  int delta_bit_width_;
+
+  int32_t last_value_;
+};
+
+// ----------------------------------------------------------------------
+// DELTA_LENGTH_BYTE_ARRAY
+
+class DeltaLengthByteArrayDecoder : public DecoderImpl,
+                                    virtual public TypedDecoder<ByteArrayType> {
+ public:
+  explicit DeltaLengthByteArrayDecoder(const ColumnDescriptor* descr,
+                                       MemoryPool* pool = ::arrow::default_memory_pool())
+      : DecoderImpl(descr, Encoding::DELTA_LENGTH_BYTE_ARRAY),
+        len_decoder_(nullptr, pool),
+        pool_(pool) {}
+
+  void SetData(int num_values, const uint8_t* data, int len) override {
+    num_values_ = num_values;
+    if (len == 0) return;
+    int total_lengths_len = ::arrow::util::SafeLoadAs<int32_t>(data);
+    data += 4;
+    this->len_decoder_.SetData(num_values, data, total_lengths_len);
+    data_ = data + total_lengths_len;
+    this->len_ = len - 4 - total_lengths_len;
+  }
+
+  int Decode(ByteArray* buffer, int max_values) override {
+    using VectorT = ArrowPoolVector<int>;
+    max_values = std::min(max_values, num_values_);
+    VectorT lengths(max_values, 0, ::arrow::stl::allocator<int>(pool_));
+    len_decoder_.Decode(lengths.data(), max_values);
+    for (int i = 0; i < max_values; ++i) {
+      buffer[i].len = lengths[i];
+      buffer[i].ptr = data_;
+      this->data_ += lengths[i];
+      this->len_ -= lengths[i];
+    }
+    this->num_values_ -= max_values;
+    return max_values;
+  }
+
+  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
+                  int64_t valid_bits_offset,
+                  typename EncodingTraits<ByteArrayType>::Accumulator* out) override {
+    ParquetException::NYI("DecodeArrow for DeltaLengthByteArrayDecoder");
+  }
+
+  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
+                  int64_t valid_bits_offset,
+                  typename EncodingTraits<ByteArrayType>::DictAccumulator* out) override {
+    ParquetException::NYI("DecodeArrow for DeltaLengthByteArrayDecoder");
+  }
+
+ private:
+  DeltaBitPackDecoder<Int32Type> len_decoder_;
+  ::arrow::MemoryPool* pool_;
+};
+
+// ----------------------------------------------------------------------
+// DELTA_BYTE_ARRAY
+
+class DeltaByteArrayDecoder : public DecoderImpl,
+                              virtual public TypedDecoder<ByteArrayType> {
+ public:
+  explicit DeltaByteArrayDecoder(const ColumnDescriptor* descr,
+                                 MemoryPool* pool = ::arrow::default_memory_pool())
+      : DecoderImpl(descr, Encoding::DELTA_BYTE_ARRAY),
+        prefix_len_decoder_(nullptr, pool),
+        suffix_decoder_(nullptr, pool),
+        last_value_(0, nullptr) {}
+
+  virtual void SetData(int num_values, const uint8_t* data, int len) {
+    num_values_ = num_values;
+    if (len == 0) return;
+    int prefix_len_length = ::arrow::util::SafeLoadAs<int32_t>(data);
+    data += 4;
+    len -= 4;
+    prefix_len_decoder_.SetData(num_values, data, prefix_len_length);
+    data += prefix_len_length;
+    len -= prefix_len_length;
+    suffix_decoder_.SetData(num_values, data, len);
+  }
+
+  // TODO: this doesn't work and requires memory management. We need to allocate
+  // new strings to store the results.
+  virtual int Decode(ByteArray* buffer, int max_values) {
+    max_values = std::min(max_values, this->num_values_);
+    for (int i = 0; i < max_values; ++i) {
+      int prefix_len = 0;
+      prefix_len_decoder_.Decode(&prefix_len, 1);
+      ByteArray suffix = {0, nullptr};
+      suffix_decoder_.Decode(&suffix, 1);
+      buffer[i].len = prefix_len + suffix.len;
+
+      uint8_t* result = reinterpret_cast<uint8_t*>(malloc(buffer[i].len));
+      memcpy(result, last_value_.ptr, prefix_len);
+      memcpy(result + prefix_len, suffix.ptr, suffix.len);
+
+      buffer[i].ptr = result;
+      last_value_ = buffer[i];
+    }
+    this->num_values_ -= max_values;
+    return max_values;
+  }
+
+ private:
+  DeltaBitPackDecoder<Int32Type> prefix_len_decoder_;
+  DeltaLengthByteArrayDecoder suffix_decoder_;
+  ByteArray last_value_;
+};
+
+// ----------------------------------------------------------------------
+// BYTE_STREAM_SPLIT
+
+template <typename DType>
+class ByteStreamSplitDecoder : public DecoderImpl, virtual public TypedDecoder<DType> {
+ public:
+  using T = typename DType::c_type;
+  explicit ByteStreamSplitDecoder(const ColumnDescriptor* descr);
+
+  int Decode(T* buffer, int max_values) override;
+
+  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
+                  int64_t valid_bits_offset,
+                  typename EncodingTraits<DType>::Accumulator* builder) override;
+
+  int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
+                  int64_t valid_bits_offset,
+                  typename EncodingTraits<DType>::DictAccumulator* builder) override;
+
+  void SetData(int num_values, const uint8_t* data, int len) override;
+
+  T* EnsureDecodeBuffer(int64_t min_values) {
+    const int64_t size = sizeof(T) * min_values;
+    if (!decode_buffer_ || decode_buffer_->size() < size) {
+      PARQUET_ASSIGN_OR_THROW(decode_buffer_, ::arrow::AllocateBuffer(size));
+    }
+    return reinterpret_cast<T*>(decode_buffer_->mutable_data());
+  }
+
+ private:
+  int num_values_in_buffer_{0};
+  std::shared_ptr<Buffer> decode_buffer_;
+
+  static constexpr size_t kNumStreams = sizeof(T);
+};
+
+template <typename DType>
+ByteStreamSplitDecoder<DType>::ByteStreamSplitDecoder(const ColumnDescriptor* descr)
+    : DecoderImpl(descr, Encoding::BYTE_STREAM_SPLIT) {}
+
+template <typename DType>
+void ByteStreamSplitDecoder<DType>::SetData(int num_values, const uint8_t* data,
+                                            int len) {
+  DecoderImpl::SetData(num_values, data, len);
+  if (num_values * static_cast<int64_t>(sizeof(T)) > len) {
+    throw ParquetException("Data size too small for number of values (corrupted file?)");
+  }
+  num_values_in_buffer_ = num_values;
+}
+
+template <typename DType>
+int ByteStreamSplitDecoder<DType>::Decode(T* buffer, int max_values) {
+  const int values_to_decode = std::min(num_values_, max_values);
+  const int num_decoded_previously = num_values_in_buffer_ - num_values_;
+  const uint8_t* data = data_ + num_decoded_previously;
+
+  ::arrow::util::internal::ByteStreamSplitDecode<T>(data, values_to_decode,
+                                                    num_values_in_buffer_, buffer);
+  num_values_ -= values_to_decode;
+  len_ -= sizeof(T) * values_to_decode;
+  return values_to_decode;
+}
+
+template <typename DType>
+int ByteStreamSplitDecoder<DType>::DecodeArrow(
+    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset,
+    typename EncodingTraits<DType>::Accumulator* builder) {
+  constexpr int value_size = static_cast<int>(kNumStreams);
+  int values_decoded = num_values - null_count;
+  if (ARROW_PREDICT_FALSE(len_ < value_size * values_decoded)) {
+    ParquetException::EofException();
+  }
+
+  PARQUET_THROW_NOT_OK(builder->Reserve(num_values));
+
+  const int num_decoded_previously = num_values_in_buffer_ - num_values_;
+  const uint8_t* data = data_ + num_decoded_previously;
+  int offset = 0;
+
+#if defined(ARROW_HAVE_SIMD_SPLIT)
+  // Use fast decoding into intermediate buffer.  This will also decode
+  // some null values, but it's fast enough that we don't care.
+  T* decode_out = EnsureDecodeBuffer(values_decoded);
+  ::arrow::util::internal::ByteStreamSplitDecode<T>(data, values_decoded,
+                                                    num_values_in_buffer_, decode_out);
+
+  // XXX If null_count is 0, we could even append in bulk or decode directly into
+  // builder
+  VisitNullBitmapInline(
+      valid_bits, valid_bits_offset, num_values, null_count,
+      [&]() {
+        builder->UnsafeAppend(decode_out[offset]);
+        ++offset;
+      },
+      [&]() { builder->UnsafeAppendNull(); });
+
+#else
+  VisitNullBitmapInline(
+      valid_bits, valid_bits_offset, num_values, null_count,
+      [&]() {
+        uint8_t gathered_byte_data[kNumStreams];
+        for (size_t b = 0; b < kNumStreams; ++b) {
+          const size_t byte_index = b * num_values_in_buffer_ + offset;
+          gathered_byte_data[b] = data[byte_index];
+        }
+        builder->UnsafeAppend(::arrow::util::SafeLoadAs<T>(&gathered_byte_data[0]));
+        ++offset;
+      },
+      [&]() { builder->UnsafeAppendNull(); });
+#endif
+
+  num_values_ -= values_decoded;
+  len_ -= sizeof(T) * values_decoded;
+  return values_decoded;
+}
+
+template <typename DType>
+int ByteStreamSplitDecoder<DType>::DecodeArrow(
+    int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset,
+    typename EncodingTraits<DType>::DictAccumulator* builder) {
+  ParquetException::NYI("DecodeArrow for ByteStreamSplitDecoder");
+}
+
+}  // namespace
+
+// ----------------------------------------------------------------------
+// Encoder and decoder factory functions
+
+std::unique_ptr<Encoder> MakeEncoder(Type::type type_num, Encoding::type encoding,
+                                     bool use_dictionary, const ColumnDescriptor* descr,
+                                     MemoryPool* pool) {
+  if (use_dictionary) {
+    switch (type_num) {
+      case Type::INT32:
+        return std::unique_ptr<Encoder>(new DictEncoderImpl<Int32Type>(descr, pool));
+      case Type::INT64:
+        return std::unique_ptr<Encoder>(new DictEncoderImpl<Int64Type>(descr, pool));
+      case Type::INT96:
+        return std::unique_ptr<Encoder>(new DictEncoderImpl<Int96Type>(descr, pool));
+      case Type::FLOAT:
+        return std::unique_ptr<Encoder>(new DictEncoderImpl<FloatType>(descr, pool));
+      case Type::DOUBLE:
+        return std::unique_ptr<Encoder>(new DictEncoderImpl<DoubleType>(descr, pool));
+      case Type::BYTE_ARRAY:
+        return std::unique_ptr<Encoder>(new DictEncoderImpl<ByteArrayType>(descr, pool));
+      case Type::FIXED_LEN_BYTE_ARRAY:
+        return std::unique_ptr<Encoder>(new DictEncoderImpl<FLBAType>(descr, pool));
+      default:
+        DCHECK(false) << "Encoder not implemented";
+        break;
+    }
+  } else if (encoding == Encoding::PLAIN) {
+    switch (type_num) {
+      case Type::BOOLEAN:
+        return std::unique_ptr<Encoder>(new PlainEncoder<BooleanType>(descr, pool));
+      case Type::INT32:
+        return std::unique_ptr<Encoder>(new PlainEncoder<Int32Type>(descr, pool));
+      case Type::INT64:
+        return std::unique_ptr<Encoder>(new PlainEncoder<Int64Type>(descr, pool));
+      case Type::INT96:
+        return std::unique_ptr<Encoder>(new PlainEncoder<Int96Type>(descr, pool));
+      case Type::FLOAT:
+        return std::unique_ptr<Encoder>(new PlainEncoder<FloatType>(descr, pool));
+      case Type::DOUBLE:
+        return std::unique_ptr<Encoder>(new PlainEncoder<DoubleType>(descr, pool));
+      case Type::BYTE_ARRAY:
+        return std::unique_ptr<Encoder>(new PlainEncoder<ByteArrayType>(descr, pool));
+      case Type::FIXED_LEN_BYTE_ARRAY:
+        return std::unique_ptr<Encoder>(new PlainEncoder<FLBAType>(descr, pool));
+      default:
+        DCHECK(false) << "Encoder not implemented";
+        break;
+    }
+  } else if (encoding == Encoding::BYTE_STREAM_SPLIT) {
+    switch (type_num) {
+      case Type::FLOAT:
+        return std::unique_ptr<Encoder>(
+            new ByteStreamSplitEncoder<FloatType>(descr, pool));
+      case Type::DOUBLE:
+        return std::unique_ptr<Encoder>(
+            new ByteStreamSplitEncoder<DoubleType>(descr, pool));
+      default:
+        throw ParquetException("BYTE_STREAM_SPLIT only supports FLOAT and DOUBLE");
+        break;
+    }
+  } else {
+    ParquetException::NYI("Selected encoding is not supported");
+  }
+  DCHECK(false) << "Should not be able to reach this code";
+  return nullptr;
+}
+
+std::unique_ptr<Decoder> MakeDecoder(Type::type type_num, Encoding::type encoding,
+                                     const ColumnDescriptor* descr) {
+  if (encoding == Encoding::PLAIN) {
+    switch (type_num) {
+      case Type::BOOLEAN:
+        return std::unique_ptr<Decoder>(new PlainBooleanDecoder(descr));
+      case Type::INT32:
+        return std::unique_ptr<Decoder>(new PlainDecoder<Int32Type>(descr));
+      case Type::INT64:
+        return std::unique_ptr<Decoder>(new PlainDecoder<Int64Type>(descr));
+      case Type::INT96:
+        return std::unique_ptr<Decoder>(new PlainDecoder<Int96Type>(descr));
+      case Type::FLOAT:
+        return std::unique_ptr<Decoder>(new PlainDecoder<FloatType>(descr));
+      case Type::DOUBLE:
+        return std::unique_ptr<Decoder>(new PlainDecoder<DoubleType>(descr));
+      case Type::BYTE_ARRAY:
+        return std::unique_ptr<Decoder>(new PlainByteArrayDecoder(descr));
+      case Type::FIXED_LEN_BYTE_ARRAY:
+        return std::unique_ptr<Decoder>(new PlainFLBADecoder(descr));
+      default:
+        break;
+    }
+  } else if (encoding == Encoding::BYTE_STREAM_SPLIT) {
+    switch (type_num) {
+      case Type::FLOAT:
+        return std::unique_ptr<Decoder>(new ByteStreamSplitDecoder<FloatType>(descr));
+      case Type::DOUBLE:
+        return std::unique_ptr<Decoder>(new ByteStreamSplitDecoder<DoubleType>(descr));
+      default:
+        throw ParquetException("BYTE_STREAM_SPLIT only supports FLOAT and DOUBLE");
+        break;
+    }
+  } else {
+    ParquetException::NYI("Selected encoding is not supported");
+  }
+  DCHECK(false) << "Should not be able to reach this code";
+  return nullptr;
+}
+
+namespace detail {
+std::unique_ptr<Decoder> MakeDictDecoder(Type::type type_num,
+                                         const ColumnDescriptor* descr,
+                                         MemoryPool* pool) {
+  switch (type_num) {
+    case Type::BOOLEAN:
+      ParquetException::NYI("Dictionary encoding not implemented for boolean type");
+    case Type::INT32:
+      return std::unique_ptr<Decoder>(new DictDecoderImpl<Int32Type>(descr, pool));
+    case Type::INT64:
+      return std::unique_ptr<Decoder>(new DictDecoderImpl<Int64Type>(descr, pool));
+    case Type::INT96:
+      return std::unique_ptr<Decoder>(new DictDecoderImpl<Int96Type>(descr, pool));
+    case Type::FLOAT:
+      return std::unique_ptr<Decoder>(new DictDecoderImpl<FloatType>(descr, pool));
+    case Type::DOUBLE:
+      return std::unique_ptr<Decoder>(new DictDecoderImpl<DoubleType>(descr, pool));
+    case Type::BYTE_ARRAY:
+      return std::unique_ptr<Decoder>(new DictByteArrayDecoderImpl(descr, pool));
+    case Type::FIXED_LEN_BYTE_ARRAY:
+      return std::unique_ptr<Decoder>(new DictDecoderImpl<FLBAType>(descr, pool));
+    default:
+      break;
+  }
+  DCHECK(false) << "Should not be able to reach this code";
+  return nullptr;
+}
+
+}  // namespace detail
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/encoding.h b/contrib/libs/apache/arrow/cpp/src/parquet/encoding.h
index bf5446e0174..b9ca7a7ee68 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/encoding.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/encoding.h
@@ -1,460 +1,460 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <cstdint> 
-#include <cstring> 
-#include <memory> 
-#include <vector> 
- 
-#include "arrow/util/spaced.h" 
- 
-#include "parquet/exception.h" 
-#include "parquet/platform.h" 
-#include "parquet/types.h" 
- 
-namespace arrow { 
- 
-class Array; 
-class ArrayBuilder; 
-class BinaryArray; 
-class BinaryBuilder; 
-class BooleanBuilder; 
-class Int32Type; 
-class Int64Type; 
-class FloatType; 
-class DoubleType; 
-class FixedSizeBinaryType; 
-template <typename T> 
-class NumericBuilder; 
-class FixedSizeBinaryBuilder; 
-template <typename T> 
-class Dictionary32Builder; 
- 
-}  // namespace arrow 
- 
-namespace parquet { 
- 
-template <typename DType> 
-class TypedEncoder; 
- 
-using BooleanEncoder = TypedEncoder<BooleanType>; 
-using Int32Encoder = TypedEncoder<Int32Type>; 
-using Int64Encoder = TypedEncoder<Int64Type>; 
-using Int96Encoder = TypedEncoder<Int96Type>; 
-using FloatEncoder = TypedEncoder<FloatType>; 
-using DoubleEncoder = TypedEncoder<DoubleType>; 
-using ByteArrayEncoder = TypedEncoder<ByteArrayType>; 
-using FLBAEncoder = TypedEncoder<FLBAType>; 
- 
-template <typename DType> 
-class TypedDecoder; 
- 
-class BooleanDecoder; 
-using Int32Decoder = TypedDecoder<Int32Type>; 
-using Int64Decoder = TypedDecoder<Int64Type>; 
-using Int96Decoder = TypedDecoder<Int96Type>; 
-using FloatDecoder = TypedDecoder<FloatType>; 
-using DoubleDecoder = TypedDecoder<DoubleType>; 
-using ByteArrayDecoder = TypedDecoder<ByteArrayType>; 
-class FLBADecoder; 
- 
-template <typename T> 
-struct EncodingTraits; 
- 
-template <> 
-struct EncodingTraits<BooleanType> { 
-  using Encoder = BooleanEncoder; 
-  using Decoder = BooleanDecoder; 
- 
-  using ArrowType = ::arrow::BooleanType; 
-  using Accumulator = ::arrow::BooleanBuilder; 
-  struct DictAccumulator {}; 
-}; 
- 
-template <> 
-struct EncodingTraits<Int32Type> { 
-  using Encoder = Int32Encoder; 
-  using Decoder = Int32Decoder; 
- 
-  using ArrowType = ::arrow::Int32Type; 
-  using Accumulator = ::arrow::NumericBuilder<::arrow::Int32Type>; 
-  using DictAccumulator = ::arrow::Dictionary32Builder<::arrow::Int32Type>; 
-}; 
- 
-template <> 
-struct EncodingTraits<Int64Type> { 
-  using Encoder = Int64Encoder; 
-  using Decoder = Int64Decoder; 
- 
-  using ArrowType = ::arrow::Int64Type; 
-  using Accumulator = ::arrow::NumericBuilder<::arrow::Int64Type>; 
-  using DictAccumulator = ::arrow::Dictionary32Builder<::arrow::Int64Type>; 
-}; 
- 
-template <> 
-struct EncodingTraits<Int96Type> { 
-  using Encoder = Int96Encoder; 
-  using Decoder = Int96Decoder; 
- 
-  struct Accumulator {}; 
-  struct DictAccumulator {}; 
-}; 
- 
-template <> 
-struct EncodingTraits<FloatType> { 
-  using Encoder = FloatEncoder; 
-  using Decoder = FloatDecoder; 
- 
-  using ArrowType = ::arrow::FloatType; 
-  using Accumulator = ::arrow::NumericBuilder<::arrow::FloatType>; 
-  using DictAccumulator = ::arrow::Dictionary32Builder<::arrow::FloatType>; 
-}; 
- 
-template <> 
-struct EncodingTraits<DoubleType> { 
-  using Encoder = DoubleEncoder; 
-  using Decoder = DoubleDecoder; 
- 
-  using ArrowType = ::arrow::DoubleType; 
-  using Accumulator = ::arrow::NumericBuilder<::arrow::DoubleType>; 
-  using DictAccumulator = ::arrow::Dictionary32Builder<::arrow::DoubleType>; 
-}; 
- 
-template <> 
-struct EncodingTraits<ByteArrayType> { 
-  using Encoder = ByteArrayEncoder; 
-  using Decoder = ByteArrayDecoder; 
- 
-  /// \brief Internal helper class for decoding BYTE_ARRAY data where we can 
-  /// overflow the capacity of a single arrow::BinaryArray 
-  struct Accumulator { 
-    std::unique_ptr<::arrow::BinaryBuilder> builder; 
-    std::vector<std::shared_ptr<::arrow::Array>> chunks; 
-  }; 
-  using ArrowType = ::arrow::BinaryType; 
-  using DictAccumulator = ::arrow::Dictionary32Builder<::arrow::BinaryType>; 
-}; 
- 
-template <> 
-struct EncodingTraits<FLBAType> { 
-  using Encoder = FLBAEncoder; 
-  using Decoder = FLBADecoder; 
- 
-  using ArrowType = ::arrow::FixedSizeBinaryType; 
-  using Accumulator = ::arrow::FixedSizeBinaryBuilder; 
-  using DictAccumulator = ::arrow::Dictionary32Builder<::arrow::FixedSizeBinaryType>; 
-}; 
- 
-class ColumnDescriptor; 
- 
-// Untyped base for all encoders 
-class Encoder { 
- public: 
-  virtual ~Encoder() = default; 
- 
-  virtual int64_t EstimatedDataEncodedSize() = 0; 
-  virtual std::shared_ptr<Buffer> FlushValues() = 0; 
-  virtual Encoding::type encoding() const = 0; 
- 
-  virtual void Put(const ::arrow::Array& values) = 0; 
- 
-  virtual MemoryPool* memory_pool() const = 0; 
-}; 
- 
-// Base class for value encoders. Since encoders may or not have state (e.g., 
-// dictionary encoding) we use a class instance to maintain any state. 
-// 
-// Encode interfaces are internal, subject to change without deprecation. 
-template <typename DType> 
-class TypedEncoder : virtual public Encoder { 
- public: 
-  typedef typename DType::c_type T; 
- 
-  using Encoder::Put; 
- 
-  virtual void Put(const T* src, int num_values) = 0; 
- 
-  virtual void Put(const std::vector<T>& src, int num_values = -1); 
- 
-  virtual void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits, 
-                         int64_t valid_bits_offset) = 0; 
-}; 
- 
-template <typename DType> 
-void TypedEncoder<DType>::Put(const std::vector<T>& src, int num_values) { 
-  if (num_values == -1) { 
-    num_values = static_cast<int>(src.size()); 
-  } 
-  Put(src.data(), num_values); 
-} 
- 
-template <> 
-inline void TypedEncoder<BooleanType>::Put(const std::vector<bool>& src, int num_values) { 
-  // NOTE(wesm): This stub is here only to satisfy the compiler; it is 
-  // overridden later with the actual implementation 
-} 
- 
-// Base class for dictionary encoders 
-template <typename DType> 
-class DictEncoder : virtual public TypedEncoder<DType> { 
- public: 
-  /// Writes out any buffered indices to buffer preceded by the bit width of this data. 
-  /// Returns the number of bytes written. 
-  /// If the supplied buffer is not big enough, returns -1. 
-  /// buffer must be preallocated with buffer_len bytes. Use EstimatedDataEncodedSize() 
-  /// to size buffer. 
-  virtual int WriteIndices(uint8_t* buffer, int buffer_len) = 0; 
- 
-  virtual int dict_encoded_size() = 0; 
-  // virtual int dict_encoded_size() { return dict_encoded_size_; } 
- 
-  virtual int bit_width() const = 0; 
- 
-  /// Writes out the encoded dictionary to buffer. buffer must be preallocated to 
-  /// dict_encoded_size() bytes. 
-  virtual void WriteDict(uint8_t* buffer) = 0; 
- 
-  virtual int num_entries() const = 0; 
- 
-  /// \brief EXPERIMENTAL: Append dictionary indices into the encoder. It is 
-  /// assumed (without any boundschecking) that the indices reference 
-  /// pre-existing dictionary values 
-  /// \param[in] indices the dictionary index values. Only Int32Array currently 
-  /// supported 
-  virtual void PutIndices(const ::arrow::Array& indices) = 0; 
- 
-  /// \brief EXPERIMENTAL: Append dictionary into encoder, inserting indices 
-  /// separately. Currently throws exception if the current dictionary memo is 
-  /// non-empty 
-  /// \param[in] values the dictionary values. Only valid for certain 
-  /// Parquet/Arrow type combinations, like BYTE_ARRAY/BinaryArray 
-  virtual void PutDictionary(const ::arrow::Array& values) = 0; 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// Value decoding 
- 
-class Decoder { 
- public: 
-  virtual ~Decoder() = default; 
- 
-  // Sets the data for a new page. This will be called multiple times on the same 
-  // decoder and should reset all internal state. 
-  virtual void SetData(int num_values, const uint8_t* data, int len) = 0; 
- 
-  // Returns the number of values left (for the last call to SetData()). This is 
-  // the number of values left in this page. 
-  virtual int values_left() const = 0; 
-  virtual Encoding::type encoding() const = 0; 
-}; 
- 
-template <typename DType> 
-class TypedDecoder : virtual public Decoder { 
- public: 
-  using T = typename DType::c_type; 
- 
-  /// \brief Decode values into a buffer 
-  /// 
-  /// Subclasses may override the more specialized Decode methods below. 
-  /// 
-  /// \param[in] buffer destination for decoded values 
-  /// \param[in] max_values maximum number of values to decode 
-  /// \return The number of values decoded. Should be identical to max_values except 
-  /// at the end of the current data page. 
-  virtual int Decode(T* buffer, int max_values) = 0; 
- 
-  /// \brief Decode the values in this data page but leave spaces for null entries. 
-  /// 
-  /// \param[in] buffer destination for decoded values 
-  /// \param[in] num_values size of the def_levels and buffer arrays including the number 
-  /// of null slots 
-  /// \param[in] null_count number of null slots 
-  /// \param[in] valid_bits bitmap data indicating position of valid slots 
-  /// \param[in] valid_bits_offset offset into valid_bits 
-  /// \return The number of values decoded, including nulls. 
-  virtual int DecodeSpaced(T* buffer, int num_values, int null_count, 
-                           const uint8_t* valid_bits, int64_t valid_bits_offset) { 
-    if (null_count > 0) { 
-      int values_to_read = num_values - null_count; 
-      int values_read = Decode(buffer, values_to_read); 
-      if (values_read != values_to_read) { 
-        throw ParquetException("Number of values / definition_levels read did not match"); 
-      } 
- 
-      return ::arrow::util::internal::SpacedExpand<T>(buffer, num_values, null_count, 
-                                                      valid_bits, valid_bits_offset); 
-    } else { 
-      return Decode(buffer, num_values); 
-    } 
-  } 
- 
-  /// \brief Decode into an ArrayBuilder or other accumulator 
-  /// 
-  /// This function assumes the definition levels were already decoded 
-  /// as a validity bitmap in the given `valid_bits`.  `null_count` 
-  /// is the number of 0s in `valid_bits`. 
-  /// As a space optimization, it is allowed for `valid_bits` to be null 
-  /// if `null_count` is zero. 
-  /// 
-  /// \return number of values decoded 
-  virtual int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits, 
-                          int64_t valid_bits_offset, 
-                          typename EncodingTraits<DType>::Accumulator* out) = 0; 
- 
-  /// \brief Decode into an ArrayBuilder or other accumulator ignoring nulls 
-  /// 
-  /// \return number of values decoded 
-  int DecodeArrowNonNull(int num_values, 
-                         typename EncodingTraits<DType>::Accumulator* out) { 
-    return DecodeArrow(num_values, 0, /*valid_bits=*/NULLPTR, 0, out); 
-  } 
- 
-  /// \brief Decode into a DictionaryBuilder 
-  /// 
-  /// This function assumes the definition levels were already decoded 
-  /// as a validity bitmap in the given `valid_bits`.  `null_count` 
-  /// is the number of 0s in `valid_bits`. 
-  /// As a space optimization, it is allowed for `valid_bits` to be null 
-  /// if `null_count` is zero. 
-  /// 
-  /// \return number of values decoded 
-  virtual int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits, 
-                          int64_t valid_bits_offset, 
-                          typename EncodingTraits<DType>::DictAccumulator* builder) = 0; 
- 
-  /// \brief Decode into a DictionaryBuilder ignoring nulls 
-  /// 
-  /// \return number of values decoded 
-  int DecodeArrowNonNull(int num_values, 
-                         typename EncodingTraits<DType>::DictAccumulator* builder) { 
-    return DecodeArrow(num_values, 0, /*valid_bits=*/NULLPTR, 0, builder); 
-  } 
-}; 
- 
-template <typename DType> 
-class DictDecoder : virtual public TypedDecoder<DType> { 
- public: 
-  using T = typename DType::c_type; 
- 
-  virtual void SetDict(TypedDecoder<DType>* dictionary) = 0; 
- 
-  /// \brief Insert dictionary values into the Arrow dictionary builder's memo, 
-  /// but do not append any indices 
-  virtual void InsertDictionary(::arrow::ArrayBuilder* builder) = 0; 
- 
-  /// \brief Decode only dictionary indices and append to dictionary 
-  /// builder. The builder must have had the dictionary from this decoder 
-  /// inserted already. 
-  /// 
-  /// \warning Remember to reset the builder each time the dict decoder is initialized 
-  /// with a new dictionary page 
-  virtual int DecodeIndicesSpaced(int num_values, int null_count, 
-                                  const uint8_t* valid_bits, int64_t valid_bits_offset, 
-                                  ::arrow::ArrayBuilder* builder) = 0; 
- 
-  /// \brief Decode only dictionary indices (no nulls) 
-  /// 
-  /// \warning Remember to reset the builder each time the dict decoder is initialized 
-  /// with a new dictionary page 
-  virtual int DecodeIndices(int num_values, ::arrow::ArrayBuilder* builder) = 0; 
- 
-  /// \brief Decode only dictionary indices (no nulls). Same as above 
-  /// DecodeIndices but target is an array instead of a builder. 
-  /// 
-  /// \note API EXPERIMENTAL 
-  virtual int DecodeIndices(int num_values, int32_t* indices) = 0; 
- 
-  /// \brief Get dictionary. The reader will call this API when it encounters a 
-  /// new dictionary. 
-  /// 
-  /// @param[out] dictionary The pointer to dictionary values. Dictionary is owned by 
-  /// the decoder and is destroyed when the decoder is destroyed. 
-  /// @param[out] dictionary_length The dictionary length. 
-  /// 
-  /// \note API EXPERIMENTAL 
-  virtual void GetDictionary(const T** dictionary, int32_t* dictionary_length) = 0; 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// TypedEncoder specializations, traits, and factory functions 
- 
-class BooleanDecoder : virtual public TypedDecoder<BooleanType> { 
- public: 
-  using TypedDecoder<BooleanType>::Decode; 
-  virtual int Decode(uint8_t* buffer, int max_values) = 0; 
-}; 
- 
-class FLBADecoder : virtual public TypedDecoder<FLBAType> { 
- public: 
-  using TypedDecoder<FLBAType>::DecodeSpaced; 
- 
-  // TODO(wesm): As possible follow-up to PARQUET-1508, we should examine if 
-  // there is value in adding specialized read methods for 
-  // FIXED_LEN_BYTE_ARRAY. If only Decimal data can occur with this data type 
-  // then perhaps not 
-}; 
- 
-PARQUET_EXPORT 
-std::unique_ptr<Encoder> MakeEncoder( 
-    Type::type type_num, Encoding::type encoding, bool use_dictionary = false, 
-    const ColumnDescriptor* descr = NULLPTR, 
-    ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()); 
- 
-template <typename DType> 
-std::unique_ptr<typename EncodingTraits<DType>::Encoder> MakeTypedEncoder( 
-    Encoding::type encoding, bool use_dictionary = false, 
-    const ColumnDescriptor* descr = NULLPTR, 
-    ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()) { 
-  using OutType = typename EncodingTraits<DType>::Encoder; 
-  std::unique_ptr<Encoder> base = 
-      MakeEncoder(DType::type_num, encoding, use_dictionary, descr, pool); 
-  return std::unique_ptr<OutType>(dynamic_cast<OutType*>(base.release())); 
-} 
- 
-PARQUET_EXPORT 
-std::unique_ptr<Decoder> MakeDecoder(Type::type type_num, Encoding::type encoding, 
-                                     const ColumnDescriptor* descr = NULLPTR); 
- 
-namespace detail { 
- 
-PARQUET_EXPORT 
-std::unique_ptr<Decoder> MakeDictDecoder(Type::type type_num, 
-                                         const ColumnDescriptor* descr, 
-                                         ::arrow::MemoryPool* pool); 
- 
-}  // namespace detail 
- 
-template <typename DType> 
-std::unique_ptr<DictDecoder<DType>> MakeDictDecoder( 
-    const ColumnDescriptor* descr = NULLPTR, 
-    ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()) { 
-  using OutType = DictDecoder<DType>; 
-  auto decoder = detail::MakeDictDecoder(DType::type_num, descr, pool); 
-  return std::unique_ptr<OutType>(dynamic_cast<OutType*>(decoder.release())); 
-} 
- 
-template <typename DType> 
-std::unique_ptr<typename EncodingTraits<DType>::Decoder> MakeTypedDecoder( 
-    Encoding::type encoding, const ColumnDescriptor* descr = NULLPTR) { 
-  using OutType = typename EncodingTraits<DType>::Decoder; 
-  std::unique_ptr<Decoder> base = MakeDecoder(DType::type_num, encoding, descr); 
-  return std::unique_ptr<OutType>(dynamic_cast<OutType*>(base.release())); 
-} 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <vector>
+
+#include "arrow/util/spaced.h"
+
+#include "parquet/exception.h"
+#include "parquet/platform.h"
+#include "parquet/types.h"
+
+namespace arrow {
+
+class Array;
+class ArrayBuilder;
+class BinaryArray;
+class BinaryBuilder;
+class BooleanBuilder;
+class Int32Type;
+class Int64Type;
+class FloatType;
+class DoubleType;
+class FixedSizeBinaryType;
+template <typename T>
+class NumericBuilder;
+class FixedSizeBinaryBuilder;
+template <typename T>
+class Dictionary32Builder;
+
+}  // namespace arrow
+
+namespace parquet {
+
+template <typename DType>
+class TypedEncoder;
+
+using BooleanEncoder = TypedEncoder<BooleanType>;
+using Int32Encoder = TypedEncoder<Int32Type>;
+using Int64Encoder = TypedEncoder<Int64Type>;
+using Int96Encoder = TypedEncoder<Int96Type>;
+using FloatEncoder = TypedEncoder<FloatType>;
+using DoubleEncoder = TypedEncoder<DoubleType>;
+using ByteArrayEncoder = TypedEncoder<ByteArrayType>;
+using FLBAEncoder = TypedEncoder<FLBAType>;
+
+template <typename DType>
+class TypedDecoder;
+
+class BooleanDecoder;
+using Int32Decoder = TypedDecoder<Int32Type>;
+using Int64Decoder = TypedDecoder<Int64Type>;
+using Int96Decoder = TypedDecoder<Int96Type>;
+using FloatDecoder = TypedDecoder<FloatType>;
+using DoubleDecoder = TypedDecoder<DoubleType>;
+using ByteArrayDecoder = TypedDecoder<ByteArrayType>;
+class FLBADecoder;
+
+template <typename T>
+struct EncodingTraits;
+
+template <>
+struct EncodingTraits<BooleanType> {
+  using Encoder = BooleanEncoder;
+  using Decoder = BooleanDecoder;
+
+  using ArrowType = ::arrow::BooleanType;
+  using Accumulator = ::arrow::BooleanBuilder;
+  struct DictAccumulator {};
+};
+
+template <>
+struct EncodingTraits<Int32Type> {
+  using Encoder = Int32Encoder;
+  using Decoder = Int32Decoder;
+
+  using ArrowType = ::arrow::Int32Type;
+  using Accumulator = ::arrow::NumericBuilder<::arrow::Int32Type>;
+  using DictAccumulator = ::arrow::Dictionary32Builder<::arrow::Int32Type>;
+};
+
+template <>
+struct EncodingTraits<Int64Type> {
+  using Encoder = Int64Encoder;
+  using Decoder = Int64Decoder;
+
+  using ArrowType = ::arrow::Int64Type;
+  using Accumulator = ::arrow::NumericBuilder<::arrow::Int64Type>;
+  using DictAccumulator = ::arrow::Dictionary32Builder<::arrow::Int64Type>;
+};
+
+template <>
+struct EncodingTraits<Int96Type> {
+  using Encoder = Int96Encoder;
+  using Decoder = Int96Decoder;
+
+  struct Accumulator {};
+  struct DictAccumulator {};
+};
+
+template <>
+struct EncodingTraits<FloatType> {
+  using Encoder = FloatEncoder;
+  using Decoder = FloatDecoder;
+
+  using ArrowType = ::arrow::FloatType;
+  using Accumulator = ::arrow::NumericBuilder<::arrow::FloatType>;
+  using DictAccumulator = ::arrow::Dictionary32Builder<::arrow::FloatType>;
+};
+
+template <>
+struct EncodingTraits<DoubleType> {
+  using Encoder = DoubleEncoder;
+  using Decoder = DoubleDecoder;
+
+  using ArrowType = ::arrow::DoubleType;
+  using Accumulator = ::arrow::NumericBuilder<::arrow::DoubleType>;
+  using DictAccumulator = ::arrow::Dictionary32Builder<::arrow::DoubleType>;
+};
+
+template <>
+struct EncodingTraits<ByteArrayType> {
+  using Encoder = ByteArrayEncoder;
+  using Decoder = ByteArrayDecoder;
+
+  /// \brief Internal helper class for decoding BYTE_ARRAY data where we can
+  /// overflow the capacity of a single arrow::BinaryArray
+  struct Accumulator {
+    std::unique_ptr<::arrow::BinaryBuilder> builder;
+    std::vector<std::shared_ptr<::arrow::Array>> chunks;
+  };
+  using ArrowType = ::arrow::BinaryType;
+  using DictAccumulator = ::arrow::Dictionary32Builder<::arrow::BinaryType>;
+};
+
+template <>
+struct EncodingTraits<FLBAType> {
+  using Encoder = FLBAEncoder;
+  using Decoder = FLBADecoder;
+
+  using ArrowType = ::arrow::FixedSizeBinaryType;
+  using Accumulator = ::arrow::FixedSizeBinaryBuilder;
+  using DictAccumulator = ::arrow::Dictionary32Builder<::arrow::FixedSizeBinaryType>;
+};
+
+class ColumnDescriptor;
+
+// Untyped base for all encoders
+class Encoder {
+ public:
+  virtual ~Encoder() = default;
+
+  virtual int64_t EstimatedDataEncodedSize() = 0;
+  virtual std::shared_ptr<Buffer> FlushValues() = 0;
+  virtual Encoding::type encoding() const = 0;
+
+  virtual void Put(const ::arrow::Array& values) = 0;
+
+  virtual MemoryPool* memory_pool() const = 0;
+};
+
+// Base class for value encoders. Since encoders may or not have state (e.g.,
+// dictionary encoding) we use a class instance to maintain any state.
+//
+// Encode interfaces are internal, subject to change without deprecation.
+template <typename DType>
+class TypedEncoder : virtual public Encoder {
+ public:
+  typedef typename DType::c_type T;
+
+  using Encoder::Put;
+
+  virtual void Put(const T* src, int num_values) = 0;
+
+  virtual void Put(const std::vector<T>& src, int num_values = -1);
+
+  virtual void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
+                         int64_t valid_bits_offset) = 0;
+};
+
+template <typename DType>
+void TypedEncoder<DType>::Put(const std::vector<T>& src, int num_values) {
+  if (num_values == -1) {
+    num_values = static_cast<int>(src.size());
+  }
+  Put(src.data(), num_values);
+}
+
+template <>
+inline void TypedEncoder<BooleanType>::Put(const std::vector<bool>& src, int num_values) {
+  // NOTE(wesm): This stub is here only to satisfy the compiler; it is
+  // overridden later with the actual implementation
+}
+
+// Base class for dictionary encoders
+template <typename DType>
+class DictEncoder : virtual public TypedEncoder<DType> {
+ public:
+  /// Writes out any buffered indices to buffer preceded by the bit width of this data.
+  /// Returns the number of bytes written.
+  /// If the supplied buffer is not big enough, returns -1.
+  /// buffer must be preallocated with buffer_len bytes. Use EstimatedDataEncodedSize()
+  /// to size buffer.
+  virtual int WriteIndices(uint8_t* buffer, int buffer_len) = 0;
+
+  virtual int dict_encoded_size() = 0;
+  // virtual int dict_encoded_size() { return dict_encoded_size_; }
+
+  virtual int bit_width() const = 0;
+
+  /// Writes out the encoded dictionary to buffer. buffer must be preallocated to
+  /// dict_encoded_size() bytes.
+  virtual void WriteDict(uint8_t* buffer) = 0;
+
+  virtual int num_entries() const = 0;
+
+  /// \brief EXPERIMENTAL: Append dictionary indices into the encoder. It is
+  /// assumed (without any boundschecking) that the indices reference
+  /// pre-existing dictionary values
+  /// \param[in] indices the dictionary index values. Only Int32Array currently
+  /// supported
+  virtual void PutIndices(const ::arrow::Array& indices) = 0;
+
+  /// \brief EXPERIMENTAL: Append dictionary into encoder, inserting indices
+  /// separately. Currently throws exception if the current dictionary memo is
+  /// non-empty
+  /// \param[in] values the dictionary values. Only valid for certain
+  /// Parquet/Arrow type combinations, like BYTE_ARRAY/BinaryArray
+  virtual void PutDictionary(const ::arrow::Array& values) = 0;
+};
+
+// ----------------------------------------------------------------------
+// Value decoding
+
+class Decoder {
+ public:
+  virtual ~Decoder() = default;
+
+  // Sets the data for a new page. This will be called multiple times on the same
+  // decoder and should reset all internal state.
+  virtual void SetData(int num_values, const uint8_t* data, int len) = 0;
+
+  // Returns the number of values left (for the last call to SetData()). This is
+  // the number of values left in this page.
+  virtual int values_left() const = 0;
+  virtual Encoding::type encoding() const = 0;
+};
+
+template <typename DType>
+class TypedDecoder : virtual public Decoder {
+ public:
+  using T = typename DType::c_type;
+
+  /// \brief Decode values into a buffer
+  ///
+  /// Subclasses may override the more specialized Decode methods below.
+  ///
+  /// \param[in] buffer destination for decoded values
+  /// \param[in] max_values maximum number of values to decode
+  /// \return The number of values decoded. Should be identical to max_values except
+  /// at the end of the current data page.
+  virtual int Decode(T* buffer, int max_values) = 0;
+
+  /// \brief Decode the values in this data page but leave spaces for null entries.
+  ///
+  /// \param[in] buffer destination for decoded values
+  /// \param[in] num_values size of the def_levels and buffer arrays including the number
+  /// of null slots
+  /// \param[in] null_count number of null slots
+  /// \param[in] valid_bits bitmap data indicating position of valid slots
+  /// \param[in] valid_bits_offset offset into valid_bits
+  /// \return The number of values decoded, including nulls.
+  virtual int DecodeSpaced(T* buffer, int num_values, int null_count,
+                           const uint8_t* valid_bits, int64_t valid_bits_offset) {
+    if (null_count > 0) {
+      int values_to_read = num_values - null_count;
+      int values_read = Decode(buffer, values_to_read);
+      if (values_read != values_to_read) {
+        throw ParquetException("Number of values / definition_levels read did not match");
+      }
+
+      return ::arrow::util::internal::SpacedExpand<T>(buffer, num_values, null_count,
+                                                      valid_bits, valid_bits_offset);
+    } else {
+      return Decode(buffer, num_values);
+    }
+  }
+
+  /// \brief Decode into an ArrayBuilder or other accumulator
+  ///
+  /// This function assumes the definition levels were already decoded
+  /// as a validity bitmap in the given `valid_bits`.  `null_count`
+  /// is the number of 0s in `valid_bits`.
+  /// As a space optimization, it is allowed for `valid_bits` to be null
+  /// if `null_count` is zero.
+  ///
+  /// \return number of values decoded
+  virtual int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
+                          int64_t valid_bits_offset,
+                          typename EncodingTraits<DType>::Accumulator* out) = 0;
+
+  /// \brief Decode into an ArrayBuilder or other accumulator ignoring nulls
+  ///
+  /// \return number of values decoded
+  int DecodeArrowNonNull(int num_values,
+                         typename EncodingTraits<DType>::Accumulator* out) {
+    return DecodeArrow(num_values, 0, /*valid_bits=*/NULLPTR, 0, out);
+  }
+
+  /// \brief Decode into a DictionaryBuilder
+  ///
+  /// This function assumes the definition levels were already decoded
+  /// as a validity bitmap in the given `valid_bits`.  `null_count`
+  /// is the number of 0s in `valid_bits`.
+  /// As a space optimization, it is allowed for `valid_bits` to be null
+  /// if `null_count` is zero.
+  ///
+  /// \return number of values decoded
+  virtual int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
+                          int64_t valid_bits_offset,
+                          typename EncodingTraits<DType>::DictAccumulator* builder) = 0;
+
+  /// \brief Decode into a DictionaryBuilder ignoring nulls
+  ///
+  /// \return number of values decoded
+  int DecodeArrowNonNull(int num_values,
+                         typename EncodingTraits<DType>::DictAccumulator* builder) {
+    return DecodeArrow(num_values, 0, /*valid_bits=*/NULLPTR, 0, builder);
+  }
+};
+
+template <typename DType>
+class DictDecoder : virtual public TypedDecoder<DType> {
+ public:
+  using T = typename DType::c_type;
+
+  virtual void SetDict(TypedDecoder<DType>* dictionary) = 0;
+
+  /// \brief Insert dictionary values into the Arrow dictionary builder's memo,
+  /// but do not append any indices
+  virtual void InsertDictionary(::arrow::ArrayBuilder* builder) = 0;
+
+  /// \brief Decode only dictionary indices and append to dictionary
+  /// builder. The builder must have had the dictionary from this decoder
+  /// inserted already.
+  ///
+  /// \warning Remember to reset the builder each time the dict decoder is initialized
+  /// with a new dictionary page
+  virtual int DecodeIndicesSpaced(int num_values, int null_count,
+                                  const uint8_t* valid_bits, int64_t valid_bits_offset,
+                                  ::arrow::ArrayBuilder* builder) = 0;
+
+  /// \brief Decode only dictionary indices (no nulls)
+  ///
+  /// \warning Remember to reset the builder each time the dict decoder is initialized
+  /// with a new dictionary page
+  virtual int DecodeIndices(int num_values, ::arrow::ArrayBuilder* builder) = 0;
+
+  /// \brief Decode only dictionary indices (no nulls). Same as above
+  /// DecodeIndices but target is an array instead of a builder.
+  ///
+  /// \note API EXPERIMENTAL
+  virtual int DecodeIndices(int num_values, int32_t* indices) = 0;
+
+  /// \brief Get dictionary. The reader will call this API when it encounters a
+  /// new dictionary.
+  ///
+  /// @param[out] dictionary The pointer to dictionary values. Dictionary is owned by
+  /// the decoder and is destroyed when the decoder is destroyed.
+  /// @param[out] dictionary_length The dictionary length.
+  ///
+  /// \note API EXPERIMENTAL
+  virtual void GetDictionary(const T** dictionary, int32_t* dictionary_length) = 0;
+};
+
+// ----------------------------------------------------------------------
+// TypedEncoder specializations, traits, and factory functions
+
+class BooleanDecoder : virtual public TypedDecoder<BooleanType> {
+ public:
+  using TypedDecoder<BooleanType>::Decode;
+  virtual int Decode(uint8_t* buffer, int max_values) = 0;
+};
+
+class FLBADecoder : virtual public TypedDecoder<FLBAType> {
+ public:
+  using TypedDecoder<FLBAType>::DecodeSpaced;
+
+  // TODO(wesm): As possible follow-up to PARQUET-1508, we should examine if
+  // there is value in adding specialized read methods for
+  // FIXED_LEN_BYTE_ARRAY. If only Decimal data can occur with this data type
+  // then perhaps not
+};
+
+PARQUET_EXPORT
+std::unique_ptr<Encoder> MakeEncoder(
+    Type::type type_num, Encoding::type encoding, bool use_dictionary = false,
+    const ColumnDescriptor* descr = NULLPTR,
+    ::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
+
+template <typename DType>
+std::unique_ptr<typename EncodingTraits<DType>::Encoder> MakeTypedEncoder(
+    Encoding::type encoding, bool use_dictionary = false,
+    const ColumnDescriptor* descr = NULLPTR,
+    ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()) {
+  using OutType = typename EncodingTraits<DType>::Encoder;
+  std::unique_ptr<Encoder> base =
+      MakeEncoder(DType::type_num, encoding, use_dictionary, descr, pool);
+  return std::unique_ptr<OutType>(dynamic_cast<OutType*>(base.release()));
+}
+
+PARQUET_EXPORT
+std::unique_ptr<Decoder> MakeDecoder(Type::type type_num, Encoding::type encoding,
+                                     const ColumnDescriptor* descr = NULLPTR);
+
+namespace detail {
+
+PARQUET_EXPORT
+std::unique_ptr<Decoder> MakeDictDecoder(Type::type type_num,
+                                         const ColumnDescriptor* descr,
+                                         ::arrow::MemoryPool* pool);
+
+}  // namespace detail
+
+template <typename DType>
+std::unique_ptr<DictDecoder<DType>> MakeDictDecoder(
+    const ColumnDescriptor* descr = NULLPTR,
+    ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()) {
+  using OutType = DictDecoder<DType>;
+  auto decoder = detail::MakeDictDecoder(DType::type_num, descr, pool);
+  return std::unique_ptr<OutType>(dynamic_cast<OutType*>(decoder.release()));
+}
+
+template <typename DType>
+std::unique_ptr<typename EncodingTraits<DType>::Decoder> MakeTypedDecoder(
+    Encoding::type encoding, const ColumnDescriptor* descr = NULLPTR) {
+  using OutType = typename EncodingTraits<DType>::Decoder;
+  std::unique_ptr<Decoder> base = MakeDecoder(DType::type_num, encoding, descr);
+  return std::unique_ptr<OutType>(dynamic_cast<OutType*>(base.release()));
+}
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/encryption/encryption.cc b/contrib/libs/apache/arrow/cpp/src/parquet/encryption/encryption.cc
index 829b0e778f1..5927503aba3 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/encryption/encryption.cc
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/encryption/encryption.cc
@@ -1,412 +1,412 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "parquet/encryption/encryption.h" 
- 
-#include <string.h> 
- 
-#include <map> 
-#include <utility> 
- 
-#include "arrow/util/logging.h" 
-#include "arrow/util/utf8.h" 
-#include "parquet/encryption/encryption_internal.h" 
- 
-namespace parquet { 
- 
-// integer key retriever 
-void IntegerKeyIdRetriever::PutKey(uint32_t key_id, const std::string& key) { 
-  key_map_.insert({key_id, key}); 
-} 
- 
-std::string IntegerKeyIdRetriever::GetKey(const std::string& key_metadata) { 
-  uint32_t key_id; 
-  memcpy(reinterpret_cast<uint8_t*>(&key_id), key_metadata.c_str(), 4); 
- 
-  return key_map_.at(key_id); 
-} 
- 
-// string key retriever 
-void StringKeyIdRetriever::PutKey(const std::string& key_id, const std::string& key) { 
-  key_map_.insert({key_id, key}); 
-} 
- 
-std::string StringKeyIdRetriever::GetKey(const std::string& key_id) { 
-  return key_map_.at(key_id); 
-} 
- 
-ColumnEncryptionProperties::Builder* ColumnEncryptionProperties::Builder::key( 
-    std::string column_key) { 
-  if (column_key.empty()) return this; 
- 
-  DCHECK(key_.empty()); 
-  key_ = column_key; 
-  return this; 
-} 
- 
-ColumnEncryptionProperties::Builder* ColumnEncryptionProperties::Builder::key_metadata( 
-    const std::string& key_metadata) { 
-  DCHECK(!key_metadata.empty()); 
-  DCHECK(key_metadata_.empty()); 
-  key_metadata_ = key_metadata; 
-  return this; 
-} 
- 
-ColumnEncryptionProperties::Builder* ColumnEncryptionProperties::Builder::key_id( 
-    const std::string& key_id) { 
-  // key_id is expected to be in UTF8 encoding 
-  ::arrow::util::InitializeUTF8(); 
-  const uint8_t* data = reinterpret_cast<const uint8_t*>(key_id.c_str()); 
-  if (!::arrow::util::ValidateUTF8(data, key_id.size())) { 
-    throw ParquetException("key id should be in UTF8 encoding"); 
-  } 
- 
-  DCHECK(!key_id.empty()); 
-  this->key_metadata(key_id); 
-  return this; 
-} 
- 
-FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::column_keys( 
-    const ColumnPathToDecryptionPropertiesMap& column_decryption_properties) { 
-  if (column_decryption_properties.size() == 0) return this; 
- 
-  if (column_decryption_properties_.size() != 0) 
-    throw ParquetException("Column properties already set"); 
- 
-  for (const auto& element : column_decryption_properties) { 
-    if (element.second->is_utilized()) { 
-      throw ParquetException("Column properties utilized in another file"); 
-    } 
-    element.second->set_utilized(); 
-  } 
- 
-  column_decryption_properties_ = column_decryption_properties; 
-  return this; 
-} 
- 
-void FileDecryptionProperties::WipeOutDecryptionKeys() { 
-  footer_key_.clear(); 
- 
-  for (const auto& element : column_decryption_properties_) { 
-    element.second->WipeOutDecryptionKey(); 
-  } 
-} 
- 
-bool FileDecryptionProperties::is_utilized() { 
-  if (footer_key_.empty() && column_decryption_properties_.size() == 0 && 
-      aad_prefix_.empty()) 
-    return false; 
- 
-  return utilized_; 
-} 
- 
-std::shared_ptr<FileDecryptionProperties> FileDecryptionProperties::DeepClone( 
-    std::string new_aad_prefix) { 
-  std::string footer_key_copy = footer_key_; 
-  ColumnPathToDecryptionPropertiesMap column_decryption_properties_map_copy; 
- 
-  for (const auto& element : column_decryption_properties_) { 
-    column_decryption_properties_map_copy.insert( 
-        {element.second->column_path(), element.second->DeepClone()}); 
-  } 
- 
-  if (new_aad_prefix.empty()) new_aad_prefix = aad_prefix_; 
-  return std::shared_ptr<FileDecryptionProperties>(new FileDecryptionProperties( 
-      footer_key_copy, key_retriever_, check_plaintext_footer_integrity_, new_aad_prefix, 
-      aad_prefix_verifier_, column_decryption_properties_map_copy, 
-      plaintext_files_allowed_)); 
-} 
- 
-FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::footer_key( 
-    const std::string footer_key) { 
-  if (footer_key.empty()) { 
-    return this; 
-  } 
-  DCHECK(footer_key_.empty()); 
-  footer_key_ = footer_key; 
-  return this; 
-} 
- 
-FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::key_retriever( 
-    const std::shared_ptr<DecryptionKeyRetriever>& key_retriever) { 
-  if (key_retriever == nullptr) return this; 
- 
-  DCHECK(key_retriever_ == nullptr); 
-  key_retriever_ = key_retriever; 
-  return this; 
-} 
- 
-FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::aad_prefix( 
-    const std::string& aad_prefix) { 
-  if (aad_prefix.empty()) { 
-    return this; 
-  } 
-  DCHECK(aad_prefix_.empty()); 
-  aad_prefix_ = aad_prefix; 
-  return this; 
-} 
- 
-FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::aad_prefix_verifier( 
-    std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier) { 
-  if (aad_prefix_verifier == nullptr) return this; 
- 
-  DCHECK(aad_prefix_verifier_ == nullptr); 
-  aad_prefix_verifier_ = std::move(aad_prefix_verifier); 
-  return this; 
-} 
- 
-ColumnDecryptionProperties::Builder* ColumnDecryptionProperties::Builder::key( 
-    const std::string& key) { 
-  if (key.empty()) return this; 
- 
-  DCHECK(!key.empty()); 
-  key_ = key; 
-  return this; 
-} 
- 
-std::shared_ptr<ColumnDecryptionProperties> ColumnDecryptionProperties::Builder::build() { 
-  return std::shared_ptr<ColumnDecryptionProperties>( 
-      new ColumnDecryptionProperties(column_path_, key_)); 
-} 
- 
-void ColumnDecryptionProperties::WipeOutDecryptionKey() { key_.clear(); } 
- 
-std::shared_ptr<ColumnDecryptionProperties> ColumnDecryptionProperties::DeepClone() { 
-  std::string key_copy = key_; 
-  return std::shared_ptr<ColumnDecryptionProperties>( 
-      new ColumnDecryptionProperties(column_path_, key_copy)); 
-} 
- 
-FileEncryptionProperties::Builder* FileEncryptionProperties::Builder::footer_key_metadata( 
-    const std::string& footer_key_metadata) { 
-  if (footer_key_metadata.empty()) return this; 
- 
-  DCHECK(footer_key_metadata_.empty()); 
-  footer_key_metadata_ = footer_key_metadata; 
-  return this; 
-} 
- 
-FileEncryptionProperties::Builder* FileEncryptionProperties::Builder::encrypted_columns( 
-    const ColumnPathToEncryptionPropertiesMap& encrypted_columns) { 
-  if (encrypted_columns.size() == 0) return this; 
- 
-  if (encrypted_columns_.size() != 0) 
-    throw ParquetException("Column properties already set"); 
- 
-  for (const auto& element : encrypted_columns) { 
-    if (element.second->is_utilized()) { 
-      throw ParquetException("Column properties utilized in another file"); 
-    } 
-    element.second->set_utilized(); 
-  } 
-  encrypted_columns_ = encrypted_columns; 
-  return this; 
-} 
- 
-void FileEncryptionProperties::WipeOutEncryptionKeys() { 
-  footer_key_.clear(); 
-  for (const auto& element : encrypted_columns_) { 
-    element.second->WipeOutEncryptionKey(); 
-  } 
-} 
- 
-std::shared_ptr<FileEncryptionProperties> FileEncryptionProperties::DeepClone( 
-    std::string new_aad_prefix) { 
-  std::string footer_key_copy = footer_key_; 
-  ColumnPathToEncryptionPropertiesMap encrypted_columns_map_copy; 
- 
-  for (const auto& element : encrypted_columns_) { 
-    encrypted_columns_map_copy.insert( 
-        {element.second->column_path(), element.second->DeepClone()}); 
-  } 
- 
-  if (new_aad_prefix.empty()) new_aad_prefix = aad_prefix_; 
-  return std::shared_ptr<FileEncryptionProperties>(new FileEncryptionProperties( 
-      algorithm_.algorithm, footer_key_copy, footer_key_metadata_, encrypted_footer_, 
-      new_aad_prefix, store_aad_prefix_in_file_, encrypted_columns_map_copy)); 
-} 
- 
-FileEncryptionProperties::Builder* FileEncryptionProperties::Builder::aad_prefix( 
-    const std::string& aad_prefix) { 
-  if (aad_prefix.empty()) return this; 
- 
-  DCHECK(aad_prefix_.empty()); 
-  aad_prefix_ = aad_prefix; 
-  store_aad_prefix_in_file_ = true; 
-  return this; 
-} 
- 
-FileEncryptionProperties::Builder* 
-FileEncryptionProperties::Builder::disable_aad_prefix_storage() { 
-  DCHECK(!aad_prefix_.empty()); 
- 
-  store_aad_prefix_in_file_ = false; 
-  return this; 
-} 
- 
-ColumnEncryptionProperties::ColumnEncryptionProperties(bool encrypted, 
-                                                       const std::string& column_path, 
-                                                       const std::string& key, 
-                                                       const std::string& key_metadata) 
-    : column_path_(column_path) { 
-  // column encryption properties object (with a column key) can be used for writing only 
-  // one file. 
-  // Upon completion of file writing, the encryption keys in the properties will be wiped 
-  // out (set to 0 in memory). 
-  utilized_ = false; 
- 
-  DCHECK(!column_path.empty()); 
-  if (!encrypted) { 
-    DCHECK(key.empty() && key_metadata.empty()); 
-  } 
- 
-  if (!key.empty()) { 
-    DCHECK(key.length() == 16 || key.length() == 24 || key.length() == 32); 
-  } 
- 
-  encrypted_with_footer_key_ = (encrypted && key.empty()); 
-  if (encrypted_with_footer_key_) { 
-    DCHECK(key_metadata.empty()); 
-  } 
- 
-  encrypted_ = encrypted; 
-  key_metadata_ = key_metadata; 
-  key_ = key; 
-} 
- 
-ColumnDecryptionProperties::ColumnDecryptionProperties(const std::string& column_path, 
-                                                       const std::string& key) 
-    : column_path_(column_path) { 
-  utilized_ = false; 
-  DCHECK(!column_path.empty()); 
- 
-  if (!key.empty()) { 
-    DCHECK(key.length() == 16 || key.length() == 24 || key.length() == 32); 
-  } 
- 
-  key_ = key; 
-} 
- 
-std::string FileDecryptionProperties::column_key(const std::string& column_path) const { 
-  if (column_decryption_properties_.find(column_path) != 
-      column_decryption_properties_.end()) { 
-    auto column_prop = column_decryption_properties_.at(column_path); 
-    if (column_prop != nullptr) { 
-      return column_prop->key(); 
-    } 
-  } 
-  return empty_string_; 
-} 
- 
-FileDecryptionProperties::FileDecryptionProperties( 
-    const std::string& footer_key, std::shared_ptr<DecryptionKeyRetriever> key_retriever, 
-    bool check_plaintext_footer_integrity, const std::string& aad_prefix, 
-    std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier, 
-    const ColumnPathToDecryptionPropertiesMap& column_decryption_properties, 
-    bool plaintext_files_allowed) { 
-  DCHECK(!footer_key.empty() || nullptr != key_retriever || 
-         0 != column_decryption_properties.size()); 
- 
-  if (!footer_key.empty()) { 
-    DCHECK(footer_key.length() == 16 || footer_key.length() == 24 || 
-           footer_key.length() == 32); 
-  } 
-  if (footer_key.empty() && check_plaintext_footer_integrity) { 
-    DCHECK(nullptr != key_retriever); 
-  } 
-  aad_prefix_verifier_ = std::move(aad_prefix_verifier); 
-  footer_key_ = footer_key; 
-  check_plaintext_footer_integrity_ = check_plaintext_footer_integrity; 
-  key_retriever_ = std::move(key_retriever); 
-  aad_prefix_ = aad_prefix; 
-  column_decryption_properties_ = column_decryption_properties; 
-  plaintext_files_allowed_ = plaintext_files_allowed; 
-  utilized_ = false; 
-} 
- 
-FileEncryptionProperties::Builder* FileEncryptionProperties::Builder::footer_key_id( 
-    const std::string& key_id) { 
-  // key_id is expected to be in UTF8 encoding 
-  ::arrow::util::InitializeUTF8(); 
-  const uint8_t* data = reinterpret_cast<const uint8_t*>(key_id.c_str()); 
-  if (!::arrow::util::ValidateUTF8(data, key_id.size())) { 
-    throw ParquetException("footer key id should be in UTF8 encoding"); 
-  } 
- 
-  if (key_id.empty()) { 
-    return this; 
-  } 
- 
-  return footer_key_metadata(key_id); 
-} 
- 
-std::shared_ptr<ColumnEncryptionProperties> 
-FileEncryptionProperties::column_encryption_properties(const std::string& column_path) { 
-  if (encrypted_columns_.size() == 0) { 
-    auto builder = std::make_shared<ColumnEncryptionProperties::Builder>(column_path); 
-    return builder->build(); 
-  } 
-  if (encrypted_columns_.find(column_path) != encrypted_columns_.end()) { 
-    return encrypted_columns_[column_path]; 
-  } 
- 
-  return nullptr; 
-} 
- 
-FileEncryptionProperties::FileEncryptionProperties( 
-    ParquetCipher::type cipher, const std::string& footer_key, 
-    const std::string& footer_key_metadata, bool encrypted_footer, 
-    const std::string& aad_prefix, bool store_aad_prefix_in_file, 
-    const ColumnPathToEncryptionPropertiesMap& encrypted_columns) 
-    : footer_key_(footer_key), 
-      footer_key_metadata_(footer_key_metadata), 
-      encrypted_footer_(encrypted_footer), 
-      aad_prefix_(aad_prefix), 
-      store_aad_prefix_in_file_(store_aad_prefix_in_file), 
-      encrypted_columns_(encrypted_columns) { 
-  // file encryption properties object can be used for writing only one file. 
-  // Upon completion of file writing, the encryption keys in the properties will be wiped 
-  // out (set to 0 in memory). 
-  utilized_ = false; 
- 
-  DCHECK(!footer_key.empty()); 
-  // footer_key must be either 16, 24 or 32 bytes. 
-  DCHECK(footer_key.length() == 16 || footer_key.length() == 24 || 
-         footer_key.length() == 32); 
- 
-  uint8_t aad_file_unique[kAadFileUniqueLength]; 
-  memset(aad_file_unique, 0, kAadFileUniqueLength); 
-  encryption::RandBytes(aad_file_unique, sizeof(kAadFileUniqueLength)); 
-  std::string aad_file_unique_str(reinterpret_cast<char const*>(aad_file_unique), 
-                                  kAadFileUniqueLength); 
- 
-  bool supply_aad_prefix = false; 
-  if (aad_prefix.empty()) { 
-    file_aad_ = aad_file_unique_str; 
-  } else { 
-    file_aad_ = aad_prefix + aad_file_unique_str; 
-    if (!store_aad_prefix_in_file) supply_aad_prefix = true; 
-  } 
-  algorithm_.algorithm = cipher; 
-  algorithm_.aad.aad_file_unique = aad_file_unique_str; 
-  algorithm_.aad.supply_aad_prefix = supply_aad_prefix; 
-  if (!aad_prefix.empty() && store_aad_prefix_in_file) { 
-    algorithm_.aad.aad_prefix = aad_prefix; 
-  } 
-} 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/encryption/encryption.h"
+
+#include <string.h>
+
+#include <map>
+#include <utility>
+
+#include "arrow/util/logging.h"
+#include "arrow/util/utf8.h"
+#include "parquet/encryption/encryption_internal.h"
+
+namespace parquet {
+
+// integer key retriever
+void IntegerKeyIdRetriever::PutKey(uint32_t key_id, const std::string& key) {
+  key_map_.insert({key_id, key});
+}
+
+std::string IntegerKeyIdRetriever::GetKey(const std::string& key_metadata) {
+  uint32_t key_id;
+  memcpy(reinterpret_cast<uint8_t*>(&key_id), key_metadata.c_str(), 4);
+
+  return key_map_.at(key_id);
+}
+
+// string key retriever
+void StringKeyIdRetriever::PutKey(const std::string& key_id, const std::string& key) {
+  key_map_.insert({key_id, key});
+}
+
+std::string StringKeyIdRetriever::GetKey(const std::string& key_id) {
+  return key_map_.at(key_id);
+}
+
+ColumnEncryptionProperties::Builder* ColumnEncryptionProperties::Builder::key(
+    std::string column_key) {
+  if (column_key.empty()) return this;
+
+  DCHECK(key_.empty());
+  key_ = column_key;
+  return this;
+}
+
+ColumnEncryptionProperties::Builder* ColumnEncryptionProperties::Builder::key_metadata(
+    const std::string& key_metadata) {
+  DCHECK(!key_metadata.empty());
+  DCHECK(key_metadata_.empty());
+  key_metadata_ = key_metadata;
+  return this;
+}
+
+ColumnEncryptionProperties::Builder* ColumnEncryptionProperties::Builder::key_id(
+    const std::string& key_id) {
+  // key_id is expected to be in UTF8 encoding
+  ::arrow::util::InitializeUTF8();
+  const uint8_t* data = reinterpret_cast<const uint8_t*>(key_id.c_str());
+  if (!::arrow::util::ValidateUTF8(data, key_id.size())) {
+    throw ParquetException("key id should be in UTF8 encoding");
+  }
+
+  DCHECK(!key_id.empty());
+  this->key_metadata(key_id);
+  return this;
+}
+
+FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::column_keys(
+    const ColumnPathToDecryptionPropertiesMap& column_decryption_properties) {
+  if (column_decryption_properties.size() == 0) return this;
+
+  if (column_decryption_properties_.size() != 0)
+    throw ParquetException("Column properties already set");
+
+  for (const auto& element : column_decryption_properties) {
+    if (element.second->is_utilized()) {
+      throw ParquetException("Column properties utilized in another file");
+    }
+    element.second->set_utilized();
+  }
+
+  column_decryption_properties_ = column_decryption_properties;
+  return this;
+}
+
+void FileDecryptionProperties::WipeOutDecryptionKeys() {
+  footer_key_.clear();
+
+  for (const auto& element : column_decryption_properties_) {
+    element.second->WipeOutDecryptionKey();
+  }
+}
+
+bool FileDecryptionProperties::is_utilized() {
+  if (footer_key_.empty() && column_decryption_properties_.size() == 0 &&
+      aad_prefix_.empty())
+    return false;
+
+  return utilized_;
+}
+
+std::shared_ptr<FileDecryptionProperties> FileDecryptionProperties::DeepClone(
+    std::string new_aad_prefix) {
+  std::string footer_key_copy = footer_key_;
+  ColumnPathToDecryptionPropertiesMap column_decryption_properties_map_copy;
+
+  for (const auto& element : column_decryption_properties_) {
+    column_decryption_properties_map_copy.insert(
+        {element.second->column_path(), element.second->DeepClone()});
+  }
+
+  if (new_aad_prefix.empty()) new_aad_prefix = aad_prefix_;
+  return std::shared_ptr<FileDecryptionProperties>(new FileDecryptionProperties(
+      footer_key_copy, key_retriever_, check_plaintext_footer_integrity_, new_aad_prefix,
+      aad_prefix_verifier_, column_decryption_properties_map_copy,
+      plaintext_files_allowed_));
+}
+
+FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::footer_key(
+    const std::string footer_key) {
+  if (footer_key.empty()) {
+    return this;
+  }
+  DCHECK(footer_key_.empty());
+  footer_key_ = footer_key;
+  return this;
+}
+
+FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::key_retriever(
+    const std::shared_ptr<DecryptionKeyRetriever>& key_retriever) {
+  if (key_retriever == nullptr) return this;
+
+  DCHECK(key_retriever_ == nullptr);
+  key_retriever_ = key_retriever;
+  return this;
+}
+
+FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::aad_prefix(
+    const std::string& aad_prefix) {
+  if (aad_prefix.empty()) {
+    return this;
+  }
+  DCHECK(aad_prefix_.empty());
+  aad_prefix_ = aad_prefix;
+  return this;
+}
+
+FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::aad_prefix_verifier(
+    std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier) {
+  if (aad_prefix_verifier == nullptr) return this;
+
+  DCHECK(aad_prefix_verifier_ == nullptr);
+  aad_prefix_verifier_ = std::move(aad_prefix_verifier);
+  return this;
+}
+
+ColumnDecryptionProperties::Builder* ColumnDecryptionProperties::Builder::key(
+    const std::string& key) {
+  if (key.empty()) return this;
+
+  DCHECK(!key.empty());
+  key_ = key;
+  return this;
+}
+
+std::shared_ptr<ColumnDecryptionProperties> ColumnDecryptionProperties::Builder::build() {
+  return std::shared_ptr<ColumnDecryptionProperties>(
+      new ColumnDecryptionProperties(column_path_, key_));
+}
+
+void ColumnDecryptionProperties::WipeOutDecryptionKey() { key_.clear(); }
+
+std::shared_ptr<ColumnDecryptionProperties> ColumnDecryptionProperties::DeepClone() {
+  std::string key_copy = key_;
+  return std::shared_ptr<ColumnDecryptionProperties>(
+      new ColumnDecryptionProperties(column_path_, key_copy));
+}
+
+FileEncryptionProperties::Builder* FileEncryptionProperties::Builder::footer_key_metadata(
+    const std::string& footer_key_metadata) {
+  if (footer_key_metadata.empty()) return this;
+
+  DCHECK(footer_key_metadata_.empty());
+  footer_key_metadata_ = footer_key_metadata;
+  return this;
+}
+
+FileEncryptionProperties::Builder* FileEncryptionProperties::Builder::encrypted_columns(
+    const ColumnPathToEncryptionPropertiesMap& encrypted_columns) {
+  if (encrypted_columns.size() == 0) return this;
+
+  if (encrypted_columns_.size() != 0)
+    throw ParquetException("Column properties already set");
+
+  for (const auto& element : encrypted_columns) {
+    if (element.second->is_utilized()) {
+      throw ParquetException("Column properties utilized in another file");
+    }
+    element.second->set_utilized();
+  }
+  encrypted_columns_ = encrypted_columns;
+  return this;
+}
+
+void FileEncryptionProperties::WipeOutEncryptionKeys() {
+  footer_key_.clear();
+  for (const auto& element : encrypted_columns_) {
+    element.second->WipeOutEncryptionKey();
+  }
+}
+
+std::shared_ptr<FileEncryptionProperties> FileEncryptionProperties::DeepClone(
+    std::string new_aad_prefix) {
+  std::string footer_key_copy = footer_key_;
+  ColumnPathToEncryptionPropertiesMap encrypted_columns_map_copy;
+
+  for (const auto& element : encrypted_columns_) {
+    encrypted_columns_map_copy.insert(
+        {element.second->column_path(), element.second->DeepClone()});
+  }
+
+  if (new_aad_prefix.empty()) new_aad_prefix = aad_prefix_;
+  return std::shared_ptr<FileEncryptionProperties>(new FileEncryptionProperties(
+      algorithm_.algorithm, footer_key_copy, footer_key_metadata_, encrypted_footer_,
+      new_aad_prefix, store_aad_prefix_in_file_, encrypted_columns_map_copy));
+}
+
+FileEncryptionProperties::Builder* FileEncryptionProperties::Builder::aad_prefix(
+    const std::string& aad_prefix) {
+  if (aad_prefix.empty()) return this;
+
+  DCHECK(aad_prefix_.empty());
+  aad_prefix_ = aad_prefix;
+  store_aad_prefix_in_file_ = true;
+  return this;
+}
+
+FileEncryptionProperties::Builder*
+FileEncryptionProperties::Builder::disable_aad_prefix_storage() {
+  DCHECK(!aad_prefix_.empty());
+
+  store_aad_prefix_in_file_ = false;
+  return this;
+}
+
+ColumnEncryptionProperties::ColumnEncryptionProperties(bool encrypted,
+                                                       const std::string& column_path,
+                                                       const std::string& key,
+                                                       const std::string& key_metadata)
+    : column_path_(column_path) {
+  // column encryption properties object (with a column key) can be used for writing only
+  // one file.
+  // Upon completion of file writing, the encryption keys in the properties will be wiped
+  // out (set to 0 in memory).
+  utilized_ = false;
+
+  DCHECK(!column_path.empty());
+  if (!encrypted) {
+    DCHECK(key.empty() && key_metadata.empty());
+  }
+
+  if (!key.empty()) {
+    DCHECK(key.length() == 16 || key.length() == 24 || key.length() == 32);
+  }
+
+  encrypted_with_footer_key_ = (encrypted && key.empty());
+  if (encrypted_with_footer_key_) {
+    DCHECK(key_metadata.empty());
+  }
+
+  encrypted_ = encrypted;
+  key_metadata_ = key_metadata;
+  key_ = key;
+}
+
+ColumnDecryptionProperties::ColumnDecryptionProperties(const std::string& column_path,
+                                                       const std::string& key)
+    : column_path_(column_path) {
+  utilized_ = false;
+  DCHECK(!column_path.empty());
+
+  if (!key.empty()) {
+    DCHECK(key.length() == 16 || key.length() == 24 || key.length() == 32);
+  }
+
+  key_ = key;
+}
+
+std::string FileDecryptionProperties::column_key(const std::string& column_path) const {
+  if (column_decryption_properties_.find(column_path) !=
+      column_decryption_properties_.end()) {
+    auto column_prop = column_decryption_properties_.at(column_path);
+    if (column_prop != nullptr) {
+      return column_prop->key();
+    }
+  }
+  return empty_string_;
+}
+
+FileDecryptionProperties::FileDecryptionProperties(
+    const std::string& footer_key, std::shared_ptr<DecryptionKeyRetriever> key_retriever,
+    bool check_plaintext_footer_integrity, const std::string& aad_prefix,
+    std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier,
+    const ColumnPathToDecryptionPropertiesMap& column_decryption_properties,
+    bool plaintext_files_allowed) {
+  DCHECK(!footer_key.empty() || nullptr != key_retriever ||
+         0 != column_decryption_properties.size());
+
+  if (!footer_key.empty()) {
+    DCHECK(footer_key.length() == 16 || footer_key.length() == 24 ||
+           footer_key.length() == 32);
+  }
+  if (footer_key.empty() && check_plaintext_footer_integrity) {
+    DCHECK(nullptr != key_retriever);
+  }
+  aad_prefix_verifier_ = std::move(aad_prefix_verifier);
+  footer_key_ = footer_key;
+  check_plaintext_footer_integrity_ = check_plaintext_footer_integrity;
+  key_retriever_ = std::move(key_retriever);
+  aad_prefix_ = aad_prefix;
+  column_decryption_properties_ = column_decryption_properties;
+  plaintext_files_allowed_ = plaintext_files_allowed;
+  utilized_ = false;
+}
+
+FileEncryptionProperties::Builder* FileEncryptionProperties::Builder::footer_key_id(
+    const std::string& key_id) {
+  // key_id is expected to be in UTF8 encoding
+  ::arrow::util::InitializeUTF8();
+  const uint8_t* data = reinterpret_cast<const uint8_t*>(key_id.c_str());
+  if (!::arrow::util::ValidateUTF8(data, key_id.size())) {
+    throw ParquetException("footer key id should be in UTF8 encoding");
+  }
+
+  if (key_id.empty()) {
+    return this;
+  }
+
+  return footer_key_metadata(key_id);
+}
+
+std::shared_ptr<ColumnEncryptionProperties>
+FileEncryptionProperties::column_encryption_properties(const std::string& column_path) {
+  if (encrypted_columns_.size() == 0) {
+    auto builder = std::make_shared<ColumnEncryptionProperties::Builder>(column_path);
+    return builder->build();
+  }
+  if (encrypted_columns_.find(column_path) != encrypted_columns_.end()) {
+    return encrypted_columns_[column_path];
+  }
+
+  return nullptr;
+}
+
+FileEncryptionProperties::FileEncryptionProperties(
+    ParquetCipher::type cipher, const std::string& footer_key,
+    const std::string& footer_key_metadata, bool encrypted_footer,
+    const std::string& aad_prefix, bool store_aad_prefix_in_file,
+    const ColumnPathToEncryptionPropertiesMap& encrypted_columns)
+    : footer_key_(footer_key),
+      footer_key_metadata_(footer_key_metadata),
+      encrypted_footer_(encrypted_footer),
+      aad_prefix_(aad_prefix),
+      store_aad_prefix_in_file_(store_aad_prefix_in_file),
+      encrypted_columns_(encrypted_columns) {
+  // file encryption properties object can be used for writing only one file.
+  // Upon completion of file writing, the encryption keys in the properties will be wiped
+  // out (set to 0 in memory).
+  utilized_ = false;
+
+  DCHECK(!footer_key.empty());
+  // footer_key must be either 16, 24 or 32 bytes.
+  DCHECK(footer_key.length() == 16 || footer_key.length() == 24 ||
+         footer_key.length() == 32);
+
+  uint8_t aad_file_unique[kAadFileUniqueLength];
+  memset(aad_file_unique, 0, kAadFileUniqueLength);
+  encryption::RandBytes(aad_file_unique, sizeof(kAadFileUniqueLength));
+  std::string aad_file_unique_str(reinterpret_cast<char const*>(aad_file_unique),
+                                  kAadFileUniqueLength);
+
+  bool supply_aad_prefix = false;
+  if (aad_prefix.empty()) {
+    file_aad_ = aad_file_unique_str;
+  } else {
+    file_aad_ = aad_prefix + aad_file_unique_str;
+    if (!store_aad_prefix_in_file) supply_aad_prefix = true;
+  }
+  algorithm_.algorithm = cipher;
+  algorithm_.aad.aad_file_unique = aad_file_unique_str;
+  algorithm_.aad.supply_aad_prefix = supply_aad_prefix;
+  if (!aad_prefix.empty() && store_aad_prefix_in_file) {
+    algorithm_.aad.aad_prefix = aad_prefix;
+  }
+}
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/encryption/encryption.h b/contrib/libs/apache/arrow/cpp/src/parquet/encryption/encryption.h
index 840c669e6bd..8fd7ec8d3d0 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/encryption/encryption.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/encryption/encryption.h
@@ -1,510 +1,510 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <map> 
-#include <memory> 
-#include <string> 
-#include <utility> 
- 
-#include "parquet/exception.h" 
-#include "parquet/schema.h" 
-#include "parquet/types.h" 
- 
-namespace parquet { 
- 
-static constexpr ParquetCipher::type kDefaultEncryptionAlgorithm = 
-    ParquetCipher::AES_GCM_V1; 
-static constexpr int32_t kMaximalAadMetadataLength = 256; 
-static constexpr bool kDefaultEncryptedFooter = true; 
-static constexpr bool kDefaultCheckSignature = true; 
-static constexpr bool kDefaultAllowPlaintextFiles = false; 
-static constexpr int32_t kAadFileUniqueLength = 8; 
- 
-class ColumnDecryptionProperties; 
-using ColumnPathToDecryptionPropertiesMap = 
-    std::map<std::string, std::shared_ptr<ColumnDecryptionProperties>>; 
- 
-class ColumnEncryptionProperties; 
-using ColumnPathToEncryptionPropertiesMap = 
-    std::map<std::string, std::shared_ptr<ColumnEncryptionProperties>>; 
- 
-class PARQUET_EXPORT DecryptionKeyRetriever { 
- public: 
-  virtual std::string GetKey(const std::string& key_metadata) = 0; 
-  virtual ~DecryptionKeyRetriever() {} 
-}; 
- 
-/// Simple integer key retriever 
-class PARQUET_EXPORT IntegerKeyIdRetriever : public DecryptionKeyRetriever { 
- public: 
-  void PutKey(uint32_t key_id, const std::string& key); 
-  std::string GetKey(const std::string& key_metadata) override; 
- 
- private: 
-  std::map<uint32_t, std::string> key_map_; 
-}; 
- 
-// Simple string key retriever 
-class PARQUET_EXPORT StringKeyIdRetriever : public DecryptionKeyRetriever { 
- public: 
-  void PutKey(const std::string& key_id, const std::string& key); 
-  std::string GetKey(const std::string& key_metadata) override; 
- 
- private: 
-  std::map<std::string, std::string> key_map_; 
-}; 
- 
-class PARQUET_EXPORT HiddenColumnException : public ParquetException { 
- public: 
-  explicit HiddenColumnException(const std::string& columnPath) 
-      : ParquetException(columnPath.c_str()) {} 
-}; 
- 
-class PARQUET_EXPORT KeyAccessDeniedException : public ParquetException { 
- public: 
-  explicit KeyAccessDeniedException(const std::string& columnPath) 
-      : ParquetException(columnPath.c_str()) {} 
-}; 
- 
-inline const uint8_t* str2bytes(const std::string& str) { 
-  if (str.empty()) return NULLPTR; 
- 
-  char* cbytes = const_cast<char*>(str.c_str()); 
-  return reinterpret_cast<const uint8_t*>(cbytes); 
-} 
- 
-class PARQUET_EXPORT ColumnEncryptionProperties { 
- public: 
-  class PARQUET_EXPORT Builder { 
-   public: 
-    /// Convenience builder for encrypted columns. 
-    explicit Builder(const std::string& name) : Builder(name, true) {} 
- 
-    /// Convenience builder for encrypted columns. 
-    explicit Builder(const std::shared_ptr<schema::ColumnPath>& path) 
-        : Builder(path->ToDotString(), true) {} 
- 
-    /// Set a column-specific key. 
-    /// If key is not set on an encrypted column, the column will 
-    /// be encrypted with the footer key. 
-    /// keyBytes Key length must be either 16, 24 or 32 bytes. 
-    /// The key is cloned, and will be wiped out (array values set to 0) upon completion 
-    /// of file writing. 
-    /// Caller is responsible for wiping out the input key array. 
-    Builder* key(std::string column_key); 
- 
-    /// Set a key retrieval metadata. 
-    /// use either key_metadata() or key_id(), not both 
-    Builder* key_metadata(const std::string& key_metadata); 
- 
-    /// A convenience function to set key metadata using a string id. 
-    /// Set a key retrieval metadata (converted from String). 
-    /// use either key_metadata() or key_id(), not both 
-    /// key_id will be converted to metadata (UTF-8 array). 
-    Builder* key_id(const std::string& key_id); 
- 
-    std::shared_ptr<ColumnEncryptionProperties> build() { 
-      return std::shared_ptr<ColumnEncryptionProperties>( 
-          new ColumnEncryptionProperties(encrypted_, column_path_, key_, key_metadata_)); 
-    } 
- 
-   private: 
-    const std::string column_path_; 
-    bool encrypted_; 
-    std::string key_; 
-    std::string key_metadata_; 
- 
-    Builder(const std::string path, bool encrypted) 
-        : column_path_(path), encrypted_(encrypted) {} 
-  }; 
- 
-  std::string column_path() const { return column_path_; } 
-  bool is_encrypted() const { return encrypted_; } 
-  bool is_encrypted_with_footer_key() const { return encrypted_with_footer_key_; } 
-  std::string key() const { return key_; } 
-  std::string key_metadata() const { return key_metadata_; } 
- 
-  /// Upon completion of file writing, the encryption key 
-  /// will be wiped out. 
-  void WipeOutEncryptionKey() { key_.clear(); } 
- 
-  bool is_utilized() { 
-    if (key_.empty()) 
-      return false;  // can re-use column properties without encryption keys 
-    return utilized_; 
-  } 
- 
-  /// ColumnEncryptionProperties object can be used for writing one file only. 
-  /// Mark ColumnEncryptionProperties as utilized once it is used in 
-  /// FileEncryptionProperties as the encryption key will be wiped out upon 
-  /// completion of file writing. 
-  void set_utilized() { utilized_ = true; } 
- 
-  std::shared_ptr<ColumnEncryptionProperties> DeepClone() { 
-    std::string key_copy = key_; 
-    return std::shared_ptr<ColumnEncryptionProperties>(new ColumnEncryptionProperties( 
-        encrypted_, column_path_, key_copy, key_metadata_)); 
-  } 
- 
-  ColumnEncryptionProperties() = default; 
-  ColumnEncryptionProperties(const ColumnEncryptionProperties& other) = default; 
-  ColumnEncryptionProperties(ColumnEncryptionProperties&& other) = default; 
- 
- private: 
-  const std::string column_path_; 
-  bool encrypted_; 
-  bool encrypted_with_footer_key_; 
-  std::string key_; 
-  std::string key_metadata_; 
-  bool utilized_; 
-  explicit ColumnEncryptionProperties(bool encrypted, const std::string& column_path, 
-                                      const std::string& key, 
-                                      const std::string& key_metadata); 
-}; 
- 
-class PARQUET_EXPORT ColumnDecryptionProperties { 
- public: 
-  class PARQUET_EXPORT Builder { 
-   public: 
-    explicit Builder(const std::string& name) : column_path_(name) {} 
- 
-    explicit Builder(const std::shared_ptr<schema::ColumnPath>& path) 
-        : Builder(path->ToDotString()) {} 
- 
-    /// Set an explicit column key. If applied on a file that contains 
-    /// key metadata for this column the metadata will be ignored, 
-    /// the column will be decrypted with this key. 
-    /// key length must be either 16, 24 or 32 bytes. 
-    Builder* key(const std::string& key); 
- 
-    std::shared_ptr<ColumnDecryptionProperties> build(); 
- 
-   private: 
-    const std::string column_path_; 
-    std::string key_; 
-  }; 
- 
-  ColumnDecryptionProperties() = default; 
-  ColumnDecryptionProperties(const ColumnDecryptionProperties& other) = default; 
-  ColumnDecryptionProperties(ColumnDecryptionProperties&& other) = default; 
- 
-  std::string column_path() const { return column_path_; } 
-  std::string key() const { return key_; } 
-  bool is_utilized() { return utilized_; } 
- 
-  /// ColumnDecryptionProperties object can be used for reading one file only. 
-  /// Mark ColumnDecryptionProperties as utilized once it is used in 
-  /// FileDecryptionProperties as the encryption key will be wiped out upon 
-  /// completion of file reading. 
-  void set_utilized() { utilized_ = true; } 
- 
-  /// Upon completion of file reading, the encryption key 
-  /// will be wiped out. 
-  void WipeOutDecryptionKey(); 
- 
-  std::shared_ptr<ColumnDecryptionProperties> DeepClone(); 
- 
- private: 
-  const std::string column_path_; 
-  std::string key_; 
-  bool utilized_; 
- 
-  /// This class is only required for setting explicit column decryption keys - 
-  /// to override key retriever (or to provide keys when key metadata and/or 
-  /// key retriever are not available) 
-  explicit ColumnDecryptionProperties(const std::string& column_path, 
-                                      const std::string& key); 
-}; 
- 
-class PARQUET_EXPORT AADPrefixVerifier { 
- public: 
-  /// Verifies identity (AAD Prefix) of individual file, 
-  /// or of file collection in a data set. 
-  /// Throws exception if an AAD prefix is wrong. 
-  /// In a data set, AAD Prefixes should be collected, 
-  /// and then checked for missing files. 
-  virtual void Verify(const std::string& aad_prefix) = 0; 
-  virtual ~AADPrefixVerifier() {} 
-}; 
- 
-class PARQUET_EXPORT FileDecryptionProperties { 
- public: 
-  class PARQUET_EXPORT Builder { 
-   public: 
-    Builder() { 
-      check_plaintext_footer_integrity_ = kDefaultCheckSignature; 
-      plaintext_files_allowed_ = kDefaultAllowPlaintextFiles; 
-    } 
- 
-    /// Set an explicit footer key. If applied on a file that contains 
-    /// footer key metadata the metadata will be ignored, the footer 
-    /// will be decrypted/verified with this key. 
-    /// If explicit key is not set, footer key will be fetched from 
-    /// key retriever. 
-    /// With explicit keys or AAD prefix, new encryption properties object must be 
-    /// created for each encrypted file. 
-    /// Explicit encryption keys (footer and column) are cloned. 
-    /// Upon completion of file reading, the cloned encryption keys in the properties 
-    /// will be wiped out (array values set to 0). 
-    /// Caller is responsible for wiping out the input key array. 
-    /// param footerKey Key length must be either 16, 24 or 32 bytes. 
-    Builder* footer_key(const std::string footer_key); 
- 
-    /// Set explicit column keys (decryption properties). 
-    /// Its also possible to set a key retriever on this property object. 
-    /// Upon file decryption, availability of explicit keys is checked before 
-    /// invocation of the retriever callback. 
-    /// If an explicit key is available for a footer or a column, 
-    /// its key metadata will be ignored. 
-    Builder* column_keys( 
-        const ColumnPathToDecryptionPropertiesMap& column_decryption_properties); 
- 
-    /// Set a key retriever callback. Its also possible to 
-    /// set explicit footer or column keys on this file property object. 
-    /// Upon file decryption, availability of explicit keys is checked before 
-    /// invocation of the retriever callback. 
-    /// If an explicit key is available for a footer or a column, 
-    /// its key metadata will be ignored. 
-    Builder* key_retriever(const std::shared_ptr<DecryptionKeyRetriever>& key_retriever); 
- 
-    /// Skip integrity verification of plaintext footers. 
-    /// If not called, integrity of plaintext footers will be checked in runtime, 
-    /// and an exception will be thrown in the following situations: 
-    /// - footer signing key is not available 
-    /// (not passed, or not found by key retriever) 
-    /// - footer content and signature don't match 
-    Builder* disable_footer_signature_verification() { 
-      check_plaintext_footer_integrity_ = false; 
-      return this; 
-    } 
- 
-    /// Explicitly supply the file AAD prefix. 
-    /// A must when a prefix is used for file encryption, but not stored in file. 
-    /// If AAD prefix is stored in file, it will be compared to the explicitly 
-    /// supplied value and an exception will be thrown if they differ. 
-    Builder* aad_prefix(const std::string& aad_prefix); 
- 
-    /// Set callback for verification of AAD Prefixes stored in file. 
-    Builder* aad_prefix_verifier(std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier); 
- 
-    /// By default, reading plaintext (unencrypted) files is not 
-    /// allowed when using a decryptor 
-    /// - in order to detect files that were not encrypted by mistake. 
-    /// However, the default behavior can be overridden by calling this method. 
-    /// The caller should use then a different method to ensure encryption 
-    /// of files with sensitive data. 
-    Builder* plaintext_files_allowed() { 
-      plaintext_files_allowed_ = true; 
-      return this; 
-    } 
- 
-    std::shared_ptr<FileDecryptionProperties> build() { 
-      return std::shared_ptr<FileDecryptionProperties>(new FileDecryptionProperties( 
-          footer_key_, key_retriever_, check_plaintext_footer_integrity_, aad_prefix_, 
-          aad_prefix_verifier_, column_decryption_properties_, plaintext_files_allowed_)); 
-    } 
- 
-   private: 
-    std::string footer_key_; 
-    std::string aad_prefix_; 
-    std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier_; 
-    ColumnPathToDecryptionPropertiesMap column_decryption_properties_; 
- 
-    std::shared_ptr<DecryptionKeyRetriever> key_retriever_; 
-    bool check_plaintext_footer_integrity_; 
-    bool plaintext_files_allowed_; 
-  }; 
- 
-  std::string column_key(const std::string& column_path) const; 
- 
-  std::string footer_key() const { return footer_key_; } 
- 
-  std::string aad_prefix() const { return aad_prefix_; } 
- 
-  const std::shared_ptr<DecryptionKeyRetriever>& key_retriever() const { 
-    return key_retriever_; 
-  } 
- 
-  bool check_plaintext_footer_integrity() const { 
-    return check_plaintext_footer_integrity_; 
-  } 
- 
-  bool plaintext_files_allowed() const { return plaintext_files_allowed_; } 
- 
-  const std::shared_ptr<AADPrefixVerifier>& aad_prefix_verifier() const { 
-    return aad_prefix_verifier_; 
-  } 
- 
-  /// Upon completion of file reading, the encryption keys in the properties 
-  /// will be wiped out (array values set to 0). 
-  void WipeOutDecryptionKeys(); 
- 
-  bool is_utilized(); 
- 
-  /// FileDecryptionProperties object can be used for reading one file only. 
-  /// Mark FileDecryptionProperties as utilized once it is used to read a file as the 
-  /// encryption keys will be wiped out upon completion of file reading. 
-  void set_utilized() { utilized_ = true; } 
- 
-  /// FileDecryptionProperties object can be used for reading one file only. 
-  /// (unless this object keeps the keyRetrieval callback only, and no explicit 
-  /// keys or aadPrefix). 
-  /// At the end, keys are wiped out in the memory. 
-  /// This method allows to clone identical properties for another file, 
-  /// with an option to update the aadPrefix (if newAadPrefix is null, 
-  /// aadPrefix will be cloned too) 
-  std::shared_ptr<FileDecryptionProperties> DeepClone(std::string new_aad_prefix = ""); 
- 
- private: 
-  std::string footer_key_; 
-  std::string aad_prefix_; 
-  std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier_; 
- 
-  const std::string empty_string_ = ""; 
-  ColumnPathToDecryptionPropertiesMap column_decryption_properties_; 
- 
-  std::shared_ptr<DecryptionKeyRetriever> key_retriever_; 
-  bool check_plaintext_footer_integrity_; 
-  bool plaintext_files_allowed_; 
-  bool utilized_; 
- 
-  FileDecryptionProperties( 
-      const std::string& footer_key, 
-      std::shared_ptr<DecryptionKeyRetriever> key_retriever, 
-      bool check_plaintext_footer_integrity, const std::string& aad_prefix, 
-      std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier, 
-      const ColumnPathToDecryptionPropertiesMap& column_decryption_properties, 
-      bool plaintext_files_allowed); 
-}; 
- 
-class PARQUET_EXPORT FileEncryptionProperties { 
- public: 
-  class PARQUET_EXPORT Builder { 
-   public: 
-    explicit Builder(const std::string& footer_key) 
-        : parquet_cipher_(kDefaultEncryptionAlgorithm), 
-          encrypted_footer_(kDefaultEncryptedFooter) { 
-      footer_key_ = footer_key; 
-      store_aad_prefix_in_file_ = false; 
-    } 
- 
-    /// Create files with plaintext footer. 
-    /// If not called, the files will be created with encrypted footer (default). 
-    Builder* set_plaintext_footer() { 
-      encrypted_footer_ = false; 
-      return this; 
-    } 
- 
-    /// Set encryption algorithm. 
-    /// If not called, files will be encrypted with AES_GCM_V1 (default). 
-    Builder* algorithm(ParquetCipher::type parquet_cipher) { 
-      parquet_cipher_ = parquet_cipher; 
-      return this; 
-    } 
- 
-    /// Set a key retrieval metadata (converted from String). 
-    /// use either footer_key_metadata or footer_key_id, not both. 
-    Builder* footer_key_id(const std::string& key_id); 
- 
-    /// Set a key retrieval metadata. 
-    /// use either footer_key_metadata or footer_key_id, not both. 
-    Builder* footer_key_metadata(const std::string& footer_key_metadata); 
- 
-    /// Set the file AAD Prefix. 
-    Builder* aad_prefix(const std::string& aad_prefix); 
- 
-    /// Skip storing AAD Prefix in file. 
-    /// If not called, and if AAD Prefix is set, it will be stored. 
-    Builder* disable_aad_prefix_storage(); 
- 
-    /// Set the list of encrypted columns and their properties (keys etc). 
-    /// If not called, all columns will be encrypted with the footer key. 
-    /// If called, the file columns not in the list will be left unencrypted. 
-    Builder* encrypted_columns( 
-        const ColumnPathToEncryptionPropertiesMap& encrypted_columns); 
- 
-    std::shared_ptr<FileEncryptionProperties> build() { 
-      return std::shared_ptr<FileEncryptionProperties>(new FileEncryptionProperties( 
-          parquet_cipher_, footer_key_, footer_key_metadata_, encrypted_footer_, 
-          aad_prefix_, store_aad_prefix_in_file_, encrypted_columns_)); 
-    } 
- 
-   private: 
-    ParquetCipher::type parquet_cipher_; 
-    bool encrypted_footer_; 
-    std::string footer_key_; 
-    std::string footer_key_metadata_; 
- 
-    std::string aad_prefix_; 
-    bool store_aad_prefix_in_file_; 
-    ColumnPathToEncryptionPropertiesMap encrypted_columns_; 
-  }; 
-  bool encrypted_footer() const { return encrypted_footer_; } 
- 
-  EncryptionAlgorithm algorithm() const { return algorithm_; } 
- 
-  std::string footer_key() const { return footer_key_; } 
- 
-  std::string footer_key_metadata() const { return footer_key_metadata_; } 
- 
-  std::string file_aad() const { return file_aad_; } 
- 
-  std::shared_ptr<ColumnEncryptionProperties> column_encryption_properties( 
-      const std::string& column_path); 
- 
-  bool is_utilized() const { return utilized_; } 
- 
-  /// FileEncryptionProperties object can be used for writing one file only. 
-  /// Mark FileEncryptionProperties as utilized once it is used to write a file as the 
-  /// encryption keys will be wiped out upon completion of file writing. 
-  void set_utilized() { utilized_ = true; } 
- 
-  /// Upon completion of file writing, the encryption keys 
-  /// will be wiped out (array values set to 0). 
-  void WipeOutEncryptionKeys(); 
- 
-  /// FileEncryptionProperties object can be used for writing one file only. 
-  /// (at the end, keys are wiped out in the memory). 
-  /// This method allows to clone identical properties for another file, 
-  /// with an option to update the aadPrefix (if newAadPrefix is null, 
-  /// aadPrefix will be cloned too) 
-  std::shared_ptr<FileEncryptionProperties> DeepClone(std::string new_aad_prefix = ""); 
- 
-  ColumnPathToEncryptionPropertiesMap encrypted_columns() const { 
-    return encrypted_columns_; 
-  } 
- 
- private: 
-  EncryptionAlgorithm algorithm_; 
-  std::string footer_key_; 
-  std::string footer_key_metadata_; 
-  bool encrypted_footer_; 
-  std::string file_aad_; 
-  std::string aad_prefix_; 
-  bool utilized_; 
-  bool store_aad_prefix_in_file_; 
-  ColumnPathToEncryptionPropertiesMap encrypted_columns_; 
- 
-  FileEncryptionProperties(ParquetCipher::type cipher, const std::string& footer_key, 
-                           const std::string& footer_key_metadata, bool encrypted_footer, 
-                           const std::string& aad_prefix, bool store_aad_prefix_in_file, 
-                           const ColumnPathToEncryptionPropertiesMap& encrypted_columns); 
-}; 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "parquet/exception.h"
+#include "parquet/schema.h"
+#include "parquet/types.h"
+
+namespace parquet {
+
+static constexpr ParquetCipher::type kDefaultEncryptionAlgorithm =
+    ParquetCipher::AES_GCM_V1;
+static constexpr int32_t kMaximalAadMetadataLength = 256;
+static constexpr bool kDefaultEncryptedFooter = true;
+static constexpr bool kDefaultCheckSignature = true;
+static constexpr bool kDefaultAllowPlaintextFiles = false;
+static constexpr int32_t kAadFileUniqueLength = 8;
+
+class ColumnDecryptionProperties;
+using ColumnPathToDecryptionPropertiesMap =
+    std::map<std::string, std::shared_ptr<ColumnDecryptionProperties>>;
+
+class ColumnEncryptionProperties;
+using ColumnPathToEncryptionPropertiesMap =
+    std::map<std::string, std::shared_ptr<ColumnEncryptionProperties>>;
+
+class PARQUET_EXPORT DecryptionKeyRetriever {
+ public:
+  virtual std::string GetKey(const std::string& key_metadata) = 0;
+  virtual ~DecryptionKeyRetriever() {}
+};
+
+/// Simple integer key retriever
+class PARQUET_EXPORT IntegerKeyIdRetriever : public DecryptionKeyRetriever {
+ public:
+  void PutKey(uint32_t key_id, const std::string& key);
+  std::string GetKey(const std::string& key_metadata) override;
+
+ private:
+  std::map<uint32_t, std::string> key_map_;
+};
+
+// Simple string key retriever
+class PARQUET_EXPORT StringKeyIdRetriever : public DecryptionKeyRetriever {
+ public:
+  void PutKey(const std::string& key_id, const std::string& key);
+  std::string GetKey(const std::string& key_metadata) override;
+
+ private:
+  std::map<std::string, std::string> key_map_;
+};
+
+class PARQUET_EXPORT HiddenColumnException : public ParquetException {
+ public:
+  explicit HiddenColumnException(const std::string& columnPath)
+      : ParquetException(columnPath.c_str()) {}
+};
+
+class PARQUET_EXPORT KeyAccessDeniedException : public ParquetException {
+ public:
+  explicit KeyAccessDeniedException(const std::string& columnPath)
+      : ParquetException(columnPath.c_str()) {}
+};
+
+inline const uint8_t* str2bytes(const std::string& str) {
+  if (str.empty()) return NULLPTR;
+
+  char* cbytes = const_cast<char*>(str.c_str());
+  return reinterpret_cast<const uint8_t*>(cbytes);
+}
+
+class PARQUET_EXPORT ColumnEncryptionProperties {
+ public:
+  class PARQUET_EXPORT Builder {
+   public:
+    /// Convenience builder for encrypted columns.
+    explicit Builder(const std::string& name) : Builder(name, true) {}
+
+    /// Convenience builder for encrypted columns.
+    explicit Builder(const std::shared_ptr<schema::ColumnPath>& path)
+        : Builder(path->ToDotString(), true) {}
+
+    /// Set a column-specific key.
+    /// If key is not set on an encrypted column, the column will
+    /// be encrypted with the footer key.
+    /// keyBytes Key length must be either 16, 24 or 32 bytes.
+    /// The key is cloned, and will be wiped out (array values set to 0) upon completion
+    /// of file writing.
+    /// Caller is responsible for wiping out the input key array.
+    Builder* key(std::string column_key);
+
+    /// Set a key retrieval metadata.
+    /// use either key_metadata() or key_id(), not both
+    Builder* key_metadata(const std::string& key_metadata);
+
+    /// A convenience function to set key metadata using a string id.
+    /// Set a key retrieval metadata (converted from String).
+    /// use either key_metadata() or key_id(), not both
+    /// key_id will be converted to metadata (UTF-8 array).
+    Builder* key_id(const std::string& key_id);
+
+    std::shared_ptr<ColumnEncryptionProperties> build() {
+      return std::shared_ptr<ColumnEncryptionProperties>(
+          new ColumnEncryptionProperties(encrypted_, column_path_, key_, key_metadata_));
+    }
+
+   private:
+    const std::string column_path_;
+    bool encrypted_;
+    std::string key_;
+    std::string key_metadata_;
+
+    Builder(const std::string path, bool encrypted)
+        : column_path_(path), encrypted_(encrypted) {}
+  };
+
+  std::string column_path() const { return column_path_; }
+  bool is_encrypted() const { return encrypted_; }
+  bool is_encrypted_with_footer_key() const { return encrypted_with_footer_key_; }
+  std::string key() const { return key_; }
+  std::string key_metadata() const { return key_metadata_; }
+
+  /// Upon completion of file writing, the encryption key
+  /// will be wiped out.
+  void WipeOutEncryptionKey() { key_.clear(); }
+
+  bool is_utilized() {
+    if (key_.empty())
+      return false;  // can re-use column properties without encryption keys
+    return utilized_;
+  }
+
+  /// ColumnEncryptionProperties object can be used for writing one file only.
+  /// Mark ColumnEncryptionProperties as utilized once it is used in
+  /// FileEncryptionProperties as the encryption key will be wiped out upon
+  /// completion of file writing.
+  void set_utilized() { utilized_ = true; }
+
+  std::shared_ptr<ColumnEncryptionProperties> DeepClone() {
+    std::string key_copy = key_;
+    return std::shared_ptr<ColumnEncryptionProperties>(new ColumnEncryptionProperties(
+        encrypted_, column_path_, key_copy, key_metadata_));
+  }
+
+  ColumnEncryptionProperties() = default;
+  ColumnEncryptionProperties(const ColumnEncryptionProperties& other) = default;
+  ColumnEncryptionProperties(ColumnEncryptionProperties&& other) = default;
+
+ private:
+  const std::string column_path_;
+  bool encrypted_;
+  bool encrypted_with_footer_key_;
+  std::string key_;
+  std::string key_metadata_;
+  bool utilized_;
+  explicit ColumnEncryptionProperties(bool encrypted, const std::string& column_path,
+                                      const std::string& key,
+                                      const std::string& key_metadata);
+};
+
+class PARQUET_EXPORT ColumnDecryptionProperties {
+ public:
+  class PARQUET_EXPORT Builder {
+   public:
+    explicit Builder(const std::string& name) : column_path_(name) {}
+
+    explicit Builder(const std::shared_ptr<schema::ColumnPath>& path)
+        : Builder(path->ToDotString()) {}
+
+    /// Set an explicit column key. If applied on a file that contains
+    /// key metadata for this column the metadata will be ignored,
+    /// the column will be decrypted with this key.
+    /// key length must be either 16, 24 or 32 bytes.
+    Builder* key(const std::string& key);
+
+    std::shared_ptr<ColumnDecryptionProperties> build();
+
+   private:
+    const std::string column_path_;
+    std::string key_;
+  };
+
+  ColumnDecryptionProperties() = default;
+  ColumnDecryptionProperties(const ColumnDecryptionProperties& other) = default;
+  ColumnDecryptionProperties(ColumnDecryptionProperties&& other) = default;
+
+  std::string column_path() const { return column_path_; }
+  std::string key() const { return key_; }
+  bool is_utilized() { return utilized_; }
+
+  /// ColumnDecryptionProperties object can be used for reading one file only.
+  /// Mark ColumnDecryptionProperties as utilized once it is used in
+  /// FileDecryptionProperties as the encryption key will be wiped out upon
+  /// completion of file reading.
+  void set_utilized() { utilized_ = true; }
+
+  /// Upon completion of file reading, the encryption key
+  /// will be wiped out.
+  void WipeOutDecryptionKey();
+
+  std::shared_ptr<ColumnDecryptionProperties> DeepClone();
+
+ private:
+  const std::string column_path_;
+  std::string key_;
+  bool utilized_;
+
+  /// This class is only required for setting explicit column decryption keys -
+  /// to override key retriever (or to provide keys when key metadata and/or
+  /// key retriever are not available)
+  explicit ColumnDecryptionProperties(const std::string& column_path,
+                                      const std::string& key);
+};
+
+class PARQUET_EXPORT AADPrefixVerifier {
+ public:
+  /// Verifies identity (AAD Prefix) of individual file,
+  /// or of file collection in a data set.
+  /// Throws exception if an AAD prefix is wrong.
+  /// In a data set, AAD Prefixes should be collected,
+  /// and then checked for missing files.
+  virtual void Verify(const std::string& aad_prefix) = 0;
+  virtual ~AADPrefixVerifier() {}
+};
+
+class PARQUET_EXPORT FileDecryptionProperties {
+ public:
+  class PARQUET_EXPORT Builder {
+   public:
+    Builder() {
+      check_plaintext_footer_integrity_ = kDefaultCheckSignature;
+      plaintext_files_allowed_ = kDefaultAllowPlaintextFiles;
+    }
+
+    /// Set an explicit footer key. If applied on a file that contains
+    /// footer key metadata the metadata will be ignored, the footer
+    /// will be decrypted/verified with this key.
+    /// If explicit key is not set, footer key will be fetched from
+    /// key retriever.
+    /// With explicit keys or AAD prefix, new encryption properties object must be
+    /// created for each encrypted file.
+    /// Explicit encryption keys (footer and column) are cloned.
+    /// Upon completion of file reading, the cloned encryption keys in the properties
+    /// will be wiped out (array values set to 0).
+    /// Caller is responsible for wiping out the input key array.
+    /// param footerKey Key length must be either 16, 24 or 32 bytes.
+    Builder* footer_key(const std::string footer_key);
+
+    /// Set explicit column keys (decryption properties).
+    /// Its also possible to set a key retriever on this property object.
+    /// Upon file decryption, availability of explicit keys is checked before
+    /// invocation of the retriever callback.
+    /// If an explicit key is available for a footer or a column,
+    /// its key metadata will be ignored.
+    Builder* column_keys(
+        const ColumnPathToDecryptionPropertiesMap& column_decryption_properties);
+
+    /// Set a key retriever callback. Its also possible to
+    /// set explicit footer or column keys on this file property object.
+    /// Upon file decryption, availability of explicit keys is checked before
+    /// invocation of the retriever callback.
+    /// If an explicit key is available for a footer or a column,
+    /// its key metadata will be ignored.
+    Builder* key_retriever(const std::shared_ptr<DecryptionKeyRetriever>& key_retriever);
+
+    /// Skip integrity verification of plaintext footers.
+    /// If not called, integrity of plaintext footers will be checked in runtime,
+    /// and an exception will be thrown in the following situations:
+    /// - footer signing key is not available
+    /// (not passed, or not found by key retriever)
+    /// - footer content and signature don't match
+    Builder* disable_footer_signature_verification() {
+      check_plaintext_footer_integrity_ = false;
+      return this;
+    }
+
+    /// Explicitly supply the file AAD prefix.
+    /// A must when a prefix is used for file encryption, but not stored in file.
+    /// If AAD prefix is stored in file, it will be compared to the explicitly
+    /// supplied value and an exception will be thrown if they differ.
+    Builder* aad_prefix(const std::string& aad_prefix);
+
+    /// Set callback for verification of AAD Prefixes stored in file.
+    Builder* aad_prefix_verifier(std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier);
+
+    /// By default, reading plaintext (unencrypted) files is not
+    /// allowed when using a decryptor
+    /// - in order to detect files that were not encrypted by mistake.
+    /// However, the default behavior can be overridden by calling this method.
+    /// The caller should use then a different method to ensure encryption
+    /// of files with sensitive data.
+    Builder* plaintext_files_allowed() {
+      plaintext_files_allowed_ = true;
+      return this;
+    }
+
+    std::shared_ptr<FileDecryptionProperties> build() {
+      return std::shared_ptr<FileDecryptionProperties>(new FileDecryptionProperties(
+          footer_key_, key_retriever_, check_plaintext_footer_integrity_, aad_prefix_,
+          aad_prefix_verifier_, column_decryption_properties_, plaintext_files_allowed_));
+    }
+
+   private:
+    std::string footer_key_;
+    std::string aad_prefix_;
+    std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier_;
+    ColumnPathToDecryptionPropertiesMap column_decryption_properties_;
+
+    std::shared_ptr<DecryptionKeyRetriever> key_retriever_;
+    bool check_plaintext_footer_integrity_;
+    bool plaintext_files_allowed_;
+  };
+
+  std::string column_key(const std::string& column_path) const;
+
+  std::string footer_key() const { return footer_key_; }
+
+  std::string aad_prefix() const { return aad_prefix_; }
+
+  const std::shared_ptr<DecryptionKeyRetriever>& key_retriever() const {
+    return key_retriever_;
+  }
+
+  bool check_plaintext_footer_integrity() const {
+    return check_plaintext_footer_integrity_;
+  }
+
+  bool plaintext_files_allowed() const { return plaintext_files_allowed_; }
+
+  const std::shared_ptr<AADPrefixVerifier>& aad_prefix_verifier() const {
+    return aad_prefix_verifier_;
+  }
+
+  /// Upon completion of file reading, the encryption keys in the properties
+  /// will be wiped out (array values set to 0).
+  void WipeOutDecryptionKeys();
+
+  bool is_utilized();
+
+  /// FileDecryptionProperties object can be used for reading one file only.
+  /// Mark FileDecryptionProperties as utilized once it is used to read a file as the
+  /// encryption keys will be wiped out upon completion of file reading.
+  void set_utilized() { utilized_ = true; }
+
+  /// FileDecryptionProperties object can be used for reading one file only.
+  /// (unless this object keeps the keyRetrieval callback only, and no explicit
+  /// keys or aadPrefix).
+  /// At the end, keys are wiped out in the memory.
+  /// This method allows to clone identical properties for another file,
+  /// with an option to update the aadPrefix (if newAadPrefix is null,
+  /// aadPrefix will be cloned too)
+  std::shared_ptr<FileDecryptionProperties> DeepClone(std::string new_aad_prefix = "");
+
+ private:
+  std::string footer_key_;
+  std::string aad_prefix_;
+  std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier_;
+
+  const std::string empty_string_ = "";
+  ColumnPathToDecryptionPropertiesMap column_decryption_properties_;
+
+  std::shared_ptr<DecryptionKeyRetriever> key_retriever_;
+  bool check_plaintext_footer_integrity_;
+  bool plaintext_files_allowed_;
+  bool utilized_;
+
+  FileDecryptionProperties(
+      const std::string& footer_key,
+      std::shared_ptr<DecryptionKeyRetriever> key_retriever,
+      bool check_plaintext_footer_integrity, const std::string& aad_prefix,
+      std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier,
+      const ColumnPathToDecryptionPropertiesMap& column_decryption_properties,
+      bool plaintext_files_allowed);
+};
+
+class PARQUET_EXPORT FileEncryptionProperties {
+ public:
+  class PARQUET_EXPORT Builder {
+   public:
+    explicit Builder(const std::string& footer_key)
+        : parquet_cipher_(kDefaultEncryptionAlgorithm),
+          encrypted_footer_(kDefaultEncryptedFooter) {
+      footer_key_ = footer_key;
+      store_aad_prefix_in_file_ = false;
+    }
+
+    /// Create files with plaintext footer.
+    /// If not called, the files will be created with encrypted footer (default).
+    Builder* set_plaintext_footer() {
+      encrypted_footer_ = false;
+      return this;
+    }
+
+    /// Set encryption algorithm.
+    /// If not called, files will be encrypted with AES_GCM_V1 (default).
+    Builder* algorithm(ParquetCipher::type parquet_cipher) {
+      parquet_cipher_ = parquet_cipher;
+      return this;
+    }
+
+    /// Set a key retrieval metadata (converted from String).
+    /// use either footer_key_metadata or footer_key_id, not both.
+    Builder* footer_key_id(const std::string& key_id);
+
+    /// Set a key retrieval metadata.
+    /// use either footer_key_metadata or footer_key_id, not both.
+    Builder* footer_key_metadata(const std::string& footer_key_metadata);
+
+    /// Set the file AAD Prefix.
+    Builder* aad_prefix(const std::string& aad_prefix);
+
+    /// Skip storing AAD Prefix in file.
+    /// If not called, and if AAD Prefix is set, it will be stored.
+    Builder* disable_aad_prefix_storage();
+
+    /// Set the list of encrypted columns and their properties (keys etc).
+    /// If not called, all columns will be encrypted with the footer key.
+    /// If called, the file columns not in the list will be left unencrypted.
+    Builder* encrypted_columns(
+        const ColumnPathToEncryptionPropertiesMap& encrypted_columns);
+
+    std::shared_ptr<FileEncryptionProperties> build() {
+      return std::shared_ptr<FileEncryptionProperties>(new FileEncryptionProperties(
+          parquet_cipher_, footer_key_, footer_key_metadata_, encrypted_footer_,
+          aad_prefix_, store_aad_prefix_in_file_, encrypted_columns_));
+    }
+
+   private:
+    ParquetCipher::type parquet_cipher_;
+    bool encrypted_footer_;
+    std::string footer_key_;
+    std::string footer_key_metadata_;
+
+    std::string aad_prefix_;
+    bool store_aad_prefix_in_file_;
+    ColumnPathToEncryptionPropertiesMap encrypted_columns_;
+  };
+  bool encrypted_footer() const { return encrypted_footer_; }
+
+  EncryptionAlgorithm algorithm() const { return algorithm_; }
+
+  std::string footer_key() const { return footer_key_; }
+
+  std::string footer_key_metadata() const { return footer_key_metadata_; }
+
+  std::string file_aad() const { return file_aad_; }
+
+  std::shared_ptr<ColumnEncryptionProperties> column_encryption_properties(
+      const std::string& column_path);
+
+  bool is_utilized() const { return utilized_; }
+
+  /// FileEncryptionProperties object can be used for writing one file only.
+  /// Mark FileEncryptionProperties as utilized once it is used to write a file as the
+  /// encryption keys will be wiped out upon completion of file writing.
+  void set_utilized() { utilized_ = true; }
+
+  /// Upon completion of file writing, the encryption keys
+  /// will be wiped out (array values set to 0).
+  void WipeOutEncryptionKeys();
+
+  /// FileEncryptionProperties object can be used for writing one file only.
+  /// (at the end, keys are wiped out in the memory).
+  /// This method allows to clone identical properties for another file,
+  /// with an option to update the aadPrefix (if newAadPrefix is null,
+  /// aadPrefix will be cloned too)
+  std::shared_ptr<FileEncryptionProperties> DeepClone(std::string new_aad_prefix = "");
+
+  ColumnPathToEncryptionPropertiesMap encrypted_columns() const {
+    return encrypted_columns_;
+  }
+
+ private:
+  EncryptionAlgorithm algorithm_;
+  std::string footer_key_;
+  std::string footer_key_metadata_;
+  bool encrypted_footer_;
+  std::string file_aad_;
+  std::string aad_prefix_;
+  bool utilized_;
+  bool store_aad_prefix_in_file_;
+  ColumnPathToEncryptionPropertiesMap encrypted_columns_;
+
+  FileEncryptionProperties(ParquetCipher::type cipher, const std::string& footer_key,
+                           const std::string& footer_key_metadata, bool encrypted_footer,
+                           const std::string& aad_prefix, bool store_aad_prefix_in_file,
+                           const ColumnPathToEncryptionPropertiesMap& encrypted_columns);
+};
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/encryption/encryption_internal.h b/contrib/libs/apache/arrow/cpp/src/parquet/encryption/encryption_internal.h
index 48e64574b43..e50fb9d0b8a 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/encryption/encryption_internal.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/encryption/encryption_internal.h
@@ -1,116 +1,116 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <memory> 
-#include <string> 
-#include <vector> 
- 
-#include "parquet/properties.h" 
-#include "parquet/types.h" 
- 
-using parquet::ParquetCipher; 
- 
-namespace parquet { 
-namespace encryption { 
- 
-constexpr int kGcmTagLength = 16; 
-constexpr int kNonceLength = 12; 
- 
-// Module types 
-constexpr int8_t kFooter = 0; 
-constexpr int8_t kColumnMetaData = 1; 
-constexpr int8_t kDataPage = 2; 
-constexpr int8_t kDictionaryPage = 3; 
-constexpr int8_t kDataPageHeader = 4; 
-constexpr int8_t kDictionaryPageHeader = 5; 
-constexpr int8_t kColumnIndex = 6; 
-constexpr int8_t kOffsetIndex = 7; 
- 
-/// Performs AES encryption operations with GCM or CTR ciphers. 
-class AesEncryptor { 
- public: 
-  /// Can serve one key length only. Possible values: 16, 24, 32 bytes. 
-  explicit AesEncryptor(ParquetCipher::type alg_id, int key_len, bool metadata); 
- 
-  static AesEncryptor* Make(ParquetCipher::type alg_id, int key_len, bool metadata, 
-                            std::vector<AesEncryptor*>* all_encryptors); 
- 
-  ~AesEncryptor(); 
- 
-  /// Size difference between plaintext and ciphertext, for this cipher. 
-  int CiphertextSizeDelta(); 
- 
-  /// Encrypts plaintext with the key and aad. Key length is passed only for validation. 
-  /// If different from value in constructor, exception will be thrown. 
-  int Encrypt(const uint8_t* plaintext, int plaintext_len, const uint8_t* key, 
-              int key_len, const uint8_t* aad, int aad_len, uint8_t* ciphertext); 
- 
-  /// Encrypts plaintext footer, in order to compute footer signature (tag). 
-  int SignedFooterEncrypt(const uint8_t* footer, int footer_len, const uint8_t* key, 
-                          int key_len, const uint8_t* aad, int aad_len, 
-                          const uint8_t* nonce, uint8_t* encrypted_footer); 
- 
-  void WipeOut(); 
- 
- private: 
-  // PIMPL Idiom 
-  class AesEncryptorImpl; 
-  std::unique_ptr<AesEncryptorImpl> impl_; 
-}; 
- 
-/// Performs AES decryption operations with GCM or CTR ciphers. 
-class AesDecryptor { 
- public: 
-  /// Can serve one key length only. Possible values: 16, 24, 32 bytes. 
-  explicit AesDecryptor(ParquetCipher::type alg_id, int key_len, bool metadata); 
- 
-  static AesDecryptor* Make(ParquetCipher::type alg_id, int key_len, bool metadata, 
-                            std::vector<AesDecryptor*>* all_decryptors); 
- 
-  ~AesDecryptor(); 
-  void WipeOut(); 
- 
-  /// Size difference between plaintext and ciphertext, for this cipher. 
-  int CiphertextSizeDelta(); 
- 
-  /// Decrypts ciphertext with the key and aad. Key length is passed only for 
-  /// validation. If different from value in constructor, exception will be thrown. 
-  int Decrypt(const uint8_t* ciphertext, int ciphertext_len, const uint8_t* key, 
-              int key_len, const uint8_t* aad, int aad_len, uint8_t* plaintext); 
- 
- private: 
-  // PIMPL Idiom 
-  class AesDecryptorImpl; 
-  std::unique_ptr<AesDecryptorImpl> impl_; 
-}; 
- 
-std::string CreateModuleAad(const std::string& file_aad, int8_t module_type, 
-                            int16_t row_group_ordinal, int16_t column_ordinal, 
-                            int16_t page_ordinal); 
- 
-std::string CreateFooterAad(const std::string& aad_prefix_bytes); 
- 
-// Update last two bytes of page (or page header) module AAD 
-void QuickUpdatePageAad(const std::string& AAD, int16_t new_page_ordinal); 
- 
-// Wraps OpenSSL RAND_bytes function 
-void RandBytes(unsigned char* buf, int num); 
- 
-}  // namespace encryption 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "parquet/properties.h"
+#include "parquet/types.h"
+
+using parquet::ParquetCipher;
+
+namespace parquet {
+namespace encryption {
+
+constexpr int kGcmTagLength = 16;
+constexpr int kNonceLength = 12;
+
+// Module types
+constexpr int8_t kFooter = 0;
+constexpr int8_t kColumnMetaData = 1;
+constexpr int8_t kDataPage = 2;
+constexpr int8_t kDictionaryPage = 3;
+constexpr int8_t kDataPageHeader = 4;
+constexpr int8_t kDictionaryPageHeader = 5;
+constexpr int8_t kColumnIndex = 6;
+constexpr int8_t kOffsetIndex = 7;
+
+/// Performs AES encryption operations with GCM or CTR ciphers.
+class AesEncryptor {
+ public:
+  /// Can serve one key length only. Possible values: 16, 24, 32 bytes.
+  explicit AesEncryptor(ParquetCipher::type alg_id, int key_len, bool metadata);
+
+  static AesEncryptor* Make(ParquetCipher::type alg_id, int key_len, bool metadata,
+                            std::vector<AesEncryptor*>* all_encryptors);
+
+  ~AesEncryptor();
+
+  /// Size difference between plaintext and ciphertext, for this cipher.
+  int CiphertextSizeDelta();
+
+  /// Encrypts plaintext with the key and aad. Key length is passed only for validation.
+  /// If different from value in constructor, exception will be thrown.
+  int Encrypt(const uint8_t* plaintext, int plaintext_len, const uint8_t* key,
+              int key_len, const uint8_t* aad, int aad_len, uint8_t* ciphertext);
+
+  /// Encrypts plaintext footer, in order to compute footer signature (tag).
+  int SignedFooterEncrypt(const uint8_t* footer, int footer_len, const uint8_t* key,
+                          int key_len, const uint8_t* aad, int aad_len,
+                          const uint8_t* nonce, uint8_t* encrypted_footer);
+
+  void WipeOut();
+
+ private:
+  // PIMPL Idiom
+  class AesEncryptorImpl;
+  std::unique_ptr<AesEncryptorImpl> impl_;
+};
+
+/// Performs AES decryption operations with GCM or CTR ciphers.
+class AesDecryptor {
+ public:
+  /// Can serve one key length only. Possible values: 16, 24, 32 bytes.
+  explicit AesDecryptor(ParquetCipher::type alg_id, int key_len, bool metadata);
+
+  static AesDecryptor* Make(ParquetCipher::type alg_id, int key_len, bool metadata,
+                            std::vector<AesDecryptor*>* all_decryptors);
+
+  ~AesDecryptor();
+  void WipeOut();
+
+  /// Size difference between plaintext and ciphertext, for this cipher.
+  int CiphertextSizeDelta();
+
+  /// Decrypts ciphertext with the key and aad. Key length is passed only for
+  /// validation. If different from value in constructor, exception will be thrown.
+  int Decrypt(const uint8_t* ciphertext, int ciphertext_len, const uint8_t* key,
+              int key_len, const uint8_t* aad, int aad_len, uint8_t* plaintext);
+
+ private:
+  // PIMPL Idiom
+  class AesDecryptorImpl;
+  std::unique_ptr<AesDecryptorImpl> impl_;
+};
+
+std::string CreateModuleAad(const std::string& file_aad, int8_t module_type,
+                            int16_t row_group_ordinal, int16_t column_ordinal,
+                            int16_t page_ordinal);
+
+std::string CreateFooterAad(const std::string& aad_prefix_bytes);
+
+// Update last two bytes of page (or page header) module AAD
+void QuickUpdatePageAad(const std::string& AAD, int16_t new_page_ordinal);
+
+// Wraps OpenSSL RAND_bytes function
+void RandBytes(unsigned char* buf, int num);
+
+}  // namespace encryption
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/encryption/encryption_internal_nossl.cc b/contrib/libs/apache/arrow/cpp/src/parquet/encryption/encryption_internal_nossl.cc
index fd3c1775d25..7f2edfa1d78 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/encryption/encryption_internal_nossl.cc
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/encryption/encryption_internal_nossl.cc
@@ -1,110 +1,110 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "parquet/encryption/encryption_internal.h" 
-#include "parquet/exception.h" 
- 
-namespace parquet { 
-namespace encryption { 
- 
-void ThrowOpenSSLRequiredException() { 
-  throw ParquetException( 
-      "Calling encryption method in Arrow/Parquet built without OpenSSL"); 
-} 
- 
-class AesEncryptor::AesEncryptorImpl {}; 
- 
-AesEncryptor::~AesEncryptor() {} 
- 
-int AesEncryptor::SignedFooterEncrypt(const uint8_t* footer, int footer_len, 
-                                      const uint8_t* key, int key_len, const uint8_t* aad, 
-                                      int aad_len, const uint8_t* nonce, 
-                                      uint8_t* encrypted_footer) { 
-  ThrowOpenSSLRequiredException(); 
-  return -1; 
-} 
- 
-void AesEncryptor::WipeOut() { ThrowOpenSSLRequiredException(); } 
- 
-int AesEncryptor::CiphertextSizeDelta() { 
-  ThrowOpenSSLRequiredException(); 
-  return -1; 
-} 
- 
-int AesEncryptor::Encrypt(const uint8_t* plaintext, int plaintext_len, const uint8_t* key, 
-                          int key_len, const uint8_t* aad, int aad_len, 
-                          uint8_t* ciphertext) { 
-  ThrowOpenSSLRequiredException(); 
-  return -1; 
-} 
- 
-AesEncryptor::AesEncryptor(ParquetCipher::type alg_id, int key_len, bool metadata) { 
-  ThrowOpenSSLRequiredException(); 
-} 
- 
-class AesDecryptor::AesDecryptorImpl {}; 
- 
-int AesDecryptor::Decrypt(const uint8_t* plaintext, int plaintext_len, const uint8_t* key, 
-                          int key_len, const uint8_t* aad, int aad_len, 
-                          uint8_t* ciphertext) { 
-  ThrowOpenSSLRequiredException(); 
-  return -1; 
-} 
- 
-void AesDecryptor::WipeOut() { ThrowOpenSSLRequiredException(); } 
- 
-AesDecryptor::~AesDecryptor() {} 
- 
-AesEncryptor* AesEncryptor::Make(ParquetCipher::type alg_id, int key_len, bool metadata, 
-                                 std::vector<AesEncryptor*>* all_encryptors) { 
-  return NULLPTR; 
-} 
- 
-AesDecryptor::AesDecryptor(ParquetCipher::type alg_id, int key_len, bool metadata) { 
-  ThrowOpenSSLRequiredException(); 
-} 
- 
-AesDecryptor* AesDecryptor::Make(ParquetCipher::type alg_id, int key_len, bool metadata, 
-                                 std::vector<AesDecryptor*>* all_decryptors) { 
-  return NULLPTR; 
-} 
- 
-int AesDecryptor::CiphertextSizeDelta() { 
-  ThrowOpenSSLRequiredException(); 
-  return -1; 
-} 
- 
-std::string CreateModuleAad(const std::string& file_aad, int8_t module_type, 
-                            int16_t row_group_ordinal, int16_t column_ordinal, 
-                            int16_t page_ordinal) { 
-  ThrowOpenSSLRequiredException(); 
-  return ""; 
-} 
- 
-std::string CreateFooterAad(const std::string& aad_prefix_bytes) { 
-  ThrowOpenSSLRequiredException(); 
-  return ""; 
-} 
- 
-void QuickUpdatePageAad(const std::string& AAD, int16_t new_page_ordinal) { 
-  ThrowOpenSSLRequiredException(); 
-} 
- 
-void RandBytes(unsigned char* buf, int num) { ThrowOpenSSLRequiredException(); } 
- 
-}  // namespace encryption 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/encryption/encryption_internal.h"
+#include "parquet/exception.h"
+
+namespace parquet {
+namespace encryption {
+
+void ThrowOpenSSLRequiredException() {
+  throw ParquetException(
+      "Calling encryption method in Arrow/Parquet built without OpenSSL");
+}
+
+class AesEncryptor::AesEncryptorImpl {};
+
+AesEncryptor::~AesEncryptor() {}
+
+int AesEncryptor::SignedFooterEncrypt(const uint8_t* footer, int footer_len,
+                                      const uint8_t* key, int key_len, const uint8_t* aad,
+                                      int aad_len, const uint8_t* nonce,
+                                      uint8_t* encrypted_footer) {
+  ThrowOpenSSLRequiredException();
+  return -1;
+}
+
+void AesEncryptor::WipeOut() { ThrowOpenSSLRequiredException(); }
+
+int AesEncryptor::CiphertextSizeDelta() {
+  ThrowOpenSSLRequiredException();
+  return -1;
+}
+
+int AesEncryptor::Encrypt(const uint8_t* plaintext, int plaintext_len, const uint8_t* key,
+                          int key_len, const uint8_t* aad, int aad_len,
+                          uint8_t* ciphertext) {
+  ThrowOpenSSLRequiredException();
+  return -1;
+}
+
+AesEncryptor::AesEncryptor(ParquetCipher::type alg_id, int key_len, bool metadata) {
+  ThrowOpenSSLRequiredException();
+}
+
+class AesDecryptor::AesDecryptorImpl {};
+
+int AesDecryptor::Decrypt(const uint8_t* plaintext, int plaintext_len, const uint8_t* key,
+                          int key_len, const uint8_t* aad, int aad_len,
+                          uint8_t* ciphertext) {
+  ThrowOpenSSLRequiredException();
+  return -1;
+}
+
+void AesDecryptor::WipeOut() { ThrowOpenSSLRequiredException(); }
+
+AesDecryptor::~AesDecryptor() {}
+
+AesEncryptor* AesEncryptor::Make(ParquetCipher::type alg_id, int key_len, bool metadata,
+                                 std::vector<AesEncryptor*>* all_encryptors) {
+  return NULLPTR;
+}
+
+AesDecryptor::AesDecryptor(ParquetCipher::type alg_id, int key_len, bool metadata) {
+  ThrowOpenSSLRequiredException();
+}
+
+AesDecryptor* AesDecryptor::Make(ParquetCipher::type alg_id, int key_len, bool metadata,
+                                 std::vector<AesDecryptor*>* all_decryptors) {
+  return NULLPTR;
+}
+
+int AesDecryptor::CiphertextSizeDelta() {
+  ThrowOpenSSLRequiredException();
+  return -1;
+}
+
+std::string CreateModuleAad(const std::string& file_aad, int8_t module_type,
+                            int16_t row_group_ordinal, int16_t column_ordinal,
+                            int16_t page_ordinal) {
+  ThrowOpenSSLRequiredException();
+  return "";
+}
+
+std::string CreateFooterAad(const std::string& aad_prefix_bytes) {
+  ThrowOpenSSLRequiredException();
+  return "";
+}
+
+void QuickUpdatePageAad(const std::string& AAD, int16_t new_page_ordinal) {
+  ThrowOpenSSLRequiredException();
+}
+
+void RandBytes(unsigned char* buf, int num) { ThrowOpenSSLRequiredException(); }
+
+}  // namespace encryption
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/encryption/internal_file_decryptor.cc b/contrib/libs/apache/arrow/cpp/src/parquet/encryption/internal_file_decryptor.cc
index 5aa7d010d3f..6381e4f37f7 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/encryption/internal_file_decryptor.cc
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/encryption/internal_file_decryptor.cc
@@ -1,240 +1,240 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "parquet/encryption/internal_file_decryptor.h" 
-#include "parquet/encryption/encryption.h" 
-#include "parquet/encryption/encryption_internal.h" 
- 
-namespace parquet { 
- 
-// Decryptor 
-Decryptor::Decryptor(encryption::AesDecryptor* aes_decryptor, const std::string& key, 
-                     const std::string& file_aad, const std::string& aad, 
-                     ::arrow::MemoryPool* pool) 
-    : aes_decryptor_(aes_decryptor), 
-      key_(key), 
-      file_aad_(file_aad), 
-      aad_(aad), 
-      pool_(pool) {} 
- 
-int Decryptor::CiphertextSizeDelta() { return aes_decryptor_->CiphertextSizeDelta(); } 
- 
-int Decryptor::Decrypt(const uint8_t* ciphertext, int ciphertext_len, 
-                       uint8_t* plaintext) { 
-  return aes_decryptor_->Decrypt(ciphertext, ciphertext_len, str2bytes(key_), 
-                                 static_cast<int>(key_.size()), str2bytes(aad_), 
-                                 static_cast<int>(aad_.size()), plaintext); 
-} 
- 
-// InternalFileDecryptor 
-InternalFileDecryptor::InternalFileDecryptor(FileDecryptionProperties* properties, 
-                                             const std::string& file_aad, 
-                                             ParquetCipher::type algorithm, 
-                                             const std::string& footer_key_metadata, 
-                                             ::arrow::MemoryPool* pool) 
-    : properties_(properties), 
-      file_aad_(file_aad), 
-      algorithm_(algorithm), 
-      footer_key_metadata_(footer_key_metadata), 
-      pool_(pool) { 
-  if (properties_->is_utilized()) { 
-    throw ParquetException( 
-        "Re-using decryption properties with explicit keys for another file"); 
-  } 
-  properties_->set_utilized(); 
-} 
- 
-void InternalFileDecryptor::WipeOutDecryptionKeys() { 
-  properties_->WipeOutDecryptionKeys(); 
-  for (auto const& i : all_decryptors_) { 
-    i->WipeOut(); 
-  } 
-} 
- 
-std::string InternalFileDecryptor::GetFooterKey() { 
-  std::string footer_key = properties_->footer_key(); 
-  // ignore footer key metadata if footer key is explicitly set via API 
-  if (footer_key.empty()) { 
-    if (footer_key_metadata_.empty()) 
-      throw ParquetException("No footer key or key metadata"); 
-    if (properties_->key_retriever() == nullptr) 
-      throw ParquetException("No footer key or key retriever"); 
-    try { 
-      footer_key = properties_->key_retriever()->GetKey(footer_key_metadata_); 
-    } catch (KeyAccessDeniedException& e) { 
-      std::stringstream ss; 
-      ss << "Footer key: access denied " << e.what() << "\n"; 
-      throw ParquetException(ss.str()); 
-    } 
-  } 
-  if (footer_key.empty()) { 
-    throw ParquetException( 
-        "Footer key unavailable. Could not verify " 
-        "plaintext footer metadata"); 
-  } 
-  return footer_key; 
-} 
- 
-std::shared_ptr<Decryptor> InternalFileDecryptor::GetFooterDecryptor() { 
-  std::string aad = encryption::CreateFooterAad(file_aad_); 
-  return GetFooterDecryptor(aad, true); 
-} 
- 
-std::shared_ptr<Decryptor> InternalFileDecryptor::GetFooterDecryptorForColumnMeta( 
-    const std::string& aad) { 
-  return GetFooterDecryptor(aad, true); 
-} 
- 
-std::shared_ptr<Decryptor> InternalFileDecryptor::GetFooterDecryptorForColumnData( 
-    const std::string& aad) { 
-  return GetFooterDecryptor(aad, false); 
-} 
- 
-std::shared_ptr<Decryptor> InternalFileDecryptor::GetFooterDecryptor( 
-    const std::string& aad, bool metadata) { 
-  if (metadata) { 
-    if (footer_metadata_decryptor_ != nullptr) return footer_metadata_decryptor_; 
-  } else { 
-    if (footer_data_decryptor_ != nullptr) return footer_data_decryptor_; 
-  } 
- 
-  std::string footer_key = properties_->footer_key(); 
-  if (footer_key.empty()) { 
-    if (footer_key_metadata_.empty()) 
-      throw ParquetException("No footer key or key metadata"); 
-    if (properties_->key_retriever() == nullptr) 
-      throw ParquetException("No footer key or key retriever"); 
-    try { 
-      footer_key = properties_->key_retriever()->GetKey(footer_key_metadata_); 
-    } catch (KeyAccessDeniedException& e) { 
-      std::stringstream ss; 
-      ss << "Footer key: access denied " << e.what() << "\n"; 
-      throw ParquetException(ss.str()); 
-    } 
-  } 
-  if (footer_key.empty()) { 
-    throw ParquetException( 
-        "Invalid footer encryption key. " 
-        "Could not parse footer metadata"); 
-  } 
- 
-  // Create both data and metadata decryptors to avoid redundant retrieval of key 
-  // from the key_retriever. 
-  auto aes_metadata_decryptor = GetMetaAesDecryptor(footer_key.size()); 
-  auto aes_data_decryptor = GetDataAesDecryptor(footer_key.size()); 
- 
-  footer_metadata_decryptor_ = std::make_shared<Decryptor>( 
-      aes_metadata_decryptor, footer_key, file_aad_, aad, pool_); 
-  footer_data_decryptor_ = 
-      std::make_shared<Decryptor>(aes_data_decryptor, footer_key, file_aad_, aad, pool_); 
- 
-  if (metadata) return footer_metadata_decryptor_; 
-  return footer_data_decryptor_; 
-} 
- 
-std::shared_ptr<Decryptor> InternalFileDecryptor::GetColumnMetaDecryptor( 
-    const std::string& column_path, const std::string& column_key_metadata, 
-    const std::string& aad) { 
-  return GetColumnDecryptor(column_path, column_key_metadata, aad, true); 
-} 
- 
-std::shared_ptr<Decryptor> InternalFileDecryptor::GetColumnDataDecryptor( 
-    const std::string& column_path, const std::string& column_key_metadata, 
-    const std::string& aad) { 
-  return GetColumnDecryptor(column_path, column_key_metadata, aad, false); 
-} 
- 
-std::shared_ptr<Decryptor> InternalFileDecryptor::GetColumnDecryptor( 
-    const std::string& column_path, const std::string& column_key_metadata, 
-    const std::string& aad, bool metadata) { 
-  std::string column_key; 
-  // first look if we already got the decryptor from before 
-  if (metadata) { 
-    if (column_metadata_map_.find(column_path) != column_metadata_map_.end()) { 
-      auto res(column_metadata_map_.at(column_path)); 
-      res->UpdateAad(aad); 
-      return res; 
-    } 
-  } else { 
-    if (column_data_map_.find(column_path) != column_data_map_.end()) { 
-      auto res(column_data_map_.at(column_path)); 
-      res->UpdateAad(aad); 
-      return res; 
-    } 
-  } 
- 
-  column_key = properties_->column_key(column_path); 
-  // No explicit column key given via API. Retrieve via key metadata. 
-  if (column_key.empty() && !column_key_metadata.empty() && 
-      properties_->key_retriever() != nullptr) { 
-    try { 
-      column_key = properties_->key_retriever()->GetKey(column_key_metadata); 
-    } catch (KeyAccessDeniedException& e) { 
-      std::stringstream ss; 
-      ss << "HiddenColumnException, path=" + column_path + " " << e.what() << "\n"; 
-      throw HiddenColumnException(ss.str()); 
-    } 
-  } 
-  if (column_key.empty()) { 
-    throw HiddenColumnException("HiddenColumnException, path=" + column_path); 
-  } 
- 
-  // Create both data and metadata decryptors to avoid redundant retrieval of key 
-  // using the key_retriever. 
-  auto aes_metadata_decryptor = GetMetaAesDecryptor(column_key.size()); 
-  auto aes_data_decryptor = GetDataAesDecryptor(column_key.size()); 
- 
-  column_metadata_map_[column_path] = std::make_shared<Decryptor>( 
-      aes_metadata_decryptor, column_key, file_aad_, aad, pool_); 
-  column_data_map_[column_path] = 
-      std::make_shared<Decryptor>(aes_data_decryptor, column_key, file_aad_, aad, pool_); 
- 
-  if (metadata) return column_metadata_map_[column_path]; 
-  return column_data_map_[column_path]; 
-} 
- 
-int InternalFileDecryptor::MapKeyLenToDecryptorArrayIndex(int key_len) { 
-  if (key_len == 16) 
-    return 0; 
-  else if (key_len == 24) 
-    return 1; 
-  else if (key_len == 32) 
-    return 2; 
-  throw ParquetException("decryption key must be 16, 24 or 32 bytes in length"); 
-} 
- 
-encryption::AesDecryptor* InternalFileDecryptor::GetMetaAesDecryptor(size_t key_size) { 
-  int key_len = static_cast<int>(key_size); 
-  int index = MapKeyLenToDecryptorArrayIndex(key_len); 
-  if (meta_decryptor_[index] == nullptr) { 
-    meta_decryptor_[index].reset( 
-        encryption::AesDecryptor::Make(algorithm_, key_len, true, &all_decryptors_)); 
-  } 
-  return meta_decryptor_[index].get(); 
-} 
- 
-encryption::AesDecryptor* InternalFileDecryptor::GetDataAesDecryptor(size_t key_size) { 
-  int key_len = static_cast<int>(key_size); 
-  int index = MapKeyLenToDecryptorArrayIndex(key_len); 
-  if (data_decryptor_[index] == nullptr) { 
-    data_decryptor_[index].reset( 
-        encryption::AesDecryptor::Make(algorithm_, key_len, false, &all_decryptors_)); 
-  } 
-  return data_decryptor_[index].get(); 
-} 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/encryption/internal_file_decryptor.h"
+#include "parquet/encryption/encryption.h"
+#include "parquet/encryption/encryption_internal.h"
+
+namespace parquet {
+
+// Decryptor
+Decryptor::Decryptor(encryption::AesDecryptor* aes_decryptor, const std::string& key,
+                     const std::string& file_aad, const std::string& aad,
+                     ::arrow::MemoryPool* pool)
+    : aes_decryptor_(aes_decryptor),
+      key_(key),
+      file_aad_(file_aad),
+      aad_(aad),
+      pool_(pool) {}
+
+int Decryptor::CiphertextSizeDelta() { return aes_decryptor_->CiphertextSizeDelta(); }
+
+int Decryptor::Decrypt(const uint8_t* ciphertext, int ciphertext_len,
+                       uint8_t* plaintext) {
+  return aes_decryptor_->Decrypt(ciphertext, ciphertext_len, str2bytes(key_),
+                                 static_cast<int>(key_.size()), str2bytes(aad_),
+                                 static_cast<int>(aad_.size()), plaintext);
+}
+
+// InternalFileDecryptor
+InternalFileDecryptor::InternalFileDecryptor(FileDecryptionProperties* properties,
+                                             const std::string& file_aad,
+                                             ParquetCipher::type algorithm,
+                                             const std::string& footer_key_metadata,
+                                             ::arrow::MemoryPool* pool)
+    : properties_(properties),
+      file_aad_(file_aad),
+      algorithm_(algorithm),
+      footer_key_metadata_(footer_key_metadata),
+      pool_(pool) {
+  if (properties_->is_utilized()) {
+    throw ParquetException(
+        "Re-using decryption properties with explicit keys for another file");
+  }
+  properties_->set_utilized();
+}
+
+void InternalFileDecryptor::WipeOutDecryptionKeys() {
+  properties_->WipeOutDecryptionKeys();
+  for (auto const& i : all_decryptors_) {
+    i->WipeOut();
+  }
+}
+
+std::string InternalFileDecryptor::GetFooterKey() {
+  std::string footer_key = properties_->footer_key();
+  // ignore footer key metadata if footer key is explicitly set via API
+  if (footer_key.empty()) {
+    if (footer_key_metadata_.empty())
+      throw ParquetException("No footer key or key metadata");
+    if (properties_->key_retriever() == nullptr)
+      throw ParquetException("No footer key or key retriever");
+    try {
+      footer_key = properties_->key_retriever()->GetKey(footer_key_metadata_);
+    } catch (KeyAccessDeniedException& e) {
+      std::stringstream ss;
+      ss << "Footer key: access denied " << e.what() << "\n";
+      throw ParquetException(ss.str());
+    }
+  }
+  if (footer_key.empty()) {
+    throw ParquetException(
+        "Footer key unavailable. Could not verify "
+        "plaintext footer metadata");
+  }
+  return footer_key;
+}
+
+std::shared_ptr<Decryptor> InternalFileDecryptor::GetFooterDecryptor() {
+  std::string aad = encryption::CreateFooterAad(file_aad_);
+  return GetFooterDecryptor(aad, true);
+}
+
+std::shared_ptr<Decryptor> InternalFileDecryptor::GetFooterDecryptorForColumnMeta(
+    const std::string& aad) {
+  return GetFooterDecryptor(aad, true);
+}
+
+std::shared_ptr<Decryptor> InternalFileDecryptor::GetFooterDecryptorForColumnData(
+    const std::string& aad) {
+  return GetFooterDecryptor(aad, false);
+}
+
+std::shared_ptr<Decryptor> InternalFileDecryptor::GetFooterDecryptor(
+    const std::string& aad, bool metadata) {
+  if (metadata) {
+    if (footer_metadata_decryptor_ != nullptr) return footer_metadata_decryptor_;
+  } else {
+    if (footer_data_decryptor_ != nullptr) return footer_data_decryptor_;
+  }
+
+  std::string footer_key = properties_->footer_key();
+  if (footer_key.empty()) {
+    if (footer_key_metadata_.empty())
+      throw ParquetException("No footer key or key metadata");
+    if (properties_->key_retriever() == nullptr)
+      throw ParquetException("No footer key or key retriever");
+    try {
+      footer_key = properties_->key_retriever()->GetKey(footer_key_metadata_);
+    } catch (KeyAccessDeniedException& e) {
+      std::stringstream ss;
+      ss << "Footer key: access denied " << e.what() << "\n";
+      throw ParquetException(ss.str());
+    }
+  }
+  if (footer_key.empty()) {
+    throw ParquetException(
+        "Invalid footer encryption key. "
+        "Could not parse footer metadata");
+  }
+
+  // Create both data and metadata decryptors to avoid redundant retrieval of key
+  // from the key_retriever.
+  auto aes_metadata_decryptor = GetMetaAesDecryptor(footer_key.size());
+  auto aes_data_decryptor = GetDataAesDecryptor(footer_key.size());
+
+  footer_metadata_decryptor_ = std::make_shared<Decryptor>(
+      aes_metadata_decryptor, footer_key, file_aad_, aad, pool_);
+  footer_data_decryptor_ =
+      std::make_shared<Decryptor>(aes_data_decryptor, footer_key, file_aad_, aad, pool_);
+
+  if (metadata) return footer_metadata_decryptor_;
+  return footer_data_decryptor_;
+}
+
+std::shared_ptr<Decryptor> InternalFileDecryptor::GetColumnMetaDecryptor(
+    const std::string& column_path, const std::string& column_key_metadata,
+    const std::string& aad) {
+  return GetColumnDecryptor(column_path, column_key_metadata, aad, true);
+}
+
+std::shared_ptr<Decryptor> InternalFileDecryptor::GetColumnDataDecryptor(
+    const std::string& column_path, const std::string& column_key_metadata,
+    const std::string& aad) {
+  return GetColumnDecryptor(column_path, column_key_metadata, aad, false);
+}
+
+std::shared_ptr<Decryptor> InternalFileDecryptor::GetColumnDecryptor(
+    const std::string& column_path, const std::string& column_key_metadata,
+    const std::string& aad, bool metadata) {
+  std::string column_key;
+  // first look if we already got the decryptor from before
+  if (metadata) {
+    if (column_metadata_map_.find(column_path) != column_metadata_map_.end()) {
+      auto res(column_metadata_map_.at(column_path));
+      res->UpdateAad(aad);
+      return res;
+    }
+  } else {
+    if (column_data_map_.find(column_path) != column_data_map_.end()) {
+      auto res(column_data_map_.at(column_path));
+      res->UpdateAad(aad);
+      return res;
+    }
+  }
+
+  column_key = properties_->column_key(column_path);
+  // No explicit column key given via API. Retrieve via key metadata.
+  if (column_key.empty() && !column_key_metadata.empty() &&
+      properties_->key_retriever() != nullptr) {
+    try {
+      column_key = properties_->key_retriever()->GetKey(column_key_metadata);
+    } catch (KeyAccessDeniedException& e) {
+      std::stringstream ss;
+      ss << "HiddenColumnException, path=" + column_path + " " << e.what() << "\n";
+      throw HiddenColumnException(ss.str());
+    }
+  }
+  if (column_key.empty()) {
+    throw HiddenColumnException("HiddenColumnException, path=" + column_path);
+  }
+
+  // Create both data and metadata decryptors to avoid redundant retrieval of key
+  // using the key_retriever.
+  auto aes_metadata_decryptor = GetMetaAesDecryptor(column_key.size());
+  auto aes_data_decryptor = GetDataAesDecryptor(column_key.size());
+
+  column_metadata_map_[column_path] = std::make_shared<Decryptor>(
+      aes_metadata_decryptor, column_key, file_aad_, aad, pool_);
+  column_data_map_[column_path] =
+      std::make_shared<Decryptor>(aes_data_decryptor, column_key, file_aad_, aad, pool_);
+
+  if (metadata) return column_metadata_map_[column_path];
+  return column_data_map_[column_path];
+}
+
+int InternalFileDecryptor::MapKeyLenToDecryptorArrayIndex(int key_len) {
+  if (key_len == 16)
+    return 0;
+  else if (key_len == 24)
+    return 1;
+  else if (key_len == 32)
+    return 2;
+  throw ParquetException("decryption key must be 16, 24 or 32 bytes in length");
+}
+
+encryption::AesDecryptor* InternalFileDecryptor::GetMetaAesDecryptor(size_t key_size) {
+  int key_len = static_cast<int>(key_size);
+  int index = MapKeyLenToDecryptorArrayIndex(key_len);
+  if (meta_decryptor_[index] == nullptr) {
+    meta_decryptor_[index].reset(
+        encryption::AesDecryptor::Make(algorithm_, key_len, true, &all_decryptors_));
+  }
+  return meta_decryptor_[index].get();
+}
+
+encryption::AesDecryptor* InternalFileDecryptor::GetDataAesDecryptor(size_t key_size) {
+  int key_len = static_cast<int>(key_size);
+  int index = MapKeyLenToDecryptorArrayIndex(key_len);
+  if (data_decryptor_[index] == nullptr) {
+    data_decryptor_[index].reset(
+        encryption::AesDecryptor::Make(algorithm_, key_len, false, &all_decryptors_));
+  }
+  return data_decryptor_[index].get();
+}
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/encryption/internal_file_decryptor.h b/contrib/libs/apache/arrow/cpp/src/parquet/encryption/internal_file_decryptor.h
index fc2bc433d92..011c4acbeb6 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/encryption/internal_file_decryptor.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/encryption/internal_file_decryptor.h
@@ -1,121 +1,121 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <map> 
-#include <memory> 
-#include <string> 
-#include <vector> 
- 
-#include "parquet/schema.h" 
- 
-namespace parquet { 
- 
-namespace encryption { 
-class AesDecryptor; 
-class AesEncryptor; 
-}  // namespace encryption 
- 
-class FileDecryptionProperties; 
- 
-class PARQUET_EXPORT Decryptor { 
- public: 
-  Decryptor(encryption::AesDecryptor* decryptor, const std::string& key, 
-            const std::string& file_aad, const std::string& aad, 
-            ::arrow::MemoryPool* pool); 
- 
-  const std::string& file_aad() const { return file_aad_; } 
-  void UpdateAad(const std::string& aad) { aad_ = aad; } 
-  ::arrow::MemoryPool* pool() { return pool_; } 
- 
-  int CiphertextSizeDelta(); 
-  int Decrypt(const uint8_t* ciphertext, int ciphertext_len, uint8_t* plaintext); 
- 
- private: 
-  encryption::AesDecryptor* aes_decryptor_; 
-  std::string key_; 
-  std::string file_aad_; 
-  std::string aad_; 
-  ::arrow::MemoryPool* pool_; 
-}; 
- 
-class InternalFileDecryptor { 
- public: 
-  explicit InternalFileDecryptor(FileDecryptionProperties* properties, 
-                                 const std::string& file_aad, 
-                                 ParquetCipher::type algorithm, 
-                                 const std::string& footer_key_metadata, 
-                                 ::arrow::MemoryPool* pool); 
- 
-  std::string& file_aad() { return file_aad_; } 
- 
-  std::string GetFooterKey(); 
- 
-  ParquetCipher::type algorithm() { return algorithm_; } 
- 
-  std::string& footer_key_metadata() { return footer_key_metadata_; } 
- 
-  FileDecryptionProperties* properties() { return properties_; } 
- 
-  void WipeOutDecryptionKeys(); 
- 
-  ::arrow::MemoryPool* pool() { return pool_; } 
- 
-  std::shared_ptr<Decryptor> GetFooterDecryptor(); 
-  std::shared_ptr<Decryptor> GetFooterDecryptorForColumnMeta(const std::string& aad = ""); 
-  std::shared_ptr<Decryptor> GetFooterDecryptorForColumnData(const std::string& aad = ""); 
-  std::shared_ptr<Decryptor> GetColumnMetaDecryptor( 
-      const std::string& column_path, const std::string& column_key_metadata, 
-      const std::string& aad = ""); 
-  std::shared_ptr<Decryptor> GetColumnDataDecryptor( 
-      const std::string& column_path, const std::string& column_key_metadata, 
-      const std::string& aad = ""); 
- 
- private: 
-  FileDecryptionProperties* properties_; 
-  // Concatenation of aad_prefix (if exists) and aad_file_unique 
-  std::string file_aad_; 
-  std::map<std::string, std::shared_ptr<Decryptor>> column_data_map_; 
-  std::map<std::string, std::shared_ptr<Decryptor>> column_metadata_map_; 
- 
-  std::shared_ptr<Decryptor> footer_metadata_decryptor_; 
-  std::shared_ptr<Decryptor> footer_data_decryptor_; 
-  ParquetCipher::type algorithm_; 
-  std::string footer_key_metadata_; 
-  std::vector<encryption::AesDecryptor*> all_decryptors_; 
- 
-  /// Key must be 16, 24 or 32 bytes in length. Thus there could be up to three 
-  // types of meta_decryptors and data_decryptors. 
-  std::unique_ptr<encryption::AesDecryptor> meta_decryptor_[3]; 
-  std::unique_ptr<encryption::AesDecryptor> data_decryptor_[3]; 
- 
-  ::arrow::MemoryPool* pool_; 
- 
-  std::shared_ptr<Decryptor> GetFooterDecryptor(const std::string& aad, bool metadata); 
-  std::shared_ptr<Decryptor> GetColumnDecryptor(const std::string& column_path, 
-                                                const std::string& column_key_metadata, 
-                                                const std::string& aad, 
-                                                bool metadata = false); 
- 
-  encryption::AesDecryptor* GetMetaAesDecryptor(size_t key_size); 
-  encryption::AesDecryptor* GetDataAesDecryptor(size_t key_size); 
- 
-  int MapKeyLenToDecryptorArrayIndex(int key_len); 
-}; 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "parquet/schema.h"
+
+namespace parquet {
+
+namespace encryption {
+class AesDecryptor;
+class AesEncryptor;
+}  // namespace encryption
+
+class FileDecryptionProperties;
+
+class PARQUET_EXPORT Decryptor {
+ public:
+  Decryptor(encryption::AesDecryptor* decryptor, const std::string& key,
+            const std::string& file_aad, const std::string& aad,
+            ::arrow::MemoryPool* pool);
+
+  const std::string& file_aad() const { return file_aad_; }
+  void UpdateAad(const std::string& aad) { aad_ = aad; }
+  ::arrow::MemoryPool* pool() { return pool_; }
+
+  int CiphertextSizeDelta();
+  int Decrypt(const uint8_t* ciphertext, int ciphertext_len, uint8_t* plaintext);
+
+ private:
+  encryption::AesDecryptor* aes_decryptor_;
+  std::string key_;
+  std::string file_aad_;
+  std::string aad_;
+  ::arrow::MemoryPool* pool_;
+};
+
+class InternalFileDecryptor {
+ public:
+  explicit InternalFileDecryptor(FileDecryptionProperties* properties,
+                                 const std::string& file_aad,
+                                 ParquetCipher::type algorithm,
+                                 const std::string& footer_key_metadata,
+                                 ::arrow::MemoryPool* pool);
+
+  std::string& file_aad() { return file_aad_; }
+
+  std::string GetFooterKey();
+
+  ParquetCipher::type algorithm() { return algorithm_; }
+
+  std::string& footer_key_metadata() { return footer_key_metadata_; }
+
+  FileDecryptionProperties* properties() { return properties_; }
+
+  void WipeOutDecryptionKeys();
+
+  ::arrow::MemoryPool* pool() { return pool_; }
+
+  std::shared_ptr<Decryptor> GetFooterDecryptor();
+  std::shared_ptr<Decryptor> GetFooterDecryptorForColumnMeta(const std::string& aad = "");
+  std::shared_ptr<Decryptor> GetFooterDecryptorForColumnData(const std::string& aad = "");
+  std::shared_ptr<Decryptor> GetColumnMetaDecryptor(
+      const std::string& column_path, const std::string& column_key_metadata,
+      const std::string& aad = "");
+  std::shared_ptr<Decryptor> GetColumnDataDecryptor(
+      const std::string& column_path, const std::string& column_key_metadata,
+      const std::string& aad = "");
+
+ private:
+  FileDecryptionProperties* properties_;
+  // Concatenation of aad_prefix (if exists) and aad_file_unique
+  std::string file_aad_;
+  std::map<std::string, std::shared_ptr<Decryptor>> column_data_map_;
+  std::map<std::string, std::shared_ptr<Decryptor>> column_metadata_map_;
+
+  std::shared_ptr<Decryptor> footer_metadata_decryptor_;
+  std::shared_ptr<Decryptor> footer_data_decryptor_;
+  ParquetCipher::type algorithm_;
+  std::string footer_key_metadata_;
+  std::vector<encryption::AesDecryptor*> all_decryptors_;
+
+  /// Key must be 16, 24 or 32 bytes in length. Thus there could be up to three
+  // types of meta_decryptors and data_decryptors.
+  std::unique_ptr<encryption::AesDecryptor> meta_decryptor_[3];
+  std::unique_ptr<encryption::AesDecryptor> data_decryptor_[3];
+
+  ::arrow::MemoryPool* pool_;
+
+  std::shared_ptr<Decryptor> GetFooterDecryptor(const std::string& aad, bool metadata);
+  std::shared_ptr<Decryptor> GetColumnDecryptor(const std::string& column_path,
+                                                const std::string& column_key_metadata,
+                                                const std::string& aad,
+                                                bool metadata = false);
+
+  encryption::AesDecryptor* GetMetaAesDecryptor(size_t key_size);
+  encryption::AesDecryptor* GetDataAesDecryptor(size_t key_size);
+
+  int MapKeyLenToDecryptorArrayIndex(int key_len);
+};
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/encryption/internal_file_encryptor.cc b/contrib/libs/apache/arrow/cpp/src/parquet/encryption/internal_file_encryptor.cc
index c9f265cf7f1..15bf52b84dd 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/encryption/internal_file_encryptor.cc
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/encryption/internal_file_encryptor.cc
@@ -1,170 +1,170 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "parquet/encryption/internal_file_encryptor.h" 
-#include "parquet/encryption/encryption.h" 
-#include "parquet/encryption/encryption_internal.h" 
- 
-namespace parquet { 
- 
-// Encryptor 
-Encryptor::Encryptor(encryption::AesEncryptor* aes_encryptor, const std::string& key, 
-                     const std::string& file_aad, const std::string& aad, 
-                     ::arrow::MemoryPool* pool) 
-    : aes_encryptor_(aes_encryptor), 
-      key_(key), 
-      file_aad_(file_aad), 
-      aad_(aad), 
-      pool_(pool) {} 
- 
-int Encryptor::CiphertextSizeDelta() { return aes_encryptor_->CiphertextSizeDelta(); } 
- 
-int Encryptor::Encrypt(const uint8_t* plaintext, int plaintext_len, uint8_t* ciphertext) { 
-  return aes_encryptor_->Encrypt(plaintext, plaintext_len, str2bytes(key_), 
-                                 static_cast<int>(key_.size()), str2bytes(aad_), 
-                                 static_cast<int>(aad_.size()), ciphertext); 
-} 
- 
-// InternalFileEncryptor 
-InternalFileEncryptor::InternalFileEncryptor(FileEncryptionProperties* properties, 
-                                             ::arrow::MemoryPool* pool) 
-    : properties_(properties), pool_(pool) { 
-  if (properties_->is_utilized()) { 
-    throw ParquetException("Re-using encryption properties for another file"); 
-  } 
-  properties_->set_utilized(); 
-} 
- 
-void InternalFileEncryptor::WipeOutEncryptionKeys() { 
-  properties_->WipeOutEncryptionKeys(); 
- 
-  for (auto const& i : all_encryptors_) { 
-    i->WipeOut(); 
-  } 
-} 
- 
-std::shared_ptr<Encryptor> InternalFileEncryptor::GetFooterEncryptor() { 
-  if (footer_encryptor_ != nullptr) { 
-    return footer_encryptor_; 
-  } 
- 
-  ParquetCipher::type algorithm = properties_->algorithm().algorithm; 
-  std::string footer_aad = encryption::CreateFooterAad(properties_->file_aad()); 
-  std::string footer_key = properties_->footer_key(); 
-  auto aes_encryptor = GetMetaAesEncryptor(algorithm, footer_key.size()); 
-  footer_encryptor_ = std::make_shared<Encryptor>( 
-      aes_encryptor, footer_key, properties_->file_aad(), footer_aad, pool_); 
-  return footer_encryptor_; 
-} 
- 
-std::shared_ptr<Encryptor> InternalFileEncryptor::GetFooterSigningEncryptor() { 
-  if (footer_signing_encryptor_ != nullptr) { 
-    return footer_signing_encryptor_; 
-  } 
- 
-  ParquetCipher::type algorithm = properties_->algorithm().algorithm; 
-  std::string footer_aad = encryption::CreateFooterAad(properties_->file_aad()); 
-  std::string footer_signing_key = properties_->footer_key(); 
-  auto aes_encryptor = GetMetaAesEncryptor(algorithm, footer_signing_key.size()); 
-  footer_signing_encryptor_ = std::make_shared<Encryptor>( 
-      aes_encryptor, footer_signing_key, properties_->file_aad(), footer_aad, pool_); 
-  return footer_signing_encryptor_; 
-} 
- 
-std::shared_ptr<Encryptor> InternalFileEncryptor::GetColumnMetaEncryptor( 
-    const std::string& column_path) { 
-  return GetColumnEncryptor(column_path, true); 
-} 
- 
-std::shared_ptr<Encryptor> InternalFileEncryptor::GetColumnDataEncryptor( 
-    const std::string& column_path) { 
-  return GetColumnEncryptor(column_path, false); 
-} 
- 
-std::shared_ptr<Encryptor> 
-InternalFileEncryptor::InternalFileEncryptor::GetColumnEncryptor( 
-    const std::string& column_path, bool metadata) { 
-  // first look if we already got the encryptor from before 
-  if (metadata) { 
-    if (column_metadata_map_.find(column_path) != column_metadata_map_.end()) { 
-      return column_metadata_map_.at(column_path); 
-    } 
-  } else { 
-    if (column_data_map_.find(column_path) != column_data_map_.end()) { 
-      return column_data_map_.at(column_path); 
-    } 
-  } 
-  auto column_prop = properties_->column_encryption_properties(column_path); 
-  if (column_prop == nullptr) { 
-    return nullptr; 
-  } 
- 
-  std::string key; 
-  if (column_prop->is_encrypted_with_footer_key()) { 
-    key = properties_->footer_key(); 
-  } else { 
-    key = column_prop->key(); 
-  } 
- 
-  ParquetCipher::type algorithm = properties_->algorithm().algorithm; 
-  auto aes_encryptor = metadata ? GetMetaAesEncryptor(algorithm, key.size()) 
-                                : GetDataAesEncryptor(algorithm, key.size()); 
- 
-  std::string file_aad = properties_->file_aad(); 
-  std::shared_ptr<Encryptor> encryptor = 
-      std::make_shared<Encryptor>(aes_encryptor, key, file_aad, "", pool_); 
-  if (metadata) 
-    column_metadata_map_[column_path] = encryptor; 
-  else 
-    column_data_map_[column_path] = encryptor; 
- 
-  return encryptor; 
-} 
- 
-int InternalFileEncryptor::MapKeyLenToEncryptorArrayIndex(int key_len) { 
-  if (key_len == 16) 
-    return 0; 
-  else if (key_len == 24) 
-    return 1; 
-  else if (key_len == 32) 
-    return 2; 
-  throw ParquetException("encryption key must be 16, 24 or 32 bytes in length"); 
-} 
- 
-encryption::AesEncryptor* InternalFileEncryptor::GetMetaAesEncryptor( 
-    ParquetCipher::type algorithm, size_t key_size) { 
-  int key_len = static_cast<int>(key_size); 
-  int index = MapKeyLenToEncryptorArrayIndex(key_len); 
-  if (meta_encryptor_[index] == nullptr) { 
-    meta_encryptor_[index].reset( 
-        encryption::AesEncryptor::Make(algorithm, key_len, true, &all_encryptors_)); 
-  } 
-  return meta_encryptor_[index].get(); 
-} 
- 
-encryption::AesEncryptor* InternalFileEncryptor::GetDataAesEncryptor( 
-    ParquetCipher::type algorithm, size_t key_size) { 
-  int key_len = static_cast<int>(key_size); 
-  int index = MapKeyLenToEncryptorArrayIndex(key_len); 
-  if (data_encryptor_[index] == nullptr) { 
-    data_encryptor_[index].reset( 
-        encryption::AesEncryptor::Make(algorithm, key_len, false, &all_encryptors_)); 
-  } 
-  return data_encryptor_[index].get(); 
-} 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/encryption/internal_file_encryptor.h"
+#include "parquet/encryption/encryption.h"
+#include "parquet/encryption/encryption_internal.h"
+
+namespace parquet {
+
+// Encryptor
+Encryptor::Encryptor(encryption::AesEncryptor* aes_encryptor, const std::string& key,
+                     const std::string& file_aad, const std::string& aad,
+                     ::arrow::MemoryPool* pool)
+    : aes_encryptor_(aes_encryptor),
+      key_(key),
+      file_aad_(file_aad),
+      aad_(aad),
+      pool_(pool) {}
+
+int Encryptor::CiphertextSizeDelta() { return aes_encryptor_->CiphertextSizeDelta(); }
+
+int Encryptor::Encrypt(const uint8_t* plaintext, int plaintext_len, uint8_t* ciphertext) {
+  return aes_encryptor_->Encrypt(plaintext, plaintext_len, str2bytes(key_),
+                                 static_cast<int>(key_.size()), str2bytes(aad_),
+                                 static_cast<int>(aad_.size()), ciphertext);
+}
+
+// InternalFileEncryptor
+InternalFileEncryptor::InternalFileEncryptor(FileEncryptionProperties* properties,
+                                             ::arrow::MemoryPool* pool)
+    : properties_(properties), pool_(pool) {
+  if (properties_->is_utilized()) {
+    throw ParquetException("Re-using encryption properties for another file");
+  }
+  properties_->set_utilized();
+}
+
+void InternalFileEncryptor::WipeOutEncryptionKeys() {
+  properties_->WipeOutEncryptionKeys();
+
+  for (auto const& i : all_encryptors_) {
+    i->WipeOut();
+  }
+}
+
+std::shared_ptr<Encryptor> InternalFileEncryptor::GetFooterEncryptor() {
+  if (footer_encryptor_ != nullptr) {
+    return footer_encryptor_;
+  }
+
+  ParquetCipher::type algorithm = properties_->algorithm().algorithm;
+  std::string footer_aad = encryption::CreateFooterAad(properties_->file_aad());
+  std::string footer_key = properties_->footer_key();
+  auto aes_encryptor = GetMetaAesEncryptor(algorithm, footer_key.size());
+  footer_encryptor_ = std::make_shared<Encryptor>(
+      aes_encryptor, footer_key, properties_->file_aad(), footer_aad, pool_);
+  return footer_encryptor_;
+}
+
+std::shared_ptr<Encryptor> InternalFileEncryptor::GetFooterSigningEncryptor() {
+  if (footer_signing_encryptor_ != nullptr) {
+    return footer_signing_encryptor_;
+  }
+
+  ParquetCipher::type algorithm = properties_->algorithm().algorithm;
+  std::string footer_aad = encryption::CreateFooterAad(properties_->file_aad());
+  std::string footer_signing_key = properties_->footer_key();
+  auto aes_encryptor = GetMetaAesEncryptor(algorithm, footer_signing_key.size());
+  footer_signing_encryptor_ = std::make_shared<Encryptor>(
+      aes_encryptor, footer_signing_key, properties_->file_aad(), footer_aad, pool_);
+  return footer_signing_encryptor_;
+}
+
+std::shared_ptr<Encryptor> InternalFileEncryptor::GetColumnMetaEncryptor(
+    const std::string& column_path) {
+  return GetColumnEncryptor(column_path, true);
+}
+
+std::shared_ptr<Encryptor> InternalFileEncryptor::GetColumnDataEncryptor(
+    const std::string& column_path) {
+  return GetColumnEncryptor(column_path, false);
+}
+
+std::shared_ptr<Encryptor>
+InternalFileEncryptor::InternalFileEncryptor::GetColumnEncryptor(
+    const std::string& column_path, bool metadata) {
+  // first look if we already got the encryptor from before
+  if (metadata) {
+    if (column_metadata_map_.find(column_path) != column_metadata_map_.end()) {
+      return column_metadata_map_.at(column_path);
+    }
+  } else {
+    if (column_data_map_.find(column_path) != column_data_map_.end()) {
+      return column_data_map_.at(column_path);
+    }
+  }
+  auto column_prop = properties_->column_encryption_properties(column_path);
+  if (column_prop == nullptr) {
+    return nullptr;
+  }
+
+  std::string key;
+  if (column_prop->is_encrypted_with_footer_key()) {
+    key = properties_->footer_key();
+  } else {
+    key = column_prop->key();
+  }
+
+  ParquetCipher::type algorithm = properties_->algorithm().algorithm;
+  auto aes_encryptor = metadata ? GetMetaAesEncryptor(algorithm, key.size())
+                                : GetDataAesEncryptor(algorithm, key.size());
+
+  std::string file_aad = properties_->file_aad();
+  std::shared_ptr<Encryptor> encryptor =
+      std::make_shared<Encryptor>(aes_encryptor, key, file_aad, "", pool_);
+  if (metadata)
+    column_metadata_map_[column_path] = encryptor;
+  else
+    column_data_map_[column_path] = encryptor;
+
+  return encryptor;
+}
+
+int InternalFileEncryptor::MapKeyLenToEncryptorArrayIndex(int key_len) {
+  if (key_len == 16)
+    return 0;
+  else if (key_len == 24)
+    return 1;
+  else if (key_len == 32)
+    return 2;
+  throw ParquetException("encryption key must be 16, 24 or 32 bytes in length");
+}
+
+encryption::AesEncryptor* InternalFileEncryptor::GetMetaAesEncryptor(
+    ParquetCipher::type algorithm, size_t key_size) {
+  int key_len = static_cast<int>(key_size);
+  int index = MapKeyLenToEncryptorArrayIndex(key_len);
+  if (meta_encryptor_[index] == nullptr) {
+    meta_encryptor_[index].reset(
+        encryption::AesEncryptor::Make(algorithm, key_len, true, &all_encryptors_));
+  }
+  return meta_encryptor_[index].get();
+}
+
+encryption::AesEncryptor* InternalFileEncryptor::GetDataAesEncryptor(
+    ParquetCipher::type algorithm, size_t key_size) {
+  int key_len = static_cast<int>(key_size);
+  int index = MapKeyLenToEncryptorArrayIndex(key_len);
+  if (data_encryptor_[index] == nullptr) {
+    data_encryptor_[index].reset(
+        encryption::AesEncryptor::Make(algorithm, key_len, false, &all_encryptors_));
+  }
+  return data_encryptor_[index].get();
+}
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/encryption/internal_file_encryptor.h b/contrib/libs/apache/arrow/cpp/src/parquet/encryption/internal_file_encryptor.h
index 7cf513ca810..3cbe53500c2 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/encryption/internal_file_encryptor.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/encryption/internal_file_encryptor.h
@@ -1,109 +1,109 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <map> 
-#include <memory> 
-#include <string> 
-#include <vector> 
- 
-#include "parquet/encryption/encryption.h" 
-#include "parquet/schema.h" 
- 
-namespace parquet { 
- 
-namespace encryption { 
-class AesEncryptor; 
-}  // namespace encryption 
- 
-class FileEncryptionProperties; 
-class ColumnEncryptionProperties; 
- 
-class PARQUET_EXPORT Encryptor { 
- public: 
-  Encryptor(encryption::AesEncryptor* aes_encryptor, const std::string& key, 
-            const std::string& file_aad, const std::string& aad, 
-            ::arrow::MemoryPool* pool); 
-  const std::string& file_aad() { return file_aad_; } 
-  void UpdateAad(const std::string& aad) { aad_ = aad; } 
-  ::arrow::MemoryPool* pool() { return pool_; } 
- 
-  int CiphertextSizeDelta(); 
-  int Encrypt(const uint8_t* plaintext, int plaintext_len, uint8_t* ciphertext); 
- 
-  bool EncryptColumnMetaData( 
-      bool encrypted_footer, 
-      const std::shared_ptr<ColumnEncryptionProperties>& column_encryption_properties) { 
-    // if column is not encrypted then do not encrypt the column metadata 
-    if (!column_encryption_properties || !column_encryption_properties->is_encrypted()) 
-      return false; 
-    // if plaintext footer then encrypt the column metadata 
-    if (!encrypted_footer) return true; 
-    // if column is not encrypted with footer key then encrypt the column metadata 
-    return !column_encryption_properties->is_encrypted_with_footer_key(); 
-  } 
- 
- private: 
-  encryption::AesEncryptor* aes_encryptor_; 
-  std::string key_; 
-  std::string file_aad_; 
-  std::string aad_; 
-  ::arrow::MemoryPool* pool_; 
-}; 
- 
-class InternalFileEncryptor { 
- public: 
-  explicit InternalFileEncryptor(FileEncryptionProperties* properties, 
-                                 ::arrow::MemoryPool* pool); 
- 
-  std::shared_ptr<Encryptor> GetFooterEncryptor(); 
-  std::shared_ptr<Encryptor> GetFooterSigningEncryptor(); 
-  std::shared_ptr<Encryptor> GetColumnMetaEncryptor(const std::string& column_path); 
-  std::shared_ptr<Encryptor> GetColumnDataEncryptor(const std::string& column_path); 
-  void WipeOutEncryptionKeys(); 
- 
- private: 
-  FileEncryptionProperties* properties_; 
- 
-  std::map<std::string, std::shared_ptr<Encryptor>> column_data_map_; 
-  std::map<std::string, std::shared_ptr<Encryptor>> column_metadata_map_; 
- 
-  std::shared_ptr<Encryptor> footer_signing_encryptor_; 
-  std::shared_ptr<Encryptor> footer_encryptor_; 
- 
-  std::vector<encryption::AesEncryptor*> all_encryptors_; 
- 
-  // Key must be 16, 24 or 32 bytes in length. Thus there could be up to three 
-  // types of meta_encryptors and data_encryptors. 
-  std::unique_ptr<encryption::AesEncryptor> meta_encryptor_[3]; 
-  std::unique_ptr<encryption::AesEncryptor> data_encryptor_[3]; 
- 
-  ::arrow::MemoryPool* pool_; 
- 
-  std::shared_ptr<Encryptor> GetColumnEncryptor(const std::string& column_path, 
-                                                bool metadata); 
- 
-  encryption::AesEncryptor* GetMetaAesEncryptor(ParquetCipher::type algorithm, 
-                                                size_t key_len); 
-  encryption::AesEncryptor* GetDataAesEncryptor(ParquetCipher::type algorithm, 
-                                                size_t key_len); 
- 
-  int MapKeyLenToEncryptorArrayIndex(int key_len); 
-}; 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "parquet/encryption/encryption.h"
+#include "parquet/schema.h"
+
+namespace parquet {
+
+namespace encryption {
+class AesEncryptor;
+}  // namespace encryption
+
+class FileEncryptionProperties;
+class ColumnEncryptionProperties;
+
+class PARQUET_EXPORT Encryptor {
+ public:
+  Encryptor(encryption::AesEncryptor* aes_encryptor, const std::string& key,
+            const std::string& file_aad, const std::string& aad,
+            ::arrow::MemoryPool* pool);
+  const std::string& file_aad() { return file_aad_; }
+  void UpdateAad(const std::string& aad) { aad_ = aad; }
+  ::arrow::MemoryPool* pool() { return pool_; }
+
+  int CiphertextSizeDelta();
+  int Encrypt(const uint8_t* plaintext, int plaintext_len, uint8_t* ciphertext);
+
+  bool EncryptColumnMetaData(
+      bool encrypted_footer,
+      const std::shared_ptr<ColumnEncryptionProperties>& column_encryption_properties) {
+    // if column is not encrypted then do not encrypt the column metadata
+    if (!column_encryption_properties || !column_encryption_properties->is_encrypted())
+      return false;
+    // if plaintext footer then encrypt the column metadata
+    if (!encrypted_footer) return true;
+    // if column is not encrypted with footer key then encrypt the column metadata
+    return !column_encryption_properties->is_encrypted_with_footer_key();
+  }
+
+ private:
+  encryption::AesEncryptor* aes_encryptor_;
+  std::string key_;
+  std::string file_aad_;
+  std::string aad_;
+  ::arrow::MemoryPool* pool_;
+};
+
+class InternalFileEncryptor {
+ public:
+  explicit InternalFileEncryptor(FileEncryptionProperties* properties,
+                                 ::arrow::MemoryPool* pool);
+
+  std::shared_ptr<Encryptor> GetFooterEncryptor();
+  std::shared_ptr<Encryptor> GetFooterSigningEncryptor();
+  std::shared_ptr<Encryptor> GetColumnMetaEncryptor(const std::string& column_path);
+  std::shared_ptr<Encryptor> GetColumnDataEncryptor(const std::string& column_path);
+  void WipeOutEncryptionKeys();
+
+ private:
+  FileEncryptionProperties* properties_;
+
+  std::map<std::string, std::shared_ptr<Encryptor>> column_data_map_;
+  std::map<std::string, std::shared_ptr<Encryptor>> column_metadata_map_;
+
+  std::shared_ptr<Encryptor> footer_signing_encryptor_;
+  std::shared_ptr<Encryptor> footer_encryptor_;
+
+  std::vector<encryption::AesEncryptor*> all_encryptors_;
+
+  // Key must be 16, 24 or 32 bytes in length. Thus there could be up to three
+  // types of meta_encryptors and data_encryptors.
+  std::unique_ptr<encryption::AesEncryptor> meta_encryptor_[3];
+  std::unique_ptr<encryption::AesEncryptor> data_encryptor_[3];
+
+  ::arrow::MemoryPool* pool_;
+
+  std::shared_ptr<Encryptor> GetColumnEncryptor(const std::string& column_path,
+                                                bool metadata);
+
+  encryption::AesEncryptor* GetMetaAesEncryptor(ParquetCipher::type algorithm,
+                                                size_t key_len);
+  encryption::AesEncryptor* GetDataAesEncryptor(ParquetCipher::type algorithm,
+                                                size_t key_len);
+
+  int MapKeyLenToEncryptorArrayIndex(int key_len);
+};
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/exception.cc b/contrib/libs/apache/arrow/cpp/src/parquet/exception.cc
index 909a5079c76..c333957dd1d 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/exception.cc
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/exception.cc
@@ -1,27 +1,27 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "parquet/exception.h" 
- 
-namespace parquet { 
- 
-std::ostream& operator<<(std::ostream& os, const ParquetException& exception) { 
-  os << exception.what(); 
-  return os; 
-} 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/exception.h"
+
+namespace parquet {
+
+std::ostream& operator<<(std::ostream& os, const ParquetException& exception) {
+  os << exception.what();
+  return os;
+}
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/exception.h b/contrib/libs/apache/arrow/cpp/src/parquet/exception.h
index a76761c63c3..826f5bdc8bf 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/exception.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/exception.h
@@ -1,158 +1,158 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <exception> 
-#include <sstream> 
-#include <string> 
-#include <utility> 
- 
-#include "arrow/type_fwd.h" 
-#include "arrow/util/string_builder.h" 
-#include "parquet/platform.h" 
- 
-// PARQUET-1085 
-#if !defined(ARROW_UNUSED) 
-#define ARROW_UNUSED(x) UNUSED(x) 
-#endif 
- 
-// Parquet exception to Arrow Status 
- 
-#define BEGIN_PARQUET_CATCH_EXCEPTIONS try { 
-#define END_PARQUET_CATCH_EXCEPTIONS                   \ 
-  }                                                    \ 
-  catch (const ::parquet::ParquetStatusException& e) { \ 
-    return e.status();                                 \ 
-  }                                                    \ 
-  catch (const ::parquet::ParquetException& e) {       \ 
-    return ::arrow::Status::IOError(e.what());         \ 
-  } 
- 
-// clang-format off 
- 
-#define PARQUET_CATCH_NOT_OK(s)    \ 
-  BEGIN_PARQUET_CATCH_EXCEPTIONS   \ 
-  (s);                             \ 
-  END_PARQUET_CATCH_EXCEPTIONS 
- 
-// clang-format on 
- 
-#define PARQUET_CATCH_AND_RETURN(s) \ 
-  BEGIN_PARQUET_CATCH_EXCEPTIONS    \ 
-  return (s);                       \ 
-  END_PARQUET_CATCH_EXCEPTIONS 
- 
-// Arrow Status to Parquet exception 
- 
-#define PARQUET_IGNORE_NOT_OK(s)                                \ 
-  do {                                                          \ 
-    ::arrow::Status _s = ::arrow::internal::GenericToStatus(s); \ 
-    ARROW_UNUSED(_s);                                           \ 
-  } while (0) 
- 
-#define PARQUET_THROW_NOT_OK(s)                                 \ 
-  do {                                                          \ 
-    ::arrow::Status _s = ::arrow::internal::GenericToStatus(s); \ 
-    if (!_s.ok()) {                                             \ 
-      throw ::parquet::ParquetStatusException(std::move(_s));   \ 
-    }                                                           \ 
-  } while (0) 
- 
-#define PARQUET_ASSIGN_OR_THROW_IMPL(status_name, lhs, rexpr) \ 
-  auto status_name = (rexpr);                                 \ 
-  PARQUET_THROW_NOT_OK(status_name.status());                 \ 
-  lhs = std::move(status_name).ValueOrDie(); 
- 
-#define PARQUET_ASSIGN_OR_THROW(lhs, rexpr)                                              \ 
-  PARQUET_ASSIGN_OR_THROW_IMPL(ARROW_ASSIGN_OR_RAISE_NAME(_error_or_value, __COUNTER__), \ 
-                               lhs, rexpr); 
- 
-namespace parquet { 
- 
-class ParquetException : public std::exception { 
- public: 
-  PARQUET_NORETURN static void EofException(const std::string& msg = "") { 
-    static std::string prefix = "Unexpected end of stream"; 
-    if (msg.empty()) { 
-      throw ParquetException(prefix); 
-    } 
-    throw ParquetException(prefix, ": ", msg); 
-  } 
- 
-  PARQUET_NORETURN static void NYI(const std::string& msg = "") { 
-    throw ParquetException("Not yet implemented: ", msg, "."); 
-  } 
- 
-  template <typename... Args> 
-  explicit ParquetException(Args&&... args) 
-      : msg_(::arrow::util::StringBuilder(std::forward<Args>(args)...)) {} 
- 
-  explicit ParquetException(std::string msg) : msg_(std::move(msg)) {} 
- 
-  explicit ParquetException(const char* msg, const std::exception&) : msg_(msg) {} 
- 
-  ParquetException(const ParquetException&) = default; 
-  ParquetException& operator=(const ParquetException&) = default; 
-  ParquetException(ParquetException&&) = default; 
-  ParquetException& operator=(ParquetException&&) = default; 
- 
-  const char* what() const noexcept override { return msg_.c_str(); } 
- 
- private: 
-  std::string msg_; 
-}; 
- 
-// Support printing a ParquetException. 
-// This is needed for clang-on-MSVC as there operator<< is not defined for 
-// std::exception. 
-PARQUET_EXPORT 
-std::ostream& operator<<(std::ostream& os, const ParquetException& exception); 
- 
-class ParquetStatusException : public ParquetException { 
- public: 
-  explicit ParquetStatusException(::arrow::Status status) 
-      : ParquetException(status.ToString()), status_(std::move(status)) {} 
- 
-  const ::arrow::Status& status() const { return status_; } 
- 
- private: 
-  ::arrow::Status status_; 
-}; 
- 
-// This class exists for the purpose of detecting an invalid or corrupted file. 
-class ParquetInvalidOrCorruptedFileException : public ParquetStatusException { 
- public: 
-  ParquetInvalidOrCorruptedFileException(const ParquetInvalidOrCorruptedFileException&) = 
-      default; 
- 
-  template <typename Arg, 
-            typename std::enable_if< 
-                !std::is_base_of<ParquetInvalidOrCorruptedFileException, Arg>::value, 
-                int>::type = 0, 
-            typename... Args> 
-  explicit ParquetInvalidOrCorruptedFileException(Arg arg, Args&&... args) 
-      : ParquetStatusException(::arrow::Status::Invalid(std::forward<Arg>(arg), 
-                                                        std::forward<Args>(args)...)) {} 
-}; 
- 
-template <typename StatusReturnBlock> 
-void ThrowNotOk(StatusReturnBlock&& b) { 
-  PARQUET_THROW_NOT_OK(b()); 
-} 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <exception>
+#include <sstream>
+#include <string>
+#include <utility>
+
+#include "arrow/type_fwd.h"
+#include "arrow/util/string_builder.h"
+#include "parquet/platform.h"
+
+// PARQUET-1085
+#if !defined(ARROW_UNUSED)
+#define ARROW_UNUSED(x) UNUSED(x)
+#endif
+
+// Parquet exception to Arrow Status
+
+#define BEGIN_PARQUET_CATCH_EXCEPTIONS try {
+#define END_PARQUET_CATCH_EXCEPTIONS                   \
+  }                                                    \
+  catch (const ::parquet::ParquetStatusException& e) { \
+    return e.status();                                 \
+  }                                                    \
+  catch (const ::parquet::ParquetException& e) {       \
+    return ::arrow::Status::IOError(e.what());         \
+  }
+
+// clang-format off
+
+#define PARQUET_CATCH_NOT_OK(s)    \
+  BEGIN_PARQUET_CATCH_EXCEPTIONS   \
+  (s);                             \
+  END_PARQUET_CATCH_EXCEPTIONS
+
+// clang-format on
+
+#define PARQUET_CATCH_AND_RETURN(s) \
+  BEGIN_PARQUET_CATCH_EXCEPTIONS    \
+  return (s);                       \
+  END_PARQUET_CATCH_EXCEPTIONS
+
+// Arrow Status to Parquet exception
+
+#define PARQUET_IGNORE_NOT_OK(s)                                \
+  do {                                                          \
+    ::arrow::Status _s = ::arrow::internal::GenericToStatus(s); \
+    ARROW_UNUSED(_s);                                           \
+  } while (0)
+
+#define PARQUET_THROW_NOT_OK(s)                                 \
+  do {                                                          \
+    ::arrow::Status _s = ::arrow::internal::GenericToStatus(s); \
+    if (!_s.ok()) {                                             \
+      throw ::parquet::ParquetStatusException(std::move(_s));   \
+    }                                                           \
+  } while (0)
+
+#define PARQUET_ASSIGN_OR_THROW_IMPL(status_name, lhs, rexpr) \
+  auto status_name = (rexpr);                                 \
+  PARQUET_THROW_NOT_OK(status_name.status());                 \
+  lhs = std::move(status_name).ValueOrDie();
+
+#define PARQUET_ASSIGN_OR_THROW(lhs, rexpr)                                              \
+  PARQUET_ASSIGN_OR_THROW_IMPL(ARROW_ASSIGN_OR_RAISE_NAME(_error_or_value, __COUNTER__), \
+                               lhs, rexpr);
+
+namespace parquet {
+
+class ParquetException : public std::exception {
+ public:
+  PARQUET_NORETURN static void EofException(const std::string& msg = "") {
+    static std::string prefix = "Unexpected end of stream";
+    if (msg.empty()) {
+      throw ParquetException(prefix);
+    }
+    throw ParquetException(prefix, ": ", msg);
+  }
+
+  PARQUET_NORETURN static void NYI(const std::string& msg = "") {
+    throw ParquetException("Not yet implemented: ", msg, ".");
+  }
+
+  template <typename... Args>
+  explicit ParquetException(Args&&... args)
+      : msg_(::arrow::util::StringBuilder(std::forward<Args>(args)...)) {}
+
+  explicit ParquetException(std::string msg) : msg_(std::move(msg)) {}
+
+  explicit ParquetException(const char* msg, const std::exception&) : msg_(msg) {}
+
+  ParquetException(const ParquetException&) = default;
+  ParquetException& operator=(const ParquetException&) = default;
+  ParquetException(ParquetException&&) = default;
+  ParquetException& operator=(ParquetException&&) = default;
+
+  const char* what() const noexcept override { return msg_.c_str(); }
+
+ private:
+  std::string msg_;
+};
+
+// Support printing a ParquetException.
+// This is needed for clang-on-MSVC as there operator<< is not defined for
+// std::exception.
+PARQUET_EXPORT
+std::ostream& operator<<(std::ostream& os, const ParquetException& exception);
+
+class ParquetStatusException : public ParquetException {
+ public:
+  explicit ParquetStatusException(::arrow::Status status)
+      : ParquetException(status.ToString()), status_(std::move(status)) {}
+
+  const ::arrow::Status& status() const { return status_; }
+
+ private:
+  ::arrow::Status status_;
+};
+
+// This class exists for the purpose of detecting an invalid or corrupted file.
+class ParquetInvalidOrCorruptedFileException : public ParquetStatusException {
+ public:
+  ParquetInvalidOrCorruptedFileException(const ParquetInvalidOrCorruptedFileException&) =
+      default;
+
+  template <typename Arg,
+            typename std::enable_if<
+                !std::is_base_of<ParquetInvalidOrCorruptedFileException, Arg>::value,
+                int>::type = 0,
+            typename... Args>
+  explicit ParquetInvalidOrCorruptedFileException(Arg arg, Args&&... args)
+      : ParquetStatusException(::arrow::Status::Invalid(std::forward<Arg>(arg),
+                                                        std::forward<Args>(args)...)) {}
+};
+
+template <typename StatusReturnBlock>
+void ThrowNotOk(StatusReturnBlock&& b) {
+  PARQUET_THROW_NOT_OK(b());
+}
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/file_reader.cc b/contrib/libs/apache/arrow/cpp/src/parquet/file_reader.cc
index 3c3c124987e..4e38901aa0d 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/file_reader.cc
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/file_reader.cc
@@ -1,868 +1,868 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "parquet/file_reader.h" 
- 
-#include <algorithm> 
-#include <cstdint> 
-#include <cstring> 
-#include <memory> 
-#include <ostream> 
-#include <string> 
-#include <utility> 
- 
-#include "arrow/io/caching.h" 
-#include "arrow/io/file.h" 
-#include "arrow/io/memory.h" 
-#include "arrow/util/checked_cast.h" 
-#include "arrow/util/future.h" 
-#include "arrow/util/int_util_internal.h" 
-#include "arrow/util/logging.h" 
-#include "arrow/util/ubsan.h" 
-#include "parquet/column_reader.h" 
-#include "parquet/column_scanner.h" 
-#include "parquet/encryption/encryption_internal.h" 
-#include "parquet/encryption/internal_file_decryptor.h" 
-#include "parquet/exception.h" 
-#include "parquet/file_writer.h" 
-#include "parquet/metadata.h" 
-#include "parquet/platform.h" 
-#include "parquet/properties.h" 
-#include "parquet/schema.h" 
-#include "parquet/types.h" 
- 
-using arrow::internal::AddWithOverflow; 
- 
-namespace parquet { 
- 
-// PARQUET-978: Minimize footer reads by reading 64 KB from the end of the file 
-static constexpr int64_t kDefaultFooterReadSize = 64 * 1024; 
-static constexpr uint32_t kFooterSize = 8; 
- 
-// For PARQUET-816 
-static constexpr int64_t kMaxDictHeaderSize = 100; 
- 
-// ---------------------------------------------------------------------- 
-// RowGroupReader public API 
- 
-RowGroupReader::RowGroupReader(std::unique_ptr<Contents> contents) 
-    : contents_(std::move(contents)) {} 
- 
-std::shared_ptr<ColumnReader> RowGroupReader::Column(int i) { 
-  if (i >= metadata()->num_columns()) { 
-    std::stringstream ss; 
-    ss << "Trying to read column index " << i << " but row group metadata has only " 
-       << metadata()->num_columns() << " columns"; 
-    throw ParquetException(ss.str()); 
-  } 
-  const ColumnDescriptor* descr = metadata()->schema()->Column(i); 
- 
-  std::unique_ptr<PageReader> page_reader = contents_->GetColumnPageReader(i); 
-  return ColumnReader::Make( 
-      descr, std::move(page_reader), 
-      const_cast<ReaderProperties*>(contents_->properties())->memory_pool()); 
-} 
- 
-std::shared_ptr<ColumnReader> RowGroupReader::ColumnWithExposeEncoding( 
-    int i, ExposedEncoding encoding_to_expose) { 
-  std::shared_ptr<ColumnReader> reader = Column(i); 
- 
-  if (encoding_to_expose == ExposedEncoding::DICTIONARY) { 
-    // Check the encoding_stats to see if all data pages are dictionary encoded. 
-    std::unique_ptr<ColumnChunkMetaData> col = metadata()->ColumnChunk(i); 
-    const std::vector<PageEncodingStats>& encoding_stats = col->encoding_stats(); 
-    if (encoding_stats.empty()) { 
-      // Some parquet files may have empty encoding_stats. In this case we are 
-      // not sure whether all data pages are dictionary encoded. So we do not 
-      // enable exposing dictionary. 
-      return reader; 
-    } 
-    // The 1st page should be the dictionary page. 
-    if (encoding_stats[0].page_type != PageType::DICTIONARY_PAGE || 
-        (encoding_stats[0].encoding != Encoding::PLAIN && 
-         encoding_stats[0].encoding != Encoding::PLAIN_DICTIONARY)) { 
-      return reader; 
-    } 
-    // The following pages should be dictionary encoded data pages. 
-    for (size_t idx = 1; idx < encoding_stats.size(); ++idx) { 
-      if ((encoding_stats[idx].encoding != Encoding::RLE_DICTIONARY && 
-           encoding_stats[idx].encoding != Encoding::PLAIN_DICTIONARY) || 
-          (encoding_stats[idx].page_type != PageType::DATA_PAGE && 
-           encoding_stats[idx].page_type != PageType::DATA_PAGE_V2)) { 
-        return reader; 
-      } 
-    } 
-  } else { 
-    // Exposing other encodings are not supported for now. 
-    return reader; 
-  } 
- 
-  // Set exposed encoding. 
-  reader->SetExposedEncoding(encoding_to_expose); 
-  return reader; 
-} 
- 
-std::unique_ptr<PageReader> RowGroupReader::GetColumnPageReader(int i) { 
-  if (i >= metadata()->num_columns()) { 
-    std::stringstream ss; 
-    ss << "Trying to read column index " << i << " but row group metadata has only " 
-       << metadata()->num_columns() << " columns"; 
-    throw ParquetException(ss.str()); 
-  } 
-  return contents_->GetColumnPageReader(i); 
-} 
- 
-// Returns the rowgroup metadata 
-const RowGroupMetaData* RowGroupReader::metadata() const { return contents_->metadata(); } 
- 
-/// Compute the section of the file that should be read for the given 
-/// row group and column chunk. 
-::arrow::io::ReadRange ComputeColumnChunkRange(FileMetaData* file_metadata, 
-                                               int64_t source_size, int row_group_index, 
-                                               int column_index) { 
-  auto row_group_metadata = file_metadata->RowGroup(row_group_index); 
-  auto column_metadata = row_group_metadata->ColumnChunk(column_index); 
- 
-  int64_t col_start = column_metadata->data_page_offset(); 
-  if (column_metadata->has_dictionary_page() && 
-      column_metadata->dictionary_page_offset() > 0 && 
-      col_start > column_metadata->dictionary_page_offset()) { 
-    col_start = column_metadata->dictionary_page_offset(); 
-  } 
- 
-  int64_t col_length = column_metadata->total_compressed_size(); 
-  int64_t col_end; 
-  if (AddWithOverflow(col_start, col_length, &col_end) || col_end > source_size) { 
-    throw ParquetException("Invalid column metadata (corrupt file?)"); 
-  } 
- 
-  // PARQUET-816 workaround for old files created by older parquet-mr 
-  const ApplicationVersion& version = file_metadata->writer_version(); 
-  if (version.VersionLt(ApplicationVersion::PARQUET_816_FIXED_VERSION())) { 
-    // The Parquet MR writer had a bug in 1.2.8 and below where it didn't include the 
-    // dictionary page header size in total_compressed_size and total_uncompressed_size 
-    // (see IMPALA-694). We add padding to compensate. 
-    int64_t bytes_remaining = source_size - col_end; 
-    int64_t padding = std::min<int64_t>(kMaxDictHeaderSize, bytes_remaining); 
-    col_length += padding; 
-  } 
- 
-  return {col_start, col_length}; 
-} 
- 
-// RowGroupReader::Contents implementation for the Parquet file specification 
-class SerializedRowGroup : public RowGroupReader::Contents { 
- public: 
-  SerializedRowGroup(std::shared_ptr<ArrowInputFile> source, 
-                     std::shared_ptr<::arrow::io::internal::ReadRangeCache> cached_source, 
-                     int64_t source_size, FileMetaData* file_metadata, 
-                     int row_group_number, const ReaderProperties& props, 
-                     std::shared_ptr<InternalFileDecryptor> file_decryptor = nullptr) 
-      : source_(std::move(source)), 
-        cached_source_(std::move(cached_source)), 
-        source_size_(source_size), 
-        file_metadata_(file_metadata), 
-        properties_(props), 
-        row_group_ordinal_(row_group_number), 
-        file_decryptor_(file_decryptor) { 
-    row_group_metadata_ = file_metadata->RowGroup(row_group_number); 
-  } 
- 
-  const RowGroupMetaData* metadata() const override { return row_group_metadata_.get(); } 
- 
-  const ReaderProperties* properties() const override { return &properties_; } 
- 
-  std::unique_ptr<PageReader> GetColumnPageReader(int i) override { 
-    // Read column chunk from the file 
-    auto col = row_group_metadata_->ColumnChunk(i); 
- 
-    ::arrow::io::ReadRange col_range = 
-        ComputeColumnChunkRange(file_metadata_, source_size_, row_group_ordinal_, i); 
-    std::shared_ptr<ArrowInputStream> stream; 
-    if (cached_source_) { 
-      // PARQUET-1698: if read coalescing is enabled, read from pre-buffered 
-      // segments. 
-      PARQUET_ASSIGN_OR_THROW(auto buffer, cached_source_->Read(col_range)); 
-      stream = std::make_shared<::arrow::io::BufferReader>(buffer); 
-    } else { 
-      stream = properties_.GetStream(source_, col_range.offset, col_range.length); 
-    } 
- 
-    std::unique_ptr<ColumnCryptoMetaData> crypto_metadata = col->crypto_metadata(); 
- 
-    // Column is encrypted only if crypto_metadata exists. 
-    if (!crypto_metadata) { 
-      return PageReader::Open(stream, col->num_values(), col->compression(), 
-                              properties_.memory_pool()); 
-    } 
- 
-    if (file_decryptor_ == nullptr) { 
-      throw ParquetException("RowGroup is noted as encrypted but no file decryptor"); 
-    } 
- 
-    constexpr auto kEncryptedRowGroupsLimit = 32767; 
-    if (i > kEncryptedRowGroupsLimit) { 
-      throw ParquetException("Encrypted files cannot contain more than 32767 row groups"); 
-    } 
- 
-    // The column is encrypted 
-    std::shared_ptr<Decryptor> meta_decryptor; 
-    std::shared_ptr<Decryptor> data_decryptor; 
-    // The column is encrypted with footer key 
-    if (crypto_metadata->encrypted_with_footer_key()) { 
-      meta_decryptor = file_decryptor_->GetFooterDecryptorForColumnMeta(); 
-      data_decryptor = file_decryptor_->GetFooterDecryptorForColumnData(); 
-      CryptoContext ctx(col->has_dictionary_page(), row_group_ordinal_, 
-                        static_cast<int16_t>(i), meta_decryptor, data_decryptor); 
-      return PageReader::Open(stream, col->num_values(), col->compression(), 
-                              properties_.memory_pool(), &ctx); 
-    } 
- 
-    // The column is encrypted with its own key 
-    std::string column_key_metadata = crypto_metadata->key_metadata(); 
-    const std::string column_path = crypto_metadata->path_in_schema()->ToDotString(); 
- 
-    meta_decryptor = 
-        file_decryptor_->GetColumnMetaDecryptor(column_path, column_key_metadata); 
-    data_decryptor = 
-        file_decryptor_->GetColumnDataDecryptor(column_path, column_key_metadata); 
- 
-    CryptoContext ctx(col->has_dictionary_page(), row_group_ordinal_, 
-                      static_cast<int16_t>(i), meta_decryptor, data_decryptor); 
-    return PageReader::Open(stream, col->num_values(), col->compression(), 
-                            properties_.memory_pool(), &ctx); 
-  } 
- 
- private: 
-  std::shared_ptr<ArrowInputFile> source_; 
-  // Will be nullptr if PreBuffer() is not called. 
-  std::shared_ptr<::arrow::io::internal::ReadRangeCache> cached_source_; 
-  int64_t source_size_; 
-  FileMetaData* file_metadata_; 
-  std::unique_ptr<RowGroupMetaData> row_group_metadata_; 
-  ReaderProperties properties_; 
-  int row_group_ordinal_; 
-  std::shared_ptr<InternalFileDecryptor> file_decryptor_; 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// SerializedFile: An implementation of ParquetFileReader::Contents that deals 
-// with the Parquet file structure, Thrift deserialization, and other internal 
-// matters 
- 
-// This class takes ownership of the provided data source 
-class SerializedFile : public ParquetFileReader::Contents { 
- public: 
-  SerializedFile(std::shared_ptr<ArrowInputFile> source, 
-                 const ReaderProperties& props = default_reader_properties()) 
-      : source_(std::move(source)), properties_(props) { 
-    PARQUET_ASSIGN_OR_THROW(source_size_, source_->GetSize()); 
-  } 
- 
-  ~SerializedFile() override { 
-    try { 
-      Close(); 
-    } catch (...) { 
-    } 
-  } 
- 
-  void Close() override { 
-    if (file_decryptor_) file_decryptor_->WipeOutDecryptionKeys(); 
-  } 
- 
-  std::shared_ptr<RowGroupReader> GetRowGroup(int i) override { 
-    std::unique_ptr<SerializedRowGroup> contents( 
-        new SerializedRowGroup(source_, cached_source_, source_size_, 
-                               file_metadata_.get(), i, properties_, file_decryptor_)); 
-    return std::make_shared<RowGroupReader>(std::move(contents)); 
-  } 
- 
-  std::shared_ptr<FileMetaData> metadata() const override { return file_metadata_; } 
- 
-  void set_metadata(std::shared_ptr<FileMetaData> metadata) { 
-    file_metadata_ = std::move(metadata); 
-  } 
- 
-  void PreBuffer(const std::vector<int>& row_groups, 
-                 const std::vector<int>& column_indices, 
-                 const ::arrow::io::IOContext& ctx, 
-                 const ::arrow::io::CacheOptions& options) { 
-    cached_source_ = 
-        std::make_shared<::arrow::io::internal::ReadRangeCache>(source_, ctx, options); 
-    std::vector<::arrow::io::ReadRange> ranges; 
-    for (int row : row_groups) { 
-      for (int col : column_indices) { 
-        ranges.push_back( 
-            ComputeColumnChunkRange(file_metadata_.get(), source_size_, row, col)); 
-      } 
-    } 
-    PARQUET_THROW_NOT_OK(cached_source_->Cache(ranges)); 
-  } 
- 
-  ::arrow::Future<> WhenBuffered(const std::vector<int>& row_groups, 
-                                 const std::vector<int>& column_indices) const { 
-    if (!cached_source_) { 
-      return ::arrow::Status::Invalid("Must call PreBuffer before WhenBuffered"); 
-    } 
-    std::vector<::arrow::io::ReadRange> ranges; 
-    for (int row : row_groups) { 
-      for (int col : column_indices) { 
-        ranges.push_back( 
-            ComputeColumnChunkRange(file_metadata_.get(), source_size_, row, col)); 
-      } 
-    } 
-    return cached_source_->WaitFor(ranges); 
-  } 
- 
-  // Metadata/footer parsing. Divided up to separate sync/async paths, and to use 
-  // exceptions for error handling (with the async path converting to Future/Status). 
- 
-  void ParseMetaData() { 
-    int64_t footer_read_size = GetFooterReadSize(); 
-    PARQUET_ASSIGN_OR_THROW( 
-        auto footer_buffer, 
-        source_->ReadAt(source_size_ - footer_read_size, footer_read_size)); 
-    uint32_t metadata_len = ParseFooterLength(footer_buffer, footer_read_size); 
-    int64_t metadata_start = source_size_ - kFooterSize - metadata_len; 
- 
-    std::shared_ptr<::arrow::Buffer> metadata_buffer; 
-    if (footer_read_size >= (metadata_len + kFooterSize)) { 
-      metadata_buffer = SliceBuffer( 
-          footer_buffer, footer_read_size - metadata_len - kFooterSize, metadata_len); 
-    } else { 
-      PARQUET_ASSIGN_OR_THROW(metadata_buffer, 
-                              source_->ReadAt(metadata_start, metadata_len)); 
-    } 
- 
-    // Parse the footer depending on encryption type 
-    const bool is_encrypted_footer = 
-        memcmp(footer_buffer->data() + footer_read_size - 4, kParquetEMagic, 4) == 0; 
-    if (is_encrypted_footer) { 
-      // Encrypted file with Encrypted footer. 
-      const std::pair<int64_t, uint32_t> read_size = 
-          ParseMetaDataOfEncryptedFileWithEncryptedFooter(metadata_buffer, metadata_len); 
-      // Read the actual footer 
-      metadata_start = read_size.first; 
-      metadata_len = read_size.second; 
-      PARQUET_ASSIGN_OR_THROW(metadata_buffer, 
-                              source_->ReadAt(metadata_start, metadata_len)); 
-      // Fall through 
-    } 
- 
-    const uint32_t read_metadata_len = 
-        ParseUnencryptedFileMetadata(metadata_buffer, metadata_len); 
-    auto file_decryption_properties = properties_.file_decryption_properties().get(); 
-    if (is_encrypted_footer) { 
-      // Nothing else to do here. 
-      return; 
-    } else if (!file_metadata_->is_encryption_algorithm_set()) {  // Non encrypted file. 
-      if (file_decryption_properties != nullptr) { 
-        if (!file_decryption_properties->plaintext_files_allowed()) { 
-          throw ParquetException("Applying decryption properties on plaintext file"); 
-        } 
-      } 
-    } else { 
-      // Encrypted file with plaintext footer mode. 
-      ParseMetaDataOfEncryptedFileWithPlaintextFooter( 
-          file_decryption_properties, metadata_buffer, metadata_len, read_metadata_len); 
-    } 
-  } 
- 
-  // Validate the source size and get the initial read size. 
-  int64_t GetFooterReadSize() { 
-    if (source_size_ == 0) { 
-      throw ParquetInvalidOrCorruptedFileException("Parquet file size is 0 bytes"); 
-    } else if (source_size_ < kFooterSize) { 
-      throw ParquetInvalidOrCorruptedFileException( 
-          "Parquet file size is ", source_size_, 
-          " bytes, smaller than the minimum file footer (", kFooterSize, " bytes)"); 
-    } 
-    return std::min(source_size_, kDefaultFooterReadSize); 
-  } 
- 
-  // Validate the magic bytes and get the length of the full footer. 
-  uint32_t ParseFooterLength(const std::shared_ptr<::arrow::Buffer>& footer_buffer, 
-                             const int64_t footer_read_size) { 
-    // Check if all bytes are read. Check if last 4 bytes read have the magic bits 
-    if (footer_buffer->size() != footer_read_size || 
-        (memcmp(footer_buffer->data() + footer_read_size - 4, kParquetMagic, 4) != 0 && 
-         memcmp(footer_buffer->data() + footer_read_size - 4, kParquetEMagic, 4) != 0)) { 
-      throw ParquetInvalidOrCorruptedFileException( 
-          "Parquet magic bytes not found in footer. Either the file is corrupted or this " 
-          "is not a parquet file."); 
-    } 
-    // Both encrypted/unencrypted footers have the same footer length check. 
-    uint32_t metadata_len = ::arrow::util::SafeLoadAs<uint32_t>( 
-        reinterpret_cast<const uint8_t*>(footer_buffer->data()) + footer_read_size - 
-        kFooterSize); 
-    if (metadata_len > source_size_ - kFooterSize) { 
-      throw ParquetInvalidOrCorruptedFileException( 
-          "Parquet file size is ", source_size_, 
-          " bytes, smaller than the size reported by footer's (", metadata_len, "bytes)"); 
-    } 
-    return metadata_len; 
-  } 
- 
-  // Does not throw. 
-  ::arrow::Future<> ParseMetaDataAsync() { 
-    int64_t footer_read_size; 
-    BEGIN_PARQUET_CATCH_EXCEPTIONS 
-    footer_read_size = GetFooterReadSize(); 
-    END_PARQUET_CATCH_EXCEPTIONS 
-    // Assumes this is kept alive externally 
-    return source_->ReadAsync(source_size_ - footer_read_size, footer_read_size) 
-        .Then([=](const std::shared_ptr<::arrow::Buffer>& footer_buffer) 
-                  -> ::arrow::Future<> { 
-          uint32_t metadata_len; 
-          BEGIN_PARQUET_CATCH_EXCEPTIONS 
-          metadata_len = ParseFooterLength(footer_buffer, footer_read_size); 
-          END_PARQUET_CATCH_EXCEPTIONS 
-          int64_t metadata_start = source_size_ - kFooterSize - metadata_len; 
- 
-          std::shared_ptr<::arrow::Buffer> metadata_buffer; 
-          if (footer_read_size >= (metadata_len + kFooterSize)) { 
-            metadata_buffer = 
-                SliceBuffer(footer_buffer, footer_read_size - metadata_len - kFooterSize, 
-                            metadata_len); 
-            return ParseMaybeEncryptedMetaDataAsync(footer_buffer, 
-                                                    std::move(metadata_buffer), 
-                                                    footer_read_size, metadata_len); 
-          } 
-          return source_->ReadAsync(metadata_start, metadata_len) 
-              .Then([=](const std::shared_ptr<::arrow::Buffer>& metadata_buffer) { 
-                return ParseMaybeEncryptedMetaDataAsync(footer_buffer, metadata_buffer, 
-                                                        footer_read_size, metadata_len); 
-              }); 
-        }); 
-  } 
- 
-  // Continuation 
-  ::arrow::Future<> ParseMaybeEncryptedMetaDataAsync( 
-      std::shared_ptr<::arrow::Buffer> footer_buffer, 
-      std::shared_ptr<::arrow::Buffer> metadata_buffer, int64_t footer_read_size, 
-      uint32_t metadata_len) { 
-    // Parse the footer depending on encryption type 
-    const bool is_encrypted_footer = 
-        memcmp(footer_buffer->data() + footer_read_size - 4, kParquetEMagic, 4) == 0; 
-    if (is_encrypted_footer) { 
-      // Encrypted file with Encrypted footer. 
-      std::pair<int64_t, uint32_t> read_size; 
-      BEGIN_PARQUET_CATCH_EXCEPTIONS 
-      read_size = 
-          ParseMetaDataOfEncryptedFileWithEncryptedFooter(metadata_buffer, metadata_len); 
-      END_PARQUET_CATCH_EXCEPTIONS 
-      // Read the actual footer 
-      int64_t metadata_start = read_size.first; 
-      metadata_len = read_size.second; 
-      return source_->ReadAsync(metadata_start, metadata_len) 
-          .Then([=](const std::shared_ptr<::arrow::Buffer>& metadata_buffer) { 
-            // Continue and read the file footer 
-            return ParseMetaDataFinal(metadata_buffer, metadata_len, is_encrypted_footer); 
-          }); 
-    } 
-    return ParseMetaDataFinal(std::move(metadata_buffer), metadata_len, 
-                              is_encrypted_footer); 
-  } 
- 
-  // Continuation 
-  ::arrow::Status ParseMetaDataFinal(std::shared_ptr<::arrow::Buffer> metadata_buffer, 
-                                     uint32_t metadata_len, 
-                                     const bool is_encrypted_footer) { 
-    BEGIN_PARQUET_CATCH_EXCEPTIONS 
-    const uint32_t read_metadata_len = 
-        ParseUnencryptedFileMetadata(metadata_buffer, metadata_len); 
-    auto file_decryption_properties = properties_.file_decryption_properties().get(); 
-    if (is_encrypted_footer) { 
-      // Nothing else to do here. 
-      return ::arrow::Status::OK(); 
-    } else if (!file_metadata_->is_encryption_algorithm_set()) {  // Non encrypted file. 
-      if (file_decryption_properties != nullptr) { 
-        if (!file_decryption_properties->plaintext_files_allowed()) { 
-          throw ParquetException("Applying decryption properties on plaintext file"); 
-        } 
-      } 
-    } else { 
-      // Encrypted file with plaintext footer mode. 
-      ParseMetaDataOfEncryptedFileWithPlaintextFooter( 
-          file_decryption_properties, metadata_buffer, metadata_len, read_metadata_len); 
-    } 
-    END_PARQUET_CATCH_EXCEPTIONS 
-    return ::arrow::Status::OK(); 
-  } 
- 
- private: 
-  std::shared_ptr<ArrowInputFile> source_; 
-  std::shared_ptr<::arrow::io::internal::ReadRangeCache> cached_source_; 
-  int64_t source_size_; 
-  std::shared_ptr<FileMetaData> file_metadata_; 
-  ReaderProperties properties_; 
- 
-  std::shared_ptr<InternalFileDecryptor> file_decryptor_; 
- 
-  // \return The true length of the metadata in bytes 
-  uint32_t ParseUnencryptedFileMetadata(const std::shared_ptr<Buffer>& footer_buffer, 
-                                        const uint32_t metadata_len); 
- 
-  std::string HandleAadPrefix(FileDecryptionProperties* file_decryption_properties, 
-                              EncryptionAlgorithm& algo); 
- 
-  void ParseMetaDataOfEncryptedFileWithPlaintextFooter( 
-      FileDecryptionProperties* file_decryption_properties, 
-      const std::shared_ptr<Buffer>& metadata_buffer, uint32_t metadata_len, 
-      uint32_t read_metadata_len); 
- 
-  // \return The position and size of the actual footer 
-  std::pair<int64_t, uint32_t> ParseMetaDataOfEncryptedFileWithEncryptedFooter( 
-      const std::shared_ptr<Buffer>& crypto_metadata_buffer, uint32_t footer_len); 
-}; 
- 
-uint32_t SerializedFile::ParseUnencryptedFileMetadata( 
-    const std::shared_ptr<Buffer>& metadata_buffer, const uint32_t metadata_len) { 
-  if (metadata_buffer->size() != metadata_len) { 
-    throw ParquetException("Failed reading metadata buffer (requested " + 
-                           std::to_string(metadata_len) + " bytes but got " + 
-                           std::to_string(metadata_buffer->size()) + " bytes)"); 
-  } 
-  uint32_t read_metadata_len = metadata_len; 
-  // The encrypted read path falls through to here, so pass in the decryptor 
-  file_metadata_ = 
-      FileMetaData::Make(metadata_buffer->data(), &read_metadata_len, file_decryptor_); 
-  return read_metadata_len; 
-} 
- 
-std::pair<int64_t, uint32_t> 
-SerializedFile::ParseMetaDataOfEncryptedFileWithEncryptedFooter( 
-    const std::shared_ptr<::arrow::Buffer>& crypto_metadata_buffer, 
-    // both metadata & crypto metadata length 
-    const uint32_t footer_len) { 
-  // encryption with encrypted footer 
-  // Check if the footer_buffer contains the entire metadata 
-  if (crypto_metadata_buffer->size() != footer_len) { 
-    throw ParquetException("Failed reading encrypted metadata buffer (requested " + 
-                           std::to_string(footer_len) + " bytes but got " + 
-                           std::to_string(crypto_metadata_buffer->size()) + " bytes)"); 
-  } 
-  auto file_decryption_properties = properties_.file_decryption_properties().get(); 
-  if (file_decryption_properties == nullptr) { 
-    throw ParquetException( 
-        "Could not read encrypted metadata, no decryption found in reader's properties"); 
-  } 
-  uint32_t crypto_metadata_len = footer_len; 
-  std::shared_ptr<FileCryptoMetaData> file_crypto_metadata = 
-      FileCryptoMetaData::Make(crypto_metadata_buffer->data(), &crypto_metadata_len); 
-  // Handle AAD prefix 
-  EncryptionAlgorithm algo = file_crypto_metadata->encryption_algorithm(); 
-  std::string file_aad = HandleAadPrefix(file_decryption_properties, algo); 
-  file_decryptor_ = std::make_shared<InternalFileDecryptor>( 
-      file_decryption_properties, file_aad, algo.algorithm, 
-      file_crypto_metadata->key_metadata(), properties_.memory_pool()); 
- 
-  int64_t metadata_offset = source_size_ - kFooterSize - footer_len + crypto_metadata_len; 
-  uint32_t metadata_len = footer_len - crypto_metadata_len; 
-  return std::make_pair(metadata_offset, metadata_len); 
-} 
- 
-void SerializedFile::ParseMetaDataOfEncryptedFileWithPlaintextFooter( 
-    FileDecryptionProperties* file_decryption_properties, 
-    const std::shared_ptr<Buffer>& metadata_buffer, uint32_t metadata_len, 
-    uint32_t read_metadata_len) { 
-  // Providing decryption properties in plaintext footer mode is not mandatory, for 
-  // example when reading by legacy reader. 
-  if (file_decryption_properties != nullptr) { 
-    EncryptionAlgorithm algo = file_metadata_->encryption_algorithm(); 
-    // Handle AAD prefix 
-    std::string file_aad = HandleAadPrefix(file_decryption_properties, algo); 
-    file_decryptor_ = std::make_shared<InternalFileDecryptor>( 
-        file_decryption_properties, file_aad, algo.algorithm, 
-        file_metadata_->footer_signing_key_metadata(), properties_.memory_pool()); 
-    // set the InternalFileDecryptor in the metadata as well, as it's used 
-    // for signature verification and for ColumnChunkMetaData creation. 
-    file_metadata_->set_file_decryptor(file_decryptor_); 
- 
-    if (file_decryption_properties->check_plaintext_footer_integrity()) { 
-      if (metadata_len - read_metadata_len != 
-          (parquet::encryption::kGcmTagLength + parquet::encryption::kNonceLength)) { 
-        throw ParquetInvalidOrCorruptedFileException( 
-            "Failed reading metadata for encryption signature (requested ", 
-            parquet::encryption::kGcmTagLength + parquet::encryption::kNonceLength, 
-            " bytes but have ", metadata_len - read_metadata_len, " bytes)"); 
-      } 
- 
-      if (!file_metadata_->VerifySignature(metadata_buffer->data() + read_metadata_len)) { 
-        throw ParquetInvalidOrCorruptedFileException( 
-            "Parquet crypto signature verification failed"); 
-      } 
-    } 
-  } 
-} 
- 
-std::string SerializedFile::HandleAadPrefix( 
-    FileDecryptionProperties* file_decryption_properties, EncryptionAlgorithm& algo) { 
-  std::string aad_prefix_in_properties = file_decryption_properties->aad_prefix(); 
-  std::string aad_prefix = aad_prefix_in_properties; 
-  bool file_has_aad_prefix = algo.aad.aad_prefix.empty() ? false : true; 
-  std::string aad_prefix_in_file = algo.aad.aad_prefix; 
- 
-  if (algo.aad.supply_aad_prefix && aad_prefix_in_properties.empty()) { 
-    throw ParquetException( 
-        "AAD prefix used for file encryption, " 
-        "but not stored in file and not supplied " 
-        "in decryption properties"); 
-  } 
- 
-  if (file_has_aad_prefix) { 
-    if (!aad_prefix_in_properties.empty()) { 
-      if (aad_prefix_in_properties.compare(aad_prefix_in_file) != 0) { 
-        throw ParquetException( 
-            "AAD Prefix in file and in properties " 
-            "is not the same"); 
-      } 
-    } 
-    aad_prefix = aad_prefix_in_file; 
-    std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier = 
-        file_decryption_properties->aad_prefix_verifier(); 
-    if (aad_prefix_verifier != nullptr) aad_prefix_verifier->Verify(aad_prefix); 
-  } else { 
-    if (!algo.aad.supply_aad_prefix && !aad_prefix_in_properties.empty()) { 
-      throw ParquetException( 
-          "AAD Prefix set in decryption properties, but was not used " 
-          "for file encryption"); 
-    } 
-    std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier = 
-        file_decryption_properties->aad_prefix_verifier(); 
-    if (aad_prefix_verifier != nullptr) { 
-      throw ParquetException( 
-          "AAD Prefix Verifier is set, but AAD Prefix not found in file"); 
-    } 
-  } 
-  return aad_prefix + algo.aad.aad_file_unique; 
-} 
- 
-// ---------------------------------------------------------------------- 
-// ParquetFileReader public API 
- 
-ParquetFileReader::ParquetFileReader() {} 
- 
-ParquetFileReader::~ParquetFileReader() { 
-  try { 
-    Close(); 
-  } catch (...) { 
-  } 
-} 
- 
-// Open the file. If no metadata is passed, it is parsed from the footer of 
-// the file 
-std::unique_ptr<ParquetFileReader::Contents> ParquetFileReader::Contents::Open( 
-    std::shared_ptr<ArrowInputFile> source, const ReaderProperties& props, 
-    std::shared_ptr<FileMetaData> metadata) { 
-  std::unique_ptr<ParquetFileReader::Contents> result( 
-      new SerializedFile(std::move(source), props)); 
- 
-  // Access private methods here, but otherwise unavailable 
-  SerializedFile* file = static_cast<SerializedFile*>(result.get()); 
- 
-  if (metadata == nullptr) { 
-    // Validates magic bytes, parses metadata, and initializes the SchemaDescriptor 
-    file->ParseMetaData(); 
-  } else { 
-    file->set_metadata(std::move(metadata)); 
-  } 
- 
-  return result; 
-} 
- 
-::arrow::Future<std::unique_ptr<ParquetFileReader::Contents>> 
-ParquetFileReader::Contents::OpenAsync(std::shared_ptr<ArrowInputFile> source, 
-                                       const ReaderProperties& props, 
-                                       std::shared_ptr<FileMetaData> metadata) { 
-  BEGIN_PARQUET_CATCH_EXCEPTIONS 
-  std::unique_ptr<ParquetFileReader::Contents> result( 
-      new SerializedFile(std::move(source), props)); 
-  SerializedFile* file = static_cast<SerializedFile*>(result.get()); 
-  if (metadata == nullptr) { 
-    // TODO(ARROW-12259): workaround since we have Future<(move-only type)> 
-    struct { 
-      ::arrow::Result<std::unique_ptr<ParquetFileReader::Contents>> operator()() { 
-        return std::move(result); 
-      } 
- 
-      std::unique_ptr<ParquetFileReader::Contents> result; 
-    } Continuation; 
-    Continuation.result = std::move(result); 
-    return file->ParseMetaDataAsync().Then(std::move(Continuation)); 
-  } else { 
-    file->set_metadata(std::move(metadata)); 
-    return ::arrow::Future<std::unique_ptr<ParquetFileReader::Contents>>::MakeFinished( 
-        std::move(result)); 
-  } 
-  END_PARQUET_CATCH_EXCEPTIONS 
-} 
- 
-std::unique_ptr<ParquetFileReader> ParquetFileReader::Open( 
-    std::shared_ptr<::arrow::io::RandomAccessFile> source, const ReaderProperties& props, 
-    std::shared_ptr<FileMetaData> metadata) { 
-  auto contents = SerializedFile::Open(std::move(source), props, std::move(metadata)); 
-  std::unique_ptr<ParquetFileReader> result(new ParquetFileReader()); 
-  result->Open(std::move(contents)); 
-  return result; 
-} 
- 
-std::unique_ptr<ParquetFileReader> ParquetFileReader::OpenFile( 
-    const std::string& path, bool memory_map, const ReaderProperties& props, 
-    std::shared_ptr<FileMetaData> metadata) { 
-  std::shared_ptr<::arrow::io::RandomAccessFile> source; 
-  if (memory_map) { 
-    PARQUET_ASSIGN_OR_THROW( 
-        source, ::arrow::io::MemoryMappedFile::Open(path, ::arrow::io::FileMode::READ)); 
-  } else { 
-    PARQUET_ASSIGN_OR_THROW(source, 
-                            ::arrow::io::ReadableFile::Open(path, props.memory_pool())); 
-  } 
- 
-  return Open(std::move(source), props, std::move(metadata)); 
-} 
- 
-::arrow::Future<std::unique_ptr<ParquetFileReader>> ParquetFileReader::OpenAsync( 
-    std::shared_ptr<::arrow::io::RandomAccessFile> source, const ReaderProperties& props, 
-    std::shared_ptr<FileMetaData> metadata) { 
-  BEGIN_PARQUET_CATCH_EXCEPTIONS 
-  auto fut = SerializedFile::OpenAsync(std::move(source), props, std::move(metadata)); 
-  // TODO(ARROW-12259): workaround since we have Future<(move-only type)> 
-  auto completed = ::arrow::Future<std::unique_ptr<ParquetFileReader>>::Make(); 
-  fut.AddCallback([fut, completed]( 
-                      const ::arrow::Result<std::unique_ptr<ParquetFileReader::Contents>>& 
-                          contents) mutable { 
-    if (!contents.ok()) { 
-      completed.MarkFinished(contents.status()); 
-      return; 
-    } 
-    std::unique_ptr<ParquetFileReader> result(new ParquetFileReader()); 
-    result->Open(fut.MoveResult().MoveValueUnsafe()); 
-    completed.MarkFinished(std::move(result)); 
-  }); 
-  return completed; 
-  END_PARQUET_CATCH_EXCEPTIONS 
-} 
- 
-void ParquetFileReader::Open(std::unique_ptr<ParquetFileReader::Contents> contents) { 
-  contents_ = std::move(contents); 
-} 
- 
-void ParquetFileReader::Close() { 
-  if (contents_) { 
-    contents_->Close(); 
-  } 
-} 
- 
-std::shared_ptr<FileMetaData> ParquetFileReader::metadata() const { 
-  return contents_->metadata(); 
-} 
- 
-std::shared_ptr<RowGroupReader> ParquetFileReader::RowGroup(int i) { 
-  if (i >= metadata()->num_row_groups()) { 
-    std::stringstream ss; 
-    ss << "Trying to read row group " << i << " but file only has " 
-       << metadata()->num_row_groups() << " row groups"; 
-    throw ParquetException(ss.str()); 
-  } 
-  return contents_->GetRowGroup(i); 
-} 
- 
-void ParquetFileReader::PreBuffer(const std::vector<int>& row_groups, 
-                                  const std::vector<int>& column_indices, 
-                                  const ::arrow::io::IOContext& ctx, 
-                                  const ::arrow::io::CacheOptions& options) { 
-  // Access private methods here 
-  SerializedFile* file = 
-      ::arrow::internal::checked_cast<SerializedFile*>(contents_.get()); 
-  file->PreBuffer(row_groups, column_indices, ctx, options); 
-} 
- 
-::arrow::Future<> ParquetFileReader::WhenBuffered( 
-    const std::vector<int>& row_groups, const std::vector<int>& column_indices) const { 
-  // Access private methods here 
-  SerializedFile* file = 
-      ::arrow::internal::checked_cast<SerializedFile*>(contents_.get()); 
-  return file->WhenBuffered(row_groups, column_indices); 
-} 
- 
-// ---------------------------------------------------------------------- 
-// File metadata helpers 
- 
-std::shared_ptr<FileMetaData> ReadMetaData( 
-    const std::shared_ptr<::arrow::io::RandomAccessFile>& source) { 
-  return ParquetFileReader::Open(source)->metadata(); 
-} 
- 
-// ---------------------------------------------------------------------- 
-// File scanner for performance testing 
- 
-int64_t ScanFileContents(std::vector<int> columns, const int32_t column_batch_size, 
-                         ParquetFileReader* reader) { 
-  std::vector<int16_t> rep_levels(column_batch_size); 
-  std::vector<int16_t> def_levels(column_batch_size); 
- 
-  int num_columns = static_cast<int>(columns.size()); 
- 
-  // columns are not specified explicitly. Add all columns 
-  if (columns.size() == 0) { 
-    num_columns = reader->metadata()->num_columns(); 
-    columns.resize(num_columns); 
-    for (int i = 0; i < num_columns; i++) { 
-      columns[i] = i; 
-    } 
-  } 
- 
-  std::vector<int64_t> total_rows(num_columns, 0); 
- 
-  for (int r = 0; r < reader->metadata()->num_row_groups(); ++r) { 
-    auto group_reader = reader->RowGroup(r); 
-    int col = 0; 
-    for (auto i : columns) { 
-      std::shared_ptr<ColumnReader> col_reader = group_reader->Column(i); 
-      size_t value_byte_size = GetTypeByteSize(col_reader->descr()->physical_type()); 
-      std::vector<uint8_t> values(column_batch_size * value_byte_size); 
- 
-      int64_t values_read = 0; 
-      while (col_reader->HasNext()) { 
-        int64_t levels_read = 
-            ScanAllValues(column_batch_size, def_levels.data(), rep_levels.data(), 
-                          values.data(), &values_read, col_reader.get()); 
-        if (col_reader->descr()->max_repetition_level() > 0) { 
-          for (int64_t i = 0; i < levels_read; i++) { 
-            if (rep_levels[i] == 0) { 
-              total_rows[col]++; 
-            } 
-          } 
-        } else { 
-          total_rows[col] += levels_read; 
-        } 
-      } 
-      col++; 
-    } 
-  } 
- 
-  for (int i = 1; i < num_columns; ++i) { 
-    if (total_rows[0] != total_rows[i]) { 
-      throw ParquetException("Parquet error: Total rows among columns do not match"); 
-    } 
-  } 
- 
-  return total_rows[0]; 
-} 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/file_reader.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <ostream>
+#include <string>
+#include <utility>
+
+#include "arrow/io/caching.h"
+#include "arrow/io/file.h"
+#include "arrow/io/memory.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/future.h"
+#include "arrow/util/int_util_internal.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/ubsan.h"
+#include "parquet/column_reader.h"
+#include "parquet/column_scanner.h"
+#include "parquet/encryption/encryption_internal.h"
+#include "parquet/encryption/internal_file_decryptor.h"
+#include "parquet/exception.h"
+#include "parquet/file_writer.h"
+#include "parquet/metadata.h"
+#include "parquet/platform.h"
+#include "parquet/properties.h"
+#include "parquet/schema.h"
+#include "parquet/types.h"
+
+using arrow::internal::AddWithOverflow;
+
+namespace parquet {
+
+// PARQUET-978: Minimize footer reads by reading 64 KB from the end of the file
+static constexpr int64_t kDefaultFooterReadSize = 64 * 1024;
+static constexpr uint32_t kFooterSize = 8;
+
+// For PARQUET-816
+static constexpr int64_t kMaxDictHeaderSize = 100;
+
+// ----------------------------------------------------------------------
+// RowGroupReader public API
+
+RowGroupReader::RowGroupReader(std::unique_ptr<Contents> contents)
+    : contents_(std::move(contents)) {}
+
+std::shared_ptr<ColumnReader> RowGroupReader::Column(int i) {
+  if (i >= metadata()->num_columns()) {
+    std::stringstream ss;
+    ss << "Trying to read column index " << i << " but row group metadata has only "
+       << metadata()->num_columns() << " columns";
+    throw ParquetException(ss.str());
+  }
+  const ColumnDescriptor* descr = metadata()->schema()->Column(i);
+
+  std::unique_ptr<PageReader> page_reader = contents_->GetColumnPageReader(i);
+  return ColumnReader::Make(
+      descr, std::move(page_reader),
+      const_cast<ReaderProperties*>(contents_->properties())->memory_pool());
+}
+
+std::shared_ptr<ColumnReader> RowGroupReader::ColumnWithExposeEncoding(
+    int i, ExposedEncoding encoding_to_expose) {
+  std::shared_ptr<ColumnReader> reader = Column(i);
+
+  if (encoding_to_expose == ExposedEncoding::DICTIONARY) {
+    // Check the encoding_stats to see if all data pages are dictionary encoded.
+    std::unique_ptr<ColumnChunkMetaData> col = metadata()->ColumnChunk(i);
+    const std::vector<PageEncodingStats>& encoding_stats = col->encoding_stats();
+    if (encoding_stats.empty()) {
+      // Some parquet files may have empty encoding_stats. In this case we are
+      // not sure whether all data pages are dictionary encoded. So we do not
+      // enable exposing dictionary.
+      return reader;
+    }
+    // The 1st page should be the dictionary page.
+    if (encoding_stats[0].page_type != PageType::DICTIONARY_PAGE ||
+        (encoding_stats[0].encoding != Encoding::PLAIN &&
+         encoding_stats[0].encoding != Encoding::PLAIN_DICTIONARY)) {
+      return reader;
+    }
+    // The following pages should be dictionary encoded data pages.
+    for (size_t idx = 1; idx < encoding_stats.size(); ++idx) {
+      if ((encoding_stats[idx].encoding != Encoding::RLE_DICTIONARY &&
+           encoding_stats[idx].encoding != Encoding::PLAIN_DICTIONARY) ||
+          (encoding_stats[idx].page_type != PageType::DATA_PAGE &&
+           encoding_stats[idx].page_type != PageType::DATA_PAGE_V2)) {
+        return reader;
+      }
+    }
+  } else {
+    // Exposing other encodings are not supported for now.
+    return reader;
+  }
+
+  // Set exposed encoding.
+  reader->SetExposedEncoding(encoding_to_expose);
+  return reader;
+}
+
+std::unique_ptr<PageReader> RowGroupReader::GetColumnPageReader(int i) {
+  if (i >= metadata()->num_columns()) {
+    std::stringstream ss;
+    ss << "Trying to read column index " << i << " but row group metadata has only "
+       << metadata()->num_columns() << " columns";
+    throw ParquetException(ss.str());
+  }
+  return contents_->GetColumnPageReader(i);
+}
+
+// Returns the rowgroup metadata
+const RowGroupMetaData* RowGroupReader::metadata() const { return contents_->metadata(); }
+
+/// Compute the section of the file that should be read for the given
+/// row group and column chunk.
+::arrow::io::ReadRange ComputeColumnChunkRange(FileMetaData* file_metadata,
+                                               int64_t source_size, int row_group_index,
+                                               int column_index) {
+  auto row_group_metadata = file_metadata->RowGroup(row_group_index);
+  auto column_metadata = row_group_metadata->ColumnChunk(column_index);
+
+  int64_t col_start = column_metadata->data_page_offset();
+  if (column_metadata->has_dictionary_page() &&
+      column_metadata->dictionary_page_offset() > 0 &&
+      col_start > column_metadata->dictionary_page_offset()) {
+    col_start = column_metadata->dictionary_page_offset();
+  }
+
+  int64_t col_length = column_metadata->total_compressed_size();
+  int64_t col_end;
+  if (AddWithOverflow(col_start, col_length, &col_end) || col_end > source_size) {
+    throw ParquetException("Invalid column metadata (corrupt file?)");
+  }
+
+  // PARQUET-816 workaround for old files created by older parquet-mr
+  const ApplicationVersion& version = file_metadata->writer_version();
+  if (version.VersionLt(ApplicationVersion::PARQUET_816_FIXED_VERSION())) {
+    // The Parquet MR writer had a bug in 1.2.8 and below where it didn't include the
+    // dictionary page header size in total_compressed_size and total_uncompressed_size
+    // (see IMPALA-694). We add padding to compensate.
+    int64_t bytes_remaining = source_size - col_end;
+    int64_t padding = std::min<int64_t>(kMaxDictHeaderSize, bytes_remaining);
+    col_length += padding;
+  }
+
+  return {col_start, col_length};
+}
+
+// RowGroupReader::Contents implementation for the Parquet file specification
+class SerializedRowGroup : public RowGroupReader::Contents {
+ public:
+  SerializedRowGroup(std::shared_ptr<ArrowInputFile> source,
+                     std::shared_ptr<::arrow::io::internal::ReadRangeCache> cached_source,
+                     int64_t source_size, FileMetaData* file_metadata,
+                     int row_group_number, const ReaderProperties& props,
+                     std::shared_ptr<InternalFileDecryptor> file_decryptor = nullptr)
+      : source_(std::move(source)),
+        cached_source_(std::move(cached_source)),
+        source_size_(source_size),
+        file_metadata_(file_metadata),
+        properties_(props),
+        row_group_ordinal_(row_group_number),
+        file_decryptor_(file_decryptor) {
+    row_group_metadata_ = file_metadata->RowGroup(row_group_number);
+  }
+
+  const RowGroupMetaData* metadata() const override { return row_group_metadata_.get(); }
+
+  const ReaderProperties* properties() const override { return &properties_; }
+
+  std::unique_ptr<PageReader> GetColumnPageReader(int i) override {
+    // Read column chunk from the file
+    auto col = row_group_metadata_->ColumnChunk(i);
+
+    ::arrow::io::ReadRange col_range =
+        ComputeColumnChunkRange(file_metadata_, source_size_, row_group_ordinal_, i);
+    std::shared_ptr<ArrowInputStream> stream;
+    if (cached_source_) {
+      // PARQUET-1698: if read coalescing is enabled, read from pre-buffered
+      // segments.
+      PARQUET_ASSIGN_OR_THROW(auto buffer, cached_source_->Read(col_range));
+      stream = std::make_shared<::arrow::io::BufferReader>(buffer);
+    } else {
+      stream = properties_.GetStream(source_, col_range.offset, col_range.length);
+    }
+
+    std::unique_ptr<ColumnCryptoMetaData> crypto_metadata = col->crypto_metadata();
+
+    // Column is encrypted only if crypto_metadata exists.
+    if (!crypto_metadata) {
+      return PageReader::Open(stream, col->num_values(), col->compression(),
+                              properties_.memory_pool());
+    }
+
+    if (file_decryptor_ == nullptr) {
+      throw ParquetException("RowGroup is noted as encrypted but no file decryptor");
+    }
+
+    constexpr auto kEncryptedRowGroupsLimit = 32767;
+    if (i > kEncryptedRowGroupsLimit) {
+      throw ParquetException("Encrypted files cannot contain more than 32767 row groups");
+    }
+
+    // The column is encrypted
+    std::shared_ptr<Decryptor> meta_decryptor;
+    std::shared_ptr<Decryptor> data_decryptor;
+    // The column is encrypted with footer key
+    if (crypto_metadata->encrypted_with_footer_key()) {
+      meta_decryptor = file_decryptor_->GetFooterDecryptorForColumnMeta();
+      data_decryptor = file_decryptor_->GetFooterDecryptorForColumnData();
+      CryptoContext ctx(col->has_dictionary_page(), row_group_ordinal_,
+                        static_cast<int16_t>(i), meta_decryptor, data_decryptor);
+      return PageReader::Open(stream, col->num_values(), col->compression(),
+                              properties_.memory_pool(), &ctx);
+    }
+
+    // The column is encrypted with its own key
+    std::string column_key_metadata = crypto_metadata->key_metadata();
+    const std::string column_path = crypto_metadata->path_in_schema()->ToDotString();
+
+    meta_decryptor =
+        file_decryptor_->GetColumnMetaDecryptor(column_path, column_key_metadata);
+    data_decryptor =
+        file_decryptor_->GetColumnDataDecryptor(column_path, column_key_metadata);
+
+    CryptoContext ctx(col->has_dictionary_page(), row_group_ordinal_,
+                      static_cast<int16_t>(i), meta_decryptor, data_decryptor);
+    return PageReader::Open(stream, col->num_values(), col->compression(),
+                            properties_.memory_pool(), &ctx);
+  }
+
+ private:
+  std::shared_ptr<ArrowInputFile> source_;
+  // Will be nullptr if PreBuffer() is not called.
+  std::shared_ptr<::arrow::io::internal::ReadRangeCache> cached_source_;
+  int64_t source_size_;
+  FileMetaData* file_metadata_;
+  std::unique_ptr<RowGroupMetaData> row_group_metadata_;
+  ReaderProperties properties_;
+  int row_group_ordinal_;
+  std::shared_ptr<InternalFileDecryptor> file_decryptor_;
+};
+
+// ----------------------------------------------------------------------
+// SerializedFile: An implementation of ParquetFileReader::Contents that deals
+// with the Parquet file structure, Thrift deserialization, and other internal
+// matters
+
+// This class takes ownership of the provided data source
+class SerializedFile : public ParquetFileReader::Contents {
+ public:
+  SerializedFile(std::shared_ptr<ArrowInputFile> source,
+                 const ReaderProperties& props = default_reader_properties())
+      : source_(std::move(source)), properties_(props) {
+    PARQUET_ASSIGN_OR_THROW(source_size_, source_->GetSize());
+  }
+
+  ~SerializedFile() override {
+    try {
+      Close();
+    } catch (...) {
+    }
+  }
+
+  void Close() override {
+    if (file_decryptor_) file_decryptor_->WipeOutDecryptionKeys();
+  }
+
+  std::shared_ptr<RowGroupReader> GetRowGroup(int i) override {
+    std::unique_ptr<SerializedRowGroup> contents(
+        new SerializedRowGroup(source_, cached_source_, source_size_,
+                               file_metadata_.get(), i, properties_, file_decryptor_));
+    return std::make_shared<RowGroupReader>(std::move(contents));
+  }
+
+  std::shared_ptr<FileMetaData> metadata() const override { return file_metadata_; }
+
+  void set_metadata(std::shared_ptr<FileMetaData> metadata) {
+    file_metadata_ = std::move(metadata);
+  }
+
+  void PreBuffer(const std::vector<int>& row_groups,
+                 const std::vector<int>& column_indices,
+                 const ::arrow::io::IOContext& ctx,
+                 const ::arrow::io::CacheOptions& options) {
+    cached_source_ =
+        std::make_shared<::arrow::io::internal::ReadRangeCache>(source_, ctx, options);
+    std::vector<::arrow::io::ReadRange> ranges;
+    for (int row : row_groups) {
+      for (int col : column_indices) {
+        ranges.push_back(
+            ComputeColumnChunkRange(file_metadata_.get(), source_size_, row, col));
+      }
+    }
+    PARQUET_THROW_NOT_OK(cached_source_->Cache(ranges));
+  }
+
+  ::arrow::Future<> WhenBuffered(const std::vector<int>& row_groups,
+                                 const std::vector<int>& column_indices) const {
+    if (!cached_source_) {
+      return ::arrow::Status::Invalid("Must call PreBuffer before WhenBuffered");
+    }
+    std::vector<::arrow::io::ReadRange> ranges;
+    for (int row : row_groups) {
+      for (int col : column_indices) {
+        ranges.push_back(
+            ComputeColumnChunkRange(file_metadata_.get(), source_size_, row, col));
+      }
+    }
+    return cached_source_->WaitFor(ranges);
+  }
+
+  // Metadata/footer parsing. Divided up to separate sync/async paths, and to use
+  // exceptions for error handling (with the async path converting to Future/Status).
+
+  void ParseMetaData() {
+    int64_t footer_read_size = GetFooterReadSize();
+    PARQUET_ASSIGN_OR_THROW(
+        auto footer_buffer,
+        source_->ReadAt(source_size_ - footer_read_size, footer_read_size));
+    uint32_t metadata_len = ParseFooterLength(footer_buffer, footer_read_size);
+    int64_t metadata_start = source_size_ - kFooterSize - metadata_len;
+
+    std::shared_ptr<::arrow::Buffer> metadata_buffer;
+    if (footer_read_size >= (metadata_len + kFooterSize)) {
+      metadata_buffer = SliceBuffer(
+          footer_buffer, footer_read_size - metadata_len - kFooterSize, metadata_len);
+    } else {
+      PARQUET_ASSIGN_OR_THROW(metadata_buffer,
+                              source_->ReadAt(metadata_start, metadata_len));
+    }
+
+    // Parse the footer depending on encryption type
+    const bool is_encrypted_footer =
+        memcmp(footer_buffer->data() + footer_read_size - 4, kParquetEMagic, 4) == 0;
+    if (is_encrypted_footer) {
+      // Encrypted file with Encrypted footer.
+      const std::pair<int64_t, uint32_t> read_size =
+          ParseMetaDataOfEncryptedFileWithEncryptedFooter(metadata_buffer, metadata_len);
+      // Read the actual footer
+      metadata_start = read_size.first;
+      metadata_len = read_size.second;
+      PARQUET_ASSIGN_OR_THROW(metadata_buffer,
+                              source_->ReadAt(metadata_start, metadata_len));
+      // Fall through
+    }
+
+    const uint32_t read_metadata_len =
+        ParseUnencryptedFileMetadata(metadata_buffer, metadata_len);
+    auto file_decryption_properties = properties_.file_decryption_properties().get();
+    if (is_encrypted_footer) {
+      // Nothing else to do here.
+      return;
+    } else if (!file_metadata_->is_encryption_algorithm_set()) {  // Non encrypted file.
+      if (file_decryption_properties != nullptr) {
+        if (!file_decryption_properties->plaintext_files_allowed()) {
+          throw ParquetException("Applying decryption properties on plaintext file");
+        }
+      }
+    } else {
+      // Encrypted file with plaintext footer mode.
+      ParseMetaDataOfEncryptedFileWithPlaintextFooter(
+          file_decryption_properties, metadata_buffer, metadata_len, read_metadata_len);
+    }
+  }
+
+  // Validate the source size and get the initial read size.
+  int64_t GetFooterReadSize() {
+    if (source_size_ == 0) {
+      throw ParquetInvalidOrCorruptedFileException("Parquet file size is 0 bytes");
+    } else if (source_size_ < kFooterSize) {
+      throw ParquetInvalidOrCorruptedFileException(
+          "Parquet file size is ", source_size_,
+          " bytes, smaller than the minimum file footer (", kFooterSize, " bytes)");
+    }
+    return std::min(source_size_, kDefaultFooterReadSize);
+  }
+
+  // Validate the magic bytes and get the length of the full footer.
+  uint32_t ParseFooterLength(const std::shared_ptr<::arrow::Buffer>& footer_buffer,
+                             const int64_t footer_read_size) {
+    // Check if all bytes are read. Check if last 4 bytes read have the magic bits
+    if (footer_buffer->size() != footer_read_size ||
+        (memcmp(footer_buffer->data() + footer_read_size - 4, kParquetMagic, 4) != 0 &&
+         memcmp(footer_buffer->data() + footer_read_size - 4, kParquetEMagic, 4) != 0)) {
+      throw ParquetInvalidOrCorruptedFileException(
+          "Parquet magic bytes not found in footer. Either the file is corrupted or this "
+          "is not a parquet file.");
+    }
+    // Both encrypted/unencrypted footers have the same footer length check.
+    uint32_t metadata_len = ::arrow::util::SafeLoadAs<uint32_t>(
+        reinterpret_cast<const uint8_t*>(footer_buffer->data()) + footer_read_size -
+        kFooterSize);
+    if (metadata_len > source_size_ - kFooterSize) {
+      throw ParquetInvalidOrCorruptedFileException(
+          "Parquet file size is ", source_size_,
+          " bytes, smaller than the size reported by footer's (", metadata_len, "bytes)");
+    }
+    return metadata_len;
+  }
+
+  // Does not throw.
+  ::arrow::Future<> ParseMetaDataAsync() {
+    int64_t footer_read_size;
+    BEGIN_PARQUET_CATCH_EXCEPTIONS
+    footer_read_size = GetFooterReadSize();
+    END_PARQUET_CATCH_EXCEPTIONS
+    // Assumes this is kept alive externally
+    return source_->ReadAsync(source_size_ - footer_read_size, footer_read_size)
+        .Then([=](const std::shared_ptr<::arrow::Buffer>& footer_buffer)
+                  -> ::arrow::Future<> {
+          uint32_t metadata_len;
+          BEGIN_PARQUET_CATCH_EXCEPTIONS
+          metadata_len = ParseFooterLength(footer_buffer, footer_read_size);
+          END_PARQUET_CATCH_EXCEPTIONS
+          int64_t metadata_start = source_size_ - kFooterSize - metadata_len;
+
+          std::shared_ptr<::arrow::Buffer> metadata_buffer;
+          if (footer_read_size >= (metadata_len + kFooterSize)) {
+            metadata_buffer =
+                SliceBuffer(footer_buffer, footer_read_size - metadata_len - kFooterSize,
+                            metadata_len);
+            return ParseMaybeEncryptedMetaDataAsync(footer_buffer,
+                                                    std::move(metadata_buffer),
+                                                    footer_read_size, metadata_len);
+          }
+          return source_->ReadAsync(metadata_start, metadata_len)
+              .Then([=](const std::shared_ptr<::arrow::Buffer>& metadata_buffer) {
+                return ParseMaybeEncryptedMetaDataAsync(footer_buffer, metadata_buffer,
+                                                        footer_read_size, metadata_len);
+              });
+        });
+  }
+
+  // Continuation
+  ::arrow::Future<> ParseMaybeEncryptedMetaDataAsync(
+      std::shared_ptr<::arrow::Buffer> footer_buffer,
+      std::shared_ptr<::arrow::Buffer> metadata_buffer, int64_t footer_read_size,
+      uint32_t metadata_len) {
+    // Parse the footer depending on encryption type
+    const bool is_encrypted_footer =
+        memcmp(footer_buffer->data() + footer_read_size - 4, kParquetEMagic, 4) == 0;
+    if (is_encrypted_footer) {
+      // Encrypted file with Encrypted footer.
+      std::pair<int64_t, uint32_t> read_size;
+      BEGIN_PARQUET_CATCH_EXCEPTIONS
+      read_size =
+          ParseMetaDataOfEncryptedFileWithEncryptedFooter(metadata_buffer, metadata_len);
+      END_PARQUET_CATCH_EXCEPTIONS
+      // Read the actual footer
+      int64_t metadata_start = read_size.first;
+      metadata_len = read_size.second;
+      return source_->ReadAsync(metadata_start, metadata_len)
+          .Then([=](const std::shared_ptr<::arrow::Buffer>& metadata_buffer) {
+            // Continue and read the file footer
+            return ParseMetaDataFinal(metadata_buffer, metadata_len, is_encrypted_footer);
+          });
+    }
+    return ParseMetaDataFinal(std::move(metadata_buffer), metadata_len,
+                              is_encrypted_footer);
+  }
+
+  // Continuation
+  ::arrow::Status ParseMetaDataFinal(std::shared_ptr<::arrow::Buffer> metadata_buffer,
+                                     uint32_t metadata_len,
+                                     const bool is_encrypted_footer) {
+    BEGIN_PARQUET_CATCH_EXCEPTIONS
+    const uint32_t read_metadata_len =
+        ParseUnencryptedFileMetadata(metadata_buffer, metadata_len);
+    auto file_decryption_properties = properties_.file_decryption_properties().get();
+    if (is_encrypted_footer) {
+      // Nothing else to do here.
+      return ::arrow::Status::OK();
+    } else if (!file_metadata_->is_encryption_algorithm_set()) {  // Non encrypted file.
+      if (file_decryption_properties != nullptr) {
+        if (!file_decryption_properties->plaintext_files_allowed()) {
+          throw ParquetException("Applying decryption properties on plaintext file");
+        }
+      }
+    } else {
+      // Encrypted file with plaintext footer mode.
+      ParseMetaDataOfEncryptedFileWithPlaintextFooter(
+          file_decryption_properties, metadata_buffer, metadata_len, read_metadata_len);
+    }
+    END_PARQUET_CATCH_EXCEPTIONS
+    return ::arrow::Status::OK();
+  }
+
+ private:
+  std::shared_ptr<ArrowInputFile> source_;
+  std::shared_ptr<::arrow::io::internal::ReadRangeCache> cached_source_;
+  int64_t source_size_;
+  std::shared_ptr<FileMetaData> file_metadata_;
+  ReaderProperties properties_;
+
+  std::shared_ptr<InternalFileDecryptor> file_decryptor_;
+
+  // \return The true length of the metadata in bytes
+  uint32_t ParseUnencryptedFileMetadata(const std::shared_ptr<Buffer>& footer_buffer,
+                                        const uint32_t metadata_len);
+
+  std::string HandleAadPrefix(FileDecryptionProperties* file_decryption_properties,
+                              EncryptionAlgorithm& algo);
+
+  void ParseMetaDataOfEncryptedFileWithPlaintextFooter(
+      FileDecryptionProperties* file_decryption_properties,
+      const std::shared_ptr<Buffer>& metadata_buffer, uint32_t metadata_len,
+      uint32_t read_metadata_len);
+
+  // \return The position and size of the actual footer
+  std::pair<int64_t, uint32_t> ParseMetaDataOfEncryptedFileWithEncryptedFooter(
+      const std::shared_ptr<Buffer>& crypto_metadata_buffer, uint32_t footer_len);
+};
+
+uint32_t SerializedFile::ParseUnencryptedFileMetadata(
+    const std::shared_ptr<Buffer>& metadata_buffer, const uint32_t metadata_len) {
+  if (metadata_buffer->size() != metadata_len) {
+    throw ParquetException("Failed reading metadata buffer (requested " +
+                           std::to_string(metadata_len) + " bytes but got " +
+                           std::to_string(metadata_buffer->size()) + " bytes)");
+  }
+  uint32_t read_metadata_len = metadata_len;
+  // The encrypted read path falls through to here, so pass in the decryptor
+  file_metadata_ =
+      FileMetaData::Make(metadata_buffer->data(), &read_metadata_len, file_decryptor_);
+  return read_metadata_len;
+}
+
+std::pair<int64_t, uint32_t>
+SerializedFile::ParseMetaDataOfEncryptedFileWithEncryptedFooter(
+    const std::shared_ptr<::arrow::Buffer>& crypto_metadata_buffer,
+    // both metadata & crypto metadata length
+    const uint32_t footer_len) {
+  // encryption with encrypted footer
+  // Check if the footer_buffer contains the entire metadata
+  if (crypto_metadata_buffer->size() != footer_len) {
+    throw ParquetException("Failed reading encrypted metadata buffer (requested " +
+                           std::to_string(footer_len) + " bytes but got " +
+                           std::to_string(crypto_metadata_buffer->size()) + " bytes)");
+  }
+  auto file_decryption_properties = properties_.file_decryption_properties().get();
+  if (file_decryption_properties == nullptr) {
+    throw ParquetException(
+        "Could not read encrypted metadata, no decryption found in reader's properties");
+  }
+  uint32_t crypto_metadata_len = footer_len;
+  std::shared_ptr<FileCryptoMetaData> file_crypto_metadata =
+      FileCryptoMetaData::Make(crypto_metadata_buffer->data(), &crypto_metadata_len);
+  // Handle AAD prefix
+  EncryptionAlgorithm algo = file_crypto_metadata->encryption_algorithm();
+  std::string file_aad = HandleAadPrefix(file_decryption_properties, algo);
+  file_decryptor_ = std::make_shared<InternalFileDecryptor>(
+      file_decryption_properties, file_aad, algo.algorithm,
+      file_crypto_metadata->key_metadata(), properties_.memory_pool());
+
+  int64_t metadata_offset = source_size_ - kFooterSize - footer_len + crypto_metadata_len;
+  uint32_t metadata_len = footer_len - crypto_metadata_len;
+  return std::make_pair(metadata_offset, metadata_len);
+}
+
+void SerializedFile::ParseMetaDataOfEncryptedFileWithPlaintextFooter(
+    FileDecryptionProperties* file_decryption_properties,
+    const std::shared_ptr<Buffer>& metadata_buffer, uint32_t metadata_len,
+    uint32_t read_metadata_len) {
+  // Providing decryption properties in plaintext footer mode is not mandatory, for
+  // example when reading by legacy reader.
+  if (file_decryption_properties != nullptr) {
+    EncryptionAlgorithm algo = file_metadata_->encryption_algorithm();
+    // Handle AAD prefix
+    std::string file_aad = HandleAadPrefix(file_decryption_properties, algo);
+    file_decryptor_ = std::make_shared<InternalFileDecryptor>(
+        file_decryption_properties, file_aad, algo.algorithm,
+        file_metadata_->footer_signing_key_metadata(), properties_.memory_pool());
+    // set the InternalFileDecryptor in the metadata as well, as it's used
+    // for signature verification and for ColumnChunkMetaData creation.
+    file_metadata_->set_file_decryptor(file_decryptor_);
+
+    if (file_decryption_properties->check_plaintext_footer_integrity()) {
+      if (metadata_len - read_metadata_len !=
+          (parquet::encryption::kGcmTagLength + parquet::encryption::kNonceLength)) {
+        throw ParquetInvalidOrCorruptedFileException(
+            "Failed reading metadata for encryption signature (requested ",
+            parquet::encryption::kGcmTagLength + parquet::encryption::kNonceLength,
+            " bytes but have ", metadata_len - read_metadata_len, " bytes)");
+      }
+
+      if (!file_metadata_->VerifySignature(metadata_buffer->data() + read_metadata_len)) {
+        throw ParquetInvalidOrCorruptedFileException(
+            "Parquet crypto signature verification failed");
+      }
+    }
+  }
+}
+
+std::string SerializedFile::HandleAadPrefix(
+    FileDecryptionProperties* file_decryption_properties, EncryptionAlgorithm& algo) {
+  std::string aad_prefix_in_properties = file_decryption_properties->aad_prefix();
+  std::string aad_prefix = aad_prefix_in_properties;
+  bool file_has_aad_prefix = algo.aad.aad_prefix.empty() ? false : true;
+  std::string aad_prefix_in_file = algo.aad.aad_prefix;
+
+  if (algo.aad.supply_aad_prefix && aad_prefix_in_properties.empty()) {
+    throw ParquetException(
+        "AAD prefix used for file encryption, "
+        "but not stored in file and not supplied "
+        "in decryption properties");
+  }
+
+  if (file_has_aad_prefix) {
+    if (!aad_prefix_in_properties.empty()) {
+      if (aad_prefix_in_properties.compare(aad_prefix_in_file) != 0) {
+        throw ParquetException(
+            "AAD Prefix in file and in properties "
+            "is not the same");
+      }
+    }
+    aad_prefix = aad_prefix_in_file;
+    std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier =
+        file_decryption_properties->aad_prefix_verifier();
+    if (aad_prefix_verifier != nullptr) aad_prefix_verifier->Verify(aad_prefix);
+  } else {
+    if (!algo.aad.supply_aad_prefix && !aad_prefix_in_properties.empty()) {
+      throw ParquetException(
+          "AAD Prefix set in decryption properties, but was not used "
+          "for file encryption");
+    }
+    std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier =
+        file_decryption_properties->aad_prefix_verifier();
+    if (aad_prefix_verifier != nullptr) {
+      throw ParquetException(
+          "AAD Prefix Verifier is set, but AAD Prefix not found in file");
+    }
+  }
+  return aad_prefix + algo.aad.aad_file_unique;
+}
+
+// ----------------------------------------------------------------------
+// ParquetFileReader public API
+
+ParquetFileReader::ParquetFileReader() {}
+
+ParquetFileReader::~ParquetFileReader() {
+  try {
+    Close();
+  } catch (...) {
+  }
+}
+
+// Open the file. If no metadata is passed, it is parsed from the footer of
+// the file
+std::unique_ptr<ParquetFileReader::Contents> ParquetFileReader::Contents::Open(
+    std::shared_ptr<ArrowInputFile> source, const ReaderProperties& props,
+    std::shared_ptr<FileMetaData> metadata) {
+  std::unique_ptr<ParquetFileReader::Contents> result(
+      new SerializedFile(std::move(source), props));
+
+  // Access private methods here, but otherwise unavailable
+  SerializedFile* file = static_cast<SerializedFile*>(result.get());
+
+  if (metadata == nullptr) {
+    // Validates magic bytes, parses metadata, and initializes the SchemaDescriptor
+    file->ParseMetaData();
+  } else {
+    file->set_metadata(std::move(metadata));
+  }
+
+  return result;
+}
+
+::arrow::Future<std::unique_ptr<ParquetFileReader::Contents>>
+ParquetFileReader::Contents::OpenAsync(std::shared_ptr<ArrowInputFile> source,
+                                       const ReaderProperties& props,
+                                       std::shared_ptr<FileMetaData> metadata) {
+  BEGIN_PARQUET_CATCH_EXCEPTIONS
+  std::unique_ptr<ParquetFileReader::Contents> result(
+      new SerializedFile(std::move(source), props));
+  SerializedFile* file = static_cast<SerializedFile*>(result.get());
+  if (metadata == nullptr) {
+    // TODO(ARROW-12259): workaround since we have Future<(move-only type)>
+    struct {
+      ::arrow::Result<std::unique_ptr<ParquetFileReader::Contents>> operator()() {
+        return std::move(result);
+      }
+
+      std::unique_ptr<ParquetFileReader::Contents> result;
+    } Continuation;
+    Continuation.result = std::move(result);
+    return file->ParseMetaDataAsync().Then(std::move(Continuation));
+  } else {
+    file->set_metadata(std::move(metadata));
+    return ::arrow::Future<std::unique_ptr<ParquetFileReader::Contents>>::MakeFinished(
+        std::move(result));
+  }
+  END_PARQUET_CATCH_EXCEPTIONS
+}
+
+std::unique_ptr<ParquetFileReader> ParquetFileReader::Open(
+    std::shared_ptr<::arrow::io::RandomAccessFile> source, const ReaderProperties& props,
+    std::shared_ptr<FileMetaData> metadata) {
+  auto contents = SerializedFile::Open(std::move(source), props, std::move(metadata));
+  std::unique_ptr<ParquetFileReader> result(new ParquetFileReader());
+  result->Open(std::move(contents));
+  return result;
+}
+
+std::unique_ptr<ParquetFileReader> ParquetFileReader::OpenFile(
+    const std::string& path, bool memory_map, const ReaderProperties& props,
+    std::shared_ptr<FileMetaData> metadata) {
+  std::shared_ptr<::arrow::io::RandomAccessFile> source;
+  if (memory_map) {
+    PARQUET_ASSIGN_OR_THROW(
+        source, ::arrow::io::MemoryMappedFile::Open(path, ::arrow::io::FileMode::READ));
+  } else {
+    PARQUET_ASSIGN_OR_THROW(source,
+                            ::arrow::io::ReadableFile::Open(path, props.memory_pool()));
+  }
+
+  return Open(std::move(source), props, std::move(metadata));
+}
+
+::arrow::Future<std::unique_ptr<ParquetFileReader>> ParquetFileReader::OpenAsync(
+    std::shared_ptr<::arrow::io::RandomAccessFile> source, const ReaderProperties& props,
+    std::shared_ptr<FileMetaData> metadata) {
+  BEGIN_PARQUET_CATCH_EXCEPTIONS
+  auto fut = SerializedFile::OpenAsync(std::move(source), props, std::move(metadata));
+  // TODO(ARROW-12259): workaround since we have Future<(move-only type)>
+  auto completed = ::arrow::Future<std::unique_ptr<ParquetFileReader>>::Make();
+  fut.AddCallback([fut, completed](
+                      const ::arrow::Result<std::unique_ptr<ParquetFileReader::Contents>>&
+                          contents) mutable {
+    if (!contents.ok()) {
+      completed.MarkFinished(contents.status());
+      return;
+    }
+    std::unique_ptr<ParquetFileReader> result(new ParquetFileReader());
+    result->Open(fut.MoveResult().MoveValueUnsafe());
+    completed.MarkFinished(std::move(result));
+  });
+  return completed;
+  END_PARQUET_CATCH_EXCEPTIONS
+}
+
+void ParquetFileReader::Open(std::unique_ptr<ParquetFileReader::Contents> contents) {
+  contents_ = std::move(contents);
+}
+
+void ParquetFileReader::Close() {
+  if (contents_) {
+    contents_->Close();
+  }
+}
+
+std::shared_ptr<FileMetaData> ParquetFileReader::metadata() const {
+  return contents_->metadata();
+}
+
+std::shared_ptr<RowGroupReader> ParquetFileReader::RowGroup(int i) {
+  if (i >= metadata()->num_row_groups()) {
+    std::stringstream ss;
+    ss << "Trying to read row group " << i << " but file only has "
+       << metadata()->num_row_groups() << " row groups";
+    throw ParquetException(ss.str());
+  }
+  return contents_->GetRowGroup(i);
+}
+
+void ParquetFileReader::PreBuffer(const std::vector<int>& row_groups,
+                                  const std::vector<int>& column_indices,
+                                  const ::arrow::io::IOContext& ctx,
+                                  const ::arrow::io::CacheOptions& options) {
+  // Access private methods here
+  SerializedFile* file =
+      ::arrow::internal::checked_cast<SerializedFile*>(contents_.get());
+  file->PreBuffer(row_groups, column_indices, ctx, options);
+}
+
+::arrow::Future<> ParquetFileReader::WhenBuffered(
+    const std::vector<int>& row_groups, const std::vector<int>& column_indices) const {
+  // Access private methods here
+  SerializedFile* file =
+      ::arrow::internal::checked_cast<SerializedFile*>(contents_.get());
+  return file->WhenBuffered(row_groups, column_indices);
+}
+
+// ----------------------------------------------------------------------
+// File metadata helpers
+
+std::shared_ptr<FileMetaData> ReadMetaData(
+    const std::shared_ptr<::arrow::io::RandomAccessFile>& source) {
+  return ParquetFileReader::Open(source)->metadata();
+}
+
+// ----------------------------------------------------------------------
+// File scanner for performance testing
+
+int64_t ScanFileContents(std::vector<int> columns, const int32_t column_batch_size,
+                         ParquetFileReader* reader) {
+  std::vector<int16_t> rep_levels(column_batch_size);
+  std::vector<int16_t> def_levels(column_batch_size);
+
+  int num_columns = static_cast<int>(columns.size());
+
+  // columns are not specified explicitly. Add all columns
+  if (columns.size() == 0) {
+    num_columns = reader->metadata()->num_columns();
+    columns.resize(num_columns);
+    for (int i = 0; i < num_columns; i++) {
+      columns[i] = i;
+    }
+  }
+
+  std::vector<int64_t> total_rows(num_columns, 0);
+
+  for (int r = 0; r < reader->metadata()->num_row_groups(); ++r) {
+    auto group_reader = reader->RowGroup(r);
+    int col = 0;
+    for (auto i : columns) {
+      std::shared_ptr<ColumnReader> col_reader = group_reader->Column(i);
+      size_t value_byte_size = GetTypeByteSize(col_reader->descr()->physical_type());
+      std::vector<uint8_t> values(column_batch_size * value_byte_size);
+
+      int64_t values_read = 0;
+      while (col_reader->HasNext()) {
+        int64_t levels_read =
+            ScanAllValues(column_batch_size, def_levels.data(), rep_levels.data(),
+                          values.data(), &values_read, col_reader.get());
+        if (col_reader->descr()->max_repetition_level() > 0) {
+          for (int64_t i = 0; i < levels_read; i++) {
+            if (rep_levels[i] == 0) {
+              total_rows[col]++;
+            }
+          }
+        } else {
+          total_rows[col] += levels_read;
+        }
+      }
+      col++;
+    }
+  }
+
+  for (int i = 1; i < num_columns; ++i) {
+    if (total_rows[0] != total_rows[i]) {
+      throw ParquetException("Parquet error: Total rows among columns do not match");
+    }
+  }
+
+  return total_rows[0];
+}
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/file_reader.h b/contrib/libs/apache/arrow/cpp/src/parquet/file_reader.h
index a6358684250..0fc84054939 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/file_reader.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/file_reader.h
@@ -1,188 +1,188 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <cstdint> 
-#include <memory> 
-#include <string> 
-#include <vector> 
- 
-#include "arrow/io/caching.h" 
-#include "arrow/util/type_fwd.h" 
-#include "parquet/metadata.h"  // IWYU pragma: keep 
-#include "parquet/platform.h" 
-#include "parquet/properties.h" 
- 
-namespace parquet { 
- 
-class ColumnReader; 
-class FileMetaData; 
-class PageReader; 
-class RowGroupMetaData; 
- 
-class PARQUET_EXPORT RowGroupReader { 
- public: 
-  // Forward declare a virtual class 'Contents' to aid dependency injection and more 
-  // easily create test fixtures 
-  // An implementation of the Contents class is defined in the .cc file 
-  struct Contents { 
-    virtual ~Contents() {} 
-    virtual std::unique_ptr<PageReader> GetColumnPageReader(int i) = 0; 
-    virtual const RowGroupMetaData* metadata() const = 0; 
-    virtual const ReaderProperties* properties() const = 0; 
-  }; 
- 
-  explicit RowGroupReader(std::unique_ptr<Contents> contents); 
- 
-  // Returns the rowgroup metadata 
-  const RowGroupMetaData* metadata() const; 
- 
-  // Construct a ColumnReader for the indicated row group-relative 
-  // column. Ownership is shared with the RowGroupReader. 
-  std::shared_ptr<ColumnReader> Column(int i); 
- 
-  // Construct a ColumnReader, trying to enable exposed encoding. 
-  // 
-  // For dictionary encoding, currently we only support column chunks that are fully 
-  // dictionary encoded, i.e., all data pages in the column chunk are dictionary encoded. 
-  // If a column chunk uses dictionary encoding but then falls back to plain encoding, the 
-  // encoding will not be exposed. 
-  // 
-  // The returned column reader provides an API GetExposedEncoding() for the 
-  // users to check the exposed encoding and determine how to read the batches. 
-  // 
-  // \note API EXPERIMENTAL 
-  std::shared_ptr<ColumnReader> ColumnWithExposeEncoding( 
-      int i, ExposedEncoding encoding_to_expose); 
- 
-  std::unique_ptr<PageReader> GetColumnPageReader(int i); 
- 
- private: 
-  // Holds a pointer to an instance of Contents implementation 
-  std::unique_ptr<Contents> contents_; 
-}; 
- 
-class PARQUET_EXPORT ParquetFileReader { 
- public: 
-  // Declare a virtual class 'Contents' to aid dependency injection and more 
-  // easily create test fixtures 
-  // An implementation of the Contents class is defined in the .cc file 
-  struct PARQUET_EXPORT Contents { 
-    static std::unique_ptr<Contents> Open( 
-        std::shared_ptr<::arrow::io::RandomAccessFile> source, 
-        const ReaderProperties& props = default_reader_properties(), 
-        std::shared_ptr<FileMetaData> metadata = NULLPTR); 
- 
-    static ::arrow::Future<std::unique_ptr<Contents>> OpenAsync( 
-        std::shared_ptr<::arrow::io::RandomAccessFile> source, 
-        const ReaderProperties& props = default_reader_properties(), 
-        std::shared_ptr<FileMetaData> metadata = NULLPTR); 
- 
-    virtual ~Contents() = default; 
-    // Perform any cleanup associated with the file contents 
-    virtual void Close() = 0; 
-    virtual std::shared_ptr<RowGroupReader> GetRowGroup(int i) = 0; 
-    virtual std::shared_ptr<FileMetaData> metadata() const = 0; 
-  }; 
- 
-  ParquetFileReader(); 
-  ~ParquetFileReader(); 
- 
-  // Create a file reader instance from an Arrow file object. Thread-safety is 
-  // the responsibility of the file implementation 
-  static std::unique_ptr<ParquetFileReader> Open( 
-      std::shared_ptr<::arrow::io::RandomAccessFile> source, 
-      const ReaderProperties& props = default_reader_properties(), 
-      std::shared_ptr<FileMetaData> metadata = NULLPTR); 
- 
-  // API Convenience to open a serialized Parquet file on disk, using Arrow IO 
-  // interfaces. 
-  static std::unique_ptr<ParquetFileReader> OpenFile( 
-      const std::string& path, bool memory_map = true, 
-      const ReaderProperties& props = default_reader_properties(), 
-      std::shared_ptr<FileMetaData> metadata = NULLPTR); 
- 
-  // Asynchronously open a file reader from an Arrow file object. 
-  // Does not throw - all errors are reported through the Future. 
-  static ::arrow::Future<std::unique_ptr<ParquetFileReader>> OpenAsync( 
-      std::shared_ptr<::arrow::io::RandomAccessFile> source, 
-      const ReaderProperties& props = default_reader_properties(), 
-      std::shared_ptr<FileMetaData> metadata = NULLPTR); 
- 
-  void Open(std::unique_ptr<Contents> contents); 
-  void Close(); 
- 
-  // The RowGroupReader is owned by the FileReader 
-  std::shared_ptr<RowGroupReader> RowGroup(int i); 
- 
-  // Returns the file metadata. Only one instance is ever created 
-  std::shared_ptr<FileMetaData> metadata() const; 
- 
-  /// Pre-buffer the specified column indices in all row groups. 
-  /// 
-  /// Readers can optionally call this to cache the necessary slices 
-  /// of the file in-memory before deserialization. Arrow readers can 
-  /// automatically do this via an option. This is intended to 
-  /// increase performance when reading from high-latency filesystems 
-  /// (e.g. Amazon S3). 
-  /// 
-  /// After calling this, creating readers for row groups/column 
-  /// indices that were not buffered may fail. Creating multiple 
-  /// readers for the a subset of the buffered regions is 
-  /// acceptable. This may be called again to buffer a different set 
-  /// of row groups/columns. 
-  /// 
-  /// If memory usage is a concern, note that data will remain 
-  /// buffered in memory until either \a PreBuffer() is called again, 
-  /// or the reader itself is destructed. Reading - and buffering - 
-  /// only one row group at a time may be useful. 
-  /// 
-  /// This method may throw. 
-  void PreBuffer(const std::vector<int>& row_groups, 
-                 const std::vector<int>& column_indices, 
-                 const ::arrow::io::IOContext& ctx, 
-                 const ::arrow::io::CacheOptions& options); 
- 
-  /// Wait for the specified row groups and column indices to be pre-buffered. 
-  /// 
-  /// After the returned Future completes, reading the specified row 
-  /// groups/columns will not block. 
-  /// 
-  /// PreBuffer must be called first. This method does not throw. 
-  ::arrow::Future<> WhenBuffered(const std::vector<int>& row_groups, 
-                                 const std::vector<int>& column_indices) const; 
- 
- private: 
-  // Holds a pointer to an instance of Contents implementation 
-  std::unique_ptr<Contents> contents_; 
-}; 
- 
-// Read only Parquet file metadata 
-std::shared_ptr<FileMetaData> PARQUET_EXPORT 
-ReadMetaData(const std::shared_ptr<::arrow::io::RandomAccessFile>& source); 
- 
-/// \brief Scan all values in file. Useful for performance testing 
-/// \param[in] columns the column numbers to scan. If empty scans all 
-/// \param[in] column_batch_size number of values to read at a time when scanning column 
-/// \param[in] reader a ParquetFileReader instance 
-/// \return number of semantic rows in file 
-PARQUET_EXPORT 
-int64_t ScanFileContents(std::vector<int> columns, const int32_t column_batch_size, 
-                         ParquetFileReader* reader); 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/io/caching.h"
+#include "arrow/util/type_fwd.h"
+#include "parquet/metadata.h"  // IWYU pragma: keep
+#include "parquet/platform.h"
+#include "parquet/properties.h"
+
+namespace parquet {
+
+class ColumnReader;
+class FileMetaData;
+class PageReader;
+class RowGroupMetaData;
+
+class PARQUET_EXPORT RowGroupReader {
+ public:
+  // Forward declare a virtual class 'Contents' to aid dependency injection and more
+  // easily create test fixtures
+  // An implementation of the Contents class is defined in the .cc file
+  struct Contents {
+    virtual ~Contents() {}
+    virtual std::unique_ptr<PageReader> GetColumnPageReader(int i) = 0;
+    virtual const RowGroupMetaData* metadata() const = 0;
+    virtual const ReaderProperties* properties() const = 0;
+  };
+
+  explicit RowGroupReader(std::unique_ptr<Contents> contents);
+
+  // Returns the rowgroup metadata
+  const RowGroupMetaData* metadata() const;
+
+  // Construct a ColumnReader for the indicated row group-relative
+  // column. Ownership is shared with the RowGroupReader.
+  std::shared_ptr<ColumnReader> Column(int i);
+
+  // Construct a ColumnReader, trying to enable exposed encoding.
+  //
+  // For dictionary encoding, currently we only support column chunks that are fully
+  // dictionary encoded, i.e., all data pages in the column chunk are dictionary encoded.
+  // If a column chunk uses dictionary encoding but then falls back to plain encoding, the
+  // encoding will not be exposed.
+  //
+  // The returned column reader provides an API GetExposedEncoding() for the
+  // users to check the exposed encoding and determine how to read the batches.
+  //
+  // \note API EXPERIMENTAL
+  std::shared_ptr<ColumnReader> ColumnWithExposeEncoding(
+      int i, ExposedEncoding encoding_to_expose);
+
+  std::unique_ptr<PageReader> GetColumnPageReader(int i);
+
+ private:
+  // Holds a pointer to an instance of Contents implementation
+  std::unique_ptr<Contents> contents_;
+};
+
+class PARQUET_EXPORT ParquetFileReader {
+ public:
+  // Declare a virtual class 'Contents' to aid dependency injection and more
+  // easily create test fixtures
+  // An implementation of the Contents class is defined in the .cc file
+  struct PARQUET_EXPORT Contents {
+    static std::unique_ptr<Contents> Open(
+        std::shared_ptr<::arrow::io::RandomAccessFile> source,
+        const ReaderProperties& props = default_reader_properties(),
+        std::shared_ptr<FileMetaData> metadata = NULLPTR);
+
+    static ::arrow::Future<std::unique_ptr<Contents>> OpenAsync(
+        std::shared_ptr<::arrow::io::RandomAccessFile> source,
+        const ReaderProperties& props = default_reader_properties(),
+        std::shared_ptr<FileMetaData> metadata = NULLPTR);
+
+    virtual ~Contents() = default;
+    // Perform any cleanup associated with the file contents
+    virtual void Close() = 0;
+    virtual std::shared_ptr<RowGroupReader> GetRowGroup(int i) = 0;
+    virtual std::shared_ptr<FileMetaData> metadata() const = 0;
+  };
+
+  ParquetFileReader();
+  ~ParquetFileReader();
+
+  // Create a file reader instance from an Arrow file object. Thread-safety is
+  // the responsibility of the file implementation
+  static std::unique_ptr<ParquetFileReader> Open(
+      std::shared_ptr<::arrow::io::RandomAccessFile> source,
+      const ReaderProperties& props = default_reader_properties(),
+      std::shared_ptr<FileMetaData> metadata = NULLPTR);
+
+  // API Convenience to open a serialized Parquet file on disk, using Arrow IO
+  // interfaces.
+  static std::unique_ptr<ParquetFileReader> OpenFile(
+      const std::string& path, bool memory_map = true,
+      const ReaderProperties& props = default_reader_properties(),
+      std::shared_ptr<FileMetaData> metadata = NULLPTR);
+
+  // Asynchronously open a file reader from an Arrow file object.
+  // Does not throw - all errors are reported through the Future.
+  static ::arrow::Future<std::unique_ptr<ParquetFileReader>> OpenAsync(
+      std::shared_ptr<::arrow::io::RandomAccessFile> source,
+      const ReaderProperties& props = default_reader_properties(),
+      std::shared_ptr<FileMetaData> metadata = NULLPTR);
+
+  void Open(std::unique_ptr<Contents> contents);
+  void Close();
+
+  // The RowGroupReader is owned by the FileReader
+  std::shared_ptr<RowGroupReader> RowGroup(int i);
+
+  // Returns the file metadata. Only one instance is ever created
+  std::shared_ptr<FileMetaData> metadata() const;
+
+  /// Pre-buffer the specified column indices in all row groups.
+  ///
+  /// Readers can optionally call this to cache the necessary slices
+  /// of the file in-memory before deserialization. Arrow readers can
+  /// automatically do this via an option. This is intended to
+  /// increase performance when reading from high-latency filesystems
+  /// (e.g. Amazon S3).
+  ///
+  /// After calling this, creating readers for row groups/column
+  /// indices that were not buffered may fail. Creating multiple
+  /// readers for the a subset of the buffered regions is
+  /// acceptable. This may be called again to buffer a different set
+  /// of row groups/columns.
+  ///
+  /// If memory usage is a concern, note that data will remain
+  /// buffered in memory until either \a PreBuffer() is called again,
+  /// or the reader itself is destructed. Reading - and buffering -
+  /// only one row group at a time may be useful.
+  ///
+  /// This method may throw.
+  void PreBuffer(const std::vector<int>& row_groups,
+                 const std::vector<int>& column_indices,
+                 const ::arrow::io::IOContext& ctx,
+                 const ::arrow::io::CacheOptions& options);
+
+  /// Wait for the specified row groups and column indices to be pre-buffered.
+  ///
+  /// After the returned Future completes, reading the specified row
+  /// groups/columns will not block.
+  ///
+  /// PreBuffer must be called first. This method does not throw.
+  ::arrow::Future<> WhenBuffered(const std::vector<int>& row_groups,
+                                 const std::vector<int>& column_indices) const;
+
+ private:
+  // Holds a pointer to an instance of Contents implementation
+  std::unique_ptr<Contents> contents_;
+};
+
+// Read only Parquet file metadata
+std::shared_ptr<FileMetaData> PARQUET_EXPORT
+ReadMetaData(const std::shared_ptr<::arrow::io::RandomAccessFile>& source);
+
+/// \brief Scan all values in file. Useful for performance testing
+/// \param[in] columns the column numbers to scan. If empty scans all
+/// \param[in] column_batch_size number of values to read at a time when scanning column
+/// \param[in] reader a ParquetFileReader instance
+/// \return number of semantic rows in file
+PARQUET_EXPORT
+int64_t ScanFileContents(std::vector<int> columns, const int32_t column_batch_size,
+                         ParquetFileReader* reader);
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/file_writer.cc b/contrib/libs/apache/arrow/cpp/src/parquet/file_writer.cc
index a4c824c423b..deac9586e5a 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/file_writer.cc
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/file_writer.cc
@@ -1,547 +1,547 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "parquet/file_writer.h" 
- 
-#include <cstddef> 
-#include <ostream> 
-#include <string> 
-#include <utility> 
-#include <vector> 
- 
-#include "parquet/column_writer.h" 
-#include "parquet/encryption/encryption_internal.h" 
-#include "parquet/encryption/internal_file_encryptor.h" 
-#include "parquet/exception.h" 
-#include "parquet/platform.h" 
-#include "parquet/schema.h" 
-#include "parquet/types.h" 
- 
-using arrow::MemoryPool; 
- 
-using parquet::schema::GroupNode; 
- 
-namespace parquet { 
- 
-// ---------------------------------------------------------------------- 
-// RowGroupWriter public API 
- 
-RowGroupWriter::RowGroupWriter(std::unique_ptr<Contents> contents) 
-    : contents_(std::move(contents)) {} 
- 
-void RowGroupWriter::Close() { 
-  if (contents_) { 
-    contents_->Close(); 
-  } 
-} 
- 
-ColumnWriter* RowGroupWriter::NextColumn() { return contents_->NextColumn(); } 
- 
-ColumnWriter* RowGroupWriter::column(int i) { return contents_->column(i); } 
- 
-int64_t RowGroupWriter::total_compressed_bytes() const { 
-  return contents_->total_compressed_bytes(); 
-} 
- 
-int64_t RowGroupWriter::total_bytes_written() const { 
-  return contents_->total_bytes_written(); 
-} 
- 
-int RowGroupWriter::current_column() { return contents_->current_column(); } 
- 
-int RowGroupWriter::num_columns() const { return contents_->num_columns(); } 
- 
-int64_t RowGroupWriter::num_rows() const { return contents_->num_rows(); } 
- 
-inline void ThrowRowsMisMatchError(int col, int64_t prev, int64_t curr) { 
-  std::stringstream ss; 
-  ss << "Column " << col << " had " << curr << " while previous column had " << prev; 
-  throw ParquetException(ss.str()); 
-} 
- 
-// ---------------------------------------------------------------------- 
-// RowGroupSerializer 
- 
-// RowGroupWriter::Contents implementation for the Parquet file specification 
-class RowGroupSerializer : public RowGroupWriter::Contents { 
- public: 
-  RowGroupSerializer(std::shared_ptr<ArrowOutputStream> sink, 
-                     RowGroupMetaDataBuilder* metadata, int16_t row_group_ordinal, 
-                     const WriterProperties* properties, bool buffered_row_group = false, 
-                     InternalFileEncryptor* file_encryptor = nullptr) 
-      : sink_(std::move(sink)), 
-        metadata_(metadata), 
-        properties_(properties), 
-        total_bytes_written_(0), 
-        closed_(false), 
-        row_group_ordinal_(row_group_ordinal), 
-        next_column_index_(0), 
-        num_rows_(0), 
-        buffered_row_group_(buffered_row_group), 
-        file_encryptor_(file_encryptor) { 
-    if (buffered_row_group) { 
-      InitColumns(); 
-    } else { 
-      column_writers_.push_back(nullptr); 
-    } 
-  } 
- 
-  int num_columns() const override { return metadata_->num_columns(); } 
- 
-  int64_t num_rows() const override { 
-    CheckRowsWritten(); 
-    // CheckRowsWritten ensures num_rows_ is set correctly 
-    return num_rows_; 
-  } 
- 
-  ColumnWriter* NextColumn() override { 
-    if (buffered_row_group_) { 
-      throw ParquetException( 
-          "NextColumn() is not supported when a RowGroup is written by size"); 
-    } 
- 
-    if (column_writers_[0]) { 
-      CheckRowsWritten(); 
-    } 
- 
-    // Throws an error if more columns are being written 
-    auto col_meta = metadata_->NextColumnChunk(); 
- 
-    if (column_writers_[0]) { 
-      total_bytes_written_ += column_writers_[0]->Close(); 
-    } 
- 
-    ++next_column_index_; 
- 
-    const auto& path = col_meta->descr()->path(); 
-    auto meta_encryptor = 
-        file_encryptor_ ? file_encryptor_->GetColumnMetaEncryptor(path->ToDotString()) 
-                        : nullptr; 
-    auto data_encryptor = 
-        file_encryptor_ ? file_encryptor_->GetColumnDataEncryptor(path->ToDotString()) 
-                        : nullptr; 
-    std::unique_ptr<PageWriter> pager = PageWriter::Open( 
-        sink_, properties_->compression(path), properties_->compression_level(path), 
-        col_meta, row_group_ordinal_, static_cast<int16_t>(next_column_index_ - 1), 
-        properties_->memory_pool(), false, meta_encryptor, data_encryptor); 
-    column_writers_[0] = ColumnWriter::Make(col_meta, std::move(pager), properties_); 
-    return column_writers_[0].get(); 
-  } 
- 
-  ColumnWriter* column(int i) override { 
-    if (!buffered_row_group_) { 
-      throw ParquetException( 
-          "column() is only supported when a BufferedRowGroup is being written"); 
-    } 
- 
-    if (i >= 0 && i < static_cast<int>(column_writers_.size())) { 
-      return column_writers_[i].get(); 
-    } 
-    return nullptr; 
-  } 
- 
-  int current_column() const override { return metadata_->current_column(); } 
- 
-  int64_t total_compressed_bytes() const override { 
-    int64_t total_compressed_bytes = 0; 
-    for (size_t i = 0; i < column_writers_.size(); i++) { 
-      if (column_writers_[i]) { 
-        total_compressed_bytes += column_writers_[i]->total_compressed_bytes(); 
-      } 
-    } 
-    return total_compressed_bytes; 
-  } 
- 
-  int64_t total_bytes_written() const override { 
-    int64_t total_bytes_written = 0; 
-    for (size_t i = 0; i < column_writers_.size(); i++) { 
-      if (column_writers_[i]) { 
-        total_bytes_written += column_writers_[i]->total_bytes_written(); 
-      } 
-    } 
-    return total_bytes_written; 
-  } 
- 
-  void Close() override { 
-    if (!closed_) { 
-      closed_ = true; 
-      CheckRowsWritten(); 
- 
-      for (size_t i = 0; i < column_writers_.size(); i++) { 
-        if (column_writers_[i]) { 
-          total_bytes_written_ += column_writers_[i]->Close(); 
-          column_writers_[i].reset(); 
-        } 
-      } 
- 
-      column_writers_.clear(); 
- 
-      // Ensures all columns have been written 
-      metadata_->set_num_rows(num_rows_); 
-      metadata_->Finish(total_bytes_written_, row_group_ordinal_); 
-    } 
-  } 
- 
- private: 
-  std::shared_ptr<ArrowOutputStream> sink_; 
-  mutable RowGroupMetaDataBuilder* metadata_; 
-  const WriterProperties* properties_; 
-  int64_t total_bytes_written_; 
-  bool closed_; 
-  int16_t row_group_ordinal_; 
-  int next_column_index_; 
-  mutable int64_t num_rows_; 
-  bool buffered_row_group_; 
-  InternalFileEncryptor* file_encryptor_; 
- 
-  void CheckRowsWritten() const { 
-    // verify when only one column is written at a time 
-    if (!buffered_row_group_ && column_writers_.size() > 0 && column_writers_[0]) { 
-      int64_t current_col_rows = column_writers_[0]->rows_written(); 
-      if (num_rows_ == 0) { 
-        num_rows_ = current_col_rows; 
-      } else if (num_rows_ != current_col_rows) { 
-        ThrowRowsMisMatchError(next_column_index_, current_col_rows, num_rows_); 
-      } 
-    } else if (buffered_row_group_ && 
-               column_writers_.size() > 0) {  // when buffered_row_group = true 
-      int64_t current_col_rows = column_writers_[0]->rows_written(); 
-      for (int i = 1; i < static_cast<int>(column_writers_.size()); i++) { 
-        int64_t current_col_rows_i = column_writers_[i]->rows_written(); 
-        if (current_col_rows != current_col_rows_i) { 
-          ThrowRowsMisMatchError(i, current_col_rows_i, current_col_rows); 
-        } 
-      } 
-      num_rows_ = current_col_rows; 
-    } 
-  } 
- 
-  void InitColumns() { 
-    for (int i = 0; i < num_columns(); i++) { 
-      auto col_meta = metadata_->NextColumnChunk(); 
-      const auto& path = col_meta->descr()->path(); 
-      auto meta_encryptor = 
-          file_encryptor_ ? file_encryptor_->GetColumnMetaEncryptor(path->ToDotString()) 
-                          : nullptr; 
-      auto data_encryptor = 
-          file_encryptor_ ? file_encryptor_->GetColumnDataEncryptor(path->ToDotString()) 
-                          : nullptr; 
-      std::unique_ptr<PageWriter> pager = PageWriter::Open( 
-          sink_, properties_->compression(path), properties_->compression_level(path), 
-          col_meta, static_cast<int16_t>(row_group_ordinal_), 
-          static_cast<int16_t>(next_column_index_++), properties_->memory_pool(), 
-          buffered_row_group_, meta_encryptor, data_encryptor); 
-      column_writers_.push_back( 
-          ColumnWriter::Make(col_meta, std::move(pager), properties_)); 
-    } 
-  } 
- 
-  std::vector<std::shared_ptr<ColumnWriter>> column_writers_; 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// FileSerializer 
- 
-// An implementation of ParquetFileWriter::Contents that deals with the Parquet 
-// file structure, Thrift serialization, and other internal matters 
- 
-class FileSerializer : public ParquetFileWriter::Contents { 
- public: 
-  static std::unique_ptr<ParquetFileWriter::Contents> Open( 
-      std::shared_ptr<ArrowOutputStream> sink, std::shared_ptr<GroupNode> schema, 
-      std::shared_ptr<WriterProperties> properties, 
-      std::shared_ptr<const KeyValueMetadata> key_value_metadata) { 
-    std::unique_ptr<ParquetFileWriter::Contents> result( 
-        new FileSerializer(std::move(sink), std::move(schema), std::move(properties), 
-                           std::move(key_value_metadata))); 
- 
-    return result; 
-  } 
- 
-  void Close() override { 
-    if (is_open_) { 
-      // If any functions here raise an exception, we set is_open_ to be false 
-      // so that this does not get called again (possibly causing segfault) 
-      is_open_ = false; 
-      if (row_group_writer_) { 
-        num_rows_ += row_group_writer_->num_rows(); 
-        row_group_writer_->Close(); 
-      } 
-      row_group_writer_.reset(); 
- 
-      // Write magic bytes and metadata 
-      auto file_encryption_properties = properties_->file_encryption_properties(); 
- 
-      if (file_encryption_properties == nullptr) {  // Non encrypted file. 
-        file_metadata_ = metadata_->Finish(); 
-        WriteFileMetaData(*file_metadata_, sink_.get()); 
-      } else {  // Encrypted file 
-        CloseEncryptedFile(file_encryption_properties); 
-      } 
-    } 
-  } 
- 
-  int num_columns() const override { return schema_.num_columns(); } 
- 
-  int num_row_groups() const override { return num_row_groups_; } 
- 
-  int64_t num_rows() const override { return num_rows_; } 
- 
-  const std::shared_ptr<WriterProperties>& properties() const override { 
-    return properties_; 
-  } 
- 
-  RowGroupWriter* AppendRowGroup(bool buffered_row_group) { 
-    if (row_group_writer_) { 
-      row_group_writer_->Close(); 
-    } 
-    num_row_groups_++; 
-    auto rg_metadata = metadata_->AppendRowGroup(); 
-    std::unique_ptr<RowGroupWriter::Contents> contents(new RowGroupSerializer( 
-        sink_, rg_metadata, static_cast<int16_t>(num_row_groups_ - 1), properties_.get(), 
-        buffered_row_group, file_encryptor_.get())); 
-    row_group_writer_.reset(new RowGroupWriter(std::move(contents))); 
-    return row_group_writer_.get(); 
-  } 
- 
-  RowGroupWriter* AppendRowGroup() override { return AppendRowGroup(false); } 
- 
-  RowGroupWriter* AppendBufferedRowGroup() override { return AppendRowGroup(true); } 
- 
-  ~FileSerializer() override { 
-    try { 
-      Close(); 
-    } catch (...) { 
-    } 
-  } 
- 
- private: 
-  FileSerializer(std::shared_ptr<ArrowOutputStream> sink, 
-                 std::shared_ptr<GroupNode> schema, 
-                 std::shared_ptr<WriterProperties> properties, 
-                 std::shared_ptr<const KeyValueMetadata> key_value_metadata) 
-      : ParquetFileWriter::Contents(std::move(schema), std::move(key_value_metadata)), 
-        sink_(std::move(sink)), 
-        is_open_(true), 
-        properties_(std::move(properties)), 
-        num_row_groups_(0), 
-        num_rows_(0), 
-        metadata_(FileMetaDataBuilder::Make(&schema_, properties_, key_value_metadata_)) { 
-    PARQUET_ASSIGN_OR_THROW(int64_t position, sink_->Tell()); 
-    if (position == 0) { 
-      StartFile(); 
-    } else { 
-      throw ParquetException("Appending to file not implemented."); 
-    } 
-  } 
- 
-  void CloseEncryptedFile(FileEncryptionProperties* file_encryption_properties) { 
-    // Encrypted file with encrypted footer 
-    if (file_encryption_properties->encrypted_footer()) { 
-      // encrypted footer 
-      file_metadata_ = metadata_->Finish(); 
- 
-      PARQUET_ASSIGN_OR_THROW(int64_t position, sink_->Tell()); 
-      uint64_t metadata_start = static_cast<uint64_t>(position); 
-      auto crypto_metadata = metadata_->GetCryptoMetaData(); 
-      WriteFileCryptoMetaData(*crypto_metadata, sink_.get()); 
- 
-      auto footer_encryptor = file_encryptor_->GetFooterEncryptor(); 
-      WriteEncryptedFileMetadata(*file_metadata_, sink_.get(), footer_encryptor, true); 
-      PARQUET_ASSIGN_OR_THROW(position, sink_->Tell()); 
-      uint32_t footer_and_crypto_len = static_cast<uint32_t>(position - metadata_start); 
-      PARQUET_THROW_NOT_OK( 
-          sink_->Write(reinterpret_cast<uint8_t*>(&footer_and_crypto_len), 4)); 
-      PARQUET_THROW_NOT_OK(sink_->Write(kParquetEMagic, 4)); 
-    } else {  // Encrypted file with plaintext footer 
-      file_metadata_ = metadata_->Finish(); 
-      auto footer_signing_encryptor = file_encryptor_->GetFooterSigningEncryptor(); 
-      WriteEncryptedFileMetadata(*file_metadata_, sink_.get(), footer_signing_encryptor, 
-                                 false); 
-    } 
-    if (file_encryptor_) { 
-      file_encryptor_->WipeOutEncryptionKeys(); 
-    } 
-  } 
- 
-  std::shared_ptr<ArrowOutputStream> sink_; 
-  bool is_open_; 
-  const std::shared_ptr<WriterProperties> properties_; 
-  int num_row_groups_; 
-  int64_t num_rows_; 
-  std::unique_ptr<FileMetaDataBuilder> metadata_; 
-  // Only one of the row group writers is active at a time 
-  std::unique_ptr<RowGroupWriter> row_group_writer_; 
- 
-  std::unique_ptr<InternalFileEncryptor> file_encryptor_; 
- 
-  void StartFile() { 
-    auto file_encryption_properties = properties_->file_encryption_properties(); 
-    if (file_encryption_properties == nullptr) { 
-      // Unencrypted parquet files always start with PAR1 
-      PARQUET_THROW_NOT_OK(sink_->Write(kParquetMagic, 4)); 
-    } else { 
-      // Check that all columns in columnEncryptionProperties exist in the schema. 
-      auto encrypted_columns = file_encryption_properties->encrypted_columns(); 
-      // if columnEncryptionProperties is empty, every column in file schema will be 
-      // encrypted with footer key. 
-      if (encrypted_columns.size() != 0) { 
-        std::vector<std::string> column_path_vec; 
-        // First, save all column paths in schema. 
-        for (int i = 0; i < num_columns(); i++) { 
-          column_path_vec.push_back(schema_.Column(i)->path()->ToDotString()); 
-        } 
-        // Check if column exists in schema. 
-        for (const auto& elem : encrypted_columns) { 
-          auto it = std::find(column_path_vec.begin(), column_path_vec.end(), elem.first); 
-          if (it == column_path_vec.end()) { 
-            std::stringstream ss; 
-            ss << "Encrypted column " + elem.first + " not in file schema"; 
-            throw ParquetException(ss.str()); 
-          } 
-        } 
-      } 
- 
-      file_encryptor_.reset(new InternalFileEncryptor(file_encryption_properties, 
-                                                      properties_->memory_pool())); 
-      if (file_encryption_properties->encrypted_footer()) { 
-        PARQUET_THROW_NOT_OK(sink_->Write(kParquetEMagic, 4)); 
-      } else { 
-        // Encrypted file with plaintext footer mode. 
-        PARQUET_THROW_NOT_OK(sink_->Write(kParquetMagic, 4)); 
-      } 
-    } 
-  } 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// ParquetFileWriter public API 
- 
-ParquetFileWriter::ParquetFileWriter() {} 
- 
-ParquetFileWriter::~ParquetFileWriter() { 
-  try { 
-    Close(); 
-  } catch (...) { 
-  } 
-} 
- 
-std::unique_ptr<ParquetFileWriter> ParquetFileWriter::Open( 
-    std::shared_ptr<::arrow::io::OutputStream> sink, std::shared_ptr<GroupNode> schema, 
-    std::shared_ptr<WriterProperties> properties, 
-    std::shared_ptr<const KeyValueMetadata> key_value_metadata) { 
-  auto contents = 
-      FileSerializer::Open(std::move(sink), std::move(schema), std::move(properties), 
-                           std::move(key_value_metadata)); 
-  std::unique_ptr<ParquetFileWriter> result(new ParquetFileWriter()); 
-  result->Open(std::move(contents)); 
-  return result; 
-} 
- 
-void WriteFileMetaData(const FileMetaData& file_metadata, ArrowOutputStream* sink) { 
-  // Write MetaData 
-  PARQUET_ASSIGN_OR_THROW(int64_t position, sink->Tell()); 
-  uint32_t metadata_len = static_cast<uint32_t>(position); 
- 
-  file_metadata.WriteTo(sink); 
-  PARQUET_ASSIGN_OR_THROW(position, sink->Tell()); 
-  metadata_len = static_cast<uint32_t>(position) - metadata_len; 
- 
-  // Write Footer 
-  PARQUET_THROW_NOT_OK(sink->Write(reinterpret_cast<uint8_t*>(&metadata_len), 4)); 
-  PARQUET_THROW_NOT_OK(sink->Write(kParquetMagic, 4)); 
-} 
- 
-void WriteMetaDataFile(const FileMetaData& file_metadata, ArrowOutputStream* sink) { 
-  PARQUET_THROW_NOT_OK(sink->Write(kParquetMagic, 4)); 
-  return WriteFileMetaData(file_metadata, sink); 
-} 
- 
-void WriteEncryptedFileMetadata(const FileMetaData& file_metadata, 
-                                ArrowOutputStream* sink, 
-                                const std::shared_ptr<Encryptor>& encryptor, 
-                                bool encrypt_footer) { 
-  if (encrypt_footer) {  // Encrypted file with encrypted footer 
-    // encrypt and write to sink 
-    file_metadata.WriteTo(sink, encryptor); 
-  } else {  // Encrypted file with plaintext footer mode. 
-    PARQUET_ASSIGN_OR_THROW(int64_t position, sink->Tell()); 
-    uint32_t metadata_len = static_cast<uint32_t>(position); 
-    file_metadata.WriteTo(sink, encryptor); 
-    PARQUET_ASSIGN_OR_THROW(position, sink->Tell()); 
-    metadata_len = static_cast<uint32_t>(position) - metadata_len; 
- 
-    PARQUET_THROW_NOT_OK(sink->Write(reinterpret_cast<uint8_t*>(&metadata_len), 4)); 
-    PARQUET_THROW_NOT_OK(sink->Write(kParquetMagic, 4)); 
-  } 
-} 
- 
-void WriteFileCryptoMetaData(const FileCryptoMetaData& crypto_metadata, 
-                             ArrowOutputStream* sink) { 
-  crypto_metadata.WriteTo(sink); 
-} 
- 
-const SchemaDescriptor* ParquetFileWriter::schema() const { return contents_->schema(); } 
- 
-const ColumnDescriptor* ParquetFileWriter::descr(int i) const { 
-  return contents_->schema()->Column(i); 
-} 
- 
-int ParquetFileWriter::num_columns() const { return contents_->num_columns(); } 
- 
-int64_t ParquetFileWriter::num_rows() const { return contents_->num_rows(); } 
- 
-int ParquetFileWriter::num_row_groups() const { return contents_->num_row_groups(); } 
- 
-const std::shared_ptr<const KeyValueMetadata>& ParquetFileWriter::key_value_metadata() 
-    const { 
-  return contents_->key_value_metadata(); 
-} 
- 
-const std::shared_ptr<FileMetaData> ParquetFileWriter::metadata() const { 
-  return file_metadata_; 
-} 
- 
-void ParquetFileWriter::Open(std::unique_ptr<ParquetFileWriter::Contents> contents) { 
-  contents_ = std::move(contents); 
-} 
- 
-void ParquetFileWriter::Close() { 
-  if (contents_) { 
-    contents_->Close(); 
-    file_metadata_ = contents_->metadata(); 
-    contents_.reset(); 
-  } 
-} 
- 
-RowGroupWriter* ParquetFileWriter::AppendRowGroup() { 
-  return contents_->AppendRowGroup(); 
-} 
- 
-RowGroupWriter* ParquetFileWriter::AppendBufferedRowGroup() { 
-  return contents_->AppendBufferedRowGroup(); 
-} 
- 
-RowGroupWriter* ParquetFileWriter::AppendRowGroup(int64_t num_rows) { 
-  return AppendRowGroup(); 
-} 
- 
-const std::shared_ptr<WriterProperties>& ParquetFileWriter::properties() const { 
-  return contents_->properties(); 
-} 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/file_writer.h"
+
+#include <cstddef>
+#include <ostream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "parquet/column_writer.h"
+#include "parquet/encryption/encryption_internal.h"
+#include "parquet/encryption/internal_file_encryptor.h"
+#include "parquet/exception.h"
+#include "parquet/platform.h"
+#include "parquet/schema.h"
+#include "parquet/types.h"
+
+using arrow::MemoryPool;
+
+using parquet::schema::GroupNode;
+
+namespace parquet {
+
+// ----------------------------------------------------------------------
+// RowGroupWriter public API
+
+RowGroupWriter::RowGroupWriter(std::unique_ptr<Contents> contents)
+    : contents_(std::move(contents)) {}
+
+void RowGroupWriter::Close() {
+  if (contents_) {
+    contents_->Close();
+  }
+}
+
+ColumnWriter* RowGroupWriter::NextColumn() { return contents_->NextColumn(); }
+
+ColumnWriter* RowGroupWriter::column(int i) { return contents_->column(i); }
+
+int64_t RowGroupWriter::total_compressed_bytes() const {
+  return contents_->total_compressed_bytes();
+}
+
+int64_t RowGroupWriter::total_bytes_written() const {
+  return contents_->total_bytes_written();
+}
+
+int RowGroupWriter::current_column() { return contents_->current_column(); }
+
+int RowGroupWriter::num_columns() const { return contents_->num_columns(); }
+
+int64_t RowGroupWriter::num_rows() const { return contents_->num_rows(); }
+
+inline void ThrowRowsMisMatchError(int col, int64_t prev, int64_t curr) {
+  std::stringstream ss;
+  ss << "Column " << col << " had " << curr << " while previous column had " << prev;
+  throw ParquetException(ss.str());
+}
+
+// ----------------------------------------------------------------------
+// RowGroupSerializer
+
+// RowGroupWriter::Contents implementation for the Parquet file specification
+class RowGroupSerializer : public RowGroupWriter::Contents {
+ public:
+  RowGroupSerializer(std::shared_ptr<ArrowOutputStream> sink,
+                     RowGroupMetaDataBuilder* metadata, int16_t row_group_ordinal,
+                     const WriterProperties* properties, bool buffered_row_group = false,
+                     InternalFileEncryptor* file_encryptor = nullptr)
+      : sink_(std::move(sink)),
+        metadata_(metadata),
+        properties_(properties),
+        total_bytes_written_(0),
+        closed_(false),
+        row_group_ordinal_(row_group_ordinal),
+        next_column_index_(0),
+        num_rows_(0),
+        buffered_row_group_(buffered_row_group),
+        file_encryptor_(file_encryptor) {
+    if (buffered_row_group) {
+      InitColumns();
+    } else {
+      column_writers_.push_back(nullptr);
+    }
+  }
+
+  int num_columns() const override { return metadata_->num_columns(); }
+
+  int64_t num_rows() const override {
+    CheckRowsWritten();
+    // CheckRowsWritten ensures num_rows_ is set correctly
+    return num_rows_;
+  }
+
+  ColumnWriter* NextColumn() override {
+    if (buffered_row_group_) {
+      throw ParquetException(
+          "NextColumn() is not supported when a RowGroup is written by size");
+    }
+
+    if (column_writers_[0]) {
+      CheckRowsWritten();
+    }
+
+    // Throws an error if more columns are being written
+    auto col_meta = metadata_->NextColumnChunk();
+
+    if (column_writers_[0]) {
+      total_bytes_written_ += column_writers_[0]->Close();
+    }
+
+    ++next_column_index_;
+
+    const auto& path = col_meta->descr()->path();
+    auto meta_encryptor =
+        file_encryptor_ ? file_encryptor_->GetColumnMetaEncryptor(path->ToDotString())
+                        : nullptr;
+    auto data_encryptor =
+        file_encryptor_ ? file_encryptor_->GetColumnDataEncryptor(path->ToDotString())
+                        : nullptr;
+    std::unique_ptr<PageWriter> pager = PageWriter::Open(
+        sink_, properties_->compression(path), properties_->compression_level(path),
+        col_meta, row_group_ordinal_, static_cast<int16_t>(next_column_index_ - 1),
+        properties_->memory_pool(), false, meta_encryptor, data_encryptor);
+    column_writers_[0] = ColumnWriter::Make(col_meta, std::move(pager), properties_);
+    return column_writers_[0].get();
+  }
+
+  ColumnWriter* column(int i) override {
+    if (!buffered_row_group_) {
+      throw ParquetException(
+          "column() is only supported when a BufferedRowGroup is being written");
+    }
+
+    if (i >= 0 && i < static_cast<int>(column_writers_.size())) {
+      return column_writers_[i].get();
+    }
+    return nullptr;
+  }
+
+  int current_column() const override { return metadata_->current_column(); }
+
+  int64_t total_compressed_bytes() const override {
+    int64_t total_compressed_bytes = 0;
+    for (size_t i = 0; i < column_writers_.size(); i++) {
+      if (column_writers_[i]) {
+        total_compressed_bytes += column_writers_[i]->total_compressed_bytes();
+      }
+    }
+    return total_compressed_bytes;
+  }
+
+  int64_t total_bytes_written() const override {
+    int64_t total_bytes_written = 0;
+    for (size_t i = 0; i < column_writers_.size(); i++) {
+      if (column_writers_[i]) {
+        total_bytes_written += column_writers_[i]->total_bytes_written();
+      }
+    }
+    return total_bytes_written;
+  }
+
+  void Close() override {
+    if (!closed_) {
+      closed_ = true;
+      CheckRowsWritten();
+
+      for (size_t i = 0; i < column_writers_.size(); i++) {
+        if (column_writers_[i]) {
+          total_bytes_written_ += column_writers_[i]->Close();
+          column_writers_[i].reset();
+        }
+      }
+
+      column_writers_.clear();
+
+      // Ensures all columns have been written
+      metadata_->set_num_rows(num_rows_);
+      metadata_->Finish(total_bytes_written_, row_group_ordinal_);
+    }
+  }
+
+ private:
+  std::shared_ptr<ArrowOutputStream> sink_;
+  mutable RowGroupMetaDataBuilder* metadata_;
+  const WriterProperties* properties_;
+  int64_t total_bytes_written_;
+  bool closed_;
+  int16_t row_group_ordinal_;
+  int next_column_index_;
+  mutable int64_t num_rows_;
+  bool buffered_row_group_;
+  InternalFileEncryptor* file_encryptor_;
+
+  void CheckRowsWritten() const {
+    // verify when only one column is written at a time
+    if (!buffered_row_group_ && column_writers_.size() > 0 && column_writers_[0]) {
+      int64_t current_col_rows = column_writers_[0]->rows_written();
+      if (num_rows_ == 0) {
+        num_rows_ = current_col_rows;
+      } else if (num_rows_ != current_col_rows) {
+        ThrowRowsMisMatchError(next_column_index_, current_col_rows, num_rows_);
+      }
+    } else if (buffered_row_group_ &&
+               column_writers_.size() > 0) {  // when buffered_row_group = true
+      int64_t current_col_rows = column_writers_[0]->rows_written();
+      for (int i = 1; i < static_cast<int>(column_writers_.size()); i++) {
+        int64_t current_col_rows_i = column_writers_[i]->rows_written();
+        if (current_col_rows != current_col_rows_i) {
+          ThrowRowsMisMatchError(i, current_col_rows_i, current_col_rows);
+        }
+      }
+      num_rows_ = current_col_rows;
+    }
+  }
+
+  void InitColumns() {
+    for (int i = 0; i < num_columns(); i++) {
+      auto col_meta = metadata_->NextColumnChunk();
+      const auto& path = col_meta->descr()->path();
+      auto meta_encryptor =
+          file_encryptor_ ? file_encryptor_->GetColumnMetaEncryptor(path->ToDotString())
+                          : nullptr;
+      auto data_encryptor =
+          file_encryptor_ ? file_encryptor_->GetColumnDataEncryptor(path->ToDotString())
+                          : nullptr;
+      std::unique_ptr<PageWriter> pager = PageWriter::Open(
+          sink_, properties_->compression(path), properties_->compression_level(path),
+          col_meta, static_cast<int16_t>(row_group_ordinal_),
+          static_cast<int16_t>(next_column_index_++), properties_->memory_pool(),
+          buffered_row_group_, meta_encryptor, data_encryptor);
+      column_writers_.push_back(
+          ColumnWriter::Make(col_meta, std::move(pager), properties_));
+    }
+  }
+
+  std::vector<std::shared_ptr<ColumnWriter>> column_writers_;
+};
+
+// ----------------------------------------------------------------------
+// FileSerializer
+
+// An implementation of ParquetFileWriter::Contents that deals with the Parquet
+// file structure, Thrift serialization, and other internal matters
+
+class FileSerializer : public ParquetFileWriter::Contents {
+ public:
+  static std::unique_ptr<ParquetFileWriter::Contents> Open(
+      std::shared_ptr<ArrowOutputStream> sink, std::shared_ptr<GroupNode> schema,
+      std::shared_ptr<WriterProperties> properties,
+      std::shared_ptr<const KeyValueMetadata> key_value_metadata) {
+    std::unique_ptr<ParquetFileWriter::Contents> result(
+        new FileSerializer(std::move(sink), std::move(schema), std::move(properties),
+                           std::move(key_value_metadata)));
+
+    return result;
+  }
+
+  void Close() override {
+    if (is_open_) {
+      // If any functions here raise an exception, we set is_open_ to be false
+      // so that this does not get called again (possibly causing segfault)
+      is_open_ = false;
+      if (row_group_writer_) {
+        num_rows_ += row_group_writer_->num_rows();
+        row_group_writer_->Close();
+      }
+      row_group_writer_.reset();
+
+      // Write magic bytes and metadata
+      auto file_encryption_properties = properties_->file_encryption_properties();
+
+      if (file_encryption_properties == nullptr) {  // Non encrypted file.
+        file_metadata_ = metadata_->Finish();
+        WriteFileMetaData(*file_metadata_, sink_.get());
+      } else {  // Encrypted file
+        CloseEncryptedFile(file_encryption_properties);
+      }
+    }
+  }
+
+  int num_columns() const override { return schema_.num_columns(); }
+
+  int num_row_groups() const override { return num_row_groups_; }
+
+  int64_t num_rows() const override { return num_rows_; }
+
+  const std::shared_ptr<WriterProperties>& properties() const override {
+    return properties_;
+  }
+
+  RowGroupWriter* AppendRowGroup(bool buffered_row_group) {
+    if (row_group_writer_) {
+      row_group_writer_->Close();
+    }
+    num_row_groups_++;
+    auto rg_metadata = metadata_->AppendRowGroup();
+    std::unique_ptr<RowGroupWriter::Contents> contents(new RowGroupSerializer(
+        sink_, rg_metadata, static_cast<int16_t>(num_row_groups_ - 1), properties_.get(),
+        buffered_row_group, file_encryptor_.get()));
+    row_group_writer_.reset(new RowGroupWriter(std::move(contents)));
+    return row_group_writer_.get();
+  }
+
+  RowGroupWriter* AppendRowGroup() override { return AppendRowGroup(false); }
+
+  RowGroupWriter* AppendBufferedRowGroup() override { return AppendRowGroup(true); }
+
+  ~FileSerializer() override {
+    try {
+      Close();
+    } catch (...) {
+    }
+  }
+
+ private:
+  FileSerializer(std::shared_ptr<ArrowOutputStream> sink,
+                 std::shared_ptr<GroupNode> schema,
+                 std::shared_ptr<WriterProperties> properties,
+                 std::shared_ptr<const KeyValueMetadata> key_value_metadata)
+      : ParquetFileWriter::Contents(std::move(schema), std::move(key_value_metadata)),
+        sink_(std::move(sink)),
+        is_open_(true),
+        properties_(std::move(properties)),
+        num_row_groups_(0),
+        num_rows_(0),
+        metadata_(FileMetaDataBuilder::Make(&schema_, properties_, key_value_metadata_)) {
+    PARQUET_ASSIGN_OR_THROW(int64_t position, sink_->Tell());
+    if (position == 0) {
+      StartFile();
+    } else {
+      throw ParquetException("Appending to file not implemented.");
+    }
+  }
+
+  void CloseEncryptedFile(FileEncryptionProperties* file_encryption_properties) {
+    // Encrypted file with encrypted footer
+    if (file_encryption_properties->encrypted_footer()) {
+      // encrypted footer
+      file_metadata_ = metadata_->Finish();
+
+      PARQUET_ASSIGN_OR_THROW(int64_t position, sink_->Tell());
+      uint64_t metadata_start = static_cast<uint64_t>(position);
+      auto crypto_metadata = metadata_->GetCryptoMetaData();
+      WriteFileCryptoMetaData(*crypto_metadata, sink_.get());
+
+      auto footer_encryptor = file_encryptor_->GetFooterEncryptor();
+      WriteEncryptedFileMetadata(*file_metadata_, sink_.get(), footer_encryptor, true);
+      PARQUET_ASSIGN_OR_THROW(position, sink_->Tell());
+      uint32_t footer_and_crypto_len = static_cast<uint32_t>(position - metadata_start);
+      PARQUET_THROW_NOT_OK(
+          sink_->Write(reinterpret_cast<uint8_t*>(&footer_and_crypto_len), 4));
+      PARQUET_THROW_NOT_OK(sink_->Write(kParquetEMagic, 4));
+    } else {  // Encrypted file with plaintext footer
+      file_metadata_ = metadata_->Finish();
+      auto footer_signing_encryptor = file_encryptor_->GetFooterSigningEncryptor();
+      WriteEncryptedFileMetadata(*file_metadata_, sink_.get(), footer_signing_encryptor,
+                                 false);
+    }
+    if (file_encryptor_) {
+      file_encryptor_->WipeOutEncryptionKeys();
+    }
+  }
+
+  std::shared_ptr<ArrowOutputStream> sink_;
+  bool is_open_;
+  const std::shared_ptr<WriterProperties> properties_;
+  int num_row_groups_;
+  int64_t num_rows_;
+  std::unique_ptr<FileMetaDataBuilder> metadata_;
+  // Only one of the row group writers is active at a time
+  std::unique_ptr<RowGroupWriter> row_group_writer_;
+
+  std::unique_ptr<InternalFileEncryptor> file_encryptor_;
+
+  void StartFile() {
+    auto file_encryption_properties = properties_->file_encryption_properties();
+    if (file_encryption_properties == nullptr) {
+      // Unencrypted parquet files always start with PAR1
+      PARQUET_THROW_NOT_OK(sink_->Write(kParquetMagic, 4));
+    } else {
+      // Check that all columns in columnEncryptionProperties exist in the schema.
+      auto encrypted_columns = file_encryption_properties->encrypted_columns();
+      // if columnEncryptionProperties is empty, every column in file schema will be
+      // encrypted with footer key.
+      if (encrypted_columns.size() != 0) {
+        std::vector<std::string> column_path_vec;
+        // First, save all column paths in schema.
+        for (int i = 0; i < num_columns(); i++) {
+          column_path_vec.push_back(schema_.Column(i)->path()->ToDotString());
+        }
+        // Check if column exists in schema.
+        for (const auto& elem : encrypted_columns) {
+          auto it = std::find(column_path_vec.begin(), column_path_vec.end(), elem.first);
+          if (it == column_path_vec.end()) {
+            std::stringstream ss;
+            ss << "Encrypted column " + elem.first + " not in file schema";
+            throw ParquetException(ss.str());
+          }
+        }
+      }
+
+      file_encryptor_.reset(new InternalFileEncryptor(file_encryption_properties,
+                                                      properties_->memory_pool()));
+      if (file_encryption_properties->encrypted_footer()) {
+        PARQUET_THROW_NOT_OK(sink_->Write(kParquetEMagic, 4));
+      } else {
+        // Encrypted file with plaintext footer mode.
+        PARQUET_THROW_NOT_OK(sink_->Write(kParquetMagic, 4));
+      }
+    }
+  }
+};
+
+// ----------------------------------------------------------------------
+// ParquetFileWriter public API
+
+ParquetFileWriter::ParquetFileWriter() {}
+
+ParquetFileWriter::~ParquetFileWriter() {
+  try {
+    Close();
+  } catch (...) {
+  }
+}
+
+std::unique_ptr<ParquetFileWriter> ParquetFileWriter::Open(
+    std::shared_ptr<::arrow::io::OutputStream> sink, std::shared_ptr<GroupNode> schema,
+    std::shared_ptr<WriterProperties> properties,
+    std::shared_ptr<const KeyValueMetadata> key_value_metadata) {
+  auto contents =
+      FileSerializer::Open(std::move(sink), std::move(schema), std::move(properties),
+                           std::move(key_value_metadata));
+  std::unique_ptr<ParquetFileWriter> result(new ParquetFileWriter());
+  result->Open(std::move(contents));
+  return result;
+}
+
+void WriteFileMetaData(const FileMetaData& file_metadata, ArrowOutputStream* sink) {
+  // Write MetaData
+  PARQUET_ASSIGN_OR_THROW(int64_t position, sink->Tell());
+  uint32_t metadata_len = static_cast<uint32_t>(position);
+
+  file_metadata.WriteTo(sink);
+  PARQUET_ASSIGN_OR_THROW(position, sink->Tell());
+  metadata_len = static_cast<uint32_t>(position) - metadata_len;
+
+  // Write Footer
+  PARQUET_THROW_NOT_OK(sink->Write(reinterpret_cast<uint8_t*>(&metadata_len), 4));
+  PARQUET_THROW_NOT_OK(sink->Write(kParquetMagic, 4));
+}
+
+void WriteMetaDataFile(const FileMetaData& file_metadata, ArrowOutputStream* sink) {
+  PARQUET_THROW_NOT_OK(sink->Write(kParquetMagic, 4));
+  return WriteFileMetaData(file_metadata, sink);
+}
+
+void WriteEncryptedFileMetadata(const FileMetaData& file_metadata,
+                                ArrowOutputStream* sink,
+                                const std::shared_ptr<Encryptor>& encryptor,
+                                bool encrypt_footer) {
+  if (encrypt_footer) {  // Encrypted file with encrypted footer
+    // encrypt and write to sink
+    file_metadata.WriteTo(sink, encryptor);
+  } else {  // Encrypted file with plaintext footer mode.
+    PARQUET_ASSIGN_OR_THROW(int64_t position, sink->Tell());
+    uint32_t metadata_len = static_cast<uint32_t>(position);
+    file_metadata.WriteTo(sink, encryptor);
+    PARQUET_ASSIGN_OR_THROW(position, sink->Tell());
+    metadata_len = static_cast<uint32_t>(position) - metadata_len;
+
+    PARQUET_THROW_NOT_OK(sink->Write(reinterpret_cast<uint8_t*>(&metadata_len), 4));
+    PARQUET_THROW_NOT_OK(sink->Write(kParquetMagic, 4));
+  }
+}
+
+void WriteFileCryptoMetaData(const FileCryptoMetaData& crypto_metadata,
+                             ArrowOutputStream* sink) {
+  crypto_metadata.WriteTo(sink);
+}
+
+const SchemaDescriptor* ParquetFileWriter::schema() const { return contents_->schema(); }
+
+const ColumnDescriptor* ParquetFileWriter::descr(int i) const {
+  return contents_->schema()->Column(i);
+}
+
+int ParquetFileWriter::num_columns() const { return contents_->num_columns(); }
+
+int64_t ParquetFileWriter::num_rows() const { return contents_->num_rows(); }
+
+int ParquetFileWriter::num_row_groups() const { return contents_->num_row_groups(); }
+
+const std::shared_ptr<const KeyValueMetadata>& ParquetFileWriter::key_value_metadata()
+    const {
+  return contents_->key_value_metadata();
+}
+
+const std::shared_ptr<FileMetaData> ParquetFileWriter::metadata() const {
+  return file_metadata_;
+}
+
+void ParquetFileWriter::Open(std::unique_ptr<ParquetFileWriter::Contents> contents) {
+  contents_ = std::move(contents);
+}
+
+void ParquetFileWriter::Close() {
+  if (contents_) {
+    contents_->Close();
+    file_metadata_ = contents_->metadata();
+    contents_.reset();
+  }
+}
+
+RowGroupWriter* ParquetFileWriter::AppendRowGroup() {
+  return contents_->AppendRowGroup();
+}
+
+RowGroupWriter* ParquetFileWriter::AppendBufferedRowGroup() {
+  return contents_->AppendBufferedRowGroup();
+}
+
+RowGroupWriter* ParquetFileWriter::AppendRowGroup(int64_t num_rows) {
+  return AppendRowGroup();
+}
+
+const std::shared_ptr<WriterProperties>& ParquetFileWriter::properties() const {
+  return contents_->properties();
+}
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/file_writer.h b/contrib/libs/apache/arrow/cpp/src/parquet/file_writer.h
index dafb2573b2c..4cfc24719a3 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/file_writer.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/file_writer.h
@@ -1,234 +1,234 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <cstdint> 
-#include <memory> 
-#include <utility> 
- 
-#include "parquet/metadata.h" 
-#include "parquet/platform.h" 
-#include "parquet/properties.h" 
-#include "parquet/schema.h" 
- 
-namespace parquet { 
- 
-class ColumnWriter; 
- 
-// FIXME: copied from reader-internal.cc 
-static constexpr uint8_t kParquetMagic[4] = {'P', 'A', 'R', '1'}; 
-static constexpr uint8_t kParquetEMagic[4] = {'P', 'A', 'R', 'E'}; 
- 
-class PARQUET_EXPORT RowGroupWriter { 
- public: 
-  // Forward declare a virtual class 'Contents' to aid dependency injection and more 
-  // easily create test fixtures 
-  // An implementation of the Contents class is defined in the .cc file 
-  struct Contents { 
-    virtual ~Contents() = default; 
-    virtual int num_columns() const = 0; 
-    virtual int64_t num_rows() const = 0; 
- 
-    // to be used only with ParquetFileWriter::AppendRowGroup 
-    virtual ColumnWriter* NextColumn() = 0; 
-    // to be used only with ParquetFileWriter::AppendBufferedRowGroup 
-    virtual ColumnWriter* column(int i) = 0; 
- 
-    virtual int current_column() const = 0; 
-    virtual void Close() = 0; 
- 
-    // total bytes written by the page writer 
-    virtual int64_t total_bytes_written() const = 0; 
-    // total bytes still compressed but not written 
-    virtual int64_t total_compressed_bytes() const = 0; 
-  }; 
- 
-  explicit RowGroupWriter(std::unique_ptr<Contents> contents); 
- 
-  /// Construct a ColumnWriter for the indicated row group-relative column. 
-  /// 
-  /// To be used only with ParquetFileWriter::AppendRowGroup 
-  /// Ownership is solely within the RowGroupWriter. The ColumnWriter is only 
-  /// valid until the next call to NextColumn or Close. As the contents are 
-  /// directly written to the sink, once a new column is started, the contents 
-  /// of the previous one cannot be modified anymore. 
-  ColumnWriter* NextColumn(); 
-  /// Index of currently written column. Equal to -1 if NextColumn() 
-  /// has not been called yet. 
-  int current_column(); 
-  void Close(); 
- 
-  int num_columns() const; 
- 
-  /// Construct a ColumnWriter for the indicated row group column. 
-  /// 
-  /// To be used only with ParquetFileWriter::AppendBufferedRowGroup 
-  /// Ownership is solely within the RowGroupWriter. The ColumnWriter is 
-  /// valid until Close. The contents are buffered in memory and written to sink 
-  /// on Close 
-  ColumnWriter* column(int i); 
- 
-  /** 
-   * Number of rows that shall be written as part of this RowGroup. 
-   */ 
-  int64_t num_rows() const; 
- 
-  int64_t total_bytes_written() const; 
-  int64_t total_compressed_bytes() const; 
- 
- private: 
-  // Holds a pointer to an instance of Contents implementation 
-  std::unique_ptr<Contents> contents_; 
-}; 
- 
-PARQUET_EXPORT 
-void WriteFileMetaData(const FileMetaData& file_metadata, 
-                       ::arrow::io::OutputStream* sink); 
- 
-PARQUET_EXPORT 
-void WriteMetaDataFile(const FileMetaData& file_metadata, 
-                       ::arrow::io::OutputStream* sink); 
- 
-PARQUET_EXPORT 
-void WriteEncryptedFileMetadata(const FileMetaData& file_metadata, 
-                                ArrowOutputStream* sink, 
-                                const std::shared_ptr<Encryptor>& encryptor, 
-                                bool encrypt_footer); 
- 
-PARQUET_EXPORT 
-void WriteEncryptedFileMetadata(const FileMetaData& file_metadata, 
-                                ::arrow::io::OutputStream* sink, 
-                                const std::shared_ptr<Encryptor>& encryptor = NULLPTR, 
-                                bool encrypt_footer = false); 
-PARQUET_EXPORT 
-void WriteFileCryptoMetaData(const FileCryptoMetaData& crypto_metadata, 
-                             ::arrow::io::OutputStream* sink); 
- 
-class PARQUET_EXPORT ParquetFileWriter { 
- public: 
-  // Forward declare a virtual class 'Contents' to aid dependency injection and more 
-  // easily create test fixtures 
-  // An implementation of the Contents class is defined in the .cc file 
-  struct Contents { 
-    Contents(std::shared_ptr<::parquet::schema::GroupNode> schema, 
-             std::shared_ptr<const KeyValueMetadata> key_value_metadata) 
-        : schema_(), key_value_metadata_(std::move(key_value_metadata)) { 
-      schema_.Init(std::move(schema)); 
-    } 
-    virtual ~Contents() {} 
-    // Perform any cleanup associated with the file contents 
-    virtual void Close() = 0; 
- 
-    /// \note Deprecated since 1.3.0 
-    RowGroupWriter* AppendRowGroup(int64_t num_rows); 
- 
-    virtual RowGroupWriter* AppendRowGroup() = 0; 
-    virtual RowGroupWriter* AppendBufferedRowGroup() = 0; 
- 
-    virtual int64_t num_rows() const = 0; 
-    virtual int num_columns() const = 0; 
-    virtual int num_row_groups() const = 0; 
- 
-    virtual const std::shared_ptr<WriterProperties>& properties() const = 0; 
- 
-    const std::shared_ptr<const KeyValueMetadata>& key_value_metadata() const { 
-      return key_value_metadata_; 
-    } 
- 
-    // Return const-pointer to make it clear that this object is not to be copied 
-    const SchemaDescriptor* schema() const { return &schema_; } 
- 
-    SchemaDescriptor schema_; 
- 
-    /// This should be the only place this is stored. Everything else is a const reference 
-    std::shared_ptr<const KeyValueMetadata> key_value_metadata_; 
- 
-    const std::shared_ptr<FileMetaData>& metadata() const { return file_metadata_; } 
-    std::shared_ptr<FileMetaData> file_metadata_; 
-  }; 
- 
-  ParquetFileWriter(); 
-  ~ParquetFileWriter(); 
- 
-  static std::unique_ptr<ParquetFileWriter> Open( 
-      std::shared_ptr<::arrow::io::OutputStream> sink, 
-      std::shared_ptr<schema::GroupNode> schema, 
-      std::shared_ptr<WriterProperties> properties = default_writer_properties(), 
-      std::shared_ptr<const KeyValueMetadata> key_value_metadata = NULLPTR); 
- 
-  void Open(std::unique_ptr<Contents> contents); 
-  void Close(); 
- 
-  // Construct a RowGroupWriter for the indicated number of rows. 
-  // 
-  // Ownership is solely within the ParquetFileWriter. The RowGroupWriter is only valid 
-  // until the next call to AppendRowGroup or AppendBufferedRowGroup or Close. 
-  // @param num_rows The number of rows that are stored in the new RowGroup 
-  // 
-  // \deprecated Since 1.3.0 
-  RowGroupWriter* AppendRowGroup(int64_t num_rows); 
- 
-  /// Construct a RowGroupWriter with an arbitrary number of rows. 
-  /// 
-  /// Ownership is solely within the ParquetFileWriter. The RowGroupWriter is only valid 
-  /// until the next call to AppendRowGroup or AppendBufferedRowGroup or Close. 
-  RowGroupWriter* AppendRowGroup(); 
- 
-  /// Construct a RowGroupWriter that buffers all the values until the RowGroup is ready. 
-  /// Use this if you want to write a RowGroup based on a certain size 
-  /// 
-  /// Ownership is solely within the ParquetFileWriter. The RowGroupWriter is only valid 
-  /// until the next call to AppendRowGroup or AppendBufferedRowGroup or Close. 
-  RowGroupWriter* AppendBufferedRowGroup(); 
- 
-  /// Number of columns. 
-  /// 
-  /// This number is fixed during the lifetime of the writer as it is determined via 
-  /// the schema. 
-  int num_columns() const; 
- 
-  /// Number of rows in the yet started RowGroups. 
-  /// 
-  /// Changes on the addition of a new RowGroup. 
-  int64_t num_rows() const; 
- 
-  /// Number of started RowGroups. 
-  int num_row_groups() const; 
- 
-  /// Configuration passed to the writer, e.g. the used Parquet format version. 
-  const std::shared_ptr<WriterProperties>& properties() const; 
- 
-  /// Returns the file schema descriptor 
-  const SchemaDescriptor* schema() const; 
- 
-  /// Returns a column descriptor in schema 
-  const ColumnDescriptor* descr(int i) const; 
- 
-  /// Returns the file custom metadata 
-  const std::shared_ptr<const KeyValueMetadata>& key_value_metadata() const; 
- 
-  /// Returns the file metadata, only available after calling Close(). 
-  const std::shared_ptr<FileMetaData> metadata() const; 
- 
- private: 
-  // Holds a pointer to an instance of Contents implementation 
-  std::unique_ptr<Contents> contents_; 
-  std::shared_ptr<FileMetaData> file_metadata_; 
-}; 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <utility>
+
+#include "parquet/metadata.h"
+#include "parquet/platform.h"
+#include "parquet/properties.h"
+#include "parquet/schema.h"
+
+namespace parquet {
+
+class ColumnWriter;
+
+// FIXME: copied from reader-internal.cc
+static constexpr uint8_t kParquetMagic[4] = {'P', 'A', 'R', '1'};
+static constexpr uint8_t kParquetEMagic[4] = {'P', 'A', 'R', 'E'};
+
+class PARQUET_EXPORT RowGroupWriter {
+ public:
+  // Forward declare a virtual class 'Contents' to aid dependency injection and more
+  // easily create test fixtures
+  // An implementation of the Contents class is defined in the .cc file
+  struct Contents {
+    virtual ~Contents() = default;
+    virtual int num_columns() const = 0;
+    virtual int64_t num_rows() const = 0;
+
+    // to be used only with ParquetFileWriter::AppendRowGroup
+    virtual ColumnWriter* NextColumn() = 0;
+    // to be used only with ParquetFileWriter::AppendBufferedRowGroup
+    virtual ColumnWriter* column(int i) = 0;
+
+    virtual int current_column() const = 0;
+    virtual void Close() = 0;
+
+    // total bytes written by the page writer
+    virtual int64_t total_bytes_written() const = 0;
+    // total bytes still compressed but not written
+    virtual int64_t total_compressed_bytes() const = 0;
+  };
+
+  explicit RowGroupWriter(std::unique_ptr<Contents> contents);
+
+  /// Construct a ColumnWriter for the indicated row group-relative column.
+  ///
+  /// To be used only with ParquetFileWriter::AppendRowGroup
+  /// Ownership is solely within the RowGroupWriter. The ColumnWriter is only
+  /// valid until the next call to NextColumn or Close. As the contents are
+  /// directly written to the sink, once a new column is started, the contents
+  /// of the previous one cannot be modified anymore.
+  ColumnWriter* NextColumn();
+  /// Index of currently written column. Equal to -1 if NextColumn()
+  /// has not been called yet.
+  int current_column();
+  void Close();
+
+  int num_columns() const;
+
+  /// Construct a ColumnWriter for the indicated row group column.
+  ///
+  /// To be used only with ParquetFileWriter::AppendBufferedRowGroup
+  /// Ownership is solely within the RowGroupWriter. The ColumnWriter is
+  /// valid until Close. The contents are buffered in memory and written to sink
+  /// on Close
+  ColumnWriter* column(int i);
+
+  /**
+   * Number of rows that shall be written as part of this RowGroup.
+   */
+  int64_t num_rows() const;
+
+  int64_t total_bytes_written() const;
+  int64_t total_compressed_bytes() const;
+
+ private:
+  // Holds a pointer to an instance of Contents implementation
+  std::unique_ptr<Contents> contents_;
+};
+
+PARQUET_EXPORT
+void WriteFileMetaData(const FileMetaData& file_metadata,
+                       ::arrow::io::OutputStream* sink);
+
+PARQUET_EXPORT
+void WriteMetaDataFile(const FileMetaData& file_metadata,
+                       ::arrow::io::OutputStream* sink);
+
+PARQUET_EXPORT
+void WriteEncryptedFileMetadata(const FileMetaData& file_metadata,
+                                ArrowOutputStream* sink,
+                                const std::shared_ptr<Encryptor>& encryptor,
+                                bool encrypt_footer);
+
+PARQUET_EXPORT
+void WriteEncryptedFileMetadata(const FileMetaData& file_metadata,
+                                ::arrow::io::OutputStream* sink,
+                                const std::shared_ptr<Encryptor>& encryptor = NULLPTR,
+                                bool encrypt_footer = false);
+PARQUET_EXPORT
+void WriteFileCryptoMetaData(const FileCryptoMetaData& crypto_metadata,
+                             ::arrow::io::OutputStream* sink);
+
+class PARQUET_EXPORT ParquetFileWriter {
+ public:
+  // Forward declare a virtual class 'Contents' to aid dependency injection and more
+  // easily create test fixtures
+  // An implementation of the Contents class is defined in the .cc file
+  struct Contents {
+    Contents(std::shared_ptr<::parquet::schema::GroupNode> schema,
+             std::shared_ptr<const KeyValueMetadata> key_value_metadata)
+        : schema_(), key_value_metadata_(std::move(key_value_metadata)) {
+      schema_.Init(std::move(schema));
+    }
+    virtual ~Contents() {}
+    // Perform any cleanup associated with the file contents
+    virtual void Close() = 0;
+
+    /// \note Deprecated since 1.3.0
+    RowGroupWriter* AppendRowGroup(int64_t num_rows);
+
+    virtual RowGroupWriter* AppendRowGroup() = 0;
+    virtual RowGroupWriter* AppendBufferedRowGroup() = 0;
+
+    virtual int64_t num_rows() const = 0;
+    virtual int num_columns() const = 0;
+    virtual int num_row_groups() const = 0;
+
+    virtual const std::shared_ptr<WriterProperties>& properties() const = 0;
+
+    const std::shared_ptr<const KeyValueMetadata>& key_value_metadata() const {
+      return key_value_metadata_;
+    }
+
+    // Return const-pointer to make it clear that this object is not to be copied
+    const SchemaDescriptor* schema() const { return &schema_; }
+
+    SchemaDescriptor schema_;
+
+    /// This should be the only place this is stored. Everything else is a const reference
+    std::shared_ptr<const KeyValueMetadata> key_value_metadata_;
+
+    const std::shared_ptr<FileMetaData>& metadata() const { return file_metadata_; }
+    std::shared_ptr<FileMetaData> file_metadata_;
+  };
+
+  ParquetFileWriter();
+  ~ParquetFileWriter();
+
+  static std::unique_ptr<ParquetFileWriter> Open(
+      std::shared_ptr<::arrow::io::OutputStream> sink,
+      std::shared_ptr<schema::GroupNode> schema,
+      std::shared_ptr<WriterProperties> properties = default_writer_properties(),
+      std::shared_ptr<const KeyValueMetadata> key_value_metadata = NULLPTR);
+
+  void Open(std::unique_ptr<Contents> contents);
+  void Close();
+
+  // Construct a RowGroupWriter for the indicated number of rows.
+  //
+  // Ownership is solely within the ParquetFileWriter. The RowGroupWriter is only valid
+  // until the next call to AppendRowGroup or AppendBufferedRowGroup or Close.
+  // @param num_rows The number of rows that are stored in the new RowGroup
+  //
+  // \deprecated Since 1.3.0
+  RowGroupWriter* AppendRowGroup(int64_t num_rows);
+
+  /// Construct a RowGroupWriter with an arbitrary number of rows.
+  ///
+  /// Ownership is solely within the ParquetFileWriter. The RowGroupWriter is only valid
+  /// until the next call to AppendRowGroup or AppendBufferedRowGroup or Close.
+  RowGroupWriter* AppendRowGroup();
+
+  /// Construct a RowGroupWriter that buffers all the values until the RowGroup is ready.
+  /// Use this if you want to write a RowGroup based on a certain size
+  ///
+  /// Ownership is solely within the ParquetFileWriter. The RowGroupWriter is only valid
+  /// until the next call to AppendRowGroup or AppendBufferedRowGroup or Close.
+  RowGroupWriter* AppendBufferedRowGroup();
+
+  /// Number of columns.
+  ///
+  /// This number is fixed during the lifetime of the writer as it is determined via
+  /// the schema.
+  int num_columns() const;
+
+  /// Number of rows in the yet started RowGroups.
+  ///
+  /// Changes on the addition of a new RowGroup.
+  int64_t num_rows() const;
+
+  /// Number of started RowGroups.
+  int num_row_groups() const;
+
+  /// Configuration passed to the writer, e.g. the used Parquet format version.
+  const std::shared_ptr<WriterProperties>& properties() const;
+
+  /// Returns the file schema descriptor
+  const SchemaDescriptor* schema() const;
+
+  /// Returns a column descriptor in schema
+  const ColumnDescriptor* descr(int i) const;
+
+  /// Returns the file custom metadata
+  const std::shared_ptr<const KeyValueMetadata>& key_value_metadata() const;
+
+  /// Returns the file metadata, only available after calling Close().
+  const std::shared_ptr<FileMetaData> metadata() const;
+
+ private:
+  // Holds a pointer to an instance of Contents implementation
+  std::unique_ptr<Contents> contents_;
+  std::shared_ptr<FileMetaData> file_metadata_;
+};
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/hasher.h b/contrib/libs/apache/arrow/cpp/src/parquet/hasher.h
index 7452e39190f..d699356a6c4 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/hasher.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/hasher.h
@@ -1,72 +1,72 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <cstdint> 
-#include "parquet/types.h" 
- 
-namespace parquet { 
-// Abstract class for hash 
-class Hasher { 
- public: 
-  /// Compute hash for 32 bits value by using its plain encoding result. 
-  /// 
-  /// @param value the value to hash. 
-  /// @return hash result. 
-  virtual uint64_t Hash(int32_t value) const = 0; 
- 
-  /// Compute hash for 64 bits value by using its plain encoding result. 
-  /// 
-  /// @param value the value to hash. 
-  /// @return hash result. 
-  virtual uint64_t Hash(int64_t value) const = 0; 
- 
-  /// Compute hash for float value by using its plain encoding result. 
-  /// 
-  /// @param value the value to hash. 
-  /// @return hash result. 
-  virtual uint64_t Hash(float value) const = 0; 
- 
-  /// Compute hash for double value by using its plain encoding result. 
-  /// 
-  /// @param value the value to hash. 
-  /// @return hash result. 
-  virtual uint64_t Hash(double value) const = 0; 
- 
-  /// Compute hash for Int96 value by using its plain encoding result. 
-  /// 
-  /// @param value the value to hash. 
-  /// @return hash result. 
-  virtual uint64_t Hash(const Int96* value) const = 0; 
- 
-  /// Compute hash for ByteArray value by using its plain encoding result. 
-  /// 
-  /// @param value the value to hash. 
-  /// @return hash result. 
-  virtual uint64_t Hash(const ByteArray* value) const = 0; 
- 
-  /// Compute hash for fixed byte array value by using its plain encoding result. 
-  /// 
-  /// @param value the value address. 
-  /// @param len the value length. 
-  virtual uint64_t Hash(const FLBA* value, uint32_t len) const = 0; 
- 
-  virtual ~Hasher() = default; 
-}; 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include "parquet/types.h"
+
+namespace parquet {
+// Abstract class for hash
+class Hasher {
+ public:
+  /// Compute hash for 32 bits value by using its plain encoding result.
+  ///
+  /// @param value the value to hash.
+  /// @return hash result.
+  virtual uint64_t Hash(int32_t value) const = 0;
+
+  /// Compute hash for 64 bits value by using its plain encoding result.
+  ///
+  /// @param value the value to hash.
+  /// @return hash result.
+  virtual uint64_t Hash(int64_t value) const = 0;
+
+  /// Compute hash for float value by using its plain encoding result.
+  ///
+  /// @param value the value to hash.
+  /// @return hash result.
+  virtual uint64_t Hash(float value) const = 0;
+
+  /// Compute hash for double value by using its plain encoding result.
+  ///
+  /// @param value the value to hash.
+  /// @return hash result.
+  virtual uint64_t Hash(double value) const = 0;
+
+  /// Compute hash for Int96 value by using its plain encoding result.
+  ///
+  /// @param value the value to hash.
+  /// @return hash result.
+  virtual uint64_t Hash(const Int96* value) const = 0;
+
+  /// Compute hash for ByteArray value by using its plain encoding result.
+  ///
+  /// @param value the value to hash.
+  /// @return hash result.
+  virtual uint64_t Hash(const ByteArray* value) const = 0;
+
+  /// Compute hash for fixed byte array value by using its plain encoding result.
+  ///
+  /// @param value the value address.
+  /// @param len the value length.
+  virtual uint64_t Hash(const FLBA* value, uint32_t len) const = 0;
+
+  virtual ~Hasher() = default;
+};
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/level_comparison.cc b/contrib/libs/apache/arrow/cpp/src/parquet/level_comparison.cc
index b0851f5cf1f..30614ae61fb 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/level_comparison.cc
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/level_comparison.cc
@@ -1,82 +1,82 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "parquet/level_comparison.h" 
- 
-#define PARQUET_IMPL_NAMESPACE standard 
-#include "parquet/level_comparison_inc.h" 
-#undef PARQUET_IMPL_NAMESPACE 
- 
-#include <vector> 
- 
-#include "arrow/util/dispatch.h" 
- 
-namespace parquet { 
-namespace internal { 
- 
-#if defined(ARROW_HAVE_RUNTIME_AVX2) 
-MinMax FindMinMaxAvx2(const int16_t* levels, int64_t num_levels); 
-uint64_t GreaterThanBitmapAvx2(const int16_t* levels, int64_t num_levels, int16_t rhs); 
-#endif 
- 
-namespace { 
- 
-using ::arrow::internal::DispatchLevel; 
-using ::arrow::internal::DynamicDispatch; 
- 
-// defined in level_comparison_avx2.cc 
- 
-struct GreaterThanDynamicFunction { 
-  using FunctionType = decltype(&GreaterThanBitmap); 
- 
-  static std::vector<std::pair<DispatchLevel, FunctionType>> implementations() { 
-    return { 
-      { DispatchLevel::NONE, standard::GreaterThanBitmapImpl } 
-#if defined(ARROW_HAVE_RUNTIME_AVX2) 
-      , { DispatchLevel::AVX2, GreaterThanBitmapAvx2 } 
-#endif 
-    }; 
-  } 
-}; 
- 
-struct MinMaxDynamicFunction { 
-  using FunctionType = decltype(&FindMinMax); 
- 
-  static std::vector<std::pair<DispatchLevel, FunctionType>> implementations() { 
-    return { 
-      { DispatchLevel::NONE, standard::FindMinMaxImpl } 
-#if defined(ARROW_HAVE_RUNTIME_AVX2) 
-      , { DispatchLevel::AVX2, FindMinMaxAvx2 } 
-#endif 
-    }; 
-  } 
-}; 
- 
-}  // namespace 
- 
-uint64_t GreaterThanBitmap(const int16_t* levels, int64_t num_levels, int16_t rhs) { 
-  static DynamicDispatch<GreaterThanDynamicFunction> dispatch; 
-  return dispatch.func(levels, num_levels, rhs); 
-} 
- 
-MinMax FindMinMax(const int16_t* levels, int64_t num_levels) { 
-  static DynamicDispatch<MinMaxDynamicFunction> dispatch; 
-  return dispatch.func(levels, num_levels); 
-} 
- 
-}  // namespace internal 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/level_comparison.h"
+
+#define PARQUET_IMPL_NAMESPACE standard
+#include "parquet/level_comparison_inc.h"
+#undef PARQUET_IMPL_NAMESPACE
+
+#include <vector>
+
+#include "arrow/util/dispatch.h"
+
+namespace parquet {
+namespace internal {
+
+#if defined(ARROW_HAVE_RUNTIME_AVX2)
+MinMax FindMinMaxAvx2(const int16_t* levels, int64_t num_levels);
+uint64_t GreaterThanBitmapAvx2(const int16_t* levels, int64_t num_levels, int16_t rhs);
+#endif
+
+namespace {
+
+using ::arrow::internal::DispatchLevel;
+using ::arrow::internal::DynamicDispatch;
+
+// defined in level_comparison_avx2.cc
+
+struct GreaterThanDynamicFunction {
+  using FunctionType = decltype(&GreaterThanBitmap);
+
+  static std::vector<std::pair<DispatchLevel, FunctionType>> implementations() {
+    return {
+      { DispatchLevel::NONE, standard::GreaterThanBitmapImpl }
+#if defined(ARROW_HAVE_RUNTIME_AVX2)
+      , { DispatchLevel::AVX2, GreaterThanBitmapAvx2 }
+#endif
+    };
+  }
+};
+
+struct MinMaxDynamicFunction {
+  using FunctionType = decltype(&FindMinMax);
+
+  static std::vector<std::pair<DispatchLevel, FunctionType>> implementations() {
+    return {
+      { DispatchLevel::NONE, standard::FindMinMaxImpl }
+#if defined(ARROW_HAVE_RUNTIME_AVX2)
+      , { DispatchLevel::AVX2, FindMinMaxAvx2 }
+#endif
+    };
+  }
+};
+
+}  // namespace
+
+uint64_t GreaterThanBitmap(const int16_t* levels, int64_t num_levels, int16_t rhs) {
+  static DynamicDispatch<GreaterThanDynamicFunction> dispatch;
+  return dispatch.func(levels, num_levels, rhs);
+}
+
+MinMax FindMinMax(const int16_t* levels, int64_t num_levels) {
+  static DynamicDispatch<MinMaxDynamicFunction> dispatch;
+  return dispatch.func(levels, num_levels);
+}
+
+}  // namespace internal
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/level_comparison.h b/contrib/libs/apache/arrow/cpp/src/parquet/level_comparison.h
index 2097e4db8a0..38e7ef8e2ec 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/level_comparison.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/level_comparison.h
@@ -1,40 +1,40 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
-#pragma once 
- 
-#include <algorithm> 
-#include <cstdint> 
- 
-#include "parquet/platform.h" 
- 
-namespace parquet { 
-namespace internal { 
- 
-/// Builds a  bitmap where each set bit indicates the corresponding level is greater 
-/// than rhs. 
-uint64_t PARQUET_EXPORT GreaterThanBitmap(const int16_t* levels, int64_t num_levels, 
-                                          int16_t rhs); 
- 
-struct MinMax { 
-  int16_t min; 
-  int16_t max; 
-}; 
- 
-MinMax FindMinMax(const int16_t* levels, int64_t num_levels); 
- 
-}  // namespace internal 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#pragma once
+
+#include <algorithm>
+#include <cstdint>
+
+#include "parquet/platform.h"
+
+namespace parquet {
+namespace internal {
+
+/// Builds a  bitmap where each set bit indicates the corresponding level is greater
+/// than rhs.
+uint64_t PARQUET_EXPORT GreaterThanBitmap(const int16_t* levels, int64_t num_levels,
+                                          int16_t rhs);
+
+struct MinMax {
+  int16_t min;
+  int16_t max;
+};
+
+MinMax FindMinMax(const int16_t* levels, int64_t num_levels);
+
+}  // namespace internal
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/level_comparison_inc.h b/contrib/libs/apache/arrow/cpp/src/parquet/level_comparison_inc.h
index cc6bf382a50..e21c3e5824d 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/level_comparison_inc.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/level_comparison_inc.h
@@ -1,65 +1,65 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
-#pragma once 
- 
-#include "arrow/util/bit_util.h" 
-#include "arrow/util/endian.h" 
-#include "parquet/level_comparison.h" 
- 
-// Used to make sure ODR rule isn't violated. 
-#ifndef PARQUET_IMPL_NAMESPACE 
-#error "PARQUET_IMPL_NAMESPACE must be defined" 
-#endif 
-namespace parquet { 
-namespace internal { 
-namespace PARQUET_IMPL_NAMESPACE { 
-/// Builds a bitmap by applying predicate to the level vector provided. 
-/// 
-/// \param[in] levels Rep or def level array. 
-/// \param[in] num_levels The number of levels to process (must be [0, 64]) 
-/// \param[in] predicate The predicate to apply (must have the signature `bool 
-/// predicate(int16_t)`. 
-/// \returns The bitmap using least significant "bit" ordering. 
-/// 
-template <typename Predicate> 
-inline uint64_t LevelsToBitmap(const int16_t* levels, int64_t num_levels, 
-                               Predicate predicate) { 
-  // Both clang and GCC can vectorize this automatically with SSE4/AVX2. 
-  uint64_t mask = 0; 
-  for (int x = 0; x < num_levels; x++) { 
-    mask |= static_cast<uint64_t>(predicate(levels[x]) ? 1 : 0) << x; 
-  } 
-  return ::arrow::BitUtil::ToLittleEndian(mask); 
-} 
- 
-inline MinMax FindMinMaxImpl(const int16_t* levels, int64_t num_levels) { 
-  MinMax out{std::numeric_limits<int16_t>::max(), std::numeric_limits<int16_t>::min()}; 
-  for (int x = 0; x < num_levels; x++) { 
-    out.min = std::min(levels[x], out.min); 
-    out.max = std::max(levels[x], out.max); 
-  } 
-  return out; 
-} 
- 
-inline uint64_t GreaterThanBitmapImpl(const int16_t* levels, int64_t num_levels, 
-                                      int16_t rhs) { 
-  return LevelsToBitmap(levels, num_levels, [rhs](int16_t value) { return value > rhs; }); 
-} 
- 
-}  // namespace PARQUET_IMPL_NAMESPACE 
-}  // namespace internal 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#pragma once
+
+#include "arrow/util/bit_util.h"
+#include "arrow/util/endian.h"
+#include "parquet/level_comparison.h"
+
+// Used to make sure ODR rule isn't violated.
+#ifndef PARQUET_IMPL_NAMESPACE
+#error "PARQUET_IMPL_NAMESPACE must be defined"
+#endif
+namespace parquet {
+namespace internal {
+namespace PARQUET_IMPL_NAMESPACE {
+/// Builds a bitmap by applying predicate to the level vector provided.
+///
+/// \param[in] levels Rep or def level array.
+/// \param[in] num_levels The number of levels to process (must be [0, 64])
+/// \param[in] predicate The predicate to apply (must have the signature `bool
+/// predicate(int16_t)`.
+/// \returns The bitmap using least significant "bit" ordering.
+///
+template <typename Predicate>
+inline uint64_t LevelsToBitmap(const int16_t* levels, int64_t num_levels,
+                               Predicate predicate) {
+  // Both clang and GCC can vectorize this automatically with SSE4/AVX2.
+  uint64_t mask = 0;
+  for (int x = 0; x < num_levels; x++) {
+    mask |= static_cast<uint64_t>(predicate(levels[x]) ? 1 : 0) << x;
+  }
+  return ::arrow::BitUtil::ToLittleEndian(mask);
+}
+
+inline MinMax FindMinMaxImpl(const int16_t* levels, int64_t num_levels) {
+  MinMax out{std::numeric_limits<int16_t>::max(), std::numeric_limits<int16_t>::min()};
+  for (int x = 0; x < num_levels; x++) {
+    out.min = std::min(levels[x], out.min);
+    out.max = std::max(levels[x], out.max);
+  }
+  return out;
+}
+
+inline uint64_t GreaterThanBitmapImpl(const int16_t* levels, int64_t num_levels,
+                                      int16_t rhs) {
+  return LevelsToBitmap(levels, num_levels, [rhs](int16_t value) { return value > rhs; });
+}
+
+}  // namespace PARQUET_IMPL_NAMESPACE
+}  // namespace internal
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/level_conversion.cc b/contrib/libs/apache/arrow/cpp/src/parquet/level_conversion.cc
index 998fd982fd7..ffdca476ddd 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/level_conversion.cc
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/level_conversion.cc
@@ -1,183 +1,183 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
-#include "parquet/level_conversion.h" 
- 
-#include <algorithm> 
-#include <limits> 
- 
-#include "arrow/util/bit_run_reader.h" 
-#include "arrow/util/bit_util.h" 
-#include "arrow/util/cpu_info.h" 
-#include "arrow/util/logging.h" 
-#include "arrow/util/optional.h" 
-#include "parquet/exception.h" 
- 
-#include "parquet/level_comparison.h" 
-#define PARQUET_IMPL_NAMESPACE standard 
-#include "parquet/level_conversion_inc.h" 
-#undef PARQUET_IMPL_NAMESPACE 
- 
-namespace parquet { 
-namespace internal { 
-namespace { 
- 
-using ::arrow::internal::CpuInfo; 
-using ::arrow::util::optional; 
- 
-template <typename OffsetType> 
-void DefRepLevelsToListInfo(const int16_t* def_levels, const int16_t* rep_levels, 
-                            int64_t num_def_levels, LevelInfo level_info, 
-                            ValidityBitmapInputOutput* output, OffsetType* offsets) { 
-  OffsetType* orig_pos = offsets; 
-  optional<::arrow::internal::FirstTimeBitmapWriter> valid_bits_writer; 
-  if (output->valid_bits) { 
-    valid_bits_writer.emplace(output->valid_bits, output->valid_bits_offset, 
-                              output->values_read_upper_bound); 
-  } 
-  for (int x = 0; x < num_def_levels; x++) { 
-    // Skip items that belong to empty or null ancestor lists and further nested lists. 
-    if (def_levels[x] < level_info.repeated_ancestor_def_level || 
-        rep_levels[x] > level_info.rep_level) { 
-      continue; 
-    } 
- 
-    if (rep_levels[x] == level_info.rep_level) { 
-      // A continuation of an existing list. 
-      // offsets can be null for structs with repeated children (we don't need to know 
-      // offsets until we get to the children). 
-      if (offsets != nullptr) { 
-        if (ARROW_PREDICT_FALSE(*offsets == std::numeric_limits<OffsetType>::max())) { 
-          throw ParquetException("List index overflow."); 
-        } 
-        *offsets += 1; 
-      } 
-    } else { 
-      if (ARROW_PREDICT_FALSE( 
-              (valid_bits_writer.has_value() && 
-               valid_bits_writer->position() >= output->values_read_upper_bound) || 
-              (offsets - orig_pos) >= output->values_read_upper_bound)) { 
-        std::stringstream ss; 
-        ss << "Definition levels exceeded upper bound: " 
-           << output->values_read_upper_bound; 
-        throw ParquetException(ss.str()); 
-      } 
- 
-      // current_rep < list rep_level i.e. start of a list (ancestor empty lists are 
-      // filtered out above). 
-      // offsets can be null for structs with repeated children (we don't need to know 
-      // offsets until we get to the children). 
-      if (offsets != nullptr) { 
-        ++offsets; 
-        // Use cumulative offsets because variable size lists are more common then 
-        // fixed size lists so it should be cheaper to make these cumulative and 
-        // subtract when validating fixed size lists. 
-        *offsets = *(offsets - 1); 
-        if (def_levels[x] >= level_info.def_level) { 
-          if (ARROW_PREDICT_FALSE(*offsets == std::numeric_limits<OffsetType>::max())) { 
-            throw ParquetException("List index overflow."); 
-          } 
-          *offsets += 1; 
-        } 
-      } 
- 
-      if (valid_bits_writer.has_value()) { 
-        // the level_info def level for lists reflects element present level. 
-        // the prior level distinguishes between empty lists. 
-        if (def_levels[x] >= level_info.def_level - 1) { 
-          valid_bits_writer->Set(); 
-        } else { 
-          output->null_count++; 
-          valid_bits_writer->Clear(); 
-        } 
-        valid_bits_writer->Next(); 
-      } 
-    } 
-  } 
-  if (valid_bits_writer.has_value()) { 
-    valid_bits_writer->Finish(); 
-  } 
-  if (offsets != nullptr) { 
-    output->values_read = offsets - orig_pos; 
-  } else if (valid_bits_writer.has_value()) { 
-    output->values_read = valid_bits_writer->position(); 
-  } 
-  if (output->null_count > 0 && level_info.null_slot_usage > 1) { 
-    throw ParquetException( 
-        "Null values with null_slot_usage > 1 not supported." 
-        "(i.e. FixedSizeLists with null values are not supported)"); 
-  } 
-} 
- 
-}  // namespace 
- 
-#if defined(ARROW_HAVE_RUNTIME_BMI2) 
-// defined in level_conversion_bmi2.cc for dynamic dispatch. 
-void DefLevelsToBitmapBmi2WithRepeatedParent(const int16_t* def_levels, 
-                                             int64_t num_def_levels, LevelInfo level_info, 
-                                             ValidityBitmapInputOutput* output); 
-#endif 
- 
-void DefLevelsToBitmap(const int16_t* def_levels, int64_t num_def_levels, 
-                       LevelInfo level_info, ValidityBitmapInputOutput* output) { 
-  // It is simpler to rely on rep_level here until PARQUET-1899 is done and the code 
-  // is deleted in a follow-up release. 
-  if (level_info.rep_level > 0) { 
-#if defined(ARROW_HAVE_RUNTIME_BMI2) 
-    if (CpuInfo::GetInstance()->HasEfficientBmi2()) { 
-      return DefLevelsToBitmapBmi2WithRepeatedParent(def_levels, num_def_levels, 
-                                                     level_info, output); 
-    } 
-#endif 
-    standard::DefLevelsToBitmapSimd</*has_repeated_parent=*/true>( 
-        def_levels, num_def_levels, level_info, output); 
-  } else { 
-    standard::DefLevelsToBitmapSimd</*has_repeated_parent=*/false>( 
-        def_levels, num_def_levels, level_info, output); 
-  } 
-} 
- 
-uint64_t TestOnlyExtractBitsSoftware(uint64_t bitmap, uint64_t select_bitmap) { 
-  return standard::ExtractBitsSoftware(bitmap, select_bitmap); 
-} 
- 
-void DefRepLevelsToList(const int16_t* def_levels, const int16_t* rep_levels, 
-                        int64_t num_def_levels, LevelInfo level_info, 
-                        ValidityBitmapInputOutput* output, int32_t* offsets) { 
-  DefRepLevelsToListInfo<int32_t>(def_levels, rep_levels, num_def_levels, level_info, 
-                                  output, offsets); 
-} 
- 
-void DefRepLevelsToList(const int16_t* def_levels, const int16_t* rep_levels, 
-                        int64_t num_def_levels, LevelInfo level_info, 
-                        ValidityBitmapInputOutput* output, int64_t* offsets) { 
-  DefRepLevelsToListInfo<int64_t>(def_levels, rep_levels, num_def_levels, level_info, 
-                                  output, offsets); 
-} 
- 
-void DefRepLevelsToBitmap(const int16_t* def_levels, const int16_t* rep_levels, 
-                          int64_t num_def_levels, LevelInfo level_info, 
-                          ValidityBitmapInputOutput* output) { 
-  // DefReplevelsToListInfo assumes it for the actual list method and this 
-  // method is for parent structs, so we need to bump def and ref level. 
-  level_info.rep_level += 1; 
-  level_info.def_level += 1; 
-  DefRepLevelsToListInfo<int32_t>(def_levels, rep_levels, num_def_levels, level_info, 
-                                  output, /*offsets=*/nullptr); 
-} 
- 
-}  // namespace internal 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#include "parquet/level_conversion.h"
+
+#include <algorithm>
+#include <limits>
+
+#include "arrow/util/bit_run_reader.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/cpu_info.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/optional.h"
+#include "parquet/exception.h"
+
+#include "parquet/level_comparison.h"
+#define PARQUET_IMPL_NAMESPACE standard
+#include "parquet/level_conversion_inc.h"
+#undef PARQUET_IMPL_NAMESPACE
+
+namespace parquet {
+namespace internal {
+namespace {
+
+using ::arrow::internal::CpuInfo;
+using ::arrow::util::optional;
+
+template <typename OffsetType>
+void DefRepLevelsToListInfo(const int16_t* def_levels, const int16_t* rep_levels,
+                            int64_t num_def_levels, LevelInfo level_info,
+                            ValidityBitmapInputOutput* output, OffsetType* offsets) {
+  OffsetType* orig_pos = offsets;
+  optional<::arrow::internal::FirstTimeBitmapWriter> valid_bits_writer;
+  if (output->valid_bits) {
+    valid_bits_writer.emplace(output->valid_bits, output->valid_bits_offset,
+                              output->values_read_upper_bound);
+  }
+  for (int x = 0; x < num_def_levels; x++) {
+    // Skip items that belong to empty or null ancestor lists and further nested lists.
+    if (def_levels[x] < level_info.repeated_ancestor_def_level ||
+        rep_levels[x] > level_info.rep_level) {
+      continue;
+    }
+
+    if (rep_levels[x] == level_info.rep_level) {
+      // A continuation of an existing list.
+      // offsets can be null for structs with repeated children (we don't need to know
+      // offsets until we get to the children).
+      if (offsets != nullptr) {
+        if (ARROW_PREDICT_FALSE(*offsets == std::numeric_limits<OffsetType>::max())) {
+          throw ParquetException("List index overflow.");
+        }
+        *offsets += 1;
+      }
+    } else {
+      if (ARROW_PREDICT_FALSE(
+              (valid_bits_writer.has_value() &&
+               valid_bits_writer->position() >= output->values_read_upper_bound) ||
+              (offsets - orig_pos) >= output->values_read_upper_bound)) {
+        std::stringstream ss;
+        ss << "Definition levels exceeded upper bound: "
+           << output->values_read_upper_bound;
+        throw ParquetException(ss.str());
+      }
+
+      // current_rep < list rep_level i.e. start of a list (ancestor empty lists are
+      // filtered out above).
+      // offsets can be null for structs with repeated children (we don't need to know
+      // offsets until we get to the children).
+      if (offsets != nullptr) {
+        ++offsets;
+        // Use cumulative offsets because variable size lists are more common then
+        // fixed size lists so it should be cheaper to make these cumulative and
+        // subtract when validating fixed size lists.
+        *offsets = *(offsets - 1);
+        if (def_levels[x] >= level_info.def_level) {
+          if (ARROW_PREDICT_FALSE(*offsets == std::numeric_limits<OffsetType>::max())) {
+            throw ParquetException("List index overflow.");
+          }
+          *offsets += 1;
+        }
+      }
+
+      if (valid_bits_writer.has_value()) {
+        // the level_info def level for lists reflects element present level.
+        // the prior level distinguishes between empty lists.
+        if (def_levels[x] >= level_info.def_level - 1) {
+          valid_bits_writer->Set();
+        } else {
+          output->null_count++;
+          valid_bits_writer->Clear();
+        }
+        valid_bits_writer->Next();
+      }
+    }
+  }
+  if (valid_bits_writer.has_value()) {
+    valid_bits_writer->Finish();
+  }
+  if (offsets != nullptr) {
+    output->values_read = offsets - orig_pos;
+  } else if (valid_bits_writer.has_value()) {
+    output->values_read = valid_bits_writer->position();
+  }
+  if (output->null_count > 0 && level_info.null_slot_usage > 1) {
+    throw ParquetException(
+        "Null values with null_slot_usage > 1 not supported."
+        "(i.e. FixedSizeLists with null values are not supported)");
+  }
+}
+
+}  // namespace
+
+#if defined(ARROW_HAVE_RUNTIME_BMI2)
+// defined in level_conversion_bmi2.cc for dynamic dispatch.
+void DefLevelsToBitmapBmi2WithRepeatedParent(const int16_t* def_levels,
+                                             int64_t num_def_levels, LevelInfo level_info,
+                                             ValidityBitmapInputOutput* output);
+#endif
+
+void DefLevelsToBitmap(const int16_t* def_levels, int64_t num_def_levels,
+                       LevelInfo level_info, ValidityBitmapInputOutput* output) {
+  // It is simpler to rely on rep_level here until PARQUET-1899 is done and the code
+  // is deleted in a follow-up release.
+  if (level_info.rep_level > 0) {
+#if defined(ARROW_HAVE_RUNTIME_BMI2)
+    if (CpuInfo::GetInstance()->HasEfficientBmi2()) {
+      return DefLevelsToBitmapBmi2WithRepeatedParent(def_levels, num_def_levels,
+                                                     level_info, output);
+    }
+#endif
+    standard::DefLevelsToBitmapSimd</*has_repeated_parent=*/true>(
+        def_levels, num_def_levels, level_info, output);
+  } else {
+    standard::DefLevelsToBitmapSimd</*has_repeated_parent=*/false>(
+        def_levels, num_def_levels, level_info, output);
+  }
+}
+
+uint64_t TestOnlyExtractBitsSoftware(uint64_t bitmap, uint64_t select_bitmap) {
+  return standard::ExtractBitsSoftware(bitmap, select_bitmap);
+}
+
+void DefRepLevelsToList(const int16_t* def_levels, const int16_t* rep_levels,
+                        int64_t num_def_levels, LevelInfo level_info,
+                        ValidityBitmapInputOutput* output, int32_t* offsets) {
+  DefRepLevelsToListInfo<int32_t>(def_levels, rep_levels, num_def_levels, level_info,
+                                  output, offsets);
+}
+
+void DefRepLevelsToList(const int16_t* def_levels, const int16_t* rep_levels,
+                        int64_t num_def_levels, LevelInfo level_info,
+                        ValidityBitmapInputOutput* output, int64_t* offsets) {
+  DefRepLevelsToListInfo<int64_t>(def_levels, rep_levels, num_def_levels, level_info,
+                                  output, offsets);
+}
+
+void DefRepLevelsToBitmap(const int16_t* def_levels, const int16_t* rep_levels,
+                          int64_t num_def_levels, LevelInfo level_info,
+                          ValidityBitmapInputOutput* output) {
+  // DefReplevelsToListInfo assumes it for the actual list method and this
+  // method is for parent structs, so we need to bump def and ref level.
+  level_info.rep_level += 1;
+  level_info.def_level += 1;
+  DefRepLevelsToListInfo<int32_t>(def_levels, rep_levels, num_def_levels, level_info,
+                                  output, /*offsets=*/nullptr);
+}
+
+}  // namespace internal
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/level_conversion.h b/contrib/libs/apache/arrow/cpp/src/parquet/level_conversion.h
index fa6a23c1d9b..e45a288e8c0 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/level_conversion.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/level_conversion.h
@@ -1,199 +1,199 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <cstdint> 
- 
-#include "arrow/util/endian.h" 
-#include "parquet/platform.h" 
-#include "parquet/schema.h" 
- 
-namespace parquet { 
-namespace internal { 
- 
-struct PARQUET_EXPORT LevelInfo { 
-  LevelInfo() 
-      : null_slot_usage(1), def_level(0), rep_level(0), repeated_ancestor_def_level(0) {} 
-  LevelInfo(int32_t null_slots, int32_t definition_level, int32_t repetition_level, 
-            int32_t repeated_ancestor_definition_level) 
-      : null_slot_usage(null_slots), 
-        def_level(definition_level), 
-        rep_level(repetition_level), 
-        repeated_ancestor_def_level(repeated_ancestor_definition_level) {} 
- 
-  bool operator==(const LevelInfo& b) const { 
-    return null_slot_usage == b.null_slot_usage && def_level == b.def_level && 
-           rep_level == b.rep_level && 
-           repeated_ancestor_def_level == b.repeated_ancestor_def_level; 
-  } 
- 
-  bool HasNullableValues() const { return repeated_ancestor_def_level < def_level; } 
- 
-  // How many slots an undefined but present (i.e. null) element in 
-  // parquet consumes when decoding to Arrow. 
-  // "Slot" is used in the same context as the Arrow specification 
-  // (i.e. a value holder). 
-  // This is only ever >1 for descendents of FixedSizeList. 
-  int32_t null_slot_usage = 1; 
- 
-  // The definition level at which the value for the field 
-  // is considered not null (definition levels greater than 
-  // or equal to this value indicate a not-null 
-  // value for the field). For list fields definition levels 
-  // greater than or equal to this field indicate a present, 
-  // possibly null, child value. 
-  int16_t def_level = 0; 
- 
-  // The repetition level corresponding to this element 
-  // or the closest repeated ancestor.  Any repetition 
-  // level less than this indicates either a new list OR 
-  // an empty list (which is determined in conjunction 
-  // with definition levels). 
-  int16_t rep_level = 0; 
- 
-  // The definition level indicating the level at which the closest 
-  // repeated ancestor is not empty.  This is used to discriminate 
-  // between a value less than |def_level| being null or excluded entirely. 
-  // For instance if we have an arrow schema like: 
-  // list(struct(f0: int)).  Then then there are the following 
-  // definition levels: 
-  //   0 = null list 
-  //   1 = present but empty list. 
-  //   2 = a null value in the list 
-  //   3 = a non null struct but null integer. 
-  //   4 = a present integer. 
-  // When reconstructing, the struct and integer arrays' 
-  // repeated_ancestor_def_level would be 2.  Any 
-  // def_level < 2 indicates that there isn't a corresponding 
-  // child value in the list. 
-  // i.e. [null, [], [null], [{f0: null}], [{f0: 1}]] 
-  // has the def levels [0, 1, 2, 3, 4].  The actual 
-  // struct array is only of length 3: [not-set, set, set] and 
-  // the int array is also of length 3: [N/A, null, 1]. 
-  // 
-  int16_t repeated_ancestor_def_level = 0; 
- 
-  /// Increments levels according to the cardinality of node. 
-  void Increment(const schema::Node& node) { 
-    if (node.is_repeated()) { 
-      IncrementRepeated(); 
-      return; 
-    } 
-    if (node.is_optional()) { 
-      IncrementOptional(); 
-      return; 
-    } 
-  } 
- 
-  /// Incremetns level for a optional node. 
-  void IncrementOptional() { def_level++; } 
- 
-  /// Increments levels for the repeated node.  Returns 
-  /// the previous ancestor_list_def_level. 
-  int16_t IncrementRepeated() { 
-    int16_t last_repeated_ancestor = repeated_ancestor_def_level; 
- 
-    // Repeated fields add both a repetition and definition level. This is used 
-    // to distinguish between an empty list and a list with an item in it. 
-    ++rep_level; 
-    ++def_level; 
-    // For levels >= repeated_ancenstor_def_level it indicates the list was 
-    // non-null and had at least one element.  This is important 
-    // for later decoding because we need to add a slot for these 
-    // values.  for levels < current_def_level no slots are added 
-    // to arrays. 
-    repeated_ancestor_def_level = def_level; 
-    return last_repeated_ancestor; 
-  } 
- 
-  friend std::ostream& operator<<(std::ostream& os, const LevelInfo& levels) { 
-    // This print method is to silence valgrind issues.  What's printed 
-    // is not important because all asserts happen directly on 
-    // members. 
-    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level 
-       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def_level; 
-    if (levels.null_slot_usage > 1) { 
-      os << ", null_slot_usage=" << levels.null_slot_usage; 
-    } 
-    os << "}"; 
-    return os; 
-  } 
-}; 
- 
-// Input/Output structure for reconstructed validity bitmaps. 
-struct PARQUET_EXPORT ValidityBitmapInputOutput { 
-  // Input only. 
-  // The maximum number of values_read expected (actual 
-  // values read must be less than or equal to this value). 
-  // If this number is exceeded methods will throw a 
-  // ParquetException. Exceeding this limit indicates 
-  // either a corrupt or incorrectly written file. 
-  int64_t values_read_upper_bound = 0; 
-  // Output only. The number of values added to the encountered 
-  // (this is logically the count of the number of elements 
-  // for an Arrow array). 
-  int64_t values_read = 0; 
-  // Input/Output. The number of nulls encountered. 
-  int64_t null_count = 0; 
-  // Output only. The validity bitmap to populate. May be be null only 
-  // for DefRepLevelsToListInfo (if all that is needed is list offsets). 
-  uint8_t* valid_bits = NULLPTR; 
-  // Input only, offset into valid_bits to start at. 
-  int64_t valid_bits_offset = 0; 
-}; 
- 
-//  Converts def_levels to validity bitmaps for non-list arrays and structs that have 
-//  at least one member that is not a list and has no list descendents. 
-//  For lists use DefRepLevelsToList and structs where all descendants contain 
-//  a list use DefRepLevelsToBitmap. 
-void PARQUET_EXPORT DefLevelsToBitmap(const int16_t* def_levels, int64_t num_def_levels, 
-                                      LevelInfo level_info, 
-                                      ValidityBitmapInputOutput* output); 
- 
-// Reconstructs a validity bitmap and list offsets for a list arrays based on 
-// def/rep levels. The first element of offsets will not be modified if rep_levels 
-// starts with a new list.  The first element of offsets will be used when calculating 
-// the next offset.  See documentation onf DefLevelsToBitmap for when to use this 
-// method vs the other ones in this file for reconstruction. 
-// 
-// Offsets must be sized to 1 + values_read_upper_bound. 
-void PARQUET_EXPORT DefRepLevelsToList(const int16_t* def_levels, 
-                                       const int16_t* rep_levels, int64_t num_def_levels, 
-                                       LevelInfo level_info, 
-                                       ValidityBitmapInputOutput* output, 
-                                       int32_t* offsets); 
-void PARQUET_EXPORT DefRepLevelsToList(const int16_t* def_levels, 
-                                       const int16_t* rep_levels, int64_t num_def_levels, 
-                                       LevelInfo level_info, 
-                                       ValidityBitmapInputOutput* output, 
-                                       int64_t* offsets); 
- 
-// Reconstructs a validity bitmap for a struct every member is a list or has 
-// a list descendant.  See documentation on DefLevelsToBitmap for when more 
-// details on this method compared to the other ones defined above. 
-void PARQUET_EXPORT DefRepLevelsToBitmap(const int16_t* def_levels, 
-                                         const int16_t* rep_levels, 
-                                         int64_t num_def_levels, LevelInfo level_info, 
-                                         ValidityBitmapInputOutput* output); 
- 
-// This is exposed to ensure we can properly test a software simulated pext function 
-// (i.e. it isn't hidden by runtime dispatch). 
-uint64_t PARQUET_EXPORT TestOnlyExtractBitsSoftware(uint64_t bitmap, uint64_t selection); 
- 
-}  // namespace internal 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+
+#include "arrow/util/endian.h"
+#include "parquet/platform.h"
+#include "parquet/schema.h"
+
+namespace parquet {
+namespace internal {
+
+struct PARQUET_EXPORT LevelInfo {
+  LevelInfo()
+      : null_slot_usage(1), def_level(0), rep_level(0), repeated_ancestor_def_level(0) {}
+  LevelInfo(int32_t null_slots, int32_t definition_level, int32_t repetition_level,
+            int32_t repeated_ancestor_definition_level)
+      : null_slot_usage(null_slots),
+        def_level(definition_level),
+        rep_level(repetition_level),
+        repeated_ancestor_def_level(repeated_ancestor_definition_level) {}
+
+  bool operator==(const LevelInfo& b) const {
+    return null_slot_usage == b.null_slot_usage && def_level == b.def_level &&
+           rep_level == b.rep_level &&
+           repeated_ancestor_def_level == b.repeated_ancestor_def_level;
+  }
+
+  bool HasNullableValues() const { return repeated_ancestor_def_level < def_level; }
+
+  // How many slots an undefined but present (i.e. null) element in
+  // parquet consumes when decoding to Arrow.
+  // "Slot" is used in the same context as the Arrow specification
+  // (i.e. a value holder).
+  // This is only ever >1 for descendents of FixedSizeList.
+  int32_t null_slot_usage = 1;
+
+  // The definition level at which the value for the field
+  // is considered not null (definition levels greater than
+  // or equal to this value indicate a not-null
+  // value for the field). For list fields definition levels
+  // greater than or equal to this field indicate a present,
+  // possibly null, child value.
+  int16_t def_level = 0;
+
+  // The repetition level corresponding to this element
+  // or the closest repeated ancestor.  Any repetition
+  // level less than this indicates either a new list OR
+  // an empty list (which is determined in conjunction
+  // with definition levels).
+  int16_t rep_level = 0;
+
+  // The definition level indicating the level at which the closest
+  // repeated ancestor is not empty.  This is used to discriminate
+  // between a value less than |def_level| being null or excluded entirely.
+  // For instance if we have an arrow schema like:
+  // list(struct(f0: int)).  Then then there are the following
+  // definition levels:
+  //   0 = null list
+  //   1 = present but empty list.
+  //   2 = a null value in the list
+  //   3 = a non null struct but null integer.
+  //   4 = a present integer.
+  // When reconstructing, the struct and integer arrays'
+  // repeated_ancestor_def_level would be 2.  Any
+  // def_level < 2 indicates that there isn't a corresponding
+  // child value in the list.
+  // i.e. [null, [], [null], [{f0: null}], [{f0: 1}]]
+  // has the def levels [0, 1, 2, 3, 4].  The actual
+  // struct array is only of length 3: [not-set, set, set] and
+  // the int array is also of length 3: [N/A, null, 1].
+  //
+  int16_t repeated_ancestor_def_level = 0;
+
+  /// Increments levels according to the cardinality of node.
+  void Increment(const schema::Node& node) {
+    if (node.is_repeated()) {
+      IncrementRepeated();
+      return;
+    }
+    if (node.is_optional()) {
+      IncrementOptional();
+      return;
+    }
+  }
+
+  /// Incremetns level for a optional node.
+  void IncrementOptional() { def_level++; }
+
+  /// Increments levels for the repeated node.  Returns
+  /// the previous ancestor_list_def_level.
+  int16_t IncrementRepeated() {
+    int16_t last_repeated_ancestor = repeated_ancestor_def_level;
+
+    // Repeated fields add both a repetition and definition level. This is used
+    // to distinguish between an empty list and a list with an item in it.
+    ++rep_level;
+    ++def_level;
+    // For levels >= repeated_ancenstor_def_level it indicates the list was
+    // non-null and had at least one element.  This is important
+    // for later decoding because we need to add a slot for these
+    // values.  for levels < current_def_level no slots are added
+    // to arrays.
+    repeated_ancestor_def_level = def_level;
+    return last_repeated_ancestor;
+  }
+
+  friend std::ostream& operator<<(std::ostream& os, const LevelInfo& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def_level;
+    if (levels.null_slot_usage > 1) {
+      os << ", null_slot_usage=" << levels.null_slot_usage;
+    }
+    os << "}";
+    return os;
+  }
+};
+
+// Input/Output structure for reconstructed validity bitmaps.
+struct PARQUET_EXPORT ValidityBitmapInputOutput {
+  // Input only.
+  // The maximum number of values_read expected (actual
+  // values read must be less than or equal to this value).
+  // If this number is exceeded methods will throw a
+  // ParquetException. Exceeding this limit indicates
+  // either a corrupt or incorrectly written file.
+  int64_t values_read_upper_bound = 0;
+  // Output only. The number of values added to the encountered
+  // (this is logically the count of the number of elements
+  // for an Arrow array).
+  int64_t values_read = 0;
+  // Input/Output. The number of nulls encountered.
+  int64_t null_count = 0;
+  // Output only. The validity bitmap to populate. May be be null only
+  // for DefRepLevelsToListInfo (if all that is needed is list offsets).
+  uint8_t* valid_bits = NULLPTR;
+  // Input only, offset into valid_bits to start at.
+  int64_t valid_bits_offset = 0;
+};
+
+//  Converts def_levels to validity bitmaps for non-list arrays and structs that have
+//  at least one member that is not a list and has no list descendents.
+//  For lists use DefRepLevelsToList and structs where all descendants contain
+//  a list use DefRepLevelsToBitmap.
+void PARQUET_EXPORT DefLevelsToBitmap(const int16_t* def_levels, int64_t num_def_levels,
+                                      LevelInfo level_info,
+                                      ValidityBitmapInputOutput* output);
+
+// Reconstructs a validity bitmap and list offsets for a list arrays based on
+// def/rep levels. The first element of offsets will not be modified if rep_levels
+// starts with a new list.  The first element of offsets will be used when calculating
+// the next offset.  See documentation onf DefLevelsToBitmap for when to use this
+// method vs the other ones in this file for reconstruction.
+//
+// Offsets must be sized to 1 + values_read_upper_bound.
+void PARQUET_EXPORT DefRepLevelsToList(const int16_t* def_levels,
+                                       const int16_t* rep_levels, int64_t num_def_levels,
+                                       LevelInfo level_info,
+                                       ValidityBitmapInputOutput* output,
+                                       int32_t* offsets);
+void PARQUET_EXPORT DefRepLevelsToList(const int16_t* def_levels,
+                                       const int16_t* rep_levels, int64_t num_def_levels,
+                                       LevelInfo level_info,
+                                       ValidityBitmapInputOutput* output,
+                                       int64_t* offsets);
+
+// Reconstructs a validity bitmap for a struct every member is a list or has
+// a list descendant.  See documentation on DefLevelsToBitmap for when more
+// details on this method compared to the other ones defined above.
+void PARQUET_EXPORT DefRepLevelsToBitmap(const int16_t* def_levels,
+                                         const int16_t* rep_levels,
+                                         int64_t num_def_levels, LevelInfo level_info,
+                                         ValidityBitmapInputOutput* output);
+
+// This is exposed to ensure we can properly test a software simulated pext function
+// (i.e. it isn't hidden by runtime dispatch).
+uint64_t PARQUET_EXPORT TestOnlyExtractBitsSoftware(uint64_t bitmap, uint64_t selection);
+
+}  // namespace internal
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/level_conversion_inc.h b/contrib/libs/apache/arrow/cpp/src/parquet/level_conversion_inc.h
index fd06b7334dd..75c7716c483 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/level_conversion_inc.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/level_conversion_inc.h
@@ -1,357 +1,357 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
-#pragma once 
- 
-#include "parquet/level_conversion.h" 
- 
-#include <algorithm> 
-#include <cstdint> 
-#include <limits> 
- 
-#include "arrow/util/bit_run_reader.h" 
-#include "arrow/util/bit_util.h" 
-#include "arrow/util/bitmap_writer.h" 
-#include "arrow/util/logging.h" 
-#include "arrow/util/simd.h" 
-#include "parquet/exception.h" 
-#include "parquet/level_comparison.h" 
- 
-namespace parquet { 
-namespace internal { 
-#ifndef PARQUET_IMPL_NAMESPACE 
-#error "PARQUET_IMPL_NAMESPACE must be defined" 
-#endif 
-namespace PARQUET_IMPL_NAMESPACE { 
- 
-// clang-format off 
-/* Python code to generate lookup table: 
- 
-kLookupBits = 5 
-count = 0 
-print('constexpr int kLookupBits = {};'.format(kLookupBits)) 
-print('constexpr uint8_t kPextTable[1 << kLookupBits][1 << kLookupBits] = {') 
-print(' ', end = '') 
-for mask in range(1 << kLookupBits): 
-    for data in range(1 << kLookupBits): 
-        bit_value = 0 
-        bit_len = 0 
-        for i in range(kLookupBits): 
-            if mask & (1 << i): 
-                bit_value |= (((data >> i) & 1) << bit_len) 
-                bit_len += 1 
-        out = '0x{:02X},'.format(bit_value) 
-        count += 1 
-        if count % (1 << kLookupBits) == 1: 
-            print(' {') 
-        if count % 8 == 1: 
-            print('    ', end = '') 
-        if count % 8 == 0: 
-            print(out, end = '\n') 
-        else: 
-            print(out, end = ' ') 
-        if count % (1 << kLookupBits) == 0: 
-            print('  },', end = '') 
-print('\n};') 
- 
-*/ 
-// clang-format on 
- 
-constexpr int kLookupBits = 5; 
-constexpr uint8_t kPextTable[1 << kLookupBits][1 << kLookupBits] = { 
-    { 
-        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
-        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
-        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
-    }, 
-    { 
-        0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 
-        0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 
-        0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 
-    }, 
-    { 
-        0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 
-        0x01, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 
-        0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 
-    }, 
-    { 
-        0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 
-        0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 
-        0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 
-    }, 
-    { 
-        0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 
-        0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 
-        0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 
-    }, 
-    { 
-        0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x00, 0x01, 0x00, 
-        0x01, 0x02, 0x03, 0x02, 0x03, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 
-        0x02, 0x03, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 
-    }, 
-    { 
-        0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x00, 0x00, 0x01, 
-        0x01, 0x02, 0x02, 0x03, 0x03, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 
-        0x03, 0x03, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 
-    }, 
-    { 
-        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 
-        0x03, 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 
-        0x06, 0x07, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 
-    }, 
-    { 
-        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 
-        0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
-        0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 
-    }, 
-    { 
-        0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 
-        0x03, 0x02, 0x03, 0x02, 0x03, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 
-        0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 
-    }, 
-    { 
-        0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 
-        0x03, 0x02, 0x02, 0x03, 0x03, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 
-        0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03, 
-    }, 
-    { 
-        0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 
-        0x07, 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 
-        0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 
-    }, 
-    { 
-        0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 
-        0x02, 0x03, 0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 
-        0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 
-    }, 
-    { 
-        0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04, 
-        0x05, 0x06, 0x07, 0x06, 0x07, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 
-        0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x06, 0x07, 0x06, 0x07, 
-    }, 
-    { 
-        0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 
-        0x05, 0x06, 0x06, 0x07, 0x07, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 
-        0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 
-    }, 
-    { 
-        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 
-        0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 
-        0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 
-    }, 
-    { 
-        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
-        0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 
-        0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 
-    }, 
-    { 
-        0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 
-        0x01, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 
-        0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 
-    }, 
-    { 
-        0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 
-        0x01, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 
-        0x03, 0x03, 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03, 
-    }, 
-    { 
-        0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 
-        0x03, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 
-        0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 
-    }, 
-    { 
-        0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 
-        0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 
-        0x03, 0x03, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 
-    }, 
-    { 
-        0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x00, 0x01, 0x00, 
-        0x01, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x06, 0x07, 
-        0x06, 0x07, 0x04, 0x05, 0x04, 0x05, 0x06, 0x07, 0x06, 0x07, 
-    }, 
-    { 
-        0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x00, 0x00, 0x01, 
-        0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 
-        0x07, 0x07, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 
-    }, 
-    { 
-        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 
-        0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 
-        0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 
-    }, 
-    { 
-        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 
-        0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 
-        0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 
-    }, 
-    { 
-        0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 
-        0x03, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 
-        0x04, 0x05, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 
-    }, 
-    { 
-        0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 
-        0x03, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 
-        0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07, 
-    }, 
-    { 
-        0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 
-        0x07, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x08, 0x09, 
-        0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x0C, 0x0D, 0x0E, 0x0F, 
-    }, 
-    { 
-        0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 
-        0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x05, 0x05, 
-        0x05, 0x05, 0x06, 0x06, 0x06, 0x06, 0x07, 0x07, 0x07, 0x07, 
-    }, 
-    { 
-        0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04, 
-        0x05, 0x06, 0x07, 0x06, 0x07, 0x08, 0x09, 0x08, 0x09, 0x0A, 0x0B, 
-        0x0A, 0x0B, 0x0C, 0x0D, 0x0C, 0x0D, 0x0E, 0x0F, 0x0E, 0x0F, 
-    }, 
-    { 
-        0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 
-        0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0A, 0x0A, 
-        0x0B, 0x0B, 0x0C, 0x0C, 0x0D, 0x0D, 0x0E, 0x0E, 0x0F, 0x0F, 
-    }, 
-    { 
-        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 
-        0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 
-        0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 
-    }, 
-}; 
- 
-inline uint64_t ExtractBitsSoftware(uint64_t bitmap, uint64_t select_bitmap) { 
-  // A software emulation of _pext_u64 
- 
-  // These checks should be inline and are likely to be common cases. 
-  if (select_bitmap == ~uint64_t{0}) { 
-    return bitmap; 
-  } else if (select_bitmap == 0) { 
-    return 0; 
-  } 
- 
-  // Fallback to lookup table method 
-  uint64_t bit_value = 0; 
-  int bit_len = 0; 
-  constexpr uint8_t kLookupMask = (1U << kLookupBits) - 1; 
-  while (select_bitmap != 0) { 
-    const auto mask_len = ARROW_POPCOUNT32(select_bitmap & kLookupMask); 
-    const uint64_t value = kPextTable[select_bitmap & kLookupMask][bitmap & kLookupMask]; 
-    bit_value |= (value << bit_len); 
-    bit_len += mask_len; 
-    bitmap >>= kLookupBits; 
-    select_bitmap >>= kLookupBits; 
-  } 
-  return bit_value; 
-} 
- 
-#ifdef ARROW_HAVE_BMI2 
- 
-// Use _pext_u64 on 64-bit builds, _pext_u32 on 32-bit builds, 
-#if UINTPTR_MAX == 0xFFFFFFFF 
- 
-using extract_bitmap_t = uint32_t; 
-inline extract_bitmap_t ExtractBits(extract_bitmap_t bitmap, 
-                                    extract_bitmap_t select_bitmap) { 
-  return _pext_u32(bitmap, select_bitmap); 
-} 
- 
-#else 
- 
-using extract_bitmap_t = uint64_t; 
-inline extract_bitmap_t ExtractBits(extract_bitmap_t bitmap, 
-                                    extract_bitmap_t select_bitmap) { 
-  return _pext_u64(bitmap, select_bitmap); 
-} 
- 
-#endif 
- 
-#else  // !defined(ARROW_HAVE_BMI2) 
- 
-// Use 64-bit pext emulation when BMI2 isn't available. 
-using extract_bitmap_t = uint64_t; 
-inline extract_bitmap_t ExtractBits(extract_bitmap_t bitmap, 
-                                    extract_bitmap_t select_bitmap) { 
-  return ExtractBitsSoftware(bitmap, select_bitmap); 
-} 
- 
-#endif 
- 
-static constexpr int64_t kExtractBitsSize = 8 * sizeof(extract_bitmap_t); 
- 
-template <bool has_repeated_parent> 
-int64_t DefLevelsBatchToBitmap(const int16_t* def_levels, const int64_t batch_size, 
-                               int64_t upper_bound_remaining, LevelInfo level_info, 
-                               ::arrow::internal::FirstTimeBitmapWriter* writer) { 
-  DCHECK_LE(batch_size, kExtractBitsSize); 
- 
-  // Greater than level_info.def_level - 1 implies >= the def_level 
-  auto defined_bitmap = static_cast<extract_bitmap_t>( 
-      internal::GreaterThanBitmap(def_levels, batch_size, level_info.def_level - 1)); 
- 
-  if (has_repeated_parent) { 
-    // Greater than level_info.repeated_ancestor_def_level - 1 implies >= the 
-    // repeated_ancestor_def_level 
-    auto present_bitmap = static_cast<extract_bitmap_t>(internal::GreaterThanBitmap( 
-        def_levels, batch_size, level_info.repeated_ancestor_def_level - 1)); 
-    auto selected_bits = ExtractBits(defined_bitmap, present_bitmap); 
-    int64_t selected_count = ::arrow::BitUtil::PopCount(present_bitmap); 
-    if (ARROW_PREDICT_FALSE(selected_count > upper_bound_remaining)) { 
-      throw ParquetException("Values read exceeded upper bound"); 
-    } 
-    writer->AppendWord(selected_bits, selected_count); 
-    return ::arrow::BitUtil::PopCount(selected_bits); 
-  } else { 
-    if (ARROW_PREDICT_FALSE(batch_size > upper_bound_remaining)) { 
-      std::stringstream ss; 
-      ss << "Values read exceeded upper bound"; 
-      throw ParquetException(ss.str()); 
-    } 
- 
-    writer->AppendWord(defined_bitmap, batch_size); 
-    return ::arrow::BitUtil::PopCount(defined_bitmap); 
-  } 
-} 
- 
-template <bool has_repeated_parent> 
-void DefLevelsToBitmapSimd(const int16_t* def_levels, int64_t num_def_levels, 
-                           LevelInfo level_info, ValidityBitmapInputOutput* output) { 
-  ::arrow::internal::FirstTimeBitmapWriter writer( 
-      output->valid_bits, 
-      /*start_offset=*/output->valid_bits_offset, 
-      /*length=*/num_def_levels); 
-  int64_t set_count = 0; 
-  output->values_read = 0; 
-  int64_t values_read_remaining = output->values_read_upper_bound; 
-  while (num_def_levels > kExtractBitsSize) { 
-    set_count += DefLevelsBatchToBitmap<has_repeated_parent>( 
-        def_levels, kExtractBitsSize, values_read_remaining, level_info, &writer); 
-    def_levels += kExtractBitsSize; 
-    num_def_levels -= kExtractBitsSize; 
-    values_read_remaining = output->values_read_upper_bound - writer.position(); 
-  } 
-  set_count += DefLevelsBatchToBitmap<has_repeated_parent>( 
-      def_levels, num_def_levels, values_read_remaining, level_info, &writer); 
- 
-  output->values_read = writer.position(); 
-  output->null_count += output->values_read - set_count; 
-  writer.Finish(); 
-} 
- 
-}  // namespace PARQUET_IMPL_NAMESPACE 
-}  // namespace internal 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#pragma once
+
+#include "parquet/level_conversion.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <limits>
+
+#include "arrow/util/bit_run_reader.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_writer.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/simd.h"
+#include "parquet/exception.h"
+#include "parquet/level_comparison.h"
+
+namespace parquet {
+namespace internal {
+#ifndef PARQUET_IMPL_NAMESPACE
+#error "PARQUET_IMPL_NAMESPACE must be defined"
+#endif
+namespace PARQUET_IMPL_NAMESPACE {
+
+// clang-format off
+/* Python code to generate lookup table:
+
+kLookupBits = 5
+count = 0
+print('constexpr int kLookupBits = {};'.format(kLookupBits))
+print('constexpr uint8_t kPextTable[1 << kLookupBits][1 << kLookupBits] = {')
+print(' ', end = '')
+for mask in range(1 << kLookupBits):
+    for data in range(1 << kLookupBits):
+        bit_value = 0
+        bit_len = 0
+        for i in range(kLookupBits):
+            if mask & (1 << i):
+                bit_value |= (((data >> i) & 1) << bit_len)
+                bit_len += 1
+        out = '0x{:02X},'.format(bit_value)
+        count += 1
+        if count % (1 << kLookupBits) == 1:
+            print(' {')
+        if count % 8 == 1:
+            print('    ', end = '')
+        if count % 8 == 0:
+            print(out, end = '\n')
+        else:
+            print(out, end = ' ')
+        if count % (1 << kLookupBits) == 0:
+            print('  },', end = '')
+print('\n};')
+
+*/
+// clang-format on
+
+constexpr int kLookupBits = 5;
+constexpr uint8_t kPextTable[1 << kLookupBits][1 << kLookupBits] = {
+    {
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    },
+    {
+        0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00,
+        0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01,
+        0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01,
+    },
+    {
+        0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01,
+        0x01, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00,
+        0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
+    },
+    {
+        0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02,
+        0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01,
+        0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03,
+    },
+    {
+        0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00,
+        0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,
+        0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01,
+    },
+    {
+        0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x00, 0x01, 0x00,
+        0x01, 0x02, 0x03, 0x02, 0x03, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03,
+        0x02, 0x03, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03,
+    },
+    {
+        0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x00, 0x00, 0x01,
+        0x01, 0x02, 0x02, 0x03, 0x03, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02,
+        0x03, 0x03, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03,
+    },
+    {
+        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02,
+        0x03, 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
+        0x06, 0x07, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+    },
+    {
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01,
+        0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+    },
+    {
+        0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02,
+        0x03, 0x02, 0x03, 0x02, 0x03, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01,
+        0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03,
+    },
+    {
+        0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03,
+        0x03, 0x02, 0x02, 0x03, 0x03, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00,
+        0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
+    },
+    {
+        0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
+        0x07, 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01,
+        0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07,
+    },
+    {
+        0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02,
+        0x02, 0x03, 0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,
+        0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03,
+    },
+    {
+        0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04,
+        0x05, 0x06, 0x07, 0x06, 0x07, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03,
+        0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x06, 0x07, 0x06, 0x07,
+    },
+    {
+        0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05,
+        0x05, 0x06, 0x06, 0x07, 0x07, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02,
+        0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07,
+    },
+    {
+        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A,
+        0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
+        0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+    },
+    {
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+        0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+    },
+    {
+        0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00,
+        0x01, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03,
+        0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03,
+    },
+    {
+        0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01,
+        0x01, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x02, 0x02,
+        0x03, 0x03, 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
+    },
+    {
+        0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02,
+        0x03, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05,
+        0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07,
+    },
+    {
+        0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00,
+        0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03,
+        0x03, 0x03, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03,
+    },
+    {
+        0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x00, 0x01, 0x00,
+        0x01, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x06, 0x07,
+        0x06, 0x07, 0x04, 0x05, 0x04, 0x05, 0x06, 0x07, 0x06, 0x07,
+    },
+    {
+        0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x00, 0x00, 0x01,
+        0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06,
+        0x07, 0x07, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07,
+    },
+    {
+        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02,
+        0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D,
+        0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+    },
+    {
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01,
+        0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+        0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
+    },
+    {
+        0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02,
+        0x03, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05,
+        0x04, 0x05, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07,
+    },
+    {
+        0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03,
+        0x03, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x04, 0x04,
+        0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
+    },
+    {
+        0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
+        0x07, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x08, 0x09,
+        0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x0C, 0x0D, 0x0E, 0x0F,
+    },
+    {
+        0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02,
+        0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x05, 0x05,
+        0x05, 0x05, 0x06, 0x06, 0x06, 0x06, 0x07, 0x07, 0x07, 0x07,
+    },
+    {
+        0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04,
+        0x05, 0x06, 0x07, 0x06, 0x07, 0x08, 0x09, 0x08, 0x09, 0x0A, 0x0B,
+        0x0A, 0x0B, 0x0C, 0x0D, 0x0C, 0x0D, 0x0E, 0x0F, 0x0E, 0x0F,
+    },
+    {
+        0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05,
+        0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0A, 0x0A,
+        0x0B, 0x0B, 0x0C, 0x0C, 0x0D, 0x0D, 0x0E, 0x0E, 0x0F, 0x0F,
+    },
+    {
+        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A,
+        0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
+        0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
+    },
+};
+
+inline uint64_t ExtractBitsSoftware(uint64_t bitmap, uint64_t select_bitmap) {
+  // A software emulation of _pext_u64
+
+  // These checks should be inline and are likely to be common cases.
+  if (select_bitmap == ~uint64_t{0}) {
+    return bitmap;
+  } else if (select_bitmap == 0) {
+    return 0;
+  }
+
+  // Fallback to lookup table method
+  uint64_t bit_value = 0;
+  int bit_len = 0;
+  constexpr uint8_t kLookupMask = (1U << kLookupBits) - 1;
+  while (select_bitmap != 0) {
+    const auto mask_len = ARROW_POPCOUNT32(select_bitmap & kLookupMask);
+    const uint64_t value = kPextTable[select_bitmap & kLookupMask][bitmap & kLookupMask];
+    bit_value |= (value << bit_len);
+    bit_len += mask_len;
+    bitmap >>= kLookupBits;
+    select_bitmap >>= kLookupBits;
+  }
+  return bit_value;
+}
+
+#ifdef ARROW_HAVE_BMI2
+
+// Use _pext_u64 on 64-bit builds, _pext_u32 on 32-bit builds,
+#if UINTPTR_MAX == 0xFFFFFFFF
+
+using extract_bitmap_t = uint32_t;
+inline extract_bitmap_t ExtractBits(extract_bitmap_t bitmap,
+                                    extract_bitmap_t select_bitmap) {
+  return _pext_u32(bitmap, select_bitmap);
+}
+
+#else
+
+using extract_bitmap_t = uint64_t;
+inline extract_bitmap_t ExtractBits(extract_bitmap_t bitmap,
+                                    extract_bitmap_t select_bitmap) {
+  return _pext_u64(bitmap, select_bitmap);
+}
+
+#endif
+
+#else  // !defined(ARROW_HAVE_BMI2)
+
+// Use 64-bit pext emulation when BMI2 isn't available.
+using extract_bitmap_t = uint64_t;
+inline extract_bitmap_t ExtractBits(extract_bitmap_t bitmap,
+                                    extract_bitmap_t select_bitmap) {
+  return ExtractBitsSoftware(bitmap, select_bitmap);
+}
+
+#endif
+
+static constexpr int64_t kExtractBitsSize = 8 * sizeof(extract_bitmap_t);
+
+template <bool has_repeated_parent>
+int64_t DefLevelsBatchToBitmap(const int16_t* def_levels, const int64_t batch_size,
+                               int64_t upper_bound_remaining, LevelInfo level_info,
+                               ::arrow::internal::FirstTimeBitmapWriter* writer) {
+  DCHECK_LE(batch_size, kExtractBitsSize);
+
+  // Greater than level_info.def_level - 1 implies >= the def_level
+  auto defined_bitmap = static_cast<extract_bitmap_t>(
+      internal::GreaterThanBitmap(def_levels, batch_size, level_info.def_level - 1));
+
+  if (has_repeated_parent) {
+    // Greater than level_info.repeated_ancestor_def_level - 1 implies >= the
+    // repeated_ancestor_def_level
+    auto present_bitmap = static_cast<extract_bitmap_t>(internal::GreaterThanBitmap(
+        def_levels, batch_size, level_info.repeated_ancestor_def_level - 1));
+    auto selected_bits = ExtractBits(defined_bitmap, present_bitmap);
+    int64_t selected_count = ::arrow::BitUtil::PopCount(present_bitmap);
+    if (ARROW_PREDICT_FALSE(selected_count > upper_bound_remaining)) {
+      throw ParquetException("Values read exceeded upper bound");
+    }
+    writer->AppendWord(selected_bits, selected_count);
+    return ::arrow::BitUtil::PopCount(selected_bits);
+  } else {
+    if (ARROW_PREDICT_FALSE(batch_size > upper_bound_remaining)) {
+      std::stringstream ss;
+      ss << "Values read exceeded upper bound";
+      throw ParquetException(ss.str());
+    }
+
+    writer->AppendWord(defined_bitmap, batch_size);
+    return ::arrow::BitUtil::PopCount(defined_bitmap);
+  }
+}
+
+template <bool has_repeated_parent>
+void DefLevelsToBitmapSimd(const int16_t* def_levels, int64_t num_def_levels,
+                           LevelInfo level_info, ValidityBitmapInputOutput* output) {
+  ::arrow::internal::FirstTimeBitmapWriter writer(
+      output->valid_bits,
+      /*start_offset=*/output->valid_bits_offset,
+      /*length=*/num_def_levels);
+  int64_t set_count = 0;
+  output->values_read = 0;
+  int64_t values_read_remaining = output->values_read_upper_bound;
+  while (num_def_levels > kExtractBitsSize) {
+    set_count += DefLevelsBatchToBitmap<has_repeated_parent>(
+        def_levels, kExtractBitsSize, values_read_remaining, level_info, &writer);
+    def_levels += kExtractBitsSize;
+    num_def_levels -= kExtractBitsSize;
+    values_read_remaining = output->values_read_upper_bound - writer.position();
+  }
+  set_count += DefLevelsBatchToBitmap<has_repeated_parent>(
+      def_levels, num_def_levels, values_read_remaining, level_info, &writer);
+
+  output->values_read = writer.position();
+  output->null_count += output->values_read - set_count;
+  writer.Finish();
+}
+
+}  // namespace PARQUET_IMPL_NAMESPACE
+}  // namespace internal
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/metadata.cc b/contrib/libs/apache/arrow/cpp/src/parquet/metadata.cc
index 1524333702f..bd9bf77c42d 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/metadata.cc
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/metadata.cc
@@ -1,1783 +1,1783 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "parquet/metadata.h" 
- 
-#include <algorithm> 
-#include <cinttypes> 
-#include <ostream> 
-#include <string> 
-#include <utility> 
-#include <vector> 
- 
-#include "arrow/io/memory.h" 
-#include "arrow/util/key_value_metadata.h" 
-#include "arrow/util/logging.h" 
-#include "arrow/util/string_view.h" 
-#include "parquet/encryption/encryption_internal.h" 
-#include "parquet/encryption/internal_file_decryptor.h" 
-#include "parquet/exception.h" 
-#include "parquet/schema.h" 
-#include "parquet/schema_internal.h" 
-#include "parquet/statistics.h" 
-#include "parquet/thrift_internal.h" 
- 
-namespace parquet { 
- 
-const ApplicationVersion& ApplicationVersion::PARQUET_251_FIXED_VERSION() { 
-  static ApplicationVersion version("parquet-mr", 1, 8, 0); 
-  return version; 
-} 
- 
-const ApplicationVersion& ApplicationVersion::PARQUET_816_FIXED_VERSION() { 
-  static ApplicationVersion version("parquet-mr", 1, 2, 9); 
-  return version; 
-} 
- 
-const ApplicationVersion& ApplicationVersion::PARQUET_CPP_FIXED_STATS_VERSION() { 
-  static ApplicationVersion version("parquet-cpp", 1, 3, 0); 
-  return version; 
-} 
- 
-const ApplicationVersion& ApplicationVersion::PARQUET_MR_FIXED_STATS_VERSION() { 
-  static ApplicationVersion version("parquet-mr", 1, 10, 0); 
-  return version; 
-} 
- 
-std::string ParquetVersionToString(ParquetVersion::type ver) { 
-  switch (ver) { 
-    case ParquetVersion::PARQUET_1_0: 
-      return "1.0"; 
-    case ParquetVersion::PARQUET_2_0: 
-      return "2.0"; 
-  } 
- 
-  // This should be unreachable 
-  return "UNKNOWN"; 
-} 
- 
-template <typename DType> 
-static std::shared_ptr<Statistics> MakeTypedColumnStats( 
-    const format::ColumnMetaData& metadata, const ColumnDescriptor* descr) { 
-  // If ColumnOrder is defined, return max_value and min_value 
-  if (descr->column_order().get_order() == ColumnOrder::TYPE_DEFINED_ORDER) { 
-    return MakeStatistics<DType>( 
-        descr, metadata.statistics.min_value, metadata.statistics.max_value, 
-        metadata.num_values - metadata.statistics.null_count, 
-        metadata.statistics.null_count, metadata.statistics.distinct_count, 
-        metadata.statistics.__isset.max_value || metadata.statistics.__isset.min_value, 
-        metadata.statistics.__isset.null_count, 
-        metadata.statistics.__isset.distinct_count); 
-  } 
-  // Default behavior 
-  return MakeStatistics<DType>( 
-      descr, metadata.statistics.min, metadata.statistics.max, 
-      metadata.num_values - metadata.statistics.null_count, 
-      metadata.statistics.null_count, metadata.statistics.distinct_count, 
-      metadata.statistics.__isset.max || metadata.statistics.__isset.min, 
-      metadata.statistics.__isset.null_count, metadata.statistics.__isset.distinct_count); 
-} 
- 
-std::shared_ptr<Statistics> MakeColumnStats(const format::ColumnMetaData& meta_data, 
-                                            const ColumnDescriptor* descr) { 
-  switch (static_cast<Type::type>(meta_data.type)) { 
-    case Type::BOOLEAN: 
-      return MakeTypedColumnStats<BooleanType>(meta_data, descr); 
-    case Type::INT32: 
-      return MakeTypedColumnStats<Int32Type>(meta_data, descr); 
-    case Type::INT64: 
-      return MakeTypedColumnStats<Int64Type>(meta_data, descr); 
-    case Type::INT96: 
-      return MakeTypedColumnStats<Int96Type>(meta_data, descr); 
-    case Type::DOUBLE: 
-      return MakeTypedColumnStats<DoubleType>(meta_data, descr); 
-    case Type::FLOAT: 
-      return MakeTypedColumnStats<FloatType>(meta_data, descr); 
-    case Type::BYTE_ARRAY: 
-      return MakeTypedColumnStats<ByteArrayType>(meta_data, descr); 
-    case Type::FIXED_LEN_BYTE_ARRAY: 
-      return MakeTypedColumnStats<FLBAType>(meta_data, descr); 
-    case Type::UNDEFINED: 
-      break; 
-  } 
-  throw ParquetException("Can't decode page statistics for selected column type"); 
-} 
- 
-// MetaData Accessor 
- 
-// ColumnCryptoMetaData 
-class ColumnCryptoMetaData::ColumnCryptoMetaDataImpl { 
- public: 
-  explicit ColumnCryptoMetaDataImpl(const format::ColumnCryptoMetaData* crypto_metadata) 
-      : crypto_metadata_(crypto_metadata) {} 
- 
-  bool encrypted_with_footer_key() const { 
-    return crypto_metadata_->__isset.ENCRYPTION_WITH_FOOTER_KEY; 
-  } 
-  bool encrypted_with_column_key() const { 
-    return crypto_metadata_->__isset.ENCRYPTION_WITH_COLUMN_KEY; 
-  } 
-  std::shared_ptr<schema::ColumnPath> path_in_schema() const { 
-    return std::make_shared<schema::ColumnPath>( 
-        crypto_metadata_->ENCRYPTION_WITH_COLUMN_KEY.path_in_schema); 
-  } 
-  const std::string& key_metadata() const { 
-    return crypto_metadata_->ENCRYPTION_WITH_COLUMN_KEY.key_metadata; 
-  } 
- 
- private: 
-  const format::ColumnCryptoMetaData* crypto_metadata_; 
-}; 
- 
-std::unique_ptr<ColumnCryptoMetaData> ColumnCryptoMetaData::Make( 
-    const uint8_t* metadata) { 
-  return std::unique_ptr<ColumnCryptoMetaData>(new ColumnCryptoMetaData(metadata)); 
-} 
- 
-ColumnCryptoMetaData::ColumnCryptoMetaData(const uint8_t* metadata) 
-    : impl_(new ColumnCryptoMetaDataImpl( 
-          reinterpret_cast<const format::ColumnCryptoMetaData*>(metadata))) {} 
- 
-ColumnCryptoMetaData::~ColumnCryptoMetaData() = default; 
- 
-std::shared_ptr<schema::ColumnPath> ColumnCryptoMetaData::path_in_schema() const { 
-  return impl_->path_in_schema(); 
-} 
-bool ColumnCryptoMetaData::encrypted_with_footer_key() const { 
-  return impl_->encrypted_with_footer_key(); 
-} 
-const std::string& ColumnCryptoMetaData::key_metadata() const { 
-  return impl_->key_metadata(); 
-} 
- 
-// ColumnChunk metadata 
-class ColumnChunkMetaData::ColumnChunkMetaDataImpl { 
- public: 
-  explicit ColumnChunkMetaDataImpl(const format::ColumnChunk* column, 
-                                   const ColumnDescriptor* descr, 
-                                   int16_t row_group_ordinal, int16_t column_ordinal, 
-                                   const ApplicationVersion* writer_version, 
-                                   std::shared_ptr<InternalFileDecryptor> file_decryptor) 
-      : column_(column), descr_(descr), writer_version_(writer_version) { 
-    column_metadata_ = &column->meta_data; 
-    if (column->__isset.crypto_metadata) {  // column metadata is encrypted 
-      format::ColumnCryptoMetaData ccmd = column->crypto_metadata; 
- 
-      if (ccmd.__isset.ENCRYPTION_WITH_COLUMN_KEY) { 
-        if (file_decryptor != nullptr && file_decryptor->properties() != nullptr) { 
-          // should decrypt metadata 
-          std::shared_ptr<schema::ColumnPath> path = std::make_shared<schema::ColumnPath>( 
-              ccmd.ENCRYPTION_WITH_COLUMN_KEY.path_in_schema); 
-          std::string key_metadata = ccmd.ENCRYPTION_WITH_COLUMN_KEY.key_metadata; 
- 
-          std::string aad_column_metadata = encryption::CreateModuleAad( 
-              file_decryptor->file_aad(), encryption::kColumnMetaData, row_group_ordinal, 
-              column_ordinal, static_cast<int16_t>(-1)); 
-          auto decryptor = file_decryptor->GetColumnMetaDecryptor( 
-              path->ToDotString(), key_metadata, aad_column_metadata); 
-          auto len = static_cast<uint32_t>(column->encrypted_column_metadata.size()); 
-          DeserializeThriftMsg( 
-              reinterpret_cast<const uint8_t*>(column->encrypted_column_metadata.c_str()), 
-              &len, &decrypted_metadata_, decryptor); 
-          column_metadata_ = &decrypted_metadata_; 
-        } else { 
-          throw ParquetException( 
-              "Cannot decrypt ColumnMetadata." 
-              " FileDecryption is not setup correctly"); 
-        } 
-      } 
-    } 
-    for (const auto& encoding : column_metadata_->encodings) { 
-      encodings_.push_back(LoadEnumSafe(&encoding)); 
-    } 
-    for (const auto& encoding_stats : column_metadata_->encoding_stats) { 
-      encoding_stats_.push_back({LoadEnumSafe(&encoding_stats.page_type), 
-                                 LoadEnumSafe(&encoding_stats.encoding), 
-                                 encoding_stats.count}); 
-    } 
-    possible_stats_ = nullptr; 
-  } 
- 
-  bool Equals(const ColumnChunkMetaDataImpl& other) const { 
-    return *column_metadata_ == *other.column_metadata_; 
-  } 
- 
-  // column chunk 
-  inline int64_t file_offset() const { return column_->file_offset; } 
-  inline const std::string& file_path() const { return column_->file_path; } 
- 
-  inline Type::type type() const { return LoadEnumSafe(&column_metadata_->type); } 
- 
-  inline int64_t num_values() const { return column_metadata_->num_values; } 
- 
-  std::shared_ptr<schema::ColumnPath> path_in_schema() { 
-    return std::make_shared<schema::ColumnPath>(column_metadata_->path_in_schema); 
-  } 
- 
-  // Check if statistics are set and are valid 
-  // 1) Must be set in the metadata 
-  // 2) Statistics must not be corrupted 
-  inline bool is_stats_set() const { 
-    DCHECK(writer_version_ != nullptr); 
-    // If the column statistics don't exist or column sort order is unknown 
-    // we cannot use the column stats 
-    if (!column_metadata_->__isset.statistics || 
-        descr_->sort_order() == SortOrder::UNKNOWN) { 
-      return false; 
-    } 
-    if (possible_stats_ == nullptr) { 
-      possible_stats_ = MakeColumnStats(*column_metadata_, descr_); 
-    } 
-    EncodedStatistics encodedStatistics = possible_stats_->Encode(); 
-    return writer_version_->HasCorrectStatistics(type(), encodedStatistics, 
-                                                 descr_->sort_order()); 
-  } 
- 
-  inline std::shared_ptr<Statistics> statistics() const { 
-    return is_stats_set() ? possible_stats_ : nullptr; 
-  } 
- 
-  inline Compression::type compression() const { 
-    return LoadEnumSafe(&column_metadata_->codec); 
-  } 
- 
-  const std::vector<Encoding::type>& encodings() const { return encodings_; } 
- 
-  const std::vector<PageEncodingStats>& encoding_stats() const { return encoding_stats_; } 
- 
-  inline bool has_dictionary_page() const { 
-    return column_metadata_->__isset.dictionary_page_offset; 
-  } 
- 
-  inline int64_t dictionary_page_offset() const { 
-    return column_metadata_->dictionary_page_offset; 
-  } 
- 
-  inline int64_t data_page_offset() const { return column_metadata_->data_page_offset; } 
- 
-  inline bool has_index_page() const { 
-    return column_metadata_->__isset.index_page_offset; 
-  } 
- 
-  inline int64_t index_page_offset() const { return column_metadata_->index_page_offset; } 
- 
-  inline int64_t total_compressed_size() const { 
-    return column_metadata_->total_compressed_size; 
-  } 
- 
-  inline int64_t total_uncompressed_size() const { 
-    return column_metadata_->total_uncompressed_size; 
-  } 
- 
-  inline std::unique_ptr<ColumnCryptoMetaData> crypto_metadata() const { 
-    if (column_->__isset.crypto_metadata) { 
-      return ColumnCryptoMetaData::Make( 
-          reinterpret_cast<const uint8_t*>(&column_->crypto_metadata)); 
-    } else { 
-      return nullptr; 
-    } 
-  } 
- 
- private: 
-  mutable std::shared_ptr<Statistics> possible_stats_; 
-  std::vector<Encoding::type> encodings_; 
-  std::vector<PageEncodingStats> encoding_stats_; 
-  const format::ColumnChunk* column_; 
-  const format::ColumnMetaData* column_metadata_; 
-  format::ColumnMetaData decrypted_metadata_; 
-  const ColumnDescriptor* descr_; 
-  const ApplicationVersion* writer_version_; 
-}; 
- 
-std::unique_ptr<ColumnChunkMetaData> ColumnChunkMetaData::Make( 
-    const void* metadata, const ColumnDescriptor* descr, 
-    const ApplicationVersion* writer_version, int16_t row_group_ordinal, 
-    int16_t column_ordinal, std::shared_ptr<InternalFileDecryptor> file_decryptor) { 
-  return std::unique_ptr<ColumnChunkMetaData>( 
-      new ColumnChunkMetaData(metadata, descr, row_group_ordinal, column_ordinal, 
-                              writer_version, std::move(file_decryptor))); 
-} 
- 
-ColumnChunkMetaData::ColumnChunkMetaData( 
-    const void* metadata, const ColumnDescriptor* descr, int16_t row_group_ordinal, 
-    int16_t column_ordinal, const ApplicationVersion* writer_version, 
-    std::shared_ptr<InternalFileDecryptor> file_decryptor) 
-    : impl_{new ColumnChunkMetaDataImpl( 
-          reinterpret_cast<const format::ColumnChunk*>(metadata), descr, 
-          row_group_ordinal, column_ordinal, writer_version, std::move(file_decryptor))} { 
-} 
- 
-ColumnChunkMetaData::~ColumnChunkMetaData() = default; 
- 
-// column chunk 
-int64_t ColumnChunkMetaData::file_offset() const { return impl_->file_offset(); } 
- 
-const std::string& ColumnChunkMetaData::file_path() const { return impl_->file_path(); } 
- 
-Type::type ColumnChunkMetaData::type() const { return impl_->type(); } 
- 
-int64_t ColumnChunkMetaData::num_values() const { return impl_->num_values(); } 
- 
-std::shared_ptr<schema::ColumnPath> ColumnChunkMetaData::path_in_schema() const { 
-  return impl_->path_in_schema(); 
-} 
- 
-std::shared_ptr<Statistics> ColumnChunkMetaData::statistics() const { 
-  return impl_->statistics(); 
-} 
- 
-bool ColumnChunkMetaData::is_stats_set() const { return impl_->is_stats_set(); } 
- 
-bool ColumnChunkMetaData::has_dictionary_page() const { 
-  return impl_->has_dictionary_page(); 
-} 
- 
-int64_t ColumnChunkMetaData::dictionary_page_offset() const { 
-  return impl_->dictionary_page_offset(); 
-} 
- 
-int64_t ColumnChunkMetaData::data_page_offset() const { 
-  return impl_->data_page_offset(); 
-} 
- 
-bool ColumnChunkMetaData::has_index_page() const { return impl_->has_index_page(); } 
- 
-int64_t ColumnChunkMetaData::index_page_offset() const { 
-  return impl_->index_page_offset(); 
-} 
- 
-Compression::type ColumnChunkMetaData::compression() const { 
-  return impl_->compression(); 
-} 
- 
-bool ColumnChunkMetaData::can_decompress() const { 
-  return ::arrow::util::Codec::IsAvailable(compression()); 
-} 
- 
-const std::vector<Encoding::type>& ColumnChunkMetaData::encodings() const { 
-  return impl_->encodings(); 
-} 
- 
-const std::vector<PageEncodingStats>& ColumnChunkMetaData::encoding_stats() const { 
-  return impl_->encoding_stats(); 
-} 
- 
-int64_t ColumnChunkMetaData::total_uncompressed_size() const { 
-  return impl_->total_uncompressed_size(); 
-} 
- 
-int64_t ColumnChunkMetaData::total_compressed_size() const { 
-  return impl_->total_compressed_size(); 
-} 
- 
-std::unique_ptr<ColumnCryptoMetaData> ColumnChunkMetaData::crypto_metadata() const { 
-  return impl_->crypto_metadata(); 
-} 
- 
-bool ColumnChunkMetaData::Equals(const ColumnChunkMetaData& other) const { 
-  return impl_->Equals(*other.impl_); 
-} 
- 
-// row-group metadata 
-class RowGroupMetaData::RowGroupMetaDataImpl { 
- public: 
-  explicit RowGroupMetaDataImpl(const format::RowGroup* row_group, 
-                                const SchemaDescriptor* schema, 
-                                const ApplicationVersion* writer_version, 
-                                std::shared_ptr<InternalFileDecryptor> file_decryptor) 
-      : row_group_(row_group), 
-        schema_(schema), 
-        writer_version_(writer_version), 
-        file_decryptor_(std::move(file_decryptor)) {} 
- 
-  bool Equals(const RowGroupMetaDataImpl& other) const { 
-    return *row_group_ == *other.row_group_; 
-  } 
- 
-  inline int num_columns() const { return static_cast<int>(row_group_->columns.size()); } 
- 
-  inline int64_t num_rows() const { return row_group_->num_rows; } 
- 
-  inline int64_t total_byte_size() const { return row_group_->total_byte_size; } 
- 
-  inline int64_t total_compressed_size() const { 
-    return row_group_->total_compressed_size; 
-  } 
- 
-  inline int64_t file_offset() const { return row_group_->file_offset; } 
- 
-  inline const SchemaDescriptor* schema() const { return schema_; } 
- 
-  std::unique_ptr<ColumnChunkMetaData> ColumnChunk(int i) { 
-    if (i < num_columns()) { 
-      return ColumnChunkMetaData::Make(&row_group_->columns[i], schema_->Column(i), 
-                                       writer_version_, row_group_->ordinal, 
-                                       static_cast<int16_t>(i), file_decryptor_); 
-    } 
-    throw ParquetException("The file only has ", num_columns(), 
-                           " columns, requested metadata for column: ", i); 
-  } 
- 
- private: 
-  const format::RowGroup* row_group_; 
-  const SchemaDescriptor* schema_; 
-  const ApplicationVersion* writer_version_; 
-  std::shared_ptr<InternalFileDecryptor> file_decryptor_; 
-}; 
- 
-std::unique_ptr<RowGroupMetaData> RowGroupMetaData::Make( 
-    const void* metadata, const SchemaDescriptor* schema, 
-    const ApplicationVersion* writer_version, 
-    std::shared_ptr<InternalFileDecryptor> file_decryptor) { 
-  return std::unique_ptr<RowGroupMetaData>( 
-      new RowGroupMetaData(metadata, schema, writer_version, std::move(file_decryptor))); 
-} 
- 
-RowGroupMetaData::RowGroupMetaData(const void* metadata, const SchemaDescriptor* schema, 
-                                   const ApplicationVersion* writer_version, 
-                                   std::shared_ptr<InternalFileDecryptor> file_decryptor) 
-    : impl_{new RowGroupMetaDataImpl(reinterpret_cast<const format::RowGroup*>(metadata), 
-                                     schema, writer_version, std::move(file_decryptor))} { 
-} 
- 
-RowGroupMetaData::~RowGroupMetaData() = default; 
- 
-bool RowGroupMetaData::Equals(const RowGroupMetaData& other) const { 
-  return impl_->Equals(*other.impl_); 
-} 
- 
-int RowGroupMetaData::num_columns() const { return impl_->num_columns(); } 
- 
-int64_t RowGroupMetaData::num_rows() const { return impl_->num_rows(); } 
- 
-int64_t RowGroupMetaData::total_byte_size() const { return impl_->total_byte_size(); } 
- 
-int64_t RowGroupMetaData::total_compressed_size() const { 
-  return impl_->total_compressed_size(); 
-} 
- 
-int64_t RowGroupMetaData::file_offset() const { return impl_->file_offset(); } 
- 
-const SchemaDescriptor* RowGroupMetaData::schema() const { return impl_->schema(); } 
- 
-std::unique_ptr<ColumnChunkMetaData> RowGroupMetaData::ColumnChunk(int i) const { 
-  return impl_->ColumnChunk(i); 
-} 
- 
-bool RowGroupMetaData::can_decompress() const { 
-  int n_columns = num_columns(); 
-  for (int i = 0; i < n_columns; i++) { 
-    if (!ColumnChunk(i)->can_decompress()) { 
-      return false; 
-    } 
-  } 
-  return true; 
-} 
- 
-// file metadata 
-class FileMetaData::FileMetaDataImpl { 
- public: 
-  FileMetaDataImpl() = default; 
- 
-  explicit FileMetaDataImpl( 
-      const void* metadata, uint32_t* metadata_len, 
-      std::shared_ptr<InternalFileDecryptor> file_decryptor = nullptr) 
-      : file_decryptor_(file_decryptor) { 
-    metadata_.reset(new format::FileMetaData); 
- 
-    auto footer_decryptor = 
-        file_decryptor_ != nullptr ? file_decryptor->GetFooterDecryptor() : nullptr; 
- 
-    DeserializeThriftMsg(reinterpret_cast<const uint8_t*>(metadata), metadata_len, 
-                         metadata_.get(), footer_decryptor); 
-    metadata_len_ = *metadata_len; 
- 
-    if (metadata_->__isset.created_by) { 
-      writer_version_ = ApplicationVersion(metadata_->created_by); 
-    } else { 
-      writer_version_ = ApplicationVersion("unknown 0.0.0"); 
-    } 
- 
-    InitSchema(); 
-    InitColumnOrders(); 
-    InitKeyValueMetadata(); 
-  } 
- 
-  bool VerifySignature(const void* signature) { 
-    // verify decryption properties are set 
-    if (file_decryptor_ == nullptr) { 
-      throw ParquetException("Decryption not set properly. cannot verify signature"); 
-    } 
-    // serialize the footer 
-    uint8_t* serialized_data; 
-    uint32_t serialized_len = metadata_len_; 
-    ThriftSerializer serializer; 
-    serializer.SerializeToBuffer(metadata_.get(), &serialized_len, &serialized_data); 
- 
-    // encrypt with nonce 
-    auto nonce = const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(signature)); 
-    auto tag = const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(signature)) + 
-               encryption::kNonceLength; 
- 
-    std::string key = file_decryptor_->GetFooterKey(); 
-    std::string aad = encryption::CreateFooterAad(file_decryptor_->file_aad()); 
- 
-    auto aes_encryptor = encryption::AesEncryptor::Make( 
-        file_decryptor_->algorithm(), static_cast<int>(key.size()), true, nullptr); 
- 
-    std::shared_ptr<Buffer> encrypted_buffer = std::static_pointer_cast<ResizableBuffer>( 
-        AllocateBuffer(file_decryptor_->pool(), 
-                       aes_encryptor->CiphertextSizeDelta() + serialized_len)); 
-    uint32_t encrypted_len = aes_encryptor->SignedFooterEncrypt( 
-        serialized_data, serialized_len, str2bytes(key), static_cast<int>(key.size()), 
-        str2bytes(aad), static_cast<int>(aad.size()), nonce, 
-        encrypted_buffer->mutable_data()); 
-    // Delete AES encryptor object. It was created only to verify the footer signature. 
-    aes_encryptor->WipeOut(); 
-    delete aes_encryptor; 
-    return 0 == 
-           memcmp(encrypted_buffer->data() + encrypted_len - encryption::kGcmTagLength, 
-                  tag, encryption::kGcmTagLength); 
-  } 
- 
-  inline uint32_t size() const { return metadata_len_; } 
-  inline int num_columns() const { return schema_.num_columns(); } 
-  inline int64_t num_rows() const { return metadata_->num_rows; } 
-  inline int num_row_groups() const { 
-    return static_cast<int>(metadata_->row_groups.size()); 
-  } 
-  inline int32_t version() const { return metadata_->version; } 
-  inline const std::string& created_by() const { return metadata_->created_by; } 
-  inline int num_schema_elements() const { 
-    return static_cast<int>(metadata_->schema.size()); 
-  } 
- 
-  inline bool is_encryption_algorithm_set() const { 
-    return metadata_->__isset.encryption_algorithm; 
-  } 
-  inline EncryptionAlgorithm encryption_algorithm() { 
-    return FromThrift(metadata_->encryption_algorithm); 
-  } 
-  inline const std::string& footer_signing_key_metadata() { 
-    return metadata_->footer_signing_key_metadata; 
-  } 
- 
-  const ApplicationVersion& writer_version() const { return writer_version_; } 
- 
-  void WriteTo(::arrow::io::OutputStream* dst, 
-               const std::shared_ptr<Encryptor>& encryptor) const { 
-    ThriftSerializer serializer; 
-    // Only in encrypted files with plaintext footers the 
-    // encryption_algorithm is set in footer 
-    if (is_encryption_algorithm_set()) { 
-      uint8_t* serialized_data; 
-      uint32_t serialized_len; 
-      serializer.SerializeToBuffer(metadata_.get(), &serialized_len, &serialized_data); 
- 
-      // encrypt the footer key 
-      std::vector<uint8_t> encrypted_data(encryptor->CiphertextSizeDelta() + 
-                                          serialized_len); 
-      unsigned encrypted_len = 
-          encryptor->Encrypt(serialized_data, serialized_len, encrypted_data.data()); 
- 
-      // write unencrypted footer 
-      PARQUET_THROW_NOT_OK(dst->Write(serialized_data, serialized_len)); 
-      // Write signature (nonce and tag) 
-      PARQUET_THROW_NOT_OK( 
-          dst->Write(encrypted_data.data() + 4, encryption::kNonceLength)); 
-      PARQUET_THROW_NOT_OK( 
-          dst->Write(encrypted_data.data() + encrypted_len - encryption::kGcmTagLength, 
-                     encryption::kGcmTagLength)); 
-    } else {  // either plaintext file (when encryptor is null) 
-      // or encrypted file with encrypted footer 
-      serializer.Serialize(metadata_.get(), dst, encryptor); 
-    } 
-  } 
- 
-  std::unique_ptr<RowGroupMetaData> RowGroup(int i) { 
-    if (!(i < num_row_groups())) { 
-      std::stringstream ss; 
-      ss << "The file only has " << num_row_groups() 
-         << " row groups, requested metadata for row group: " << i; 
-      throw ParquetException(ss.str()); 
-    } 
-    return RowGroupMetaData::Make(&metadata_->row_groups[i], &schema_, &writer_version_, 
-                                  file_decryptor_); 
-  } 
- 
-  bool Equals(const FileMetaDataImpl& other) const { 
-    return *metadata_ == *other.metadata_; 
-  } 
- 
-  const SchemaDescriptor* schema() const { return &schema_; } 
- 
-  const std::shared_ptr<const KeyValueMetadata>& key_value_metadata() const { 
-    return key_value_metadata_; 
-  } 
- 
-  void set_file_path(const std::string& path) { 
-    for (format::RowGroup& row_group : metadata_->row_groups) { 
-      for (format::ColumnChunk& chunk : row_group.columns) { 
-        chunk.__set_file_path(path); 
-      } 
-    } 
-  } 
- 
-  format::RowGroup& row_group(int i) { 
-    DCHECK_LT(i, num_row_groups()); 
-    return metadata_->row_groups[i]; 
-  } 
- 
-  void AppendRowGroups(const std::unique_ptr<FileMetaDataImpl>& other) { 
-    if (!schema()->Equals(*other->schema())) { 
-      throw ParquetException("AppendRowGroups requires equal schemas."); 
-    } 
- 
-    format::RowGroup other_rg; 
-    for (int i = 0; i < other->num_row_groups(); i++) { 
-      other_rg = other->row_group(i); 
-      metadata_->row_groups.push_back(other_rg); 
-      metadata_->num_rows += other_rg.num_rows; 
-    } 
-  } 
- 
-  std::shared_ptr<FileMetaData> Subset(const std::vector<int>& row_groups) { 
-    for (int i : row_groups) { 
-      if (i < num_row_groups()) continue; 
- 
-      throw ParquetException( 
-          "The file only has ", num_row_groups(), 
-          " row groups, but requested a subset including row group: ", i); 
-    } 
- 
-    std::shared_ptr<FileMetaData> out(new FileMetaData()); 
-    out->impl_.reset(new FileMetaDataImpl()); 
-    out->impl_->metadata_.reset(new format::FileMetaData()); 
- 
-    auto metadata = out->impl_->metadata_.get(); 
-    metadata->version = metadata_->version; 
-    metadata->schema = metadata_->schema; 
- 
-    metadata->row_groups.resize(row_groups.size()); 
-    int i = 0; 
-    for (int selected_index : row_groups) { 
-      metadata->num_rows += row_group(selected_index).num_rows; 
-      metadata->row_groups[i++] = row_group(selected_index); 
-    } 
- 
-    metadata->key_value_metadata = metadata_->key_value_metadata; 
-    metadata->created_by = metadata_->created_by; 
-    metadata->column_orders = metadata_->column_orders; 
-    metadata->encryption_algorithm = metadata_->encryption_algorithm; 
-    metadata->footer_signing_key_metadata = metadata_->footer_signing_key_metadata; 
-    metadata->__isset = metadata_->__isset; 
- 
-    out->impl_->schema_ = schema_; 
-    out->impl_->writer_version_ = writer_version_; 
-    out->impl_->key_value_metadata_ = key_value_metadata_; 
-    out->impl_->file_decryptor_ = file_decryptor_; 
- 
-    return out; 
-  } 
- 
-  void set_file_decryptor(std::shared_ptr<InternalFileDecryptor> file_decryptor) { 
-    file_decryptor_ = file_decryptor; 
-  } 
- 
- private: 
-  friend FileMetaDataBuilder; 
-  uint32_t metadata_len_ = 0; 
-  std::unique_ptr<format::FileMetaData> metadata_; 
-  SchemaDescriptor schema_; 
-  ApplicationVersion writer_version_; 
-  std::shared_ptr<const KeyValueMetadata> key_value_metadata_; 
-  std::shared_ptr<InternalFileDecryptor> file_decryptor_; 
- 
-  void InitSchema() { 
-    if (metadata_->schema.empty()) { 
-      throw ParquetException("Empty file schema (no root)"); 
-    } 
-    schema_.Init(schema::Unflatten(&metadata_->schema[0], 
-                                   static_cast<int>(metadata_->schema.size()))); 
-  } 
- 
-  void InitColumnOrders() { 
-    // update ColumnOrder 
-    std::vector<parquet::ColumnOrder> column_orders; 
-    if (metadata_->__isset.column_orders) { 
-      for (auto column_order : metadata_->column_orders) { 
-        if (column_order.__isset.TYPE_ORDER) { 
-          column_orders.push_back(ColumnOrder::type_defined_); 
-        } else { 
-          column_orders.push_back(ColumnOrder::undefined_); 
-        } 
-      } 
-    } else { 
-      column_orders.resize(schema_.num_columns(), ColumnOrder::undefined_); 
-    } 
- 
-    schema_.updateColumnOrders(column_orders); 
-  } 
- 
-  void InitKeyValueMetadata() { 
-    std::shared_ptr<KeyValueMetadata> metadata = nullptr; 
-    if (metadata_->__isset.key_value_metadata) { 
-      metadata = std::make_shared<KeyValueMetadata>(); 
-      for (const auto& it : metadata_->key_value_metadata) { 
-        metadata->Append(it.key, it.value); 
-      } 
-    } 
-    key_value_metadata_ = std::move(metadata); 
-  } 
-}; 
- 
-std::shared_ptr<FileMetaData> FileMetaData::Make( 
-    const void* metadata, uint32_t* metadata_len, 
-    std::shared_ptr<InternalFileDecryptor> file_decryptor) { 
-  // This FileMetaData ctor is private, not compatible with std::make_shared 
-  return std::shared_ptr<FileMetaData>( 
-      new FileMetaData(metadata, metadata_len, file_decryptor)); 
-} 
- 
-FileMetaData::FileMetaData(const void* metadata, uint32_t* metadata_len, 
-                           std::shared_ptr<InternalFileDecryptor> file_decryptor) 
-    : impl_{std::unique_ptr<FileMetaDataImpl>( 
-          new FileMetaDataImpl(metadata, metadata_len, file_decryptor))} {} 
- 
-FileMetaData::FileMetaData() 
-    : impl_{std::unique_ptr<FileMetaDataImpl>(new FileMetaDataImpl())} {} 
- 
-FileMetaData::~FileMetaData() = default; 
- 
-bool FileMetaData::Equals(const FileMetaData& other) const { 
-  return impl_->Equals(*other.impl_); 
-} 
- 
-std::unique_ptr<RowGroupMetaData> FileMetaData::RowGroup(int i) const { 
-  return impl_->RowGroup(i); 
-} 
- 
-bool FileMetaData::VerifySignature(const void* signature) { 
-  return impl_->VerifySignature(signature); 
-} 
- 
-uint32_t FileMetaData::size() const { return impl_->size(); } 
- 
-int FileMetaData::num_columns() const { return impl_->num_columns(); } 
- 
-int64_t FileMetaData::num_rows() const { return impl_->num_rows(); } 
- 
-int FileMetaData::num_row_groups() const { return impl_->num_row_groups(); } 
- 
-bool FileMetaData::can_decompress() const { 
-  int n_row_groups = num_row_groups(); 
-  for (int i = 0; i < n_row_groups; i++) { 
-    if (!RowGroup(i)->can_decompress()) { 
-      return false; 
-    } 
-  } 
-  return true; 
-} 
- 
-bool FileMetaData::is_encryption_algorithm_set() const { 
-  return impl_->is_encryption_algorithm_set(); 
-} 
- 
-EncryptionAlgorithm FileMetaData::encryption_algorithm() const { 
-  return impl_->encryption_algorithm(); 
-} 
- 
-const std::string& FileMetaData::footer_signing_key_metadata() const { 
-  return impl_->footer_signing_key_metadata(); 
-} 
- 
-void FileMetaData::set_file_decryptor( 
-    std::shared_ptr<InternalFileDecryptor> file_decryptor) { 
-  impl_->set_file_decryptor(file_decryptor); 
-} 
- 
-ParquetVersion::type FileMetaData::version() const { 
-  switch (impl_->version()) { 
-    case 1: 
-      return ParquetVersion::PARQUET_1_0; 
-    case 2: 
-      return ParquetVersion::PARQUET_2_0; 
-    default: 
-      // Improperly set version, assuming Parquet 1.0 
-      break; 
-  } 
-  return ParquetVersion::PARQUET_1_0; 
-} 
- 
-const ApplicationVersion& FileMetaData::writer_version() const { 
-  return impl_->writer_version(); 
-} 
- 
-const std::string& FileMetaData::created_by() const { return impl_->created_by(); } 
- 
-int FileMetaData::num_schema_elements() const { return impl_->num_schema_elements(); } 
- 
-const SchemaDescriptor* FileMetaData::schema() const { return impl_->schema(); } 
- 
-const std::shared_ptr<const KeyValueMetadata>& FileMetaData::key_value_metadata() const { 
-  return impl_->key_value_metadata(); 
-} 
- 
-void FileMetaData::set_file_path(const std::string& path) { impl_->set_file_path(path); } 
- 
-void FileMetaData::AppendRowGroups(const FileMetaData& other) { 
-  impl_->AppendRowGroups(other.impl_); 
-} 
- 
-std::shared_ptr<FileMetaData> FileMetaData::Subset( 
-    const std::vector<int>& row_groups) const { 
-  return impl_->Subset(row_groups); 
-} 
- 
-void FileMetaData::WriteTo(::arrow::io::OutputStream* dst, 
-                           const std::shared_ptr<Encryptor>& encryptor) const { 
-  return impl_->WriteTo(dst, encryptor); 
-} 
- 
-class FileCryptoMetaData::FileCryptoMetaDataImpl { 
- public: 
-  FileCryptoMetaDataImpl() = default; 
- 
-  explicit FileCryptoMetaDataImpl(const uint8_t* metadata, uint32_t* metadata_len) { 
-    metadata_.reset(new format::FileCryptoMetaData); 
-    DeserializeThriftMsg(metadata, metadata_len, metadata_.get()); 
-    metadata_len_ = *metadata_len; 
-  } 
- 
-  EncryptionAlgorithm encryption_algorithm() { 
-    return FromThrift(metadata_->encryption_algorithm); 
-  } 
-  const std::string& key_metadata() { return metadata_->key_metadata; } 
-  void WriteTo(::arrow::io::OutputStream* dst) const { 
-    ThriftSerializer serializer; 
-    serializer.Serialize(metadata_.get(), dst); 
-  } 
- 
- private: 
-  friend FileMetaDataBuilder; 
-  std::unique_ptr<format::FileCryptoMetaData> metadata_; 
-  uint32_t metadata_len_; 
-}; 
- 
-EncryptionAlgorithm FileCryptoMetaData::encryption_algorithm() const { 
-  return impl_->encryption_algorithm(); 
-} 
- 
-const std::string& FileCryptoMetaData::key_metadata() const { 
-  return impl_->key_metadata(); 
-} 
- 
-std::shared_ptr<FileCryptoMetaData> FileCryptoMetaData::Make( 
-    const uint8_t* serialized_metadata, uint32_t* metadata_len) { 
-  return std::shared_ptr<FileCryptoMetaData>( 
-      new FileCryptoMetaData(serialized_metadata, metadata_len)); 
-} 
- 
-FileCryptoMetaData::FileCryptoMetaData(const uint8_t* serialized_metadata, 
-                                       uint32_t* metadata_len) 
-    : impl_(new FileCryptoMetaDataImpl(serialized_metadata, metadata_len)) {} 
- 
-FileCryptoMetaData::FileCryptoMetaData() : impl_(new FileCryptoMetaDataImpl()) {} 
- 
-FileCryptoMetaData::~FileCryptoMetaData() = default; 
- 
-void FileCryptoMetaData::WriteTo(::arrow::io::OutputStream* dst) const { 
-  impl_->WriteTo(dst); 
-} 
- 
-std::string FileMetaData::SerializeToString() const { 
-  // We need to pass in an initial size. Since it will automatically 
-  // increase the buffer size to hold the metadata, we just leave it 0. 
-  PARQUET_ASSIGN_OR_THROW(auto serializer, ::arrow::io::BufferOutputStream::Create(0)); 
-  WriteTo(serializer.get()); 
-  PARQUET_ASSIGN_OR_THROW(auto metadata_buffer, serializer->Finish()); 
-  return metadata_buffer->ToString(); 
-} 
- 
-ApplicationVersion::ApplicationVersion(std::string application, int major, int minor, 
-                                       int patch) 
-    : application_(std::move(application)), version{major, minor, patch, "", "", ""} {} 
- 
-namespace { 
-// Parse the application version format and set parsed values to 
-// ApplicationVersion. 
-// 
-// The application version format must be compatible parquet-mr's 
-// one. See also: 
-//   * https://github.com/apache/parquet-mr/blob/master/parquet-common/src/main/java/org/apache/parquet/VersionParser.java 
-//   * https://github.com/apache/parquet-mr/blob/master/parquet-common/src/main/java/org/apache/parquet/SemanticVersion.java 
-// 
-// The application version format: 
-//   "${APPLICATION_NAME}" 
-//   "${APPLICATION_NAME} version ${VERSION}" 
-//   "${APPLICATION_NAME} version ${VERSION} (build ${BUILD_NAME})" 
-// 
-// Eg: 
-//   parquet-cpp 
-//   parquet-cpp version 1.5.0ab-xyz5.5.0+cd 
-//   parquet-cpp version 1.5.0ab-xyz5.5.0+cd (build abcd) 
-// 
-// The VERSION format: 
-//   "${MAJOR}" 
-//   "${MAJOR}.${MINOR}" 
-//   "${MAJOR}.${MINOR}.${PATCH}" 
-//   "${MAJOR}.${MINOR}.${PATCH}${UNKNOWN}" 
-//   "${MAJOR}.${MINOR}.${PATCH}${UNKNOWN}-${PRE_RELEASE}" 
-//   "${MAJOR}.${MINOR}.${PATCH}${UNKNOWN}-${PRE_RELEASE}+${BUILD_INFO}" 
-//   "${MAJOR}.${MINOR}.${PATCH}${UNKNOWN}+${BUILD_INFO}" 
-//   "${MAJOR}.${MINOR}.${PATCH}-${PRE_RELEASE}" 
-//   "${MAJOR}.${MINOR}.${PATCH}-${PRE_RELEASE}+${BUILD_INFO}" 
-//   "${MAJOR}.${MINOR}.${PATCH}+${BUILD_INFO}" 
-// 
-// Eg: 
-//   1 
-//   1.5 
-//   1.5.0 
-//   1.5.0ab 
-//   1.5.0ab-cdh5.5.0 
-//   1.5.0ab-cdh5.5.0+cd 
-//   1.5.0ab+cd 
-//   1.5.0-cdh5.5.0 
-//   1.5.0-cdh5.5.0+cd 
-//   1.5.0+cd 
-class ApplicationVersionParser { 
- public: 
-  ApplicationVersionParser(const std::string& created_by, 
-                           ApplicationVersion& application_version) 
-      : created_by_(created_by), 
-        application_version_(application_version), 
-        spaces_(" \t\v\r\n\f"), 
-        digits_("0123456789") {} 
- 
-  void Parse() { 
-    application_version_.application_ = "unknown"; 
-    application_version_.version = {0, 0, 0, "", "", ""}; 
- 
-    if (!ParseApplicationName()) { 
-      return; 
-    } 
-    if (!ParseVersion()) { 
-      return; 
-    } 
-    if (!ParseBuildName()) { 
-      return; 
-    } 
-  } 
- 
- private: 
-  bool IsSpace(const std::string& string, const size_t& offset) { 
-    auto target = ::arrow::util::string_view(string).substr(offset, 1); 
-    return target.find_first_of(spaces_) != ::arrow::util::string_view::npos; 
-  } 
- 
-  void RemovePrecedingSpaces(const std::string& string, size_t& start, 
-                             const size_t& end) { 
-    while (start < end && IsSpace(string, start)) { 
-      ++start; 
-    } 
-  } 
- 
-  void RemoveTrailingSpaces(const std::string& string, const size_t& start, size_t& end) { 
-    while (start < (end - 1) && (end - 1) < string.size() && IsSpace(string, end - 1)) { 
-      --end; 
-    } 
-  } 
- 
-  bool ParseApplicationName() { 
-    std::string version_mark(" version "); 
-    auto version_mark_position = created_by_.find(version_mark); 
-    size_t application_name_end; 
-    // No VERSION and BUILD_NAME. 
-    if (version_mark_position == std::string::npos) { 
-      version_start_ = std::string::npos; 
-      application_name_end = created_by_.size(); 
-    } else { 
-      version_start_ = version_mark_position + version_mark.size(); 
-      application_name_end = version_mark_position; 
-    } 
- 
-    size_t application_name_start = 0; 
-    RemovePrecedingSpaces(created_by_, application_name_start, application_name_end); 
-    RemoveTrailingSpaces(created_by_, application_name_start, application_name_end); 
-    application_version_.application_ = created_by_.substr( 
-        application_name_start, application_name_end - application_name_start); 
- 
-    return true; 
-  } 
- 
-  bool ParseVersion() { 
-    // No VERSION. 
-    if (version_start_ == std::string::npos) { 
-      return false; 
-    } 
- 
-    RemovePrecedingSpaces(created_by_, version_start_, created_by_.size()); 
-    version_end_ = created_by_.find(" (", version_start_); 
-    // No BUILD_NAME. 
-    if (version_end_ == std::string::npos) { 
-      version_end_ = created_by_.size(); 
-    } 
-    RemoveTrailingSpaces(created_by_, version_start_, version_end_); 
-    // No VERSION. 
-    if (version_start_ == version_end_) { 
-      return false; 
-    } 
-    version_string_ = created_by_.substr(version_start_, version_end_ - version_start_); 
- 
-    if (!ParseVersionMajor()) { 
-      return false; 
-    } 
-    if (!ParseVersionMinor()) { 
-      return false; 
-    } 
-    if (!ParseVersionPatch()) { 
-      return false; 
-    } 
-    if (!ParseVersionUnknown()) { 
-      return false; 
-    } 
-    if (!ParseVersionPreRelease()) { 
-      return false; 
-    } 
-    if (!ParseVersionBuildInfo()) { 
-      return false; 
-    } 
- 
-    return true; 
-  } 
- 
-  bool ParseVersionMajor() { 
-    size_t version_major_start = 0; 
-    auto version_major_end = version_string_.find_first_not_of(digits_); 
-    // MAJOR only. 
-    if (version_major_end == std::string::npos) { 
-      version_major_end = version_string_.size(); 
-      version_parsing_position_ = version_major_end; 
-    } else { 
-      // No ".". 
-      if (version_string_[version_major_end] != '.') { 
-        return false; 
-      } 
-      // No MAJOR. 
-      if (version_major_end == version_major_start) { 
-        return false; 
-      } 
-      version_parsing_position_ = version_major_end + 1;  // +1 is for '.'. 
-    } 
-    auto version_major_string = version_string_.substr( 
-        version_major_start, version_major_end - version_major_start); 
-    application_version_.version.major = atoi(version_major_string.c_str()); 
-    return true; 
-  } 
- 
-  bool ParseVersionMinor() { 
-    auto version_minor_start = version_parsing_position_; 
-    auto version_minor_end = 
-        version_string_.find_first_not_of(digits_, version_minor_start); 
-    // MAJOR.MINOR only. 
-    if (version_minor_end == std::string::npos) { 
-      version_minor_end = version_string_.size(); 
-      version_parsing_position_ = version_minor_end; 
-    } else { 
-      // No ".". 
-      if (version_string_[version_minor_end] != '.') { 
-        return false; 
-      } 
-      // No MINOR. 
-      if (version_minor_end == version_minor_start) { 
-        return false; 
-      } 
-      version_parsing_position_ = version_minor_end + 1;  // +1 is for '.'. 
-    } 
-    auto version_minor_string = version_string_.substr( 
-        version_minor_start, version_minor_end - version_minor_start); 
-    application_version_.version.minor = atoi(version_minor_string.c_str()); 
-    return true; 
-  } 
- 
-  bool ParseVersionPatch() { 
-    auto version_patch_start = version_parsing_position_; 
-    auto version_patch_end = 
-        version_string_.find_first_not_of(digits_, version_patch_start); 
-    // No UNKNOWN, PRE_RELEASE and BUILD_INFO. 
-    if (version_patch_end == std::string::npos) { 
-      version_patch_end = version_string_.size(); 
-    } 
-    // No PATCH. 
-    if (version_patch_end == version_patch_start) { 
-      return false; 
-    } 
-    auto version_patch_string = version_string_.substr( 
-        version_patch_start, version_patch_end - version_patch_start); 
-    application_version_.version.patch = atoi(version_patch_string.c_str()); 
-    version_parsing_position_ = version_patch_end; 
-    return true; 
-  } 
- 
-  bool ParseVersionUnknown() { 
-    // No UNKNOWN. 
-    if (version_parsing_position_ == version_string_.size()) { 
-      return true; 
-    } 
-    auto version_unknown_start = version_parsing_position_; 
-    auto version_unknown_end = version_string_.find_first_of("-+", version_unknown_start); 
-    // No PRE_RELEASE and BUILD_INFO 
-    if (version_unknown_end == std::string::npos) { 
-      version_unknown_end = version_string_.size(); 
-    } 
-    application_version_.version.unknown = version_string_.substr( 
-        version_unknown_start, version_unknown_end - version_unknown_start); 
-    version_parsing_position_ = version_unknown_end; 
-    return true; 
-  } 
- 
-  bool ParseVersionPreRelease() { 
-    // No PRE_RELEASE. 
-    if (version_parsing_position_ == version_string_.size() || 
-        version_string_[version_parsing_position_] != '-') { 
-      return true; 
-    } 
- 
-    auto version_pre_release_start = version_parsing_position_ + 1;  // +1 is for '-'. 
-    auto version_pre_release_end = 
-        version_string_.find_first_of("+", version_pre_release_start); 
-    // No BUILD_INFO 
-    if (version_pre_release_end == std::string::npos) { 
-      version_pre_release_end = version_string_.size(); 
-    } 
-    application_version_.version.pre_release = version_string_.substr( 
-        version_pre_release_start, version_pre_release_end - version_pre_release_start); 
-    version_parsing_position_ = version_pre_release_end; 
-    return true; 
-  } 
- 
-  bool ParseVersionBuildInfo() { 
-    // No BUILD_INFO. 
-    if (version_parsing_position_ == version_string_.size() || 
-        version_string_[version_parsing_position_] != '+') { 
-      return true; 
-    } 
- 
-    auto version_build_info_start = version_parsing_position_ + 1;  // +1 is for '+'. 
-    application_version_.version.build_info = 
-        version_string_.substr(version_build_info_start); 
-    return true; 
-  } 
- 
-  bool ParseBuildName() { 
-    std::string build_mark(" (build "); 
-    auto build_mark_position = created_by_.find(build_mark, version_end_); 
-    // No BUILD_NAME. 
-    if (build_mark_position == std::string::npos) { 
-      return false; 
-    } 
-    auto build_name_start = build_mark_position + build_mark.size(); 
-    RemovePrecedingSpaces(created_by_, build_name_start, created_by_.size()); 
-    auto build_name_end = created_by_.find_first_of(")", build_name_start); 
-    // No end ")". 
-    if (build_name_end == std::string::npos) { 
-      return false; 
-    } 
-    RemoveTrailingSpaces(created_by_, build_name_start, build_name_end); 
-    application_version_.build_ = 
-        created_by_.substr(build_name_start, build_name_end - build_name_start); 
- 
-    return true; 
-  } 
- 
-  const std::string& created_by_; 
-  ApplicationVersion& application_version_; 
- 
-  // For parsing. 
-  std::string spaces_; 
-  std::string digits_; 
-  size_t version_parsing_position_; 
-  size_t version_start_; 
-  size_t version_end_; 
-  std::string version_string_; 
-}; 
-}  // namespace 
- 
-ApplicationVersion::ApplicationVersion(const std::string& created_by) { 
-  ApplicationVersionParser parser(created_by, *this); 
-  parser.Parse(); 
-} 
- 
-bool ApplicationVersion::VersionLt(const ApplicationVersion& other_version) const { 
-  if (application_ != other_version.application_) return false; 
- 
-  if (version.major < other_version.version.major) return true; 
-  if (version.major > other_version.version.major) return false; 
-  DCHECK_EQ(version.major, other_version.version.major); 
-  if (version.minor < other_version.version.minor) return true; 
-  if (version.minor > other_version.version.minor) return false; 
-  DCHECK_EQ(version.minor, other_version.version.minor); 
-  return version.patch < other_version.version.patch; 
-} 
- 
-bool ApplicationVersion::VersionEq(const ApplicationVersion& other_version) const { 
-  return application_ == other_version.application_ && 
-         version.major == other_version.version.major && 
-         version.minor == other_version.version.minor && 
-         version.patch == other_version.version.patch; 
-} 
- 
-// Reference: 
-// parquet-mr/parquet-column/src/main/java/org/apache/parquet/CorruptStatistics.java 
-// PARQUET-686 has more discussion on statistics 
-bool ApplicationVersion::HasCorrectStatistics(Type::type col_type, 
-                                              EncodedStatistics& statistics, 
-                                              SortOrder::type sort_order) const { 
-  // parquet-cpp version 1.3.0 and parquet-mr 1.10.0 onwards stats are computed 
-  // correctly for all types 
-  if ((application_ == "parquet-cpp" && VersionLt(PARQUET_CPP_FIXED_STATS_VERSION())) || 
-      (application_ == "parquet-mr" && VersionLt(PARQUET_MR_FIXED_STATS_VERSION()))) { 
-    // Only SIGNED are valid unless max and min are the same 
-    // (in which case the sort order does not matter) 
-    bool max_equals_min = statistics.has_min && statistics.has_max 
-                              ? statistics.min() == statistics.max() 
-                              : false; 
-    if (SortOrder::SIGNED != sort_order && !max_equals_min) { 
-      return false; 
-    } 
- 
-    // Statistics of other types are OK 
-    if (col_type != Type::FIXED_LEN_BYTE_ARRAY && col_type != Type::BYTE_ARRAY) { 
-      return true; 
-    } 
-  } 
-  // created_by is not populated, which could have been caused by 
-  // parquet-mr during the same time as PARQUET-251, see PARQUET-297 
-  if (application_ == "unknown") { 
-    return true; 
-  } 
- 
-  // Unknown sort order has incorrect stats 
-  if (SortOrder::UNKNOWN == sort_order) { 
-    return false; 
-  } 
- 
-  // PARQUET-251 
-  if (VersionLt(PARQUET_251_FIXED_VERSION())) { 
-    return false; 
-  } 
- 
-  return true; 
-} 
- 
-// MetaData Builders 
-// row-group metadata 
-class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl { 
- public: 
-  explicit ColumnChunkMetaDataBuilderImpl(std::shared_ptr<WriterProperties> props, 
-                                          const ColumnDescriptor* column) 
-      : owned_column_chunk_(new format::ColumnChunk), 
-        properties_(std::move(props)), 
-        column_(column) { 
-    Init(owned_column_chunk_.get()); 
-  } 
- 
-  explicit ColumnChunkMetaDataBuilderImpl(std::shared_ptr<WriterProperties> props, 
-                                          const ColumnDescriptor* column, 
-                                          format::ColumnChunk* column_chunk) 
-      : properties_(std::move(props)), column_(column) { 
-    Init(column_chunk); 
-  } 
- 
-  const void* contents() const { return column_chunk_; } 
- 
-  // column chunk 
-  void set_file_path(const std::string& val) { column_chunk_->__set_file_path(val); } 
- 
-  // column metadata 
-  void SetStatistics(const EncodedStatistics& val) { 
-    column_chunk_->meta_data.__set_statistics(ToThrift(val)); 
-  } 
- 
-  void Finish(int64_t num_values, int64_t dictionary_page_offset, 
-              int64_t index_page_offset, int64_t data_page_offset, 
-              int64_t compressed_size, int64_t uncompressed_size, bool has_dictionary, 
-              bool dictionary_fallback, 
-              const std::map<Encoding::type, int32_t>& dict_encoding_stats, 
-              const std::map<Encoding::type, int32_t>& data_encoding_stats, 
-              const std::shared_ptr<Encryptor>& encryptor) { 
-    if (dictionary_page_offset > 0) { 
-      column_chunk_->meta_data.__set_dictionary_page_offset(dictionary_page_offset); 
-      column_chunk_->__set_file_offset(dictionary_page_offset + compressed_size); 
-    } else { 
-      column_chunk_->__set_file_offset(data_page_offset + compressed_size); 
-    } 
-    column_chunk_->__isset.meta_data = true; 
-    column_chunk_->meta_data.__set_num_values(num_values); 
-    if (index_page_offset >= 0) { 
-      column_chunk_->meta_data.__set_index_page_offset(index_page_offset); 
-    } 
-    column_chunk_->meta_data.__set_data_page_offset(data_page_offset); 
-    column_chunk_->meta_data.__set_total_uncompressed_size(uncompressed_size); 
-    column_chunk_->meta_data.__set_total_compressed_size(compressed_size); 
- 
-    std::vector<format::Encoding::type> thrift_encodings; 
-    if (has_dictionary) { 
-      thrift_encodings.push_back(ToThrift(properties_->dictionary_index_encoding())); 
-      if (properties_->version() == ParquetVersion::PARQUET_1_0) { 
-        thrift_encodings.push_back(ToThrift(Encoding::PLAIN)); 
-      } else { 
-        thrift_encodings.push_back(ToThrift(properties_->dictionary_page_encoding())); 
-      } 
-    } else {  // Dictionary not enabled 
-      thrift_encodings.push_back(ToThrift(properties_->encoding(column_->path()))); 
-    } 
-    thrift_encodings.push_back(ToThrift(Encoding::RLE)); 
-    // Only PLAIN encoding is supported for fallback in V1 
-    // TODO(majetideepak): Use user specified encoding for V2 
-    if (dictionary_fallback) { 
-      thrift_encodings.push_back(ToThrift(Encoding::PLAIN)); 
-    } 
-    column_chunk_->meta_data.__set_encodings(thrift_encodings); 
-    std::vector<format::PageEncodingStats> thrift_encoding_stats; 
-    // Add dictionary page encoding stats 
-    for (const auto& entry : dict_encoding_stats) { 
-      format::PageEncodingStats dict_enc_stat; 
-      dict_enc_stat.__set_page_type(format::PageType::DICTIONARY_PAGE); 
-      dict_enc_stat.__set_encoding(ToThrift(entry.first)); 
-      dict_enc_stat.__set_count(entry.second); 
-      thrift_encoding_stats.push_back(dict_enc_stat); 
-    } 
-    // Add data page encoding stats 
-    for (const auto& entry : data_encoding_stats) { 
-      format::PageEncodingStats data_enc_stat; 
-      data_enc_stat.__set_page_type(format::PageType::DATA_PAGE); 
-      data_enc_stat.__set_encoding(ToThrift(entry.first)); 
-      data_enc_stat.__set_count(entry.second); 
-      thrift_encoding_stats.push_back(data_enc_stat); 
-    } 
-    column_chunk_->meta_data.__set_encoding_stats(thrift_encoding_stats); 
- 
-    const auto& encrypt_md = 
-        properties_->column_encryption_properties(column_->path()->ToDotString()); 
-    // column is encrypted 
-    if (encrypt_md != nullptr && encrypt_md->is_encrypted()) { 
-      column_chunk_->__isset.crypto_metadata = true; 
-      format::ColumnCryptoMetaData ccmd; 
-      if (encrypt_md->is_encrypted_with_footer_key()) { 
-        // encrypted with footer key 
-        ccmd.__isset.ENCRYPTION_WITH_FOOTER_KEY = true; 
-        ccmd.__set_ENCRYPTION_WITH_FOOTER_KEY(format::EncryptionWithFooterKey()); 
-      } else {  // encrypted with column key 
-        format::EncryptionWithColumnKey eck; 
-        eck.__set_key_metadata(encrypt_md->key_metadata()); 
-        eck.__set_path_in_schema(column_->path()->ToDotVector()); 
-        ccmd.__isset.ENCRYPTION_WITH_COLUMN_KEY = true; 
-        ccmd.__set_ENCRYPTION_WITH_COLUMN_KEY(eck); 
-      } 
-      column_chunk_->__set_crypto_metadata(ccmd); 
- 
-      bool encrypted_footer = 
-          properties_->file_encryption_properties()->encrypted_footer(); 
-      bool encrypt_metadata = 
-          !encrypted_footer || !encrypt_md->is_encrypted_with_footer_key(); 
-      if (encrypt_metadata) { 
-        ThriftSerializer serializer; 
-        // Serialize and encrypt ColumnMetadata separately 
-        // Thrift-serialize the ColumnMetaData structure, 
-        // encrypt it with the column key, and write to encrypted_column_metadata 
-        uint8_t* serialized_data; 
-        uint32_t serialized_len; 
- 
-        serializer.SerializeToBuffer(&column_chunk_->meta_data, &serialized_len, 
-                                     &serialized_data); 
- 
-        std::vector<uint8_t> encrypted_data(encryptor->CiphertextSizeDelta() + 
-                                            serialized_len); 
-        unsigned encrypted_len = 
-            encryptor->Encrypt(serialized_data, serialized_len, encrypted_data.data()); 
- 
-        const char* temp = 
-            const_cast<const char*>(reinterpret_cast<char*>(encrypted_data.data())); 
-        std::string encrypted_column_metadata(temp, encrypted_len); 
-        column_chunk_->__set_encrypted_column_metadata(encrypted_column_metadata); 
- 
-        if (encrypted_footer) { 
-          column_chunk_->__isset.meta_data = false; 
-        } else { 
-          // Keep redacted metadata version for old readers 
-          column_chunk_->__isset.meta_data = true; 
-          column_chunk_->meta_data.__isset.statistics = false; 
-          column_chunk_->meta_data.__isset.encoding_stats = false; 
-        } 
-      } 
-    } 
-  } 
- 
-  void WriteTo(::arrow::io::OutputStream* sink) { 
-    ThriftSerializer serializer; 
-    serializer.Serialize(column_chunk_, sink); 
-  } 
- 
-  const ColumnDescriptor* descr() const { return column_; } 
-  int64_t total_compressed_size() const { 
-    return column_chunk_->meta_data.total_compressed_size; 
-  } 
- 
- private: 
-  void Init(format::ColumnChunk* column_chunk) { 
-    column_chunk_ = column_chunk; 
- 
-    column_chunk_->meta_data.__set_type(ToThrift(column_->physical_type())); 
-    column_chunk_->meta_data.__set_path_in_schema(column_->path()->ToDotVector()); 
-    column_chunk_->meta_data.__set_codec( 
-        ToThrift(properties_->compression(column_->path()))); 
-  } 
- 
-  format::ColumnChunk* column_chunk_; 
-  std::unique_ptr<format::ColumnChunk> owned_column_chunk_; 
-  const std::shared_ptr<WriterProperties> properties_; 
-  const ColumnDescriptor* column_; 
-}; 
- 
-std::unique_ptr<ColumnChunkMetaDataBuilder> ColumnChunkMetaDataBuilder::Make( 
-    std::shared_ptr<WriterProperties> props, const ColumnDescriptor* column, 
-    void* contents) { 
-  return std::unique_ptr<ColumnChunkMetaDataBuilder>( 
-      new ColumnChunkMetaDataBuilder(std::move(props), column, contents)); 
-} 
- 
-std::unique_ptr<ColumnChunkMetaDataBuilder> ColumnChunkMetaDataBuilder::Make( 
-    std::shared_ptr<WriterProperties> props, const ColumnDescriptor* column) { 
-  return std::unique_ptr<ColumnChunkMetaDataBuilder>( 
-      new ColumnChunkMetaDataBuilder(std::move(props), column)); 
-} 
- 
-ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilder( 
-    std::shared_ptr<WriterProperties> props, const ColumnDescriptor* column) 
-    : impl_{std::unique_ptr<ColumnChunkMetaDataBuilderImpl>( 
-          new ColumnChunkMetaDataBuilderImpl(std::move(props), column))} {} 
- 
-ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilder( 
-    std::shared_ptr<WriterProperties> props, const ColumnDescriptor* column, 
-    void* contents) 
-    : impl_{std::unique_ptr<ColumnChunkMetaDataBuilderImpl>( 
-          new ColumnChunkMetaDataBuilderImpl( 
-              std::move(props), column, 
-              reinterpret_cast<format::ColumnChunk*>(contents)))} {} 
- 
-ColumnChunkMetaDataBuilder::~ColumnChunkMetaDataBuilder() = default; 
- 
-const void* ColumnChunkMetaDataBuilder::contents() const { return impl_->contents(); } 
- 
-void ColumnChunkMetaDataBuilder::set_file_path(const std::string& path) { 
-  impl_->set_file_path(path); 
-} 
- 
-void ColumnChunkMetaDataBuilder::Finish( 
-    int64_t num_values, int64_t dictionary_page_offset, int64_t index_page_offset, 
-    int64_t data_page_offset, int64_t compressed_size, int64_t uncompressed_size, 
-    bool has_dictionary, bool dictionary_fallback, 
-    const std::map<Encoding::type, int32_t>& dict_encoding_stats, 
-    const std::map<Encoding::type, int32_t>& data_encoding_stats, 
-    const std::shared_ptr<Encryptor>& encryptor) { 
-  impl_->Finish(num_values, dictionary_page_offset, index_page_offset, data_page_offset, 
-                compressed_size, uncompressed_size, has_dictionary, dictionary_fallback, 
-                dict_encoding_stats, data_encoding_stats, encryptor); 
-} 
- 
-void ColumnChunkMetaDataBuilder::WriteTo(::arrow::io::OutputStream* sink) { 
-  impl_->WriteTo(sink); 
-} 
- 
-const ColumnDescriptor* ColumnChunkMetaDataBuilder::descr() const { 
-  return impl_->descr(); 
-} 
- 
-void ColumnChunkMetaDataBuilder::SetStatistics(const EncodedStatistics& result) { 
-  impl_->SetStatistics(result); 
-} 
- 
-int64_t ColumnChunkMetaDataBuilder::total_compressed_size() const { 
-  return impl_->total_compressed_size(); 
-} 
- 
-class RowGroupMetaDataBuilder::RowGroupMetaDataBuilderImpl { 
- public: 
-  explicit RowGroupMetaDataBuilderImpl(std::shared_ptr<WriterProperties> props, 
-                                       const SchemaDescriptor* schema, void* contents) 
-      : properties_(std::move(props)), schema_(schema), next_column_(0) { 
-    row_group_ = reinterpret_cast<format::RowGroup*>(contents); 
-    InitializeColumns(schema->num_columns()); 
-  } 
- 
-  ColumnChunkMetaDataBuilder* NextColumnChunk() { 
-    if (!(next_column_ < num_columns())) { 
-      std::stringstream ss; 
-      ss << "The schema only has " << num_columns() 
-         << " columns, requested metadata for column: " << next_column_; 
-      throw ParquetException(ss.str()); 
-    } 
-    auto column = schema_->Column(next_column_); 
-    auto column_builder = ColumnChunkMetaDataBuilder::Make( 
-        properties_, column, &row_group_->columns[next_column_++]); 
-    auto column_builder_ptr = column_builder.get(); 
-    column_builders_.push_back(std::move(column_builder)); 
-    return column_builder_ptr; 
-  } 
- 
-  int current_column() { return next_column_ - 1; } 
- 
-  void Finish(int64_t total_bytes_written, int16_t row_group_ordinal) { 
-    if (!(next_column_ == schema_->num_columns())) { 
-      std::stringstream ss; 
-      ss << "Only " << next_column_ - 1 << " out of " << schema_->num_columns() 
-         << " columns are initialized"; 
-      throw ParquetException(ss.str()); 
-    } 
- 
-    int64_t file_offset = 0; 
-    int64_t total_compressed_size = 0; 
-    for (int i = 0; i < schema_->num_columns(); i++) { 
-      if (!(row_group_->columns[i].file_offset >= 0)) { 
-        std::stringstream ss; 
-        ss << "Column " << i << " is not complete."; 
-        throw ParquetException(ss.str()); 
-      } 
-      if (i == 0) { 
-        file_offset = row_group_->columns[0].file_offset; 
-      } 
-      // sometimes column metadata is encrypted and not available to read, 
-      // so we must get total_compressed_size from column builder 
-      total_compressed_size += column_builders_[i]->total_compressed_size(); 
-    } 
- 
-    row_group_->__set_file_offset(file_offset); 
-    row_group_->__set_total_compressed_size(total_compressed_size); 
-    row_group_->__set_total_byte_size(total_bytes_written); 
-    row_group_->__set_ordinal(row_group_ordinal); 
-  } 
- 
-  void set_num_rows(int64_t num_rows) { row_group_->num_rows = num_rows; } 
- 
-  int num_columns() { return static_cast<int>(row_group_->columns.size()); } 
- 
-  int64_t num_rows() { return row_group_->num_rows; } 
- 
- private: 
-  void InitializeColumns(int ncols) { row_group_->columns.resize(ncols); } 
- 
-  format::RowGroup* row_group_; 
-  const std::shared_ptr<WriterProperties> properties_; 
-  const SchemaDescriptor* schema_; 
-  std::vector<std::unique_ptr<ColumnChunkMetaDataBuilder>> column_builders_; 
-  int next_column_; 
-}; 
- 
-std::unique_ptr<RowGroupMetaDataBuilder> RowGroupMetaDataBuilder::Make( 
-    std::shared_ptr<WriterProperties> props, const SchemaDescriptor* schema_, 
-    void* contents) { 
-  return std::unique_ptr<RowGroupMetaDataBuilder>( 
-      new RowGroupMetaDataBuilder(std::move(props), schema_, contents)); 
-} 
- 
-RowGroupMetaDataBuilder::RowGroupMetaDataBuilder(std::shared_ptr<WriterProperties> props, 
-                                                 const SchemaDescriptor* schema_, 
-                                                 void* contents) 
-    : impl_{new RowGroupMetaDataBuilderImpl(std::move(props), schema_, contents)} {} 
- 
-RowGroupMetaDataBuilder::~RowGroupMetaDataBuilder() = default; 
- 
-ColumnChunkMetaDataBuilder* RowGroupMetaDataBuilder::NextColumnChunk() { 
-  return impl_->NextColumnChunk(); 
-} 
- 
-int RowGroupMetaDataBuilder::current_column() const { return impl_->current_column(); } 
- 
-int RowGroupMetaDataBuilder::num_columns() { return impl_->num_columns(); } 
- 
-int64_t RowGroupMetaDataBuilder::num_rows() { return impl_->num_rows(); } 
- 
-void RowGroupMetaDataBuilder::set_num_rows(int64_t num_rows) { 
-  impl_->set_num_rows(num_rows); 
-} 
- 
-void RowGroupMetaDataBuilder::Finish(int64_t total_bytes_written, 
-                                     int16_t row_group_ordinal) { 
-  impl_->Finish(total_bytes_written, row_group_ordinal); 
-} 
- 
-// file metadata 
-// TODO(PARQUET-595) Support key_value_metadata 
-class FileMetaDataBuilder::FileMetaDataBuilderImpl { 
- public: 
-  explicit FileMetaDataBuilderImpl( 
-      const SchemaDescriptor* schema, std::shared_ptr<WriterProperties> props, 
-      std::shared_ptr<const KeyValueMetadata> key_value_metadata) 
-      : metadata_(new format::FileMetaData()), 
-        properties_(std::move(props)), 
-        schema_(schema), 
-        key_value_metadata_(std::move(key_value_metadata)) { 
-    if (properties_->file_encryption_properties() != nullptr && 
-        properties_->file_encryption_properties()->encrypted_footer()) { 
-      crypto_metadata_.reset(new format::FileCryptoMetaData()); 
-    } 
-  } 
- 
-  RowGroupMetaDataBuilder* AppendRowGroup() { 
-    row_groups_.emplace_back(); 
-    current_row_group_builder_ = 
-        RowGroupMetaDataBuilder::Make(properties_, schema_, &row_groups_.back()); 
-    return current_row_group_builder_.get(); 
-  } 
- 
-  std::unique_ptr<FileMetaData> Finish() { 
-    int64_t total_rows = 0; 
-    for (auto row_group : row_groups_) { 
-      total_rows += row_group.num_rows; 
-    } 
-    metadata_->__set_num_rows(total_rows); 
-    metadata_->__set_row_groups(row_groups_); 
- 
-    if (key_value_metadata_) { 
-      metadata_->key_value_metadata.clear(); 
-      metadata_->key_value_metadata.reserve(key_value_metadata_->size()); 
-      for (int64_t i = 0; i < key_value_metadata_->size(); ++i) { 
-        format::KeyValue kv_pair; 
-        kv_pair.__set_key(key_value_metadata_->key(i)); 
-        kv_pair.__set_value(key_value_metadata_->value(i)); 
-        metadata_->key_value_metadata.push_back(kv_pair); 
-      } 
-      metadata_->__isset.key_value_metadata = true; 
-    } 
- 
-    int32_t file_version = 0; 
-    switch (properties_->version()) { 
-      case ParquetVersion::PARQUET_1_0: 
-        file_version = 1; 
-        break; 
-      case ParquetVersion::PARQUET_2_0: 
-        file_version = 2; 
-        break; 
-      default: 
-        break; 
-    } 
-    metadata_->__set_version(file_version); 
-    metadata_->__set_created_by(properties_->created_by()); 
- 
-    // Users cannot set the `ColumnOrder` since we donot not have user defined sort order 
-    // in the spec yet. 
-    // We always default to `TYPE_DEFINED_ORDER`. We can expose it in 
-    // the API once we have user defined sort orders in the Parquet format. 
-    // TypeDefinedOrder implies choose SortOrder based on ConvertedType/PhysicalType 
-    format::TypeDefinedOrder type_defined_order; 
-    format::ColumnOrder column_order; 
-    column_order.__set_TYPE_ORDER(type_defined_order); 
-    column_order.__isset.TYPE_ORDER = true; 
-    metadata_->column_orders.resize(schema_->num_columns(), column_order); 
-    metadata_->__isset.column_orders = true; 
- 
-    // if plaintext footer, set footer signing algorithm 
-    auto file_encryption_properties = properties_->file_encryption_properties(); 
-    if (file_encryption_properties && !file_encryption_properties->encrypted_footer()) { 
-      EncryptionAlgorithm signing_algorithm; 
-      EncryptionAlgorithm algo = file_encryption_properties->algorithm(); 
-      signing_algorithm.aad.aad_file_unique = algo.aad.aad_file_unique; 
-      signing_algorithm.aad.supply_aad_prefix = algo.aad.supply_aad_prefix; 
-      if (!algo.aad.supply_aad_prefix) { 
-        signing_algorithm.aad.aad_prefix = algo.aad.aad_prefix; 
-      } 
-      signing_algorithm.algorithm = ParquetCipher::AES_GCM_V1; 
- 
-      metadata_->__set_encryption_algorithm(ToThrift(signing_algorithm)); 
-      const std::string& footer_signing_key_metadata = 
-          file_encryption_properties->footer_key_metadata(); 
-      if (footer_signing_key_metadata.size() > 0) { 
-        metadata_->__set_footer_signing_key_metadata(footer_signing_key_metadata); 
-      } 
-    } 
- 
-    ToParquet(static_cast<parquet::schema::GroupNode*>(schema_->schema_root().get()), 
-              &metadata_->schema); 
-    auto file_meta_data = std::unique_ptr<FileMetaData>(new FileMetaData()); 
-    file_meta_data->impl_->metadata_ = std::move(metadata_); 
-    file_meta_data->impl_->InitSchema(); 
-    file_meta_data->impl_->InitKeyValueMetadata(); 
-    return file_meta_data; 
-  } 
- 
-  std::unique_ptr<FileCryptoMetaData> BuildFileCryptoMetaData() { 
-    if (crypto_metadata_ == nullptr) { 
-      return nullptr; 
-    } 
- 
-    auto file_encryption_properties = properties_->file_encryption_properties(); 
- 
-    crypto_metadata_->__set_encryption_algorithm( 
-        ToThrift(file_encryption_properties->algorithm())); 
-    std::string key_metadata = file_encryption_properties->footer_key_metadata(); 
- 
-    if (!key_metadata.empty()) { 
-      crypto_metadata_->__set_key_metadata(key_metadata); 
-    } 
- 
-    std::unique_ptr<FileCryptoMetaData> file_crypto_metadata = 
-        std::unique_ptr<FileCryptoMetaData>(new FileCryptoMetaData()); 
-    file_crypto_metadata->impl_->metadata_ = std::move(crypto_metadata_); 
- 
-    return file_crypto_metadata; 
-  } 
- 
- protected: 
-  std::unique_ptr<format::FileMetaData> metadata_; 
-  std::unique_ptr<format::FileCryptoMetaData> crypto_metadata_; 
- 
- private: 
-  const std::shared_ptr<WriterProperties> properties_; 
-  std::vector<format::RowGroup> row_groups_; 
- 
-  std::unique_ptr<RowGroupMetaDataBuilder> current_row_group_builder_; 
-  const SchemaDescriptor* schema_; 
-  std::shared_ptr<const KeyValueMetadata> key_value_metadata_; 
-}; 
- 
-std::unique_ptr<FileMetaDataBuilder> FileMetaDataBuilder::Make( 
-    const SchemaDescriptor* schema, std::shared_ptr<WriterProperties> props, 
-    std::shared_ptr<const KeyValueMetadata> key_value_metadata) { 
-  return std::unique_ptr<FileMetaDataBuilder>( 
-      new FileMetaDataBuilder(schema, std::move(props), std::move(key_value_metadata))); 
-} 
- 
-FileMetaDataBuilder::FileMetaDataBuilder( 
-    const SchemaDescriptor* schema, std::shared_ptr<WriterProperties> props, 
-    std::shared_ptr<const KeyValueMetadata> key_value_metadata) 
-    : impl_{std::unique_ptr<FileMetaDataBuilderImpl>(new FileMetaDataBuilderImpl( 
-          schema, std::move(props), std::move(key_value_metadata)))} {} 
- 
-FileMetaDataBuilder::~FileMetaDataBuilder() = default; 
- 
-RowGroupMetaDataBuilder* FileMetaDataBuilder::AppendRowGroup() { 
-  return impl_->AppendRowGroup(); 
-} 
- 
-std::unique_ptr<FileMetaData> FileMetaDataBuilder::Finish() { return impl_->Finish(); } 
- 
-std::unique_ptr<FileCryptoMetaData> FileMetaDataBuilder::GetCryptoMetaData() { 
-  return impl_->BuildFileCryptoMetaData(); 
-} 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/metadata.h"
+
+#include <algorithm>
+#include <cinttypes>
+#include <ostream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/io/memory.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/string_view.h"
+#include "parquet/encryption/encryption_internal.h"
+#include "parquet/encryption/internal_file_decryptor.h"
+#include "parquet/exception.h"
+#include "parquet/schema.h"
+#include "parquet/schema_internal.h"
+#include "parquet/statistics.h"
+#include "parquet/thrift_internal.h"
+
+namespace parquet {
+
+const ApplicationVersion& ApplicationVersion::PARQUET_251_FIXED_VERSION() {
+  static ApplicationVersion version("parquet-mr", 1, 8, 0);
+  return version;
+}
+
+const ApplicationVersion& ApplicationVersion::PARQUET_816_FIXED_VERSION() {
+  static ApplicationVersion version("parquet-mr", 1, 2, 9);
+  return version;
+}
+
+const ApplicationVersion& ApplicationVersion::PARQUET_CPP_FIXED_STATS_VERSION() {
+  static ApplicationVersion version("parquet-cpp", 1, 3, 0);
+  return version;
+}
+
+const ApplicationVersion& ApplicationVersion::PARQUET_MR_FIXED_STATS_VERSION() {
+  static ApplicationVersion version("parquet-mr", 1, 10, 0);
+  return version;
+}
+
+std::string ParquetVersionToString(ParquetVersion::type ver) {
+  switch (ver) {
+    case ParquetVersion::PARQUET_1_0:
+      return "1.0";
+    case ParquetVersion::PARQUET_2_0:
+      return "2.0";
+  }
+
+  // This should be unreachable
+  return "UNKNOWN";
+}
+
+template <typename DType>
+static std::shared_ptr<Statistics> MakeTypedColumnStats(
+    const format::ColumnMetaData& metadata, const ColumnDescriptor* descr) {
+  // If ColumnOrder is defined, return max_value and min_value
+  if (descr->column_order().get_order() == ColumnOrder::TYPE_DEFINED_ORDER) {
+    return MakeStatistics<DType>(
+        descr, metadata.statistics.min_value, metadata.statistics.max_value,
+        metadata.num_values - metadata.statistics.null_count,
+        metadata.statistics.null_count, metadata.statistics.distinct_count,
+        metadata.statistics.__isset.max_value || metadata.statistics.__isset.min_value,
+        metadata.statistics.__isset.null_count,
+        metadata.statistics.__isset.distinct_count);
+  }
+  // Default behavior
+  return MakeStatistics<DType>(
+      descr, metadata.statistics.min, metadata.statistics.max,
+      metadata.num_values - metadata.statistics.null_count,
+      metadata.statistics.null_count, metadata.statistics.distinct_count,
+      metadata.statistics.__isset.max || metadata.statistics.__isset.min,
+      metadata.statistics.__isset.null_count, metadata.statistics.__isset.distinct_count);
+}
+
+std::shared_ptr<Statistics> MakeColumnStats(const format::ColumnMetaData& meta_data,
+                                            const ColumnDescriptor* descr) {
+  switch (static_cast<Type::type>(meta_data.type)) {
+    case Type::BOOLEAN:
+      return MakeTypedColumnStats<BooleanType>(meta_data, descr);
+    case Type::INT32:
+      return MakeTypedColumnStats<Int32Type>(meta_data, descr);
+    case Type::INT64:
+      return MakeTypedColumnStats<Int64Type>(meta_data, descr);
+    case Type::INT96:
+      return MakeTypedColumnStats<Int96Type>(meta_data, descr);
+    case Type::DOUBLE:
+      return MakeTypedColumnStats<DoubleType>(meta_data, descr);
+    case Type::FLOAT:
+      return MakeTypedColumnStats<FloatType>(meta_data, descr);
+    case Type::BYTE_ARRAY:
+      return MakeTypedColumnStats<ByteArrayType>(meta_data, descr);
+    case Type::FIXED_LEN_BYTE_ARRAY:
+      return MakeTypedColumnStats<FLBAType>(meta_data, descr);
+    case Type::UNDEFINED:
+      break;
+  }
+  throw ParquetException("Can't decode page statistics for selected column type");
+}
+
+// MetaData Accessor
+
+// ColumnCryptoMetaData
+class ColumnCryptoMetaData::ColumnCryptoMetaDataImpl {
+ public:
+  explicit ColumnCryptoMetaDataImpl(const format::ColumnCryptoMetaData* crypto_metadata)
+      : crypto_metadata_(crypto_metadata) {}
+
+  bool encrypted_with_footer_key() const {
+    return crypto_metadata_->__isset.ENCRYPTION_WITH_FOOTER_KEY;
+  }
+  bool encrypted_with_column_key() const {
+    return crypto_metadata_->__isset.ENCRYPTION_WITH_COLUMN_KEY;
+  }
+  std::shared_ptr<schema::ColumnPath> path_in_schema() const {
+    return std::make_shared<schema::ColumnPath>(
+        crypto_metadata_->ENCRYPTION_WITH_COLUMN_KEY.path_in_schema);
+  }
+  const std::string& key_metadata() const {
+    return crypto_metadata_->ENCRYPTION_WITH_COLUMN_KEY.key_metadata;
+  }
+
+ private:
+  const format::ColumnCryptoMetaData* crypto_metadata_;
+};
+
+std::unique_ptr<ColumnCryptoMetaData> ColumnCryptoMetaData::Make(
+    const uint8_t* metadata) {
+  return std::unique_ptr<ColumnCryptoMetaData>(new ColumnCryptoMetaData(metadata));
+}
+
+ColumnCryptoMetaData::ColumnCryptoMetaData(const uint8_t* metadata)
+    : impl_(new ColumnCryptoMetaDataImpl(
+          reinterpret_cast<const format::ColumnCryptoMetaData*>(metadata))) {}
+
+ColumnCryptoMetaData::~ColumnCryptoMetaData() = default;
+
+std::shared_ptr<schema::ColumnPath> ColumnCryptoMetaData::path_in_schema() const {
+  return impl_->path_in_schema();
+}
+bool ColumnCryptoMetaData::encrypted_with_footer_key() const {
+  return impl_->encrypted_with_footer_key();
+}
+const std::string& ColumnCryptoMetaData::key_metadata() const {
+  return impl_->key_metadata();
+}
+
+// ColumnChunk metadata
+class ColumnChunkMetaData::ColumnChunkMetaDataImpl {
+ public:
+  explicit ColumnChunkMetaDataImpl(const format::ColumnChunk* column,
+                                   const ColumnDescriptor* descr,
+                                   int16_t row_group_ordinal, int16_t column_ordinal,
+                                   const ApplicationVersion* writer_version,
+                                   std::shared_ptr<InternalFileDecryptor> file_decryptor)
+      : column_(column), descr_(descr), writer_version_(writer_version) {
+    column_metadata_ = &column->meta_data;
+    if (column->__isset.crypto_metadata) {  // column metadata is encrypted
+      format::ColumnCryptoMetaData ccmd = column->crypto_metadata;
+
+      if (ccmd.__isset.ENCRYPTION_WITH_COLUMN_KEY) {
+        if (file_decryptor != nullptr && file_decryptor->properties() != nullptr) {
+          // should decrypt metadata
+          std::shared_ptr<schema::ColumnPath> path = std::make_shared<schema::ColumnPath>(
+              ccmd.ENCRYPTION_WITH_COLUMN_KEY.path_in_schema);
+          std::string key_metadata = ccmd.ENCRYPTION_WITH_COLUMN_KEY.key_metadata;
+
+          std::string aad_column_metadata = encryption::CreateModuleAad(
+              file_decryptor->file_aad(), encryption::kColumnMetaData, row_group_ordinal,
+              column_ordinal, static_cast<int16_t>(-1));
+          auto decryptor = file_decryptor->GetColumnMetaDecryptor(
+              path->ToDotString(), key_metadata, aad_column_metadata);
+          auto len = static_cast<uint32_t>(column->encrypted_column_metadata.size());
+          DeserializeThriftMsg(
+              reinterpret_cast<const uint8_t*>(column->encrypted_column_metadata.c_str()),
+              &len, &decrypted_metadata_, decryptor);
+          column_metadata_ = &decrypted_metadata_;
+        } else {
+          throw ParquetException(
+              "Cannot decrypt ColumnMetadata."
+              " FileDecryption is not setup correctly");
+        }
+      }
+    }
+    for (const auto& encoding : column_metadata_->encodings) {
+      encodings_.push_back(LoadEnumSafe(&encoding));
+    }
+    for (const auto& encoding_stats : column_metadata_->encoding_stats) {
+      encoding_stats_.push_back({LoadEnumSafe(&encoding_stats.page_type),
+                                 LoadEnumSafe(&encoding_stats.encoding),
+                                 encoding_stats.count});
+    }
+    possible_stats_ = nullptr;
+  }
+
+  bool Equals(const ColumnChunkMetaDataImpl& other) const {
+    return *column_metadata_ == *other.column_metadata_;
+  }
+
+  // column chunk
+  inline int64_t file_offset() const { return column_->file_offset; }
+  inline const std::string& file_path() const { return column_->file_path; }
+
+  inline Type::type type() const { return LoadEnumSafe(&column_metadata_->type); }
+
+  inline int64_t num_values() const { return column_metadata_->num_values; }
+
+  std::shared_ptr<schema::ColumnPath> path_in_schema() {
+    return std::make_shared<schema::ColumnPath>(column_metadata_->path_in_schema);
+  }
+
+  // Check if statistics are set and are valid
+  // 1) Must be set in the metadata
+  // 2) Statistics must not be corrupted
+  inline bool is_stats_set() const {
+    DCHECK(writer_version_ != nullptr);
+    // If the column statistics don't exist or column sort order is unknown
+    // we cannot use the column stats
+    if (!column_metadata_->__isset.statistics ||
+        descr_->sort_order() == SortOrder::UNKNOWN) {
+      return false;
+    }
+    if (possible_stats_ == nullptr) {
+      possible_stats_ = MakeColumnStats(*column_metadata_, descr_);
+    }
+    EncodedStatistics encodedStatistics = possible_stats_->Encode();
+    return writer_version_->HasCorrectStatistics(type(), encodedStatistics,
+                                                 descr_->sort_order());
+  }
+
+  inline std::shared_ptr<Statistics> statistics() const {
+    return is_stats_set() ? possible_stats_ : nullptr;
+  }
+
+  inline Compression::type compression() const {
+    return LoadEnumSafe(&column_metadata_->codec);
+  }
+
+  const std::vector<Encoding::type>& encodings() const { return encodings_; }
+
+  const std::vector<PageEncodingStats>& encoding_stats() const { return encoding_stats_; }
+
+  inline bool has_dictionary_page() const {
+    return column_metadata_->__isset.dictionary_page_offset;
+  }
+
+  inline int64_t dictionary_page_offset() const {
+    return column_metadata_->dictionary_page_offset;
+  }
+
+  inline int64_t data_page_offset() const { return column_metadata_->data_page_offset; }
+
+  inline bool has_index_page() const {
+    return column_metadata_->__isset.index_page_offset;
+  }
+
+  inline int64_t index_page_offset() const { return column_metadata_->index_page_offset; }
+
+  inline int64_t total_compressed_size() const {
+    return column_metadata_->total_compressed_size;
+  }
+
+  inline int64_t total_uncompressed_size() const {
+    return column_metadata_->total_uncompressed_size;
+  }
+
+  inline std::unique_ptr<ColumnCryptoMetaData> crypto_metadata() const {
+    if (column_->__isset.crypto_metadata) {
+      return ColumnCryptoMetaData::Make(
+          reinterpret_cast<const uint8_t*>(&column_->crypto_metadata));
+    } else {
+      return nullptr;
+    }
+  }
+
+ private:
+  mutable std::shared_ptr<Statistics> possible_stats_;
+  std::vector<Encoding::type> encodings_;
+  std::vector<PageEncodingStats> encoding_stats_;
+  const format::ColumnChunk* column_;
+  const format::ColumnMetaData* column_metadata_;
+  format::ColumnMetaData decrypted_metadata_;
+  const ColumnDescriptor* descr_;
+  const ApplicationVersion* writer_version_;
+};
+
+std::unique_ptr<ColumnChunkMetaData> ColumnChunkMetaData::Make(
+    const void* metadata, const ColumnDescriptor* descr,
+    const ApplicationVersion* writer_version, int16_t row_group_ordinal,
+    int16_t column_ordinal, std::shared_ptr<InternalFileDecryptor> file_decryptor) {
+  return std::unique_ptr<ColumnChunkMetaData>(
+      new ColumnChunkMetaData(metadata, descr, row_group_ordinal, column_ordinal,
+                              writer_version, std::move(file_decryptor)));
+}
+
+ColumnChunkMetaData::ColumnChunkMetaData(
+    const void* metadata, const ColumnDescriptor* descr, int16_t row_group_ordinal,
+    int16_t column_ordinal, const ApplicationVersion* writer_version,
+    std::shared_ptr<InternalFileDecryptor> file_decryptor)
+    : impl_{new ColumnChunkMetaDataImpl(
+          reinterpret_cast<const format::ColumnChunk*>(metadata), descr,
+          row_group_ordinal, column_ordinal, writer_version, std::move(file_decryptor))} {
+}
+
+ColumnChunkMetaData::~ColumnChunkMetaData() = default;
+
+// column chunk
+int64_t ColumnChunkMetaData::file_offset() const { return impl_->file_offset(); }
+
+const std::string& ColumnChunkMetaData::file_path() const { return impl_->file_path(); }
+
+Type::type ColumnChunkMetaData::type() const { return impl_->type(); }
+
+int64_t ColumnChunkMetaData::num_values() const { return impl_->num_values(); }
+
+std::shared_ptr<schema::ColumnPath> ColumnChunkMetaData::path_in_schema() const {
+  return impl_->path_in_schema();
+}
+
+std::shared_ptr<Statistics> ColumnChunkMetaData::statistics() const {
+  return impl_->statistics();
+}
+
+bool ColumnChunkMetaData::is_stats_set() const { return impl_->is_stats_set(); }
+
+bool ColumnChunkMetaData::has_dictionary_page() const {
+  return impl_->has_dictionary_page();
+}
+
+int64_t ColumnChunkMetaData::dictionary_page_offset() const {
+  return impl_->dictionary_page_offset();
+}
+
+int64_t ColumnChunkMetaData::data_page_offset() const {
+  return impl_->data_page_offset();
+}
+
+bool ColumnChunkMetaData::has_index_page() const { return impl_->has_index_page(); }
+
+int64_t ColumnChunkMetaData::index_page_offset() const {
+  return impl_->index_page_offset();
+}
+
+Compression::type ColumnChunkMetaData::compression() const {
+  return impl_->compression();
+}
+
+bool ColumnChunkMetaData::can_decompress() const {
+  return ::arrow::util::Codec::IsAvailable(compression());
+}
+
+const std::vector<Encoding::type>& ColumnChunkMetaData::encodings() const {
+  return impl_->encodings();
+}
+
+const std::vector<PageEncodingStats>& ColumnChunkMetaData::encoding_stats() const {
+  return impl_->encoding_stats();
+}
+
+int64_t ColumnChunkMetaData::total_uncompressed_size() const {
+  return impl_->total_uncompressed_size();
+}
+
+int64_t ColumnChunkMetaData::total_compressed_size() const {
+  return impl_->total_compressed_size();
+}
+
+std::unique_ptr<ColumnCryptoMetaData> ColumnChunkMetaData::crypto_metadata() const {
+  return impl_->crypto_metadata();
+}
+
+bool ColumnChunkMetaData::Equals(const ColumnChunkMetaData& other) const {
+  return impl_->Equals(*other.impl_);
+}
+
+// row-group metadata
+class RowGroupMetaData::RowGroupMetaDataImpl {
+ public:
+  explicit RowGroupMetaDataImpl(const format::RowGroup* row_group,
+                                const SchemaDescriptor* schema,
+                                const ApplicationVersion* writer_version,
+                                std::shared_ptr<InternalFileDecryptor> file_decryptor)
+      : row_group_(row_group),
+        schema_(schema),
+        writer_version_(writer_version),
+        file_decryptor_(std::move(file_decryptor)) {}
+
+  bool Equals(const RowGroupMetaDataImpl& other) const {
+    return *row_group_ == *other.row_group_;
+  }
+
+  inline int num_columns() const { return static_cast<int>(row_group_->columns.size()); }
+
+  inline int64_t num_rows() const { return row_group_->num_rows; }
+
+  inline int64_t total_byte_size() const { return row_group_->total_byte_size; }
+
+  inline int64_t total_compressed_size() const {
+    return row_group_->total_compressed_size;
+  }
+
+  inline int64_t file_offset() const { return row_group_->file_offset; }
+
+  inline const SchemaDescriptor* schema() const { return schema_; }
+
+  std::unique_ptr<ColumnChunkMetaData> ColumnChunk(int i) {
+    if (i < num_columns()) {
+      return ColumnChunkMetaData::Make(&row_group_->columns[i], schema_->Column(i),
+                                       writer_version_, row_group_->ordinal,
+                                       static_cast<int16_t>(i), file_decryptor_);
+    }
+    throw ParquetException("The file only has ", num_columns(),
+                           " columns, requested metadata for column: ", i);
+  }
+
+ private:
+  const format::RowGroup* row_group_;
+  const SchemaDescriptor* schema_;
+  const ApplicationVersion* writer_version_;
+  std::shared_ptr<InternalFileDecryptor> file_decryptor_;
+};
+
+std::unique_ptr<RowGroupMetaData> RowGroupMetaData::Make(
+    const void* metadata, const SchemaDescriptor* schema,
+    const ApplicationVersion* writer_version,
+    std::shared_ptr<InternalFileDecryptor> file_decryptor) {
+  return std::unique_ptr<RowGroupMetaData>(
+      new RowGroupMetaData(metadata, schema, writer_version, std::move(file_decryptor)));
+}
+
+RowGroupMetaData::RowGroupMetaData(const void* metadata, const SchemaDescriptor* schema,
+                                   const ApplicationVersion* writer_version,
+                                   std::shared_ptr<InternalFileDecryptor> file_decryptor)
+    : impl_{new RowGroupMetaDataImpl(reinterpret_cast<const format::RowGroup*>(metadata),
+                                     schema, writer_version, std::move(file_decryptor))} {
+}
+
+RowGroupMetaData::~RowGroupMetaData() = default;
+
+bool RowGroupMetaData::Equals(const RowGroupMetaData& other) const {
+  return impl_->Equals(*other.impl_);
+}
+
+int RowGroupMetaData::num_columns() const { return impl_->num_columns(); }
+
+int64_t RowGroupMetaData::num_rows() const { return impl_->num_rows(); }
+
+int64_t RowGroupMetaData::total_byte_size() const { return impl_->total_byte_size(); }
+
+int64_t RowGroupMetaData::total_compressed_size() const {
+  return impl_->total_compressed_size();
+}
+
+int64_t RowGroupMetaData::file_offset() const { return impl_->file_offset(); }
+
+const SchemaDescriptor* RowGroupMetaData::schema() const { return impl_->schema(); }
+
+std::unique_ptr<ColumnChunkMetaData> RowGroupMetaData::ColumnChunk(int i) const {
+  return impl_->ColumnChunk(i);
+}
+
+bool RowGroupMetaData::can_decompress() const {
+  int n_columns = num_columns();
+  for (int i = 0; i < n_columns; i++) {
+    if (!ColumnChunk(i)->can_decompress()) {
+      return false;
+    }
+  }
+  return true;
+}
+
+// file metadata
+class FileMetaData::FileMetaDataImpl {
+ public:
+  FileMetaDataImpl() = default;
+
+  explicit FileMetaDataImpl(
+      const void* metadata, uint32_t* metadata_len,
+      std::shared_ptr<InternalFileDecryptor> file_decryptor = nullptr)
+      : file_decryptor_(file_decryptor) {
+    metadata_.reset(new format::FileMetaData);
+
+    auto footer_decryptor =
+        file_decryptor_ != nullptr ? file_decryptor->GetFooterDecryptor() : nullptr;
+
+    DeserializeThriftMsg(reinterpret_cast<const uint8_t*>(metadata), metadata_len,
+                         metadata_.get(), footer_decryptor);
+    metadata_len_ = *metadata_len;
+
+    if (metadata_->__isset.created_by) {
+      writer_version_ = ApplicationVersion(metadata_->created_by);
+    } else {
+      writer_version_ = ApplicationVersion("unknown 0.0.0");
+    }
+
+    InitSchema();
+    InitColumnOrders();
+    InitKeyValueMetadata();
+  }
+
+  bool VerifySignature(const void* signature) {
+    // verify decryption properties are set
+    if (file_decryptor_ == nullptr) {
+      throw ParquetException("Decryption not set properly. cannot verify signature");
+    }
+    // serialize the footer
+    uint8_t* serialized_data;
+    uint32_t serialized_len = metadata_len_;
+    ThriftSerializer serializer;
+    serializer.SerializeToBuffer(metadata_.get(), &serialized_len, &serialized_data);
+
+    // encrypt with nonce
+    auto nonce = const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(signature));
+    auto tag = const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(signature)) +
+               encryption::kNonceLength;
+
+    std::string key = file_decryptor_->GetFooterKey();
+    std::string aad = encryption::CreateFooterAad(file_decryptor_->file_aad());
+
+    auto aes_encryptor = encryption::AesEncryptor::Make(
+        file_decryptor_->algorithm(), static_cast<int>(key.size()), true, nullptr);
+
+    std::shared_ptr<Buffer> encrypted_buffer = std::static_pointer_cast<ResizableBuffer>(
+        AllocateBuffer(file_decryptor_->pool(),
+                       aes_encryptor->CiphertextSizeDelta() + serialized_len));
+    uint32_t encrypted_len = aes_encryptor->SignedFooterEncrypt(
+        serialized_data, serialized_len, str2bytes(key), static_cast<int>(key.size()),
+        str2bytes(aad), static_cast<int>(aad.size()), nonce,
+        encrypted_buffer->mutable_data());
+    // Delete AES encryptor object. It was created only to verify the footer signature.
+    aes_encryptor->WipeOut();
+    delete aes_encryptor;
+    return 0 ==
+           memcmp(encrypted_buffer->data() + encrypted_len - encryption::kGcmTagLength,
+                  tag, encryption::kGcmTagLength);
+  }
+
+  inline uint32_t size() const { return metadata_len_; }
+  inline int num_columns() const { return schema_.num_columns(); }
+  inline int64_t num_rows() const { return metadata_->num_rows; }
+  inline int num_row_groups() const {
+    return static_cast<int>(metadata_->row_groups.size());
+  }
+  inline int32_t version() const { return metadata_->version; }
+  inline const std::string& created_by() const { return metadata_->created_by; }
+  inline int num_schema_elements() const {
+    return static_cast<int>(metadata_->schema.size());
+  }
+
+  inline bool is_encryption_algorithm_set() const {
+    return metadata_->__isset.encryption_algorithm;
+  }
+  inline EncryptionAlgorithm encryption_algorithm() {
+    return FromThrift(metadata_->encryption_algorithm);
+  }
+  inline const std::string& footer_signing_key_metadata() {
+    return metadata_->footer_signing_key_metadata;
+  }
+
+  const ApplicationVersion& writer_version() const { return writer_version_; }
+
+  void WriteTo(::arrow::io::OutputStream* dst,
+               const std::shared_ptr<Encryptor>& encryptor) const {
+    ThriftSerializer serializer;
+    // Only in encrypted files with plaintext footers the
+    // encryption_algorithm is set in footer
+    if (is_encryption_algorithm_set()) {
+      uint8_t* serialized_data;
+      uint32_t serialized_len;
+      serializer.SerializeToBuffer(metadata_.get(), &serialized_len, &serialized_data);
+
+      // encrypt the footer key
+      std::vector<uint8_t> encrypted_data(encryptor->CiphertextSizeDelta() +
+                                          serialized_len);
+      unsigned encrypted_len =
+          encryptor->Encrypt(serialized_data, serialized_len, encrypted_data.data());
+
+      // write unencrypted footer
+      PARQUET_THROW_NOT_OK(dst->Write(serialized_data, serialized_len));
+      // Write signature (nonce and tag)
+      PARQUET_THROW_NOT_OK(
+          dst->Write(encrypted_data.data() + 4, encryption::kNonceLength));
+      PARQUET_THROW_NOT_OK(
+          dst->Write(encrypted_data.data() + encrypted_len - encryption::kGcmTagLength,
+                     encryption::kGcmTagLength));
+    } else {  // either plaintext file (when encryptor is null)
+      // or encrypted file with encrypted footer
+      serializer.Serialize(metadata_.get(), dst, encryptor);
+    }
+  }
+
+  std::unique_ptr<RowGroupMetaData> RowGroup(int i) {
+    if (!(i < num_row_groups())) {
+      std::stringstream ss;
+      ss << "The file only has " << num_row_groups()
+         << " row groups, requested metadata for row group: " << i;
+      throw ParquetException(ss.str());
+    }
+    return RowGroupMetaData::Make(&metadata_->row_groups[i], &schema_, &writer_version_,
+                                  file_decryptor_);
+  }
+
+  bool Equals(const FileMetaDataImpl& other) const {
+    return *metadata_ == *other.metadata_;
+  }
+
+  const SchemaDescriptor* schema() const { return &schema_; }
+
+  const std::shared_ptr<const KeyValueMetadata>& key_value_metadata() const {
+    return key_value_metadata_;
+  }
+
+  void set_file_path(const std::string& path) {
+    for (format::RowGroup& row_group : metadata_->row_groups) {
+      for (format::ColumnChunk& chunk : row_group.columns) {
+        chunk.__set_file_path(path);
+      }
+    }
+  }
+
+  format::RowGroup& row_group(int i) {
+    DCHECK_LT(i, num_row_groups());
+    return metadata_->row_groups[i];
+  }
+
+  void AppendRowGroups(const std::unique_ptr<FileMetaDataImpl>& other) {
+    if (!schema()->Equals(*other->schema())) {
+      throw ParquetException("AppendRowGroups requires equal schemas.");
+    }
+
+    format::RowGroup other_rg;
+    for (int i = 0; i < other->num_row_groups(); i++) {
+      other_rg = other->row_group(i);
+      metadata_->row_groups.push_back(other_rg);
+      metadata_->num_rows += other_rg.num_rows;
+    }
+  }
+
+  std::shared_ptr<FileMetaData> Subset(const std::vector<int>& row_groups) {
+    for (int i : row_groups) {
+      if (i < num_row_groups()) continue;
+
+      throw ParquetException(
+          "The file only has ", num_row_groups(),
+          " row groups, but requested a subset including row group: ", i);
+    }
+
+    std::shared_ptr<FileMetaData> out(new FileMetaData());
+    out->impl_.reset(new FileMetaDataImpl());
+    out->impl_->metadata_.reset(new format::FileMetaData());
+
+    auto metadata = out->impl_->metadata_.get();
+    metadata->version = metadata_->version;
+    metadata->schema = metadata_->schema;
+
+    metadata->row_groups.resize(row_groups.size());
+    int i = 0;
+    for (int selected_index : row_groups) {
+      metadata->num_rows += row_group(selected_index).num_rows;
+      metadata->row_groups[i++] = row_group(selected_index);
+    }
+
+    metadata->key_value_metadata = metadata_->key_value_metadata;
+    metadata->created_by = metadata_->created_by;
+    metadata->column_orders = metadata_->column_orders;
+    metadata->encryption_algorithm = metadata_->encryption_algorithm;
+    metadata->footer_signing_key_metadata = metadata_->footer_signing_key_metadata;
+    metadata->__isset = metadata_->__isset;
+
+    out->impl_->schema_ = schema_;
+    out->impl_->writer_version_ = writer_version_;
+    out->impl_->key_value_metadata_ = key_value_metadata_;
+    out->impl_->file_decryptor_ = file_decryptor_;
+
+    return out;
+  }
+
+  void set_file_decryptor(std::shared_ptr<InternalFileDecryptor> file_decryptor) {
+    file_decryptor_ = file_decryptor;
+  }
+
+ private:
+  friend FileMetaDataBuilder;
+  uint32_t metadata_len_ = 0;
+  std::unique_ptr<format::FileMetaData> metadata_;
+  SchemaDescriptor schema_;
+  ApplicationVersion writer_version_;
+  std::shared_ptr<const KeyValueMetadata> key_value_metadata_;
+  std::shared_ptr<InternalFileDecryptor> file_decryptor_;
+
+  void InitSchema() {
+    if (metadata_->schema.empty()) {
+      throw ParquetException("Empty file schema (no root)");
+    }
+    schema_.Init(schema::Unflatten(&metadata_->schema[0],
+                                   static_cast<int>(metadata_->schema.size())));
+  }
+
+  void InitColumnOrders() {
+    // update ColumnOrder
+    std::vector<parquet::ColumnOrder> column_orders;
+    if (metadata_->__isset.column_orders) {
+      for (auto column_order : metadata_->column_orders) {
+        if (column_order.__isset.TYPE_ORDER) {
+          column_orders.push_back(ColumnOrder::type_defined_);
+        } else {
+          column_orders.push_back(ColumnOrder::undefined_);
+        }
+      }
+    } else {
+      column_orders.resize(schema_.num_columns(), ColumnOrder::undefined_);
+    }
+
+    schema_.updateColumnOrders(column_orders);
+  }
+
+  void InitKeyValueMetadata() {
+    std::shared_ptr<KeyValueMetadata> metadata = nullptr;
+    if (metadata_->__isset.key_value_metadata) {
+      metadata = std::make_shared<KeyValueMetadata>();
+      for (const auto& it : metadata_->key_value_metadata) {
+        metadata->Append(it.key, it.value);
+      }
+    }
+    key_value_metadata_ = std::move(metadata);
+  }
+};
+
+std::shared_ptr<FileMetaData> FileMetaData::Make(
+    const void* metadata, uint32_t* metadata_len,
+    std::shared_ptr<InternalFileDecryptor> file_decryptor) {
+  // This FileMetaData ctor is private, not compatible with std::make_shared
+  return std::shared_ptr<FileMetaData>(
+      new FileMetaData(metadata, metadata_len, file_decryptor));
+}
+
+FileMetaData::FileMetaData(const void* metadata, uint32_t* metadata_len,
+                           std::shared_ptr<InternalFileDecryptor> file_decryptor)
+    : impl_{std::unique_ptr<FileMetaDataImpl>(
+          new FileMetaDataImpl(metadata, metadata_len, file_decryptor))} {}
+
+FileMetaData::FileMetaData()
+    : impl_{std::unique_ptr<FileMetaDataImpl>(new FileMetaDataImpl())} {}
+
+FileMetaData::~FileMetaData() = default;
+
+bool FileMetaData::Equals(const FileMetaData& other) const {
+  return impl_->Equals(*other.impl_);
+}
+
+std::unique_ptr<RowGroupMetaData> FileMetaData::RowGroup(int i) const {
+  return impl_->RowGroup(i);
+}
+
+bool FileMetaData::VerifySignature(const void* signature) {
+  return impl_->VerifySignature(signature);
+}
+
+uint32_t FileMetaData::size() const { return impl_->size(); }
+
+int FileMetaData::num_columns() const { return impl_->num_columns(); }
+
+int64_t FileMetaData::num_rows() const { return impl_->num_rows(); }
+
+int FileMetaData::num_row_groups() const { return impl_->num_row_groups(); }
+
+bool FileMetaData::can_decompress() const {
+  int n_row_groups = num_row_groups();
+  for (int i = 0; i < n_row_groups; i++) {
+    if (!RowGroup(i)->can_decompress()) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool FileMetaData::is_encryption_algorithm_set() const {
+  return impl_->is_encryption_algorithm_set();
+}
+
+EncryptionAlgorithm FileMetaData::encryption_algorithm() const {
+  return impl_->encryption_algorithm();
+}
+
+const std::string& FileMetaData::footer_signing_key_metadata() const {
+  return impl_->footer_signing_key_metadata();
+}
+
+void FileMetaData::set_file_decryptor(
+    std::shared_ptr<InternalFileDecryptor> file_decryptor) {
+  impl_->set_file_decryptor(file_decryptor);
+}
+
+ParquetVersion::type FileMetaData::version() const {
+  switch (impl_->version()) {
+    case 1:
+      return ParquetVersion::PARQUET_1_0;
+    case 2:
+      return ParquetVersion::PARQUET_2_0;
+    default:
+      // Improperly set version, assuming Parquet 1.0
+      break;
+  }
+  return ParquetVersion::PARQUET_1_0;
+}
+
+const ApplicationVersion& FileMetaData::writer_version() const {
+  return impl_->writer_version();
+}
+
+const std::string& FileMetaData::created_by() const { return impl_->created_by(); }
+
+int FileMetaData::num_schema_elements() const { return impl_->num_schema_elements(); }
+
+const SchemaDescriptor* FileMetaData::schema() const { return impl_->schema(); }
+
+const std::shared_ptr<const KeyValueMetadata>& FileMetaData::key_value_metadata() const {
+  return impl_->key_value_metadata();
+}
+
+void FileMetaData::set_file_path(const std::string& path) { impl_->set_file_path(path); }
+
+void FileMetaData::AppendRowGroups(const FileMetaData& other) {
+  impl_->AppendRowGroups(other.impl_);
+}
+
+std::shared_ptr<FileMetaData> FileMetaData::Subset(
+    const std::vector<int>& row_groups) const {
+  return impl_->Subset(row_groups);
+}
+
+void FileMetaData::WriteTo(::arrow::io::OutputStream* dst,
+                           const std::shared_ptr<Encryptor>& encryptor) const {
+  return impl_->WriteTo(dst, encryptor);
+}
+
+class FileCryptoMetaData::FileCryptoMetaDataImpl {
+ public:
+  FileCryptoMetaDataImpl() = default;
+
+  explicit FileCryptoMetaDataImpl(const uint8_t* metadata, uint32_t* metadata_len) {
+    metadata_.reset(new format::FileCryptoMetaData);
+    DeserializeThriftMsg(metadata, metadata_len, metadata_.get());
+    metadata_len_ = *metadata_len;
+  }
+
+  EncryptionAlgorithm encryption_algorithm() {
+    return FromThrift(metadata_->encryption_algorithm);
+  }
+  const std::string& key_metadata() { return metadata_->key_metadata; }
+  void WriteTo(::arrow::io::OutputStream* dst) const {
+    ThriftSerializer serializer;
+    serializer.Serialize(metadata_.get(), dst);
+  }
+
+ private:
+  friend FileMetaDataBuilder;
+  std::unique_ptr<format::FileCryptoMetaData> metadata_;
+  uint32_t metadata_len_;
+};
+
+EncryptionAlgorithm FileCryptoMetaData::encryption_algorithm() const {
+  return impl_->encryption_algorithm();
+}
+
+const std::string& FileCryptoMetaData::key_metadata() const {
+  return impl_->key_metadata();
+}
+
+std::shared_ptr<FileCryptoMetaData> FileCryptoMetaData::Make(
+    const uint8_t* serialized_metadata, uint32_t* metadata_len) {
+  return std::shared_ptr<FileCryptoMetaData>(
+      new FileCryptoMetaData(serialized_metadata, metadata_len));
+}
+
+FileCryptoMetaData::FileCryptoMetaData(const uint8_t* serialized_metadata,
+                                       uint32_t* metadata_len)
+    : impl_(new FileCryptoMetaDataImpl(serialized_metadata, metadata_len)) {}
+
+FileCryptoMetaData::FileCryptoMetaData() : impl_(new FileCryptoMetaDataImpl()) {}
+
+FileCryptoMetaData::~FileCryptoMetaData() = default;
+
+void FileCryptoMetaData::WriteTo(::arrow::io::OutputStream* dst) const {
+  impl_->WriteTo(dst);
+}
+
+std::string FileMetaData::SerializeToString() const {
+  // We need to pass in an initial size. Since it will automatically
+  // increase the buffer size to hold the metadata, we just leave it 0.
+  PARQUET_ASSIGN_OR_THROW(auto serializer, ::arrow::io::BufferOutputStream::Create(0));
+  WriteTo(serializer.get());
+  PARQUET_ASSIGN_OR_THROW(auto metadata_buffer, serializer->Finish());
+  return metadata_buffer->ToString();
+}
+
+ApplicationVersion::ApplicationVersion(std::string application, int major, int minor,
+                                       int patch)
+    : application_(std::move(application)), version{major, minor, patch, "", "", ""} {}
+
+namespace {
+// Parse the application version format and set parsed values to
+// ApplicationVersion.
+//
+// The application version format must be compatible parquet-mr's
+// one. See also:
+//   * https://github.com/apache/parquet-mr/blob/master/parquet-common/src/main/java/org/apache/parquet/VersionParser.java
+//   * https://github.com/apache/parquet-mr/blob/master/parquet-common/src/main/java/org/apache/parquet/SemanticVersion.java
+//
+// The application version format:
+//   "${APPLICATION_NAME}"
+//   "${APPLICATION_NAME} version ${VERSION}"
+//   "${APPLICATION_NAME} version ${VERSION} (build ${BUILD_NAME})"
+//
+// Eg:
+//   parquet-cpp
+//   parquet-cpp version 1.5.0ab-xyz5.5.0+cd
+//   parquet-cpp version 1.5.0ab-xyz5.5.0+cd (build abcd)
+//
+// The VERSION format:
+//   "${MAJOR}"
+//   "${MAJOR}.${MINOR}"
+//   "${MAJOR}.${MINOR}.${PATCH}"
+//   "${MAJOR}.${MINOR}.${PATCH}${UNKNOWN}"
+//   "${MAJOR}.${MINOR}.${PATCH}${UNKNOWN}-${PRE_RELEASE}"
+//   "${MAJOR}.${MINOR}.${PATCH}${UNKNOWN}-${PRE_RELEASE}+${BUILD_INFO}"
+//   "${MAJOR}.${MINOR}.${PATCH}${UNKNOWN}+${BUILD_INFO}"
+//   "${MAJOR}.${MINOR}.${PATCH}-${PRE_RELEASE}"
+//   "${MAJOR}.${MINOR}.${PATCH}-${PRE_RELEASE}+${BUILD_INFO}"
+//   "${MAJOR}.${MINOR}.${PATCH}+${BUILD_INFO}"
+//
+// Eg:
+//   1
+//   1.5
+//   1.5.0
+//   1.5.0ab
+//   1.5.0ab-cdh5.5.0
+//   1.5.0ab-cdh5.5.0+cd
+//   1.5.0ab+cd
+//   1.5.0-cdh5.5.0
+//   1.5.0-cdh5.5.0+cd
+//   1.5.0+cd
+class ApplicationVersionParser {
+ public:
+  ApplicationVersionParser(const std::string& created_by,
+                           ApplicationVersion& application_version)
+      : created_by_(created_by),
+        application_version_(application_version),
+        spaces_(" \t\v\r\n\f"),
+        digits_("0123456789") {}
+
+  void Parse() {
+    application_version_.application_ = "unknown";
+    application_version_.version = {0, 0, 0, "", "", ""};
+
+    if (!ParseApplicationName()) {
+      return;
+    }
+    if (!ParseVersion()) {
+      return;
+    }
+    if (!ParseBuildName()) {
+      return;
+    }
+  }
+
+ private:
+  bool IsSpace(const std::string& string, const size_t& offset) {
+    auto target = ::arrow::util::string_view(string).substr(offset, 1);
+    return target.find_first_of(spaces_) != ::arrow::util::string_view::npos;
+  }
+
+  void RemovePrecedingSpaces(const std::string& string, size_t& start,
+                             const size_t& end) {
+    while (start < end && IsSpace(string, start)) {
+      ++start;
+    }
+  }
+
+  void RemoveTrailingSpaces(const std::string& string, const size_t& start, size_t& end) {
+    while (start < (end - 1) && (end - 1) < string.size() && IsSpace(string, end - 1)) {
+      --end;
+    }
+  }
+
+  bool ParseApplicationName() {
+    std::string version_mark(" version ");
+    auto version_mark_position = created_by_.find(version_mark);
+    size_t application_name_end;
+    // No VERSION and BUILD_NAME.
+    if (version_mark_position == std::string::npos) {
+      version_start_ = std::string::npos;
+      application_name_end = created_by_.size();
+    } else {
+      version_start_ = version_mark_position + version_mark.size();
+      application_name_end = version_mark_position;
+    }
+
+    size_t application_name_start = 0;
+    RemovePrecedingSpaces(created_by_, application_name_start, application_name_end);
+    RemoveTrailingSpaces(created_by_, application_name_start, application_name_end);
+    application_version_.application_ = created_by_.substr(
+        application_name_start, application_name_end - application_name_start);
+
+    return true;
+  }
+
+  bool ParseVersion() {
+    // No VERSION.
+    if (version_start_ == std::string::npos) {
+      return false;
+    }
+
+    RemovePrecedingSpaces(created_by_, version_start_, created_by_.size());
+    version_end_ = created_by_.find(" (", version_start_);
+    // No BUILD_NAME.
+    if (version_end_ == std::string::npos) {
+      version_end_ = created_by_.size();
+    }
+    RemoveTrailingSpaces(created_by_, version_start_, version_end_);
+    // No VERSION.
+    if (version_start_ == version_end_) {
+      return false;
+    }
+    version_string_ = created_by_.substr(version_start_, version_end_ - version_start_);
+
+    if (!ParseVersionMajor()) {
+      return false;
+    }
+    if (!ParseVersionMinor()) {
+      return false;
+    }
+    if (!ParseVersionPatch()) {
+      return false;
+    }
+    if (!ParseVersionUnknown()) {
+      return false;
+    }
+    if (!ParseVersionPreRelease()) {
+      return false;
+    }
+    if (!ParseVersionBuildInfo()) {
+      return false;
+    }
+
+    return true;
+  }
+
+  bool ParseVersionMajor() {
+    size_t version_major_start = 0;
+    auto version_major_end = version_string_.find_first_not_of(digits_);
+    // MAJOR only.
+    if (version_major_end == std::string::npos) {
+      version_major_end = version_string_.size();
+      version_parsing_position_ = version_major_end;
+    } else {
+      // No ".".
+      if (version_string_[version_major_end] != '.') {
+        return false;
+      }
+      // No MAJOR.
+      if (version_major_end == version_major_start) {
+        return false;
+      }
+      version_parsing_position_ = version_major_end + 1;  // +1 is for '.'.
+    }
+    auto version_major_string = version_string_.substr(
+        version_major_start, version_major_end - version_major_start);
+    application_version_.version.major = atoi(version_major_string.c_str());
+    return true;
+  }
+
+  bool ParseVersionMinor() {
+    auto version_minor_start = version_parsing_position_;
+    auto version_minor_end =
+        version_string_.find_first_not_of(digits_, version_minor_start);
+    // MAJOR.MINOR only.
+    if (version_minor_end == std::string::npos) {
+      version_minor_end = version_string_.size();
+      version_parsing_position_ = version_minor_end;
+    } else {
+      // No ".".
+      if (version_string_[version_minor_end] != '.') {
+        return false;
+      }
+      // No MINOR.
+      if (version_minor_end == version_minor_start) {
+        return false;
+      }
+      version_parsing_position_ = version_minor_end + 1;  // +1 is for '.'.
+    }
+    auto version_minor_string = version_string_.substr(
+        version_minor_start, version_minor_end - version_minor_start);
+    application_version_.version.minor = atoi(version_minor_string.c_str());
+    return true;
+  }
+
+  bool ParseVersionPatch() {
+    auto version_patch_start = version_parsing_position_;
+    auto version_patch_end =
+        version_string_.find_first_not_of(digits_, version_patch_start);
+    // No UNKNOWN, PRE_RELEASE and BUILD_INFO.
+    if (version_patch_end == std::string::npos) {
+      version_patch_end = version_string_.size();
+    }
+    // No PATCH.
+    if (version_patch_end == version_patch_start) {
+      return false;
+    }
+    auto version_patch_string = version_string_.substr(
+        version_patch_start, version_patch_end - version_patch_start);
+    application_version_.version.patch = atoi(version_patch_string.c_str());
+    version_parsing_position_ = version_patch_end;
+    return true;
+  }
+
+  bool ParseVersionUnknown() {
+    // No UNKNOWN.
+    if (version_parsing_position_ == version_string_.size()) {
+      return true;
+    }
+    auto version_unknown_start = version_parsing_position_;
+    auto version_unknown_end = version_string_.find_first_of("-+", version_unknown_start);
+    // No PRE_RELEASE and BUILD_INFO
+    if (version_unknown_end == std::string::npos) {
+      version_unknown_end = version_string_.size();
+    }
+    application_version_.version.unknown = version_string_.substr(
+        version_unknown_start, version_unknown_end - version_unknown_start);
+    version_parsing_position_ = version_unknown_end;
+    return true;
+  }
+
+  bool ParseVersionPreRelease() {
+    // No PRE_RELEASE.
+    if (version_parsing_position_ == version_string_.size() ||
+        version_string_[version_parsing_position_] != '-') {
+      return true;
+    }
+
+    auto version_pre_release_start = version_parsing_position_ + 1;  // +1 is for '-'.
+    auto version_pre_release_end =
+        version_string_.find_first_of("+", version_pre_release_start);
+    // No BUILD_INFO
+    if (version_pre_release_end == std::string::npos) {
+      version_pre_release_end = version_string_.size();
+    }
+    application_version_.version.pre_release = version_string_.substr(
+        version_pre_release_start, version_pre_release_end - version_pre_release_start);
+    version_parsing_position_ = version_pre_release_end;
+    return true;
+  }
+
+  bool ParseVersionBuildInfo() {
+    // No BUILD_INFO.
+    if (version_parsing_position_ == version_string_.size() ||
+        version_string_[version_parsing_position_] != '+') {
+      return true;
+    }
+
+    auto version_build_info_start = version_parsing_position_ + 1;  // +1 is for '+'.
+    application_version_.version.build_info =
+        version_string_.substr(version_build_info_start);
+    return true;
+  }
+
+  bool ParseBuildName() {
+    std::string build_mark(" (build ");
+    auto build_mark_position = created_by_.find(build_mark, version_end_);
+    // No BUILD_NAME.
+    if (build_mark_position == std::string::npos) {
+      return false;
+    }
+    auto build_name_start = build_mark_position + build_mark.size();
+    RemovePrecedingSpaces(created_by_, build_name_start, created_by_.size());
+    auto build_name_end = created_by_.find_first_of(")", build_name_start);
+    // No end ")".
+    if (build_name_end == std::string::npos) {
+      return false;
+    }
+    RemoveTrailingSpaces(created_by_, build_name_start, build_name_end);
+    application_version_.build_ =
+        created_by_.substr(build_name_start, build_name_end - build_name_start);
+
+    return true;
+  }
+
+  const std::string& created_by_;
+  ApplicationVersion& application_version_;
+
+  // For parsing.
+  std::string spaces_;
+  std::string digits_;
+  size_t version_parsing_position_;
+  size_t version_start_;
+  size_t version_end_;
+  std::string version_string_;
+};
+}  // namespace
+
+ApplicationVersion::ApplicationVersion(const std::string& created_by) {
+  ApplicationVersionParser parser(created_by, *this);
+  parser.Parse();
+}
+
+bool ApplicationVersion::VersionLt(const ApplicationVersion& other_version) const {
+  if (application_ != other_version.application_) return false;
+
+  if (version.major < other_version.version.major) return true;
+  if (version.major > other_version.version.major) return false;
+  DCHECK_EQ(version.major, other_version.version.major);
+  if (version.minor < other_version.version.minor) return true;
+  if (version.minor > other_version.version.minor) return false;
+  DCHECK_EQ(version.minor, other_version.version.minor);
+  return version.patch < other_version.version.patch;
+}
+
+bool ApplicationVersion::VersionEq(const ApplicationVersion& other_version) const {
+  return application_ == other_version.application_ &&
+         version.major == other_version.version.major &&
+         version.minor == other_version.version.minor &&
+         version.patch == other_version.version.patch;
+}
+
+// Reference:
+// parquet-mr/parquet-column/src/main/java/org/apache/parquet/CorruptStatistics.java
+// PARQUET-686 has more discussion on statistics
+bool ApplicationVersion::HasCorrectStatistics(Type::type col_type,
+                                              EncodedStatistics& statistics,
+                                              SortOrder::type sort_order) const {
+  // parquet-cpp version 1.3.0 and parquet-mr 1.10.0 onwards stats are computed
+  // correctly for all types
+  if ((application_ == "parquet-cpp" && VersionLt(PARQUET_CPP_FIXED_STATS_VERSION())) ||
+      (application_ == "parquet-mr" && VersionLt(PARQUET_MR_FIXED_STATS_VERSION()))) {
+    // Only SIGNED are valid unless max and min are the same
+    // (in which case the sort order does not matter)
+    bool max_equals_min = statistics.has_min && statistics.has_max
+                              ? statistics.min() == statistics.max()
+                              : false;
+    if (SortOrder::SIGNED != sort_order && !max_equals_min) {
+      return false;
+    }
+
+    // Statistics of other types are OK
+    if (col_type != Type::FIXED_LEN_BYTE_ARRAY && col_type != Type::BYTE_ARRAY) {
+      return true;
+    }
+  }
+  // created_by is not populated, which could have been caused by
+  // parquet-mr during the same time as PARQUET-251, see PARQUET-297
+  if (application_ == "unknown") {
+    return true;
+  }
+
+  // Unknown sort order has incorrect stats
+  if (SortOrder::UNKNOWN == sort_order) {
+    return false;
+  }
+
+  // PARQUET-251
+  if (VersionLt(PARQUET_251_FIXED_VERSION())) {
+    return false;
+  }
+
+  return true;
+}
+
+// MetaData Builders
+// row-group metadata
+class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl {
+ public:
+  explicit ColumnChunkMetaDataBuilderImpl(std::shared_ptr<WriterProperties> props,
+                                          const ColumnDescriptor* column)
+      : owned_column_chunk_(new format::ColumnChunk),
+        properties_(std::move(props)),
+        column_(column) {
+    Init(owned_column_chunk_.get());
+  }
+
+  explicit ColumnChunkMetaDataBuilderImpl(std::shared_ptr<WriterProperties> props,
+                                          const ColumnDescriptor* column,
+                                          format::ColumnChunk* column_chunk)
+      : properties_(std::move(props)), column_(column) {
+    Init(column_chunk);
+  }
+
+  const void* contents() const { return column_chunk_; }
+
+  // column chunk
+  void set_file_path(const std::string& val) { column_chunk_->__set_file_path(val); }
+
+  // column metadata
+  void SetStatistics(const EncodedStatistics& val) {
+    column_chunk_->meta_data.__set_statistics(ToThrift(val));
+  }
+
+  void Finish(int64_t num_values, int64_t dictionary_page_offset,
+              int64_t index_page_offset, int64_t data_page_offset,
+              int64_t compressed_size, int64_t uncompressed_size, bool has_dictionary,
+              bool dictionary_fallback,
+              const std::map<Encoding::type, int32_t>& dict_encoding_stats,
+              const std::map<Encoding::type, int32_t>& data_encoding_stats,
+              const std::shared_ptr<Encryptor>& encryptor) {
+    if (dictionary_page_offset > 0) {
+      column_chunk_->meta_data.__set_dictionary_page_offset(dictionary_page_offset);
+      column_chunk_->__set_file_offset(dictionary_page_offset + compressed_size);
+    } else {
+      column_chunk_->__set_file_offset(data_page_offset + compressed_size);
+    }
+    column_chunk_->__isset.meta_data = true;
+    column_chunk_->meta_data.__set_num_values(num_values);
+    if (index_page_offset >= 0) {
+      column_chunk_->meta_data.__set_index_page_offset(index_page_offset);
+    }
+    column_chunk_->meta_data.__set_data_page_offset(data_page_offset);
+    column_chunk_->meta_data.__set_total_uncompressed_size(uncompressed_size);
+    column_chunk_->meta_data.__set_total_compressed_size(compressed_size);
+
+    std::vector<format::Encoding::type> thrift_encodings;
+    if (has_dictionary) {
+      thrift_encodings.push_back(ToThrift(properties_->dictionary_index_encoding()));
+      if (properties_->version() == ParquetVersion::PARQUET_1_0) {
+        thrift_encodings.push_back(ToThrift(Encoding::PLAIN));
+      } else {
+        thrift_encodings.push_back(ToThrift(properties_->dictionary_page_encoding()));
+      }
+    } else {  // Dictionary not enabled
+      thrift_encodings.push_back(ToThrift(properties_->encoding(column_->path())));
+    }
+    thrift_encodings.push_back(ToThrift(Encoding::RLE));
+    // Only PLAIN encoding is supported for fallback in V1
+    // TODO(majetideepak): Use user specified encoding for V2
+    if (dictionary_fallback) {
+      thrift_encodings.push_back(ToThrift(Encoding::PLAIN));
+    }
+    column_chunk_->meta_data.__set_encodings(thrift_encodings);
+    std::vector<format::PageEncodingStats> thrift_encoding_stats;
+    // Add dictionary page encoding stats
+    for (const auto& entry : dict_encoding_stats) {
+      format::PageEncodingStats dict_enc_stat;
+      dict_enc_stat.__set_page_type(format::PageType::DICTIONARY_PAGE);
+      dict_enc_stat.__set_encoding(ToThrift(entry.first));
+      dict_enc_stat.__set_count(entry.second);
+      thrift_encoding_stats.push_back(dict_enc_stat);
+    }
+    // Add data page encoding stats
+    for (const auto& entry : data_encoding_stats) {
+      format::PageEncodingStats data_enc_stat;
+      data_enc_stat.__set_page_type(format::PageType::DATA_PAGE);
+      data_enc_stat.__set_encoding(ToThrift(entry.first));
+      data_enc_stat.__set_count(entry.second);
+      thrift_encoding_stats.push_back(data_enc_stat);
+    }
+    column_chunk_->meta_data.__set_encoding_stats(thrift_encoding_stats);
+
+    const auto& encrypt_md =
+        properties_->column_encryption_properties(column_->path()->ToDotString());
+    // column is encrypted
+    if (encrypt_md != nullptr && encrypt_md->is_encrypted()) {
+      column_chunk_->__isset.crypto_metadata = true;
+      format::ColumnCryptoMetaData ccmd;
+      if (encrypt_md->is_encrypted_with_footer_key()) {
+        // encrypted with footer key
+        ccmd.__isset.ENCRYPTION_WITH_FOOTER_KEY = true;
+        ccmd.__set_ENCRYPTION_WITH_FOOTER_KEY(format::EncryptionWithFooterKey());
+      } else {  // encrypted with column key
+        format::EncryptionWithColumnKey eck;
+        eck.__set_key_metadata(encrypt_md->key_metadata());
+        eck.__set_path_in_schema(column_->path()->ToDotVector());
+        ccmd.__isset.ENCRYPTION_WITH_COLUMN_KEY = true;
+        ccmd.__set_ENCRYPTION_WITH_COLUMN_KEY(eck);
+      }
+      column_chunk_->__set_crypto_metadata(ccmd);
+
+      bool encrypted_footer =
+          properties_->file_encryption_properties()->encrypted_footer();
+      bool encrypt_metadata =
+          !encrypted_footer || !encrypt_md->is_encrypted_with_footer_key();
+      if (encrypt_metadata) {
+        ThriftSerializer serializer;
+        // Serialize and encrypt ColumnMetadata separately
+        // Thrift-serialize the ColumnMetaData structure,
+        // encrypt it with the column key, and write to encrypted_column_metadata
+        uint8_t* serialized_data;
+        uint32_t serialized_len;
+
+        serializer.SerializeToBuffer(&column_chunk_->meta_data, &serialized_len,
+                                     &serialized_data);
+
+        std::vector<uint8_t> encrypted_data(encryptor->CiphertextSizeDelta() +
+                                            serialized_len);
+        unsigned encrypted_len =
+            encryptor->Encrypt(serialized_data, serialized_len, encrypted_data.data());
+
+        const char* temp =
+            const_cast<const char*>(reinterpret_cast<char*>(encrypted_data.data()));
+        std::string encrypted_column_metadata(temp, encrypted_len);
+        column_chunk_->__set_encrypted_column_metadata(encrypted_column_metadata);
+
+        if (encrypted_footer) {
+          column_chunk_->__isset.meta_data = false;
+        } else {
+          // Keep redacted metadata version for old readers
+          column_chunk_->__isset.meta_data = true;
+          column_chunk_->meta_data.__isset.statistics = false;
+          column_chunk_->meta_data.__isset.encoding_stats = false;
+        }
+      }
+    }
+  }
+
+  void WriteTo(::arrow::io::OutputStream* sink) {
+    ThriftSerializer serializer;
+    serializer.Serialize(column_chunk_, sink);
+  }
+
+  const ColumnDescriptor* descr() const { return column_; }
+  int64_t total_compressed_size() const {
+    return column_chunk_->meta_data.total_compressed_size;
+  }
+
+ private:
+  void Init(format::ColumnChunk* column_chunk) {
+    column_chunk_ = column_chunk;
+
+    column_chunk_->meta_data.__set_type(ToThrift(column_->physical_type()));
+    column_chunk_->meta_data.__set_path_in_schema(column_->path()->ToDotVector());
+    column_chunk_->meta_data.__set_codec(
+        ToThrift(properties_->compression(column_->path())));
+  }
+
+  format::ColumnChunk* column_chunk_;
+  std::unique_ptr<format::ColumnChunk> owned_column_chunk_;
+  const std::shared_ptr<WriterProperties> properties_;
+  const ColumnDescriptor* column_;
+};
+
+std::unique_ptr<ColumnChunkMetaDataBuilder> ColumnChunkMetaDataBuilder::Make(
+    std::shared_ptr<WriterProperties> props, const ColumnDescriptor* column,
+    void* contents) {
+  return std::unique_ptr<ColumnChunkMetaDataBuilder>(
+      new ColumnChunkMetaDataBuilder(std::move(props), column, contents));
+}
+
+std::unique_ptr<ColumnChunkMetaDataBuilder> ColumnChunkMetaDataBuilder::Make(
+    std::shared_ptr<WriterProperties> props, const ColumnDescriptor* column) {
+  return std::unique_ptr<ColumnChunkMetaDataBuilder>(
+      new ColumnChunkMetaDataBuilder(std::move(props), column));
+}
+
+ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilder(
+    std::shared_ptr<WriterProperties> props, const ColumnDescriptor* column)
+    : impl_{std::unique_ptr<ColumnChunkMetaDataBuilderImpl>(
+          new ColumnChunkMetaDataBuilderImpl(std::move(props), column))} {}
+
+ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilder(
+    std::shared_ptr<WriterProperties> props, const ColumnDescriptor* column,
+    void* contents)
+    : impl_{std::unique_ptr<ColumnChunkMetaDataBuilderImpl>(
+          new ColumnChunkMetaDataBuilderImpl(
+              std::move(props), column,
+              reinterpret_cast<format::ColumnChunk*>(contents)))} {}
+
+ColumnChunkMetaDataBuilder::~ColumnChunkMetaDataBuilder() = default;
+
+const void* ColumnChunkMetaDataBuilder::contents() const { return impl_->contents(); }
+
+void ColumnChunkMetaDataBuilder::set_file_path(const std::string& path) {
+  impl_->set_file_path(path);
+}
+
+void ColumnChunkMetaDataBuilder::Finish(
+    int64_t num_values, int64_t dictionary_page_offset, int64_t index_page_offset,
+    int64_t data_page_offset, int64_t compressed_size, int64_t uncompressed_size,
+    bool has_dictionary, bool dictionary_fallback,
+    const std::map<Encoding::type, int32_t>& dict_encoding_stats,
+    const std::map<Encoding::type, int32_t>& data_encoding_stats,
+    const std::shared_ptr<Encryptor>& encryptor) {
+  impl_->Finish(num_values, dictionary_page_offset, index_page_offset, data_page_offset,
+                compressed_size, uncompressed_size, has_dictionary, dictionary_fallback,
+                dict_encoding_stats, data_encoding_stats, encryptor);
+}
+
+void ColumnChunkMetaDataBuilder::WriteTo(::arrow::io::OutputStream* sink) {
+  impl_->WriteTo(sink);
+}
+
+const ColumnDescriptor* ColumnChunkMetaDataBuilder::descr() const {
+  return impl_->descr();
+}
+
+void ColumnChunkMetaDataBuilder::SetStatistics(const EncodedStatistics& result) {
+  impl_->SetStatistics(result);
+}
+
+int64_t ColumnChunkMetaDataBuilder::total_compressed_size() const {
+  return impl_->total_compressed_size();
+}
+
+class RowGroupMetaDataBuilder::RowGroupMetaDataBuilderImpl {
+ public:
+  explicit RowGroupMetaDataBuilderImpl(std::shared_ptr<WriterProperties> props,
+                                       const SchemaDescriptor* schema, void* contents)
+      : properties_(std::move(props)), schema_(schema), next_column_(0) {
+    row_group_ = reinterpret_cast<format::RowGroup*>(contents);
+    InitializeColumns(schema->num_columns());
+  }
+
+  ColumnChunkMetaDataBuilder* NextColumnChunk() {
+    if (!(next_column_ < num_columns())) {
+      std::stringstream ss;
+      ss << "The schema only has " << num_columns()
+         << " columns, requested metadata for column: " << next_column_;
+      throw ParquetException(ss.str());
+    }
+    auto column = schema_->Column(next_column_);
+    auto column_builder = ColumnChunkMetaDataBuilder::Make(
+        properties_, column, &row_group_->columns[next_column_++]);
+    auto column_builder_ptr = column_builder.get();
+    column_builders_.push_back(std::move(column_builder));
+    return column_builder_ptr;
+  }
+
+  int current_column() { return next_column_ - 1; }
+
+  void Finish(int64_t total_bytes_written, int16_t row_group_ordinal) {
+    if (!(next_column_ == schema_->num_columns())) {
+      std::stringstream ss;
+      ss << "Only " << next_column_ - 1 << " out of " << schema_->num_columns()
+         << " columns are initialized";
+      throw ParquetException(ss.str());
+    }
+
+    int64_t file_offset = 0;
+    int64_t total_compressed_size = 0;
+    for (int i = 0; i < schema_->num_columns(); i++) {
+      if (!(row_group_->columns[i].file_offset >= 0)) {
+        std::stringstream ss;
+        ss << "Column " << i << " is not complete.";
+        throw ParquetException(ss.str());
+      }
+      if (i == 0) {
+        file_offset = row_group_->columns[0].file_offset;
+      }
+      // sometimes column metadata is encrypted and not available to read,
+      // so we must get total_compressed_size from column builder
+      total_compressed_size += column_builders_[i]->total_compressed_size();
+    }
+
+    row_group_->__set_file_offset(file_offset);
+    row_group_->__set_total_compressed_size(total_compressed_size);
+    row_group_->__set_total_byte_size(total_bytes_written);
+    row_group_->__set_ordinal(row_group_ordinal);
+  }
+
+  void set_num_rows(int64_t num_rows) { row_group_->num_rows = num_rows; }
+
+  int num_columns() { return static_cast<int>(row_group_->columns.size()); }
+
+  int64_t num_rows() { return row_group_->num_rows; }
+
+ private:
+  void InitializeColumns(int ncols) { row_group_->columns.resize(ncols); }
+
+  format::RowGroup* row_group_;
+  const std::shared_ptr<WriterProperties> properties_;
+  const SchemaDescriptor* schema_;
+  std::vector<std::unique_ptr<ColumnChunkMetaDataBuilder>> column_builders_;
+  int next_column_;
+};
+
+std::unique_ptr<RowGroupMetaDataBuilder> RowGroupMetaDataBuilder::Make(
+    std::shared_ptr<WriterProperties> props, const SchemaDescriptor* schema_,
+    void* contents) {
+  return std::unique_ptr<RowGroupMetaDataBuilder>(
+      new RowGroupMetaDataBuilder(std::move(props), schema_, contents));
+}
+
+RowGroupMetaDataBuilder::RowGroupMetaDataBuilder(std::shared_ptr<WriterProperties> props,
+                                                 const SchemaDescriptor* schema_,
+                                                 void* contents)
+    : impl_{new RowGroupMetaDataBuilderImpl(std::move(props), schema_, contents)} {}
+
+RowGroupMetaDataBuilder::~RowGroupMetaDataBuilder() = default;
+
+ColumnChunkMetaDataBuilder* RowGroupMetaDataBuilder::NextColumnChunk() {
+  return impl_->NextColumnChunk();
+}
+
+int RowGroupMetaDataBuilder::current_column() const { return impl_->current_column(); }
+
+int RowGroupMetaDataBuilder::num_columns() { return impl_->num_columns(); }
+
+int64_t RowGroupMetaDataBuilder::num_rows() { return impl_->num_rows(); }
+
+void RowGroupMetaDataBuilder::set_num_rows(int64_t num_rows) {
+  impl_->set_num_rows(num_rows);
+}
+
+void RowGroupMetaDataBuilder::Finish(int64_t total_bytes_written,
+                                     int16_t row_group_ordinal) {
+  impl_->Finish(total_bytes_written, row_group_ordinal);
+}
+
+// file metadata
+// TODO(PARQUET-595) Support key_value_metadata
+class FileMetaDataBuilder::FileMetaDataBuilderImpl {
+ public:
+  explicit FileMetaDataBuilderImpl(
+      const SchemaDescriptor* schema, std::shared_ptr<WriterProperties> props,
+      std::shared_ptr<const KeyValueMetadata> key_value_metadata)
+      : metadata_(new format::FileMetaData()),
+        properties_(std::move(props)),
+        schema_(schema),
+        key_value_metadata_(std::move(key_value_metadata)) {
+    if (properties_->file_encryption_properties() != nullptr &&
+        properties_->file_encryption_properties()->encrypted_footer()) {
+      crypto_metadata_.reset(new format::FileCryptoMetaData());
+    }
+  }
+
+  RowGroupMetaDataBuilder* AppendRowGroup() {
+    row_groups_.emplace_back();
+    current_row_group_builder_ =
+        RowGroupMetaDataBuilder::Make(properties_, schema_, &row_groups_.back());
+    return current_row_group_builder_.get();
+  }
+
+  std::unique_ptr<FileMetaData> Finish() {
+    int64_t total_rows = 0;
+    for (auto row_group : row_groups_) {
+      total_rows += row_group.num_rows;
+    }
+    metadata_->__set_num_rows(total_rows);
+    metadata_->__set_row_groups(row_groups_);
+
+    if (key_value_metadata_) {
+      metadata_->key_value_metadata.clear();
+      metadata_->key_value_metadata.reserve(key_value_metadata_->size());
+      for (int64_t i = 0; i < key_value_metadata_->size(); ++i) {
+        format::KeyValue kv_pair;
+        kv_pair.__set_key(key_value_metadata_->key(i));
+        kv_pair.__set_value(key_value_metadata_->value(i));
+        metadata_->key_value_metadata.push_back(kv_pair);
+      }
+      metadata_->__isset.key_value_metadata = true;
+    }
+
+    int32_t file_version = 0;
+    switch (properties_->version()) {
+      case ParquetVersion::PARQUET_1_0:
+        file_version = 1;
+        break;
+      case ParquetVersion::PARQUET_2_0:
+        file_version = 2;
+        break;
+      default:
+        break;
+    }
+    metadata_->__set_version(file_version);
+    metadata_->__set_created_by(properties_->created_by());
+
+    // Users cannot set the `ColumnOrder` since we donot not have user defined sort order
+    // in the spec yet.
+    // We always default to `TYPE_DEFINED_ORDER`. We can expose it in
+    // the API once we have user defined sort orders in the Parquet format.
+    // TypeDefinedOrder implies choose SortOrder based on ConvertedType/PhysicalType
+    format::TypeDefinedOrder type_defined_order;
+    format::ColumnOrder column_order;
+    column_order.__set_TYPE_ORDER(type_defined_order);
+    column_order.__isset.TYPE_ORDER = true;
+    metadata_->column_orders.resize(schema_->num_columns(), column_order);
+    metadata_->__isset.column_orders = true;
+
+    // if plaintext footer, set footer signing algorithm
+    auto file_encryption_properties = properties_->file_encryption_properties();
+    if (file_encryption_properties && !file_encryption_properties->encrypted_footer()) {
+      EncryptionAlgorithm signing_algorithm;
+      EncryptionAlgorithm algo = file_encryption_properties->algorithm();
+      signing_algorithm.aad.aad_file_unique = algo.aad.aad_file_unique;
+      signing_algorithm.aad.supply_aad_prefix = algo.aad.supply_aad_prefix;
+      if (!algo.aad.supply_aad_prefix) {
+        signing_algorithm.aad.aad_prefix = algo.aad.aad_prefix;
+      }
+      signing_algorithm.algorithm = ParquetCipher::AES_GCM_V1;
+
+      metadata_->__set_encryption_algorithm(ToThrift(signing_algorithm));
+      const std::string& footer_signing_key_metadata =
+          file_encryption_properties->footer_key_metadata();
+      if (footer_signing_key_metadata.size() > 0) {
+        metadata_->__set_footer_signing_key_metadata(footer_signing_key_metadata);
+      }
+    }
+
+    ToParquet(static_cast<parquet::schema::GroupNode*>(schema_->schema_root().get()),
+              &metadata_->schema);
+    auto file_meta_data = std::unique_ptr<FileMetaData>(new FileMetaData());
+    file_meta_data->impl_->metadata_ = std::move(metadata_);
+    file_meta_data->impl_->InitSchema();
+    file_meta_data->impl_->InitKeyValueMetadata();
+    return file_meta_data;
+  }
+
+  std::unique_ptr<FileCryptoMetaData> BuildFileCryptoMetaData() {
+    if (crypto_metadata_ == nullptr) {
+      return nullptr;
+    }
+
+    auto file_encryption_properties = properties_->file_encryption_properties();
+
+    crypto_metadata_->__set_encryption_algorithm(
+        ToThrift(file_encryption_properties->algorithm()));
+    std::string key_metadata = file_encryption_properties->footer_key_metadata();
+
+    if (!key_metadata.empty()) {
+      crypto_metadata_->__set_key_metadata(key_metadata);
+    }
+
+    std::unique_ptr<FileCryptoMetaData> file_crypto_metadata =
+        std::unique_ptr<FileCryptoMetaData>(new FileCryptoMetaData());
+    file_crypto_metadata->impl_->metadata_ = std::move(crypto_metadata_);
+
+    return file_crypto_metadata;
+  }
+
+ protected:
+  std::unique_ptr<format::FileMetaData> metadata_;
+  std::unique_ptr<format::FileCryptoMetaData> crypto_metadata_;
+
+ private:
+  const std::shared_ptr<WriterProperties> properties_;
+  std::vector<format::RowGroup> row_groups_;
+
+  std::unique_ptr<RowGroupMetaDataBuilder> current_row_group_builder_;
+  const SchemaDescriptor* schema_;
+  std::shared_ptr<const KeyValueMetadata> key_value_metadata_;
+};
+
+std::unique_ptr<FileMetaDataBuilder> FileMetaDataBuilder::Make(
+    const SchemaDescriptor* schema, std::shared_ptr<WriterProperties> props,
+    std::shared_ptr<const KeyValueMetadata> key_value_metadata) {
+  return std::unique_ptr<FileMetaDataBuilder>(
+      new FileMetaDataBuilder(schema, std::move(props), std::move(key_value_metadata)));
+}
+
+FileMetaDataBuilder::FileMetaDataBuilder(
+    const SchemaDescriptor* schema, std::shared_ptr<WriterProperties> props,
+    std::shared_ptr<const KeyValueMetadata> key_value_metadata)
+    : impl_{std::unique_ptr<FileMetaDataBuilderImpl>(new FileMetaDataBuilderImpl(
+          schema, std::move(props), std::move(key_value_metadata)))} {}
+
+FileMetaDataBuilder::~FileMetaDataBuilder() = default;
+
+RowGroupMetaDataBuilder* FileMetaDataBuilder::AppendRowGroup() {
+  return impl_->AppendRowGroup();
+}
+
+std::unique_ptr<FileMetaData> FileMetaDataBuilder::Finish() { return impl_->Finish(); }
+
+std::unique_ptr<FileCryptoMetaData> FileMetaDataBuilder::GetCryptoMetaData() {
+  return impl_->BuildFileCryptoMetaData();
+}
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/metadata.h b/contrib/libs/apache/arrow/cpp/src/parquet/metadata.h
index b432c20cf64..1865115e423 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/metadata.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/metadata.h
@@ -1,484 +1,484 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <cstdint> 
-#include <map> 
-#include <memory> 
-#include <string> 
-#include <utility> 
-#include <vector> 
- 
-#include "parquet/platform.h" 
-#include "parquet/properties.h" 
-#include "parquet/schema.h" 
-#include "parquet/types.h" 
- 
-namespace parquet { 
- 
-class ColumnDescriptor; 
-class EncodedStatistics; 
-class Statistics; 
-class SchemaDescriptor; 
- 
-class FileCryptoMetaData; 
-class InternalFileDecryptor; 
-class Decryptor; 
-class Encryptor; 
-class FooterSigningEncryptor; 
- 
-namespace schema { 
- 
-class ColumnPath; 
- 
-}  // namespace schema 
- 
-using KeyValueMetadata = ::arrow::KeyValueMetadata; 
- 
-class PARQUET_EXPORT ApplicationVersion { 
- public: 
-  // Known Versions with Issues 
-  static const ApplicationVersion& PARQUET_251_FIXED_VERSION(); 
-  static const ApplicationVersion& PARQUET_816_FIXED_VERSION(); 
-  static const ApplicationVersion& PARQUET_CPP_FIXED_STATS_VERSION(); 
-  static const ApplicationVersion& PARQUET_MR_FIXED_STATS_VERSION(); 
- 
-  // Application that wrote the file. e.g. "IMPALA" 
-  std::string application_; 
-  // Build name 
-  std::string build_; 
- 
-  // Version of the application that wrote the file, expressed as 
-  // (<major>.<minor>.<patch>). Unmatched parts default to 0. 
-  // "1.2.3"    => {1, 2, 3} 
-  // "1.2"      => {1, 2, 0} 
-  // "1.2-cdh5" => {1, 2, 0} 
-  struct { 
-    int major; 
-    int minor; 
-    int patch; 
-    std::string unknown; 
-    std::string pre_release; 
-    std::string build_info; 
-  } version; 
- 
-  ApplicationVersion() = default; 
-  explicit ApplicationVersion(const std::string& created_by); 
-  ApplicationVersion(std::string application, int major, int minor, int patch); 
- 
-  // Returns true if version is strictly less than other_version 
-  bool VersionLt(const ApplicationVersion& other_version) const; 
- 
-  // Returns true if version is strictly less than other_version 
-  bool VersionEq(const ApplicationVersion& other_version) const; 
- 
-  // Checks if the Version has the correct statistics for a given column 
-  bool HasCorrectStatistics(Type::type primitive, EncodedStatistics& statistics, 
-                            SortOrder::type sort_order = SortOrder::SIGNED) const; 
-}; 
- 
-class PARQUET_EXPORT ColumnCryptoMetaData { 
- public: 
-  static std::unique_ptr<ColumnCryptoMetaData> Make(const uint8_t* metadata); 
-  ~ColumnCryptoMetaData(); 
- 
-  bool Equals(const ColumnCryptoMetaData& other) const; 
- 
-  std::shared_ptr<schema::ColumnPath> path_in_schema() const; 
-  bool encrypted_with_footer_key() const; 
-  const std::string& key_metadata() const; 
- 
- private: 
-  explicit ColumnCryptoMetaData(const uint8_t* metadata); 
- 
-  class ColumnCryptoMetaDataImpl; 
-  std::unique_ptr<ColumnCryptoMetaDataImpl> impl_; 
-}; 
- 
-/// \brief Public struct for Thrift PageEncodingStats in ColumnChunkMetaData 
-struct PageEncodingStats { 
-  PageType::type page_type; 
-  Encoding::type encoding; 
-  int32_t count; 
-}; 
- 
-/// \brief ColumnChunkMetaData is a proxy around format::ColumnChunkMetaData. 
-class PARQUET_EXPORT ColumnChunkMetaData { 
- public: 
-  // API convenience to get a MetaData accessor 
-  static std::unique_ptr<ColumnChunkMetaData> Make( 
-      const void* metadata, const ColumnDescriptor* descr, 
-      const ApplicationVersion* writer_version = NULLPTR, int16_t row_group_ordinal = -1, 
-      int16_t column_ordinal = -1, 
-      std::shared_ptr<InternalFileDecryptor> file_decryptor = NULLPTR); 
- 
-  ~ColumnChunkMetaData(); 
- 
-  bool Equals(const ColumnChunkMetaData& other) const; 
- 
-  // column chunk 
-  int64_t file_offset() const; 
- 
-  // parameter is only used when a dataset is spread across multiple files 
-  const std::string& file_path() const; 
- 
-  // column metadata 
-  bool is_metadata_set() const; 
-  Type::type type() const; 
-  int64_t num_values() const; 
-  std::shared_ptr<schema::ColumnPath> path_in_schema() const; 
-  bool is_stats_set() const; 
-  std::shared_ptr<Statistics> statistics() const; 
- 
-  Compression::type compression() const; 
-  // Indicate if the ColumnChunk compression is supported by the current 
-  // compiled parquet library. 
-  bool can_decompress() const; 
- 
-  const std::vector<Encoding::type>& encodings() const; 
-  const std::vector<PageEncodingStats>& encoding_stats() const; 
-  bool has_dictionary_page() const; 
-  int64_t dictionary_page_offset() const; 
-  int64_t data_page_offset() const; 
-  bool has_index_page() const; 
-  int64_t index_page_offset() const; 
-  int64_t total_compressed_size() const; 
-  int64_t total_uncompressed_size() const; 
-  std::unique_ptr<ColumnCryptoMetaData> crypto_metadata() const; 
- 
- private: 
-  explicit ColumnChunkMetaData( 
-      const void* metadata, const ColumnDescriptor* descr, int16_t row_group_ordinal, 
-      int16_t column_ordinal, const ApplicationVersion* writer_version = NULLPTR, 
-      std::shared_ptr<InternalFileDecryptor> file_decryptor = NULLPTR); 
-  // PIMPL Idiom 
-  class ColumnChunkMetaDataImpl; 
-  std::unique_ptr<ColumnChunkMetaDataImpl> impl_; 
-}; 
- 
-/// \brief RowGroupMetaData is a proxy around format::RowGroupMetaData. 
-class PARQUET_EXPORT RowGroupMetaData { 
- public: 
-  /// \brief Create a RowGroupMetaData from a serialized thrift message. 
-  static std::unique_ptr<RowGroupMetaData> Make( 
-      const void* metadata, const SchemaDescriptor* schema, 
-      const ApplicationVersion* writer_version = NULLPTR, 
-      std::shared_ptr<InternalFileDecryptor> file_decryptor = NULLPTR); 
- 
-  ~RowGroupMetaData(); 
- 
-  bool Equals(const RowGroupMetaData& other) const; 
- 
-  /// \brief The number of columns in this row group. The order must match the 
-  /// parent's column ordering. 
-  int num_columns() const; 
- 
-  /// \brief Return the ColumnChunkMetaData of the corresponding column ordinal. 
-  /// 
-  /// WARNING, the returned object references memory location in it's parent 
-  /// (RowGroupMetaData) object. Hence, the parent must outlive the returned 
-  /// object. 
-  /// 
-  /// \param[in] index of the ColumnChunkMetaData to retrieve. 
-  /// 
-  /// \throws ParquetException if the index is out of bound. 
-  std::unique_ptr<ColumnChunkMetaData> ColumnChunk(int index) const; 
- 
-  /// \brief Number of rows in this row group. 
-  int64_t num_rows() const; 
- 
-  /// \brief Total byte size of all the uncompressed column data in this row group. 
-  int64_t total_byte_size() const; 
- 
-  /// \brief Total byte size of all the compressed (and potentially encrypted) 
-  /// column data in this row group. 
-  /// 
-  /// This information is optional and may be 0 if omitted. 
-  int64_t total_compressed_size() const; 
- 
-  /// \brief Byte offset from beginning of file to first page (data or 
-  /// dictionary) in this row group 
-  /// 
-  /// The file_offset field that this method exposes is optional. This method 
-  /// will return 0 if that field is not set to a meaningful value. 
-  int64_t file_offset() const; 
-  // Return const-pointer to make it clear that this object is not to be copied 
-  const SchemaDescriptor* schema() const; 
-  // Indicate if all of the RowGroup's ColumnChunks can be decompressed. 
-  bool can_decompress() const; 
- 
- private: 
-  explicit RowGroupMetaData( 
-      const void* metadata, const SchemaDescriptor* schema, 
-      const ApplicationVersion* writer_version = NULLPTR, 
-      std::shared_ptr<InternalFileDecryptor> file_decryptor = NULLPTR); 
-  // PIMPL Idiom 
-  class RowGroupMetaDataImpl; 
-  std::unique_ptr<RowGroupMetaDataImpl> impl_; 
-}; 
- 
-class FileMetaDataBuilder; 
- 
-/// \brief FileMetaData is a proxy around format::FileMetaData. 
-class PARQUET_EXPORT FileMetaData { 
- public: 
-  /// \brief Create a FileMetaData from a serialized thrift message. 
-  static std::shared_ptr<FileMetaData> Make( 
-      const void* serialized_metadata, uint32_t* inout_metadata_len, 
-      std::shared_ptr<InternalFileDecryptor> file_decryptor = NULLPTR); 
- 
-  ~FileMetaData(); 
- 
-  bool Equals(const FileMetaData& other) const; 
- 
-  /// \brief The number of top-level columns in the schema. 
-  /// 
-  /// Parquet thrift definition requires that nested schema elements are 
-  /// flattened. This method returns the number of columns in the un-flattened 
-  /// version. 
-  int num_columns() const; 
- 
-  /// \brief The number of flattened schema elements. 
-  /// 
-  /// Parquet thrift definition requires that nested schema elements are 
-  /// flattened. This method returns the total number of elements in the 
-  /// flattened list. 
-  int num_schema_elements() const; 
- 
-  /// \brief The total number of rows. 
-  int64_t num_rows() const; 
- 
-  /// \brief The number of row groups in the file. 
-  int num_row_groups() const; 
- 
-  /// \brief Return the RowGroupMetaData of the corresponding row group ordinal. 
-  /// 
-  /// WARNING, the returned object references memory location in it's parent 
-  /// (FileMetaData) object. Hence, the parent must outlive the returned object. 
-  /// 
-  /// \param[in] index of the RowGroup to retrieve. 
-  /// 
-  /// \throws ParquetException if the index is out of bound. 
-  std::unique_ptr<RowGroupMetaData> RowGroup(int index) const; 
- 
-  /// \brief Return the version of the file. 
-  ParquetVersion::type version() const; 
- 
-  /// \brief Return the application's user-agent string of the writer. 
-  const std::string& created_by() const; 
- 
-  /// \brief Return the application's version of the writer. 
-  const ApplicationVersion& writer_version() const; 
- 
-  /// \brief Size of the original thrift encoded metadata footer. 
-  uint32_t size() const; 
- 
-  /// \brief Indicate if all of the FileMetadata's RowGroups can be decompressed. 
-  /// 
-  /// This will return false if any of the RowGroup's page is compressed with a 
-  /// compression format which is not compiled in the current parquet library. 
-  bool can_decompress() const; 
- 
-  bool is_encryption_algorithm_set() const; 
-  EncryptionAlgorithm encryption_algorithm() const; 
-  const std::string& footer_signing_key_metadata() const; 
- 
-  /// \brief Verify signature of FileMetaData when file is encrypted but footer 
-  /// is not encrypted (plaintext footer). 
-  bool VerifySignature(const void* signature); 
- 
-  void WriteTo(::arrow::io::OutputStream* dst, 
-               const std::shared_ptr<Encryptor>& encryptor = NULLPTR) const; 
- 
-  /// \brief Return Thrift-serialized representation of the metadata as a 
-  /// string 
-  std::string SerializeToString() const; 
- 
-  // Return const-pointer to make it clear that this object is not to be copied 
-  const SchemaDescriptor* schema() const; 
- 
-  const std::shared_ptr<const KeyValueMetadata>& key_value_metadata() const; 
- 
-  /// \brief Set a path to all ColumnChunk for all RowGroups. 
-  /// 
-  /// Commonly used by systems (Dask, Spark) who generates an metadata-only 
-  /// parquet file. The path is usually relative to said index file. 
-  /// 
-  /// \param[in] path to set. 
-  void set_file_path(const std::string& path); 
- 
-  /// \brief Merge row groups from another metadata file into this one. 
-  /// 
-  /// The schema of the input FileMetaData must be equal to the 
-  /// schema of this object. 
-  /// 
-  /// This is used by systems who creates an aggregate metadata-only file by 
-  /// concatenating the row groups of multiple files. This newly created 
-  /// metadata file acts as an index of all available row groups. 
-  /// 
-  /// \param[in] other FileMetaData to merge the row groups from. 
-  /// 
-  /// \throws ParquetException if schemas are not equal. 
-  void AppendRowGroups(const FileMetaData& other); 
- 
-  /// \brief Return a FileMetaData containing a subset of the row groups in this 
-  /// FileMetaData. 
-  std::shared_ptr<FileMetaData> Subset(const std::vector<int>& row_groups) const; 
- 
- private: 
-  friend FileMetaDataBuilder; 
-  friend class SerializedFile; 
- 
-  explicit FileMetaData(const void* serialized_metadata, uint32_t* metadata_len, 
-                        std::shared_ptr<InternalFileDecryptor> file_decryptor = NULLPTR); 
- 
-  void set_file_decryptor(std::shared_ptr<InternalFileDecryptor> file_decryptor); 
- 
-  // PIMPL Idiom 
-  FileMetaData(); 
-  class FileMetaDataImpl; 
-  std::unique_ptr<FileMetaDataImpl> impl_; 
-}; 
- 
-class PARQUET_EXPORT FileCryptoMetaData { 
- public: 
-  // API convenience to get a MetaData accessor 
-  static std::shared_ptr<FileCryptoMetaData> Make(const uint8_t* serialized_metadata, 
-                                                  uint32_t* metadata_len); 
-  ~FileCryptoMetaData(); 
- 
-  EncryptionAlgorithm encryption_algorithm() const; 
-  const std::string& key_metadata() const; 
- 
-  void WriteTo(::arrow::io::OutputStream* dst) const; 
- 
- private: 
-  friend FileMetaDataBuilder; 
-  FileCryptoMetaData(const uint8_t* serialized_metadata, uint32_t* metadata_len); 
- 
-  // PIMPL Idiom 
-  FileCryptoMetaData(); 
-  class FileCryptoMetaDataImpl; 
-  std::unique_ptr<FileCryptoMetaDataImpl> impl_; 
-}; 
- 
-// Builder API 
-class PARQUET_EXPORT ColumnChunkMetaDataBuilder { 
- public: 
-  // API convenience to get a MetaData reader 
-  static std::unique_ptr<ColumnChunkMetaDataBuilder> Make( 
-      std::shared_ptr<WriterProperties> props, const ColumnDescriptor* column); 
- 
-  static std::unique_ptr<ColumnChunkMetaDataBuilder> Make( 
-      std::shared_ptr<WriterProperties> props, const ColumnDescriptor* column, 
-      void* contents); 
- 
-  ~ColumnChunkMetaDataBuilder(); 
- 
-  // column chunk 
-  // Used when a dataset is spread across multiple files 
-  void set_file_path(const std::string& path); 
-  // column metadata 
-  void SetStatistics(const EncodedStatistics& stats); 
-  // get the column descriptor 
-  const ColumnDescriptor* descr() const; 
- 
-  int64_t total_compressed_size() const; 
-  // commit the metadata 
- 
-  void Finish(int64_t num_values, int64_t dictionary_page_offset, 
-              int64_t index_page_offset, int64_t data_page_offset, 
-              int64_t compressed_size, int64_t uncompressed_size, bool has_dictionary, 
-              bool dictionary_fallback, 
-              const std::map<Encoding::type, int32_t>& dict_encoding_stats_, 
-              const std::map<Encoding::type, int32_t>& data_encoding_stats_, 
-              const std::shared_ptr<Encryptor>& encryptor = NULLPTR); 
- 
-  // The metadata contents, suitable for passing to ColumnChunkMetaData::Make 
-  const void* contents() const; 
- 
-  // For writing metadata at end of column chunk 
-  void WriteTo(::arrow::io::OutputStream* sink); 
- 
- private: 
-  explicit ColumnChunkMetaDataBuilder(std::shared_ptr<WriterProperties> props, 
-                                      const ColumnDescriptor* column); 
-  explicit ColumnChunkMetaDataBuilder(std::shared_ptr<WriterProperties> props, 
-                                      const ColumnDescriptor* column, void* contents); 
-  // PIMPL Idiom 
-  class ColumnChunkMetaDataBuilderImpl; 
-  std::unique_ptr<ColumnChunkMetaDataBuilderImpl> impl_; 
-}; 
- 
-class PARQUET_EXPORT RowGroupMetaDataBuilder { 
- public: 
-  // API convenience to get a MetaData reader 
-  static std::unique_ptr<RowGroupMetaDataBuilder> Make( 
-      std::shared_ptr<WriterProperties> props, const SchemaDescriptor* schema_, 
-      void* contents); 
- 
-  ~RowGroupMetaDataBuilder(); 
- 
-  ColumnChunkMetaDataBuilder* NextColumnChunk(); 
-  int num_columns(); 
-  int64_t num_rows(); 
-  int current_column() const; 
- 
-  void set_num_rows(int64_t num_rows); 
- 
-  // commit the metadata 
-  void Finish(int64_t total_bytes_written, int16_t row_group_ordinal = -1); 
- 
- private: 
-  explicit RowGroupMetaDataBuilder(std::shared_ptr<WriterProperties> props, 
-                                   const SchemaDescriptor* schema_, void* contents); 
-  // PIMPL Idiom 
-  class RowGroupMetaDataBuilderImpl; 
-  std::unique_ptr<RowGroupMetaDataBuilderImpl> impl_; 
-}; 
- 
-class PARQUET_EXPORT FileMetaDataBuilder { 
- public: 
-  // API convenience to get a MetaData reader 
-  static std::unique_ptr<FileMetaDataBuilder> Make( 
-      const SchemaDescriptor* schema, std::shared_ptr<WriterProperties> props, 
-      std::shared_ptr<const KeyValueMetadata> key_value_metadata = NULLPTR); 
- 
-  ~FileMetaDataBuilder(); 
- 
-  // The prior RowGroupMetaDataBuilder (if any) is destroyed 
-  RowGroupMetaDataBuilder* AppendRowGroup(); 
- 
-  // Complete the Thrift structure 
-  std::unique_ptr<FileMetaData> Finish(); 
- 
-  // crypto metadata 
-  std::unique_ptr<FileCryptoMetaData> GetCryptoMetaData(); 
- 
- private: 
-  explicit FileMetaDataBuilder( 
-      const SchemaDescriptor* schema, std::shared_ptr<WriterProperties> props, 
-      std::shared_ptr<const KeyValueMetadata> key_value_metadata = NULLPTR); 
-  // PIMPL Idiom 
-  class FileMetaDataBuilderImpl; 
-  std::unique_ptr<FileMetaDataBuilderImpl> impl_; 
-}; 
- 
-PARQUET_EXPORT std::string ParquetVersionToString(ParquetVersion::type ver); 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "parquet/platform.h"
+#include "parquet/properties.h"
+#include "parquet/schema.h"
+#include "parquet/types.h"
+
+namespace parquet {
+
+class ColumnDescriptor;
+class EncodedStatistics;
+class Statistics;
+class SchemaDescriptor;
+
+class FileCryptoMetaData;
+class InternalFileDecryptor;
+class Decryptor;
+class Encryptor;
+class FooterSigningEncryptor;
+
+namespace schema {
+
+class ColumnPath;
+
+}  // namespace schema
+
+using KeyValueMetadata = ::arrow::KeyValueMetadata;
+
+class PARQUET_EXPORT ApplicationVersion {
+ public:
+  // Known Versions with Issues
+  static const ApplicationVersion& PARQUET_251_FIXED_VERSION();
+  static const ApplicationVersion& PARQUET_816_FIXED_VERSION();
+  static const ApplicationVersion& PARQUET_CPP_FIXED_STATS_VERSION();
+  static const ApplicationVersion& PARQUET_MR_FIXED_STATS_VERSION();
+
+  // Application that wrote the file. e.g. "IMPALA"
+  std::string application_;
+  // Build name
+  std::string build_;
+
+  // Version of the application that wrote the file, expressed as
+  // (<major>.<minor>.<patch>). Unmatched parts default to 0.
+  // "1.2.3"    => {1, 2, 3}
+  // "1.2"      => {1, 2, 0}
+  // "1.2-cdh5" => {1, 2, 0}
+  struct {
+    int major;
+    int minor;
+    int patch;
+    std::string unknown;
+    std::string pre_release;
+    std::string build_info;
+  } version;
+
+  ApplicationVersion() = default;
+  explicit ApplicationVersion(const std::string& created_by);
+  ApplicationVersion(std::string application, int major, int minor, int patch);
+
+  // Returns true if version is strictly less than other_version
+  bool VersionLt(const ApplicationVersion& other_version) const;
+
+  // Returns true if version is strictly less than other_version
+  bool VersionEq(const ApplicationVersion& other_version) const;
+
+  // Checks if the Version has the correct statistics for a given column
+  bool HasCorrectStatistics(Type::type primitive, EncodedStatistics& statistics,
+                            SortOrder::type sort_order = SortOrder::SIGNED) const;
+};
+
+class PARQUET_EXPORT ColumnCryptoMetaData {
+ public:
+  static std::unique_ptr<ColumnCryptoMetaData> Make(const uint8_t* metadata);
+  ~ColumnCryptoMetaData();
+
+  bool Equals(const ColumnCryptoMetaData& other) const;
+
+  std::shared_ptr<schema::ColumnPath> path_in_schema() const;
+  bool encrypted_with_footer_key() const;
+  const std::string& key_metadata() const;
+
+ private:
+  explicit ColumnCryptoMetaData(const uint8_t* metadata);
+
+  class ColumnCryptoMetaDataImpl;
+  std::unique_ptr<ColumnCryptoMetaDataImpl> impl_;
+};
+
+/// \brief Public struct for Thrift PageEncodingStats in ColumnChunkMetaData
+struct PageEncodingStats {
+  PageType::type page_type;
+  Encoding::type encoding;
+  int32_t count;
+};
+
+/// \brief ColumnChunkMetaData is a proxy around format::ColumnChunkMetaData.
+class PARQUET_EXPORT ColumnChunkMetaData {
+ public:
+  // API convenience to get a MetaData accessor
+  static std::unique_ptr<ColumnChunkMetaData> Make(
+      const void* metadata, const ColumnDescriptor* descr,
+      const ApplicationVersion* writer_version = NULLPTR, int16_t row_group_ordinal = -1,
+      int16_t column_ordinal = -1,
+      std::shared_ptr<InternalFileDecryptor> file_decryptor = NULLPTR);
+
+  ~ColumnChunkMetaData();
+
+  bool Equals(const ColumnChunkMetaData& other) const;
+
+  // column chunk
+  int64_t file_offset() const;
+
+  // parameter is only used when a dataset is spread across multiple files
+  const std::string& file_path() const;
+
+  // column metadata
+  bool is_metadata_set() const;
+  Type::type type() const;
+  int64_t num_values() const;
+  std::shared_ptr<schema::ColumnPath> path_in_schema() const;
+  bool is_stats_set() const;
+  std::shared_ptr<Statistics> statistics() const;
+
+  Compression::type compression() const;
+  // Indicate if the ColumnChunk compression is supported by the current
+  // compiled parquet library.
+  bool can_decompress() const;
+
+  const std::vector<Encoding::type>& encodings() const;
+  const std::vector<PageEncodingStats>& encoding_stats() const;
+  bool has_dictionary_page() const;
+  int64_t dictionary_page_offset() const;
+  int64_t data_page_offset() const;
+  bool has_index_page() const;
+  int64_t index_page_offset() const;
+  int64_t total_compressed_size() const;
+  int64_t total_uncompressed_size() const;
+  std::unique_ptr<ColumnCryptoMetaData> crypto_metadata() const;
+
+ private:
+  explicit ColumnChunkMetaData(
+      const void* metadata, const ColumnDescriptor* descr, int16_t row_group_ordinal,
+      int16_t column_ordinal, const ApplicationVersion* writer_version = NULLPTR,
+      std::shared_ptr<InternalFileDecryptor> file_decryptor = NULLPTR);
+  // PIMPL Idiom
+  class ColumnChunkMetaDataImpl;
+  std::unique_ptr<ColumnChunkMetaDataImpl> impl_;
+};
+
+/// \brief RowGroupMetaData is a proxy around format::RowGroupMetaData.
+class PARQUET_EXPORT RowGroupMetaData {
+ public:
+  /// \brief Create a RowGroupMetaData from a serialized thrift message.
+  static std::unique_ptr<RowGroupMetaData> Make(
+      const void* metadata, const SchemaDescriptor* schema,
+      const ApplicationVersion* writer_version = NULLPTR,
+      std::shared_ptr<InternalFileDecryptor> file_decryptor = NULLPTR);
+
+  ~RowGroupMetaData();
+
+  bool Equals(const RowGroupMetaData& other) const;
+
+  /// \brief The number of columns in this row group. The order must match the
+  /// parent's column ordering.
+  int num_columns() const;
+
+  /// \brief Return the ColumnChunkMetaData of the corresponding column ordinal.
+  ///
+  /// WARNING, the returned object references memory location in it's parent
+  /// (RowGroupMetaData) object. Hence, the parent must outlive the returned
+  /// object.
+  ///
+  /// \param[in] index of the ColumnChunkMetaData to retrieve.
+  ///
+  /// \throws ParquetException if the index is out of bound.
+  std::unique_ptr<ColumnChunkMetaData> ColumnChunk(int index) const;
+
+  /// \brief Number of rows in this row group.
+  int64_t num_rows() const;
+
+  /// \brief Total byte size of all the uncompressed column data in this row group.
+  int64_t total_byte_size() const;
+
+  /// \brief Total byte size of all the compressed (and potentially encrypted)
+  /// column data in this row group.
+  ///
+  /// This information is optional and may be 0 if omitted.
+  int64_t total_compressed_size() const;
+
+  /// \brief Byte offset from beginning of file to first page (data or
+  /// dictionary) in this row group
+  ///
+  /// The file_offset field that this method exposes is optional. This method
+  /// will return 0 if that field is not set to a meaningful value.
+  int64_t file_offset() const;
+  // Return const-pointer to make it clear that this object is not to be copied
+  const SchemaDescriptor* schema() const;
+  // Indicate if all of the RowGroup's ColumnChunks can be decompressed.
+  bool can_decompress() const;
+
+ private:
+  explicit RowGroupMetaData(
+      const void* metadata, const SchemaDescriptor* schema,
+      const ApplicationVersion* writer_version = NULLPTR,
+      std::shared_ptr<InternalFileDecryptor> file_decryptor = NULLPTR);
+  // PIMPL Idiom
+  class RowGroupMetaDataImpl;
+  std::unique_ptr<RowGroupMetaDataImpl> impl_;
+};
+
+class FileMetaDataBuilder;
+
+/// \brief FileMetaData is a proxy around format::FileMetaData.
+class PARQUET_EXPORT FileMetaData {
+ public:
+  /// \brief Create a FileMetaData from a serialized thrift message.
+  static std::shared_ptr<FileMetaData> Make(
+      const void* serialized_metadata, uint32_t* inout_metadata_len,
+      std::shared_ptr<InternalFileDecryptor> file_decryptor = NULLPTR);
+
+  ~FileMetaData();
+
+  bool Equals(const FileMetaData& other) const;
+
+  /// \brief The number of top-level columns in the schema.
+  ///
+  /// Parquet thrift definition requires that nested schema elements are
+  /// flattened. This method returns the number of columns in the un-flattened
+  /// version.
+  int num_columns() const;
+
+  /// \brief The number of flattened schema elements.
+  ///
+  /// Parquet thrift definition requires that nested schema elements are
+  /// flattened. This method returns the total number of elements in the
+  /// flattened list.
+  int num_schema_elements() const;
+
+  /// \brief The total number of rows.
+  int64_t num_rows() const;
+
+  /// \brief The number of row groups in the file.
+  int num_row_groups() const;
+
+  /// \brief Return the RowGroupMetaData of the corresponding row group ordinal.
+  ///
+  /// WARNING, the returned object references memory location in it's parent
+  /// (FileMetaData) object. Hence, the parent must outlive the returned object.
+  ///
+  /// \param[in] index of the RowGroup to retrieve.
+  ///
+  /// \throws ParquetException if the index is out of bound.
+  std::unique_ptr<RowGroupMetaData> RowGroup(int index) const;
+
+  /// \brief Return the version of the file.
+  ParquetVersion::type version() const;
+
+  /// \brief Return the application's user-agent string of the writer.
+  const std::string& created_by() const;
+
+  /// \brief Return the application's version of the writer.
+  const ApplicationVersion& writer_version() const;
+
+  /// \brief Size of the original thrift encoded metadata footer.
+  uint32_t size() const;
+
+  /// \brief Indicate if all of the FileMetadata's RowGroups can be decompressed.
+  ///
+  /// This will return false if any of the RowGroup's page is compressed with a
+  /// compression format which is not compiled in the current parquet library.
+  bool can_decompress() const;
+
+  bool is_encryption_algorithm_set() const;
+  EncryptionAlgorithm encryption_algorithm() const;
+  const std::string& footer_signing_key_metadata() const;
+
+  /// \brief Verify signature of FileMetaData when file is encrypted but footer
+  /// is not encrypted (plaintext footer).
+  bool VerifySignature(const void* signature);
+
+  void WriteTo(::arrow::io::OutputStream* dst,
+               const std::shared_ptr<Encryptor>& encryptor = NULLPTR) const;
+
+  /// \brief Return Thrift-serialized representation of the metadata as a
+  /// string
+  std::string SerializeToString() const;
+
+  // Return const-pointer to make it clear that this object is not to be copied
+  const SchemaDescriptor* schema() const;
+
+  const std::shared_ptr<const KeyValueMetadata>& key_value_metadata() const;
+
+  /// \brief Set a path to all ColumnChunk for all RowGroups.
+  ///
+  /// Commonly used by systems (Dask, Spark) who generates an metadata-only
+  /// parquet file. The path is usually relative to said index file.
+  ///
+  /// \param[in] path to set.
+  void set_file_path(const std::string& path);
+
+  /// \brief Merge row groups from another metadata file into this one.
+  ///
+  /// The schema of the input FileMetaData must be equal to the
+  /// schema of this object.
+  ///
+  /// This is used by systems who creates an aggregate metadata-only file by
+  /// concatenating the row groups of multiple files. This newly created
+  /// metadata file acts as an index of all available row groups.
+  ///
+  /// \param[in] other FileMetaData to merge the row groups from.
+  ///
+  /// \throws ParquetException if schemas are not equal.
+  void AppendRowGroups(const FileMetaData& other);
+
+  /// \brief Return a FileMetaData containing a subset of the row groups in this
+  /// FileMetaData.
+  std::shared_ptr<FileMetaData> Subset(const std::vector<int>& row_groups) const;
+
+ private:
+  friend FileMetaDataBuilder;
+  friend class SerializedFile;
+
+  explicit FileMetaData(const void* serialized_metadata, uint32_t* metadata_len,
+                        std::shared_ptr<InternalFileDecryptor> file_decryptor = NULLPTR);
+
+  void set_file_decryptor(std::shared_ptr<InternalFileDecryptor> file_decryptor);
+
+  // PIMPL Idiom
+  FileMetaData();
+  class FileMetaDataImpl;
+  std::unique_ptr<FileMetaDataImpl> impl_;
+};
+
+class PARQUET_EXPORT FileCryptoMetaData {
+ public:
+  // API convenience to get a MetaData accessor
+  static std::shared_ptr<FileCryptoMetaData> Make(const uint8_t* serialized_metadata,
+                                                  uint32_t* metadata_len);
+  ~FileCryptoMetaData();
+
+  EncryptionAlgorithm encryption_algorithm() const;
+  const std::string& key_metadata() const;
+
+  void WriteTo(::arrow::io::OutputStream* dst) const;
+
+ private:
+  friend FileMetaDataBuilder;
+  FileCryptoMetaData(const uint8_t* serialized_metadata, uint32_t* metadata_len);
+
+  // PIMPL Idiom
+  FileCryptoMetaData();
+  class FileCryptoMetaDataImpl;
+  std::unique_ptr<FileCryptoMetaDataImpl> impl_;
+};
+
+// Builder API
+class PARQUET_EXPORT ColumnChunkMetaDataBuilder {
+ public:
+  // API convenience to get a MetaData reader
+  static std::unique_ptr<ColumnChunkMetaDataBuilder> Make(
+      std::shared_ptr<WriterProperties> props, const ColumnDescriptor* column);
+
+  static std::unique_ptr<ColumnChunkMetaDataBuilder> Make(
+      std::shared_ptr<WriterProperties> props, const ColumnDescriptor* column,
+      void* contents);
+
+  ~ColumnChunkMetaDataBuilder();
+
+  // column chunk
+  // Used when a dataset is spread across multiple files
+  void set_file_path(const std::string& path);
+  // column metadata
+  void SetStatistics(const EncodedStatistics& stats);
+  // get the column descriptor
+  const ColumnDescriptor* descr() const;
+
+  int64_t total_compressed_size() const;
+  // commit the metadata
+
+  void Finish(int64_t num_values, int64_t dictionary_page_offset,
+              int64_t index_page_offset, int64_t data_page_offset,
+              int64_t compressed_size, int64_t uncompressed_size, bool has_dictionary,
+              bool dictionary_fallback,
+              const std::map<Encoding::type, int32_t>& dict_encoding_stats_,
+              const std::map<Encoding::type, int32_t>& data_encoding_stats_,
+              const std::shared_ptr<Encryptor>& encryptor = NULLPTR);
+
+  // The metadata contents, suitable for passing to ColumnChunkMetaData::Make
+  const void* contents() const;
+
+  // For writing metadata at end of column chunk
+  void WriteTo(::arrow::io::OutputStream* sink);
+
+ private:
+  explicit ColumnChunkMetaDataBuilder(std::shared_ptr<WriterProperties> props,
+                                      const ColumnDescriptor* column);
+  explicit ColumnChunkMetaDataBuilder(std::shared_ptr<WriterProperties> props,
+                                      const ColumnDescriptor* column, void* contents);
+  // PIMPL Idiom
+  class ColumnChunkMetaDataBuilderImpl;
+  std::unique_ptr<ColumnChunkMetaDataBuilderImpl> impl_;
+};
+
+class PARQUET_EXPORT RowGroupMetaDataBuilder {
+ public:
+  // API convenience to get a MetaData reader
+  static std::unique_ptr<RowGroupMetaDataBuilder> Make(
+      std::shared_ptr<WriterProperties> props, const SchemaDescriptor* schema_,
+      void* contents);
+
+  ~RowGroupMetaDataBuilder();
+
+  ColumnChunkMetaDataBuilder* NextColumnChunk();
+  int num_columns();
+  int64_t num_rows();
+  int current_column() const;
+
+  void set_num_rows(int64_t num_rows);
+
+  // commit the metadata
+  void Finish(int64_t total_bytes_written, int16_t row_group_ordinal = -1);
+
+ private:
+  explicit RowGroupMetaDataBuilder(std::shared_ptr<WriterProperties> props,
+                                   const SchemaDescriptor* schema_, void* contents);
+  // PIMPL Idiom
+  class RowGroupMetaDataBuilderImpl;
+  std::unique_ptr<RowGroupMetaDataBuilderImpl> impl_;
+};
+
+class PARQUET_EXPORT FileMetaDataBuilder {
+ public:
+  // API convenience to get a MetaData reader
+  static std::unique_ptr<FileMetaDataBuilder> Make(
+      const SchemaDescriptor* schema, std::shared_ptr<WriterProperties> props,
+      std::shared_ptr<const KeyValueMetadata> key_value_metadata = NULLPTR);
+
+  ~FileMetaDataBuilder();
+
+  // The prior RowGroupMetaDataBuilder (if any) is destroyed
+  RowGroupMetaDataBuilder* AppendRowGroup();
+
+  // Complete the Thrift structure
+  std::unique_ptr<FileMetaData> Finish();
+
+  // crypto metadata
+  std::unique_ptr<FileCryptoMetaData> GetCryptoMetaData();
+
+ private:
+  explicit FileMetaDataBuilder(
+      const SchemaDescriptor* schema, std::shared_ptr<WriterProperties> props,
+      std::shared_ptr<const KeyValueMetadata> key_value_metadata = NULLPTR);
+  // PIMPL Idiom
+  class FileMetaDataBuilderImpl;
+  std::unique_ptr<FileMetaDataBuilderImpl> impl_;
+};
+
+PARQUET_EXPORT std::string ParquetVersionToString(ParquetVersion::type ver);
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/murmur3.cc b/contrib/libs/apache/arrow/cpp/src/parquet/murmur3.cc
index 69b38478172..07a936e0412 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/murmur3.cc
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/murmur3.cc
@@ -1,222 +1,222 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-//----------------------------------------------------------------------------- 
-// MurmurHash3 was written by Austin Appleby, and is placed in the public 
-// domain. The author hereby disclaims copyright to this source code. 
- 
-// Note - The x86 and x64 versions do _not_ produce the same results, as the 
-// algorithms are optimized for their respective platforms. You can still 
-// compile and run any of them on any platform, but your performance with the 
-// non-native version will be less than optimal. 
- 
-#include "parquet/murmur3.h" 
- 
-namespace parquet { 
- 
-#if defined(_MSC_VER) 
- 
-#define FORCE_INLINE __forceinline 
-#define ROTL64(x, y) _rotl64(x, y) 
- 
-#else  // defined(_MSC_VER) 
- 
-#define FORCE_INLINE inline __attribute__((always_inline)) 
-inline uint64_t rotl64(uint64_t x, int8_t r) { return (x << r) | (x >> (64 - r)); } 
-#define ROTL64(x, y) rotl64(x, y) 
- 
-#endif  // !defined(_MSC_VER) 
- 
-#define BIG_CONSTANT(x) (x##LLU) 
- 
-//----------------------------------------------------------------------------- 
-// Block read - if your platform needs to do endian-swapping or can only 
-// handle aligned reads, do the conversion here 
- 
-FORCE_INLINE uint32_t getblock32(const uint32_t* p, int i) { return p[i]; } 
- 
-FORCE_INLINE uint64_t getblock64(const uint64_t* p, int i) { return p[i]; } 
- 
-//----------------------------------------------------------------------------- 
-// Finalization mix - force all bits of a hash block to avalanche 
- 
-FORCE_INLINE uint32_t fmix32(uint32_t h) { 
-  h ^= h >> 16; 
-  h *= 0x85ebca6b; 
-  h ^= h >> 13; 
-  h *= 0xc2b2ae35; 
-  h ^= h >> 16; 
- 
-  return h; 
-} 
- 
-//---------- 
- 
-FORCE_INLINE uint64_t fmix64(uint64_t k) { 
-  k ^= k >> 33; 
-  k *= BIG_CONSTANT(0xff51afd7ed558ccd); 
-  k ^= k >> 33; 
-  k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); 
-  k ^= k >> 33; 
- 
-  return k; 
-} 
- 
-//----------------------------------------------------------------------------- 
- 
-void Hash_x64_128(const void* key, const int len, const uint32_t seed, uint64_t out[2]) { 
-  const uint8_t* data = (const uint8_t*)key; 
-  const int nblocks = len / 16; 
- 
-  uint64_t h1 = seed; 
-  uint64_t h2 = seed; 
- 
-  const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5); 
-  const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f); 
- 
-  //---------- 
-  // body 
- 
-  const uint64_t* blocks = (const uint64_t*)(data); 
- 
-  for (int i = 0; i < nblocks; i++) { 
-    uint64_t k1 = getblock64(blocks, i * 2 + 0); 
-    uint64_t k2 = getblock64(blocks, i * 2 + 1); 
- 
-    k1 *= c1; 
-    k1 = ROTL64(k1, 31); 
-    k1 *= c2; 
-    h1 ^= k1; 
- 
-    h1 = ROTL64(h1, 27); 
-    h1 += h2; 
-    h1 = h1 * 5 + 0x52dce729; 
- 
-    k2 *= c2; 
-    k2 = ROTL64(k2, 33); 
-    k2 *= c1; 
-    h2 ^= k2; 
- 
-    h2 = ROTL64(h2, 31); 
-    h2 += h1; 
-    h2 = h2 * 5 + 0x38495ab5; 
-  } 
- 
-  //---------- 
-  // tail 
- 
-  const uint8_t* tail = (const uint8_t*)(data + nblocks * 16); 
- 
-  uint64_t k1 = 0; 
-  uint64_t k2 = 0; 
- 
-  switch (len & 15) { 
-    case 15: 
-      k2 ^= ((uint64_t)tail[14]) << 48;  // fall through 
-    case 14: 
-      k2 ^= ((uint64_t)tail[13]) << 40;  // fall through 
-    case 13: 
-      k2 ^= ((uint64_t)tail[12]) << 32;  // fall through 
-    case 12: 
-      k2 ^= ((uint64_t)tail[11]) << 24;  // fall through 
-    case 11: 
-      k2 ^= ((uint64_t)tail[10]) << 16;  // fall through 
-    case 10: 
-      k2 ^= ((uint64_t)tail[9]) << 8;  // fall through 
-    case 9: 
-      k2 ^= ((uint64_t)tail[8]) << 0; 
-      k2 *= c2; 
-      k2 = ROTL64(k2, 33); 
-      k2 *= c1; 
-      h2 ^= k2;  // fall through 
- 
-    case 8: 
-      k1 ^= ((uint64_t)tail[7]) << 56;  // fall through 
-    case 7: 
-      k1 ^= ((uint64_t)tail[6]) << 48;  // fall through 
-    case 6: 
-      k1 ^= ((uint64_t)tail[5]) << 40;  // fall through 
-    case 5: 
-      k1 ^= ((uint64_t)tail[4]) << 32;  // fall through 
-    case 4: 
-      k1 ^= ((uint64_t)tail[3]) << 24;  // fall through 
-    case 3: 
-      k1 ^= ((uint64_t)tail[2]) << 16;  // fall through 
-    case 2: 
-      k1 ^= ((uint64_t)tail[1]) << 8;  // fall through 
-    case 1: 
-      k1 ^= ((uint64_t)tail[0]) << 0; 
-      k1 *= c1; 
-      k1 = ROTL64(k1, 31); 
-      k1 *= c2; 
-      h1 ^= k1; 
-  } 
- 
-  //---------- 
-  // finalization 
- 
-  h1 ^= len; 
-  h2 ^= len; 
- 
-  h1 += h2; 
-  h2 += h1; 
- 
-  h1 = fmix64(h1); 
-  h2 = fmix64(h2); 
- 
-  h1 += h2; 
-  h2 += h1; 
- 
-  reinterpret_cast<uint64_t*>(out)[0] = h1; 
-  reinterpret_cast<uint64_t*>(out)[1] = h2; 
-} 
- 
-template <typename T> 
-uint64_t HashHelper(T value, uint32_t seed) { 
-  uint64_t output[2]; 
-  Hash_x64_128(reinterpret_cast<void*>(&value), sizeof(T), seed, output); 
-  return output[0]; 
-} 
- 
-uint64_t MurmurHash3::Hash(int32_t value) const { return HashHelper(value, seed_); } 
- 
-uint64_t MurmurHash3::Hash(int64_t value) const { return HashHelper(value, seed_); } 
- 
-uint64_t MurmurHash3::Hash(float value) const { return HashHelper(value, seed_); } 
- 
-uint64_t MurmurHash3::Hash(double value) const { return HashHelper(value, seed_); } 
- 
-uint64_t MurmurHash3::Hash(const FLBA* value, uint32_t len) const { 
-  uint64_t out[2]; 
-  Hash_x64_128(reinterpret_cast<const void*>(value->ptr), len, seed_, out); 
-  return out[0]; 
-} 
- 
-uint64_t MurmurHash3::Hash(const Int96* value) const { 
-  uint64_t out[2]; 
-  Hash_x64_128(reinterpret_cast<const void*>(value->value), sizeof(value->value), seed_, 
-               out); 
-  return out[0]; 
-} 
- 
-uint64_t MurmurHash3::Hash(const ByteArray* value) const { 
-  uint64_t out[2]; 
-  Hash_x64_128(reinterpret_cast<const void*>(value->ptr), value->len, seed_, out); 
-  return out[0]; 
-} 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+// Note - The x86 and x64 versions do _not_ produce the same results, as the
+// algorithms are optimized for their respective platforms. You can still
+// compile and run any of them on any platform, but your performance with the
+// non-native version will be less than optimal.
+
+#include "parquet/murmur3.h"
+
+namespace parquet {
+
+#if defined(_MSC_VER)
+
+#define FORCE_INLINE __forceinline
+#define ROTL64(x, y) _rotl64(x, y)
+
+#else  // defined(_MSC_VER)
+
+#define FORCE_INLINE inline __attribute__((always_inline))
+inline uint64_t rotl64(uint64_t x, int8_t r) { return (x << r) | (x >> (64 - r)); }
+#define ROTL64(x, y) rotl64(x, y)
+
+#endif  // !defined(_MSC_VER)
+
+#define BIG_CONSTANT(x) (x##LLU)
+
+//-----------------------------------------------------------------------------
+// Block read - if your platform needs to do endian-swapping or can only
+// handle aligned reads, do the conversion here
+
+FORCE_INLINE uint32_t getblock32(const uint32_t* p, int i) { return p[i]; }
+
+FORCE_INLINE uint64_t getblock64(const uint64_t* p, int i) { return p[i]; }
+
+//-----------------------------------------------------------------------------
+// Finalization mix - force all bits of a hash block to avalanche
+
+FORCE_INLINE uint32_t fmix32(uint32_t h) {
+  h ^= h >> 16;
+  h *= 0x85ebca6b;
+  h ^= h >> 13;
+  h *= 0xc2b2ae35;
+  h ^= h >> 16;
+
+  return h;
+}
+
+//----------
+
+FORCE_INLINE uint64_t fmix64(uint64_t k) {
+  k ^= k >> 33;
+  k *= BIG_CONSTANT(0xff51afd7ed558ccd);
+  k ^= k >> 33;
+  k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
+  k ^= k >> 33;
+
+  return k;
+}
+
+//-----------------------------------------------------------------------------
+
+void Hash_x64_128(const void* key, const int len, const uint32_t seed, uint64_t out[2]) {
+  const uint8_t* data = (const uint8_t*)key;
+  const int nblocks = len / 16;
+
+  uint64_t h1 = seed;
+  uint64_t h2 = seed;
+
+  const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
+  const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
+
+  //----------
+  // body
+
+  const uint64_t* blocks = (const uint64_t*)(data);
+
+  for (int i = 0; i < nblocks; i++) {
+    uint64_t k1 = getblock64(blocks, i * 2 + 0);
+    uint64_t k2 = getblock64(blocks, i * 2 + 1);
+
+    k1 *= c1;
+    k1 = ROTL64(k1, 31);
+    k1 *= c2;
+    h1 ^= k1;
+
+    h1 = ROTL64(h1, 27);
+    h1 += h2;
+    h1 = h1 * 5 + 0x52dce729;
+
+    k2 *= c2;
+    k2 = ROTL64(k2, 33);
+    k2 *= c1;
+    h2 ^= k2;
+
+    h2 = ROTL64(h2, 31);
+    h2 += h1;
+    h2 = h2 * 5 + 0x38495ab5;
+  }
+
+  //----------
+  // tail
+
+  const uint8_t* tail = (const uint8_t*)(data + nblocks * 16);
+
+  uint64_t k1 = 0;
+  uint64_t k2 = 0;
+
+  switch (len & 15) {
+    case 15:
+      k2 ^= ((uint64_t)tail[14]) << 48;  // fall through
+    case 14:
+      k2 ^= ((uint64_t)tail[13]) << 40;  // fall through
+    case 13:
+      k2 ^= ((uint64_t)tail[12]) << 32;  // fall through
+    case 12:
+      k2 ^= ((uint64_t)tail[11]) << 24;  // fall through
+    case 11:
+      k2 ^= ((uint64_t)tail[10]) << 16;  // fall through
+    case 10:
+      k2 ^= ((uint64_t)tail[9]) << 8;  // fall through
+    case 9:
+      k2 ^= ((uint64_t)tail[8]) << 0;
+      k2 *= c2;
+      k2 = ROTL64(k2, 33);
+      k2 *= c1;
+      h2 ^= k2;  // fall through
+
+    case 8:
+      k1 ^= ((uint64_t)tail[7]) << 56;  // fall through
+    case 7:
+      k1 ^= ((uint64_t)tail[6]) << 48;  // fall through
+    case 6:
+      k1 ^= ((uint64_t)tail[5]) << 40;  // fall through
+    case 5:
+      k1 ^= ((uint64_t)tail[4]) << 32;  // fall through
+    case 4:
+      k1 ^= ((uint64_t)tail[3]) << 24;  // fall through
+    case 3:
+      k1 ^= ((uint64_t)tail[2]) << 16;  // fall through
+    case 2:
+      k1 ^= ((uint64_t)tail[1]) << 8;  // fall through
+    case 1:
+      k1 ^= ((uint64_t)tail[0]) << 0;
+      k1 *= c1;
+      k1 = ROTL64(k1, 31);
+      k1 *= c2;
+      h1 ^= k1;
+  }
+
+  //----------
+  // finalization
+
+  h1 ^= len;
+  h2 ^= len;
+
+  h1 += h2;
+  h2 += h1;
+
+  h1 = fmix64(h1);
+  h2 = fmix64(h2);
+
+  h1 += h2;
+  h2 += h1;
+
+  reinterpret_cast<uint64_t*>(out)[0] = h1;
+  reinterpret_cast<uint64_t*>(out)[1] = h2;
+}
+
+template <typename T>
+uint64_t HashHelper(T value, uint32_t seed) {
+  uint64_t output[2];
+  Hash_x64_128(reinterpret_cast<void*>(&value), sizeof(T), seed, output);
+  return output[0];
+}
+
+uint64_t MurmurHash3::Hash(int32_t value) const { return HashHelper(value, seed_); }
+
+uint64_t MurmurHash3::Hash(int64_t value) const { return HashHelper(value, seed_); }
+
+uint64_t MurmurHash3::Hash(float value) const { return HashHelper(value, seed_); }
+
+uint64_t MurmurHash3::Hash(double value) const { return HashHelper(value, seed_); }
+
+uint64_t MurmurHash3::Hash(const FLBA* value, uint32_t len) const {
+  uint64_t out[2];
+  Hash_x64_128(reinterpret_cast<const void*>(value->ptr), len, seed_, out);
+  return out[0];
+}
+
+uint64_t MurmurHash3::Hash(const Int96* value) const {
+  uint64_t out[2];
+  Hash_x64_128(reinterpret_cast<const void*>(value->value), sizeof(value->value), seed_,
+               out);
+  return out[0];
+}
+
+uint64_t MurmurHash3::Hash(const ByteArray* value) const {
+  uint64_t out[2];
+  Hash_x64_128(reinterpret_cast<const void*>(value->ptr), value->len, seed_, out);
+  return out[0];
+}
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/murmur3.h b/contrib/libs/apache/arrow/cpp/src/parquet/murmur3.h
index 2dcb8b5bffa..acf7088e44b 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/murmur3.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/murmur3.h
@@ -1,54 +1,54 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-//----------------------------------------------------------------------------- 
-// MurmurHash3 was written by Austin Appleby, and is placed in the public 
-// domain. The author hereby disclaims copyright to this source code. 
- 
-#pragma once 
- 
-#include <cstdint> 
- 
-#include "parquet/hasher.h" 
-#include "parquet/platform.h" 
-#include "parquet/types.h" 
- 
-namespace parquet { 
- 
-/// Source: 
-/// https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp 
-/// (Modified to adapt to coding conventions and to inherit the Hasher abstract class) 
-class PARQUET_EXPORT MurmurHash3 : public Hasher { 
- public: 
-  MurmurHash3() : seed_(DEFAULT_SEED) {} 
-  uint64_t Hash(int32_t value) const override; 
-  uint64_t Hash(int64_t value) const override; 
-  uint64_t Hash(float value) const override; 
-  uint64_t Hash(double value) const override; 
-  uint64_t Hash(const Int96* value) const override; 
-  uint64_t Hash(const ByteArray* value) const override; 
-  uint64_t Hash(const FLBA* val, uint32_t len) const override; 
- 
- private: 
-  // Default seed for hash which comes from Bloom filter in parquet-mr, it is generated 
-  // by System.nanoTime() of java. 
-  static constexpr int DEFAULT_SEED = 1361930890; 
- 
-  uint32_t seed_; 
-}; 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+#pragma once
+
+#include <cstdint>
+
+#include "parquet/hasher.h"
+#include "parquet/platform.h"
+#include "parquet/types.h"
+
+namespace parquet {
+
+/// Source:
+/// https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
+/// (Modified to adapt to coding conventions and to inherit the Hasher abstract class)
+class PARQUET_EXPORT MurmurHash3 : public Hasher {
+ public:
+  MurmurHash3() : seed_(DEFAULT_SEED) {}
+  uint64_t Hash(int32_t value) const override;
+  uint64_t Hash(int64_t value) const override;
+  uint64_t Hash(float value) const override;
+  uint64_t Hash(double value) const override;
+  uint64_t Hash(const Int96* value) const override;
+  uint64_t Hash(const ByteArray* value) const override;
+  uint64_t Hash(const FLBA* val, uint32_t len) const override;
+
+ private:
+  // Default seed for hash which comes from Bloom filter in parquet-mr, it is generated
+  // by System.nanoTime() of java.
+  static constexpr int DEFAULT_SEED = 1361930890;
+
+  uint32_t seed_;
+};
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/platform.cc b/contrib/libs/apache/arrow/cpp/src/parquet/platform.cc
index 70ed6f73df3..5c355c28be1 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/platform.cc
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/platform.cc
@@ -1,41 +1,41 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "parquet/platform.h" 
- 
-#include <cstdint> 
-#include <memory> 
-#include <utility> 
- 
-#include "arrow/io/memory.h" 
- 
-#include "parquet/exception.h" 
- 
-namespace parquet { 
- 
-std::shared_ptr<::arrow::io::BufferOutputStream> CreateOutputStream(MemoryPool* pool) { 
-  PARQUET_ASSIGN_OR_THROW(auto stream, ::arrow::io::BufferOutputStream::Create( 
-                                           kDefaultOutputStreamSize, pool)); 
-  return stream; 
-} 
- 
-std::shared_ptr<ResizableBuffer> AllocateBuffer(MemoryPool* pool, int64_t size) { 
-  PARQUET_ASSIGN_OR_THROW(auto result, ::arrow::AllocateResizableBuffer(size, pool)); 
-  return std::move(result); 
-} 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/platform.h"
+
+#include <cstdint>
+#include <memory>
+#include <utility>
+
+#include "arrow/io/memory.h"
+
+#include "parquet/exception.h"
+
+namespace parquet {
+
+std::shared_ptr<::arrow::io::BufferOutputStream> CreateOutputStream(MemoryPool* pool) {
+  PARQUET_ASSIGN_OR_THROW(auto stream, ::arrow::io::BufferOutputStream::Create(
+                                           kDefaultOutputStreamSize, pool));
+  return stream;
+}
+
+std::shared_ptr<ResizableBuffer> AllocateBuffer(MemoryPool* pool, int64_t size) {
+  PARQUET_ASSIGN_OR_THROW(auto result, ::arrow::AllocateResizableBuffer(size, pool));
+  return std::move(result);
+}
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/platform.h b/contrib/libs/apache/arrow/cpp/src/parquet/platform.h
index cd41aa7f5c6..00a193f144a 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/platform.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/platform.h
@@ -1,111 +1,111 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <cstdint> 
-#include <memory> 
- 
-#include "arrow/buffer.h"         // IWYU pragma: export 
-#include "arrow/io/interfaces.h"  // IWYU pragma: export 
-#include "arrow/status.h"         // IWYU pragma: export 
-#include "arrow/type_fwd.h"       // IWYU pragma: export 
-#include "arrow/util/macros.h"    // IWYU pragma: export 
- 
-#if defined(_WIN32) || defined(__CYGWIN__) 
- 
-#if defined(_MSC_VER) 
-#pragma warning(push) 
-// Disable warning for STL types usage in DLL interface 
-// https://web.archive.org/web/20130317015847/http://connect.microsoft.com/VisualStudio/feedback/details/696593/vc-10-vs-2010-basic-string-exports 
-#pragma warning(disable : 4275 4251) 
-// Disable diamond inheritance warnings 
-#pragma warning(disable : 4250) 
-// Disable macro redefinition warnings 
-#pragma warning(disable : 4005) 
-// Disable extern before exported template warnings 
-#pragma warning(disable : 4910) 
-#else 
-#pragma GCC diagnostic ignored "-Wattributes" 
-#endif 
- 
-#ifdef PARQUET_STATIC 
-#define PARQUET_EXPORT 
-#elif defined(PARQUET_EXPORTING) 
-#define PARQUET_EXPORT __declspec(dllexport) 
-#else 
-#define PARQUET_EXPORT __declspec(dllimport) 
-#endif 
- 
-#define PARQUET_NO_EXPORT 
- 
-#else  // Not Windows 
-#ifndef PARQUET_EXPORT 
-#define PARQUET_EXPORT __attribute__((visibility("default"))) 
-#endif 
-#ifndef PARQUET_NO_EXPORT 
-#define PARQUET_NO_EXPORT __attribute__((visibility("hidden"))) 
-#endif 
-#endif  // Non-Windows 
- 
-// This is a complicated topic, some reading on it: 
-// http://www.codesynthesis.com/~boris/blog/2010/01/18/dll-export-cxx-templates/ 
-#if defined(_MSC_VER) || defined(__clang__) 
-#define PARQUET_TEMPLATE_CLASS_EXPORT 
-#define PARQUET_TEMPLATE_EXPORT PARQUET_EXPORT 
-#else 
-#define PARQUET_TEMPLATE_CLASS_EXPORT PARQUET_EXPORT 
-#define PARQUET_TEMPLATE_EXPORT 
-#endif 
- 
-#define PARQUET_DISALLOW_COPY_AND_ASSIGN ARROW_DISALLOW_COPY_AND_ASSIGN 
- 
-#define PARQUET_NORETURN ARROW_NORETURN 
-#define PARQUET_DEPRECATED ARROW_DEPRECATED 
- 
-// If ARROW_VALGRIND set when compiling unit tests, also define 
-// PARQUET_VALGRIND 
-#ifdef ARROW_VALGRIND 
-#define PARQUET_VALGRIND 
-#endif 
- 
-namespace parquet { 
- 
-using Buffer = ::arrow::Buffer; 
-using Codec = ::arrow::util::Codec; 
-using Compression = ::arrow::Compression; 
-using MemoryPool = ::arrow::MemoryPool; 
-using MutableBuffer = ::arrow::MutableBuffer; 
-using ResizableBuffer = ::arrow::ResizableBuffer; 
-using ResizableBuffer = ::arrow::ResizableBuffer; 
-using ArrowInputFile = ::arrow::io::RandomAccessFile; 
-using ArrowInputStream = ::arrow::io::InputStream; 
-using ArrowOutputStream = ::arrow::io::OutputStream; 
- 
-constexpr int64_t kDefaultOutputStreamSize = 1024; 
- 
-constexpr int16_t kNonPageOrdinal = static_cast<int16_t>(-1); 
- 
-PARQUET_EXPORT 
-std::shared_ptr<::arrow::io::BufferOutputStream> CreateOutputStream( 
-    ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()); 
- 
-PARQUET_EXPORT 
-std::shared_ptr<ResizableBuffer> AllocateBuffer( 
-    ::arrow::MemoryPool* pool = ::arrow::default_memory_pool(), int64_t size = 0); 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+
+#include "arrow/buffer.h"         // IWYU pragma: export
+#include "arrow/io/interfaces.h"  // IWYU pragma: export
+#include "arrow/status.h"         // IWYU pragma: export
+#include "arrow/type_fwd.h"       // IWYU pragma: export
+#include "arrow/util/macros.h"    // IWYU pragma: export
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+
+#if defined(_MSC_VER)
+#pragma warning(push)
+// Disable warning for STL types usage in DLL interface
+// https://web.archive.org/web/20130317015847/http://connect.microsoft.com/VisualStudio/feedback/details/696593/vc-10-vs-2010-basic-string-exports
+#pragma warning(disable : 4275 4251)
+// Disable diamond inheritance warnings
+#pragma warning(disable : 4250)
+// Disable macro redefinition warnings
+#pragma warning(disable : 4005)
+// Disable extern before exported template warnings
+#pragma warning(disable : 4910)
+#else
+#pragma GCC diagnostic ignored "-Wattributes"
+#endif
+
+#ifdef PARQUET_STATIC
+#define PARQUET_EXPORT
+#elif defined(PARQUET_EXPORTING)
+#define PARQUET_EXPORT __declspec(dllexport)
+#else
+#define PARQUET_EXPORT __declspec(dllimport)
+#endif
+
+#define PARQUET_NO_EXPORT
+
+#else  // Not Windows
+#ifndef PARQUET_EXPORT
+#define PARQUET_EXPORT __attribute__((visibility("default")))
+#endif
+#ifndef PARQUET_NO_EXPORT
+#define PARQUET_NO_EXPORT __attribute__((visibility("hidden")))
+#endif
+#endif  // Non-Windows
+
+// This is a complicated topic, some reading on it:
+// http://www.codesynthesis.com/~boris/blog/2010/01/18/dll-export-cxx-templates/
+#if defined(_MSC_VER) || defined(__clang__)
+#define PARQUET_TEMPLATE_CLASS_EXPORT
+#define PARQUET_TEMPLATE_EXPORT PARQUET_EXPORT
+#else
+#define PARQUET_TEMPLATE_CLASS_EXPORT PARQUET_EXPORT
+#define PARQUET_TEMPLATE_EXPORT
+#endif
+
+#define PARQUET_DISALLOW_COPY_AND_ASSIGN ARROW_DISALLOW_COPY_AND_ASSIGN
+
+#define PARQUET_NORETURN ARROW_NORETURN
+#define PARQUET_DEPRECATED ARROW_DEPRECATED
+
+// If ARROW_VALGRIND set when compiling unit tests, also define
+// PARQUET_VALGRIND
+#ifdef ARROW_VALGRIND
+#define PARQUET_VALGRIND
+#endif
+
+namespace parquet {
+
+using Buffer = ::arrow::Buffer;
+using Codec = ::arrow::util::Codec;
+using Compression = ::arrow::Compression;
+using MemoryPool = ::arrow::MemoryPool;
+using MutableBuffer = ::arrow::MutableBuffer;
+using ResizableBuffer = ::arrow::ResizableBuffer;
+using ResizableBuffer = ::arrow::ResizableBuffer;
+using ArrowInputFile = ::arrow::io::RandomAccessFile;
+using ArrowInputStream = ::arrow::io::InputStream;
+using ArrowOutputStream = ::arrow::io::OutputStream;
+
+constexpr int64_t kDefaultOutputStreamSize = 1024;
+
+constexpr int16_t kNonPageOrdinal = static_cast<int16_t>(-1);
+
+PARQUET_EXPORT
+std::shared_ptr<::arrow::io::BufferOutputStream> CreateOutputStream(
+    ::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
+
+PARQUET_EXPORT
+std::shared_ptr<ResizableBuffer> AllocateBuffer(
+    ::arrow::MemoryPool* pool = ::arrow::default_memory_pool(), int64_t size = 0);
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/printer.cc b/contrib/libs/apache/arrow/cpp/src/parquet/printer.cc
index df2b4c50b5d..dfd4bd802ee 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/printer.cc
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/printer.cc
@@ -1,297 +1,297 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "parquet/printer.h" 
- 
-#include <cstdint> 
-#include <cstdio> 
-#include <memory> 
-#include <ostream> 
-#include <string> 
-#include <vector> 
- 
-#include "arrow/util/key_value_metadata.h" 
-#include "arrow/util/string.h" 
- 
-#include "parquet/column_scanner.h" 
-#include "parquet/exception.h" 
-#include "parquet/file_reader.h" 
-#include "parquet/metadata.h" 
-#include "parquet/schema.h" 
-#include "parquet/statistics.h" 
-#include "parquet/types.h" 
- 
-namespace parquet { 
- 
-class ColumnReader; 
- 
-// ---------------------------------------------------------------------- 
-// ParquetFilePrinter::DebugPrint 
- 
-// the fixed initial size is just for an example 
-#define COL_WIDTH 30 
- 
-void ParquetFilePrinter::DebugPrint(std::ostream& stream, std::list<int> selected_columns, 
-                                    bool print_values, bool format_dump, 
-                                    bool print_key_value_metadata, const char* filename) { 
-  const FileMetaData* file_metadata = fileReader->metadata().get(); 
- 
-  stream << "File Name: " << filename << "\n"; 
-  stream << "Version: " << ParquetVersionToString(file_metadata->version()) << "\n"; 
-  stream << "Created By: " << file_metadata->created_by() << "\n"; 
-  stream << "Total rows: " << file_metadata->num_rows() << "\n"; 
- 
-  if (print_key_value_metadata && file_metadata->key_value_metadata()) { 
-    auto key_value_metadata = file_metadata->key_value_metadata(); 
-    int64_t size_of_key_value_metadata = key_value_metadata->size(); 
-    stream << "Key Value File Metadata: " << size_of_key_value_metadata << " entries\n"; 
-    for (int64_t i = 0; i < size_of_key_value_metadata; i++) { 
-      stream << " Key nr " << i << " " << key_value_metadata->key(i) << ": " 
-             << key_value_metadata->value(i) << "\n"; 
-    } 
-  } 
- 
-  stream << "Number of RowGroups: " << file_metadata->num_row_groups() << "\n"; 
-  stream << "Number of Real Columns: " 
-         << file_metadata->schema()->group_node()->field_count() << "\n"; 
- 
-  if (selected_columns.size() == 0) { 
-    for (int i = 0; i < file_metadata->num_columns(); i++) { 
-      selected_columns.push_back(i); 
-    } 
-  } else { 
-    for (auto i : selected_columns) { 
-      if (i < 0 || i >= file_metadata->num_columns()) { 
-        throw ParquetException("Selected column is out of range"); 
-      } 
-    } 
-  } 
- 
-  stream << "Number of Columns: " << file_metadata->num_columns() << "\n"; 
-  stream << "Number of Selected Columns: " << selected_columns.size() << "\n"; 
-  for (auto i : selected_columns) { 
-    const ColumnDescriptor* descr = file_metadata->schema()->Column(i); 
-    stream << "Column " << i << ": " << descr->path()->ToDotString() << " (" 
-           << TypeToString(descr->physical_type()); 
-    const auto& logical_type = descr->logical_type(); 
-    if (!logical_type->is_none()) { 
-      stream << " / " << logical_type->ToString(); 
-    } 
-    if (descr->converted_type() != ConvertedType::NONE) { 
-      stream << " / " << ConvertedTypeToString(descr->converted_type()); 
-      if (descr->converted_type() == ConvertedType::DECIMAL) { 
-        stream << "(" << descr->type_precision() << "," << descr->type_scale() << ")"; 
-      } 
-    } 
-    stream << ")" << std::endl; 
-  } 
- 
-  for (int r = 0; r < file_metadata->num_row_groups(); ++r) { 
-    stream << "--- Row Group: " << r << " ---\n"; 
- 
-    auto group_reader = fileReader->RowGroup(r); 
-    std::unique_ptr<RowGroupMetaData> group_metadata = file_metadata->RowGroup(r); 
- 
-    stream << "--- Total Bytes: " << group_metadata->total_byte_size() << " ---\n"; 
-    stream << "--- Total Compressed Bytes: " << group_metadata->total_compressed_size() 
-           << " ---\n"; 
-    stream << "--- Rows: " << group_metadata->num_rows() << " ---\n"; 
- 
-    // Print column metadata 
-    for (auto i : selected_columns) { 
-      auto column_chunk = group_metadata->ColumnChunk(i); 
-      std::shared_ptr<Statistics> stats = column_chunk->statistics(); 
- 
-      const ColumnDescriptor* descr = file_metadata->schema()->Column(i); 
-      stream << "Column " << i << std::endl << "  Values: " << column_chunk->num_values(); 
-      if (column_chunk->is_stats_set()) { 
-        std::string min = stats->EncodeMin(), max = stats->EncodeMax(); 
-        stream << ", Null Values: " << stats->null_count() 
-               << ", Distinct Values: " << stats->distinct_count() << std::endl 
-               << "  Max: " << FormatStatValue(descr->physical_type(), max) 
-               << ", Min: " << FormatStatValue(descr->physical_type(), min); 
-      } else { 
-        stream << "  Statistics Not Set"; 
-      } 
-      stream << std::endl 
-             << "  Compression: " 
-             << ::arrow::internal::AsciiToUpper( 
-                    Codec::GetCodecAsString(column_chunk->compression())) 
-             << ", Encodings:"; 
-      for (auto encoding : column_chunk->encodings()) { 
-        stream << " " << EncodingToString(encoding); 
-      } 
-      stream << std::endl 
-             << "  Uncompressed Size: " << column_chunk->total_uncompressed_size() 
-             << ", Compressed Size: " << column_chunk->total_compressed_size() 
-             << std::endl; 
-    } 
- 
-    if (!print_values) { 
-      continue; 
-    } 
-    stream << "--- Values ---\n"; 
- 
-    static constexpr int bufsize = COL_WIDTH + 1; 
-    char buffer[bufsize]; 
- 
-    // Create readers for selected columns and print contents 
-    std::vector<std::shared_ptr<Scanner>> scanners(selected_columns.size(), nullptr); 
-    int j = 0; 
-    for (auto i : selected_columns) { 
-      std::shared_ptr<ColumnReader> col_reader = group_reader->Column(i); 
-      // This is OK in this method as long as the RowGroupReader does not get 
-      // deleted 
-      auto& scanner = scanners[j++] = Scanner::Make(col_reader); 
- 
-      if (format_dump) { 
-        stream << "Column " << i << std::endl; 
-        while (scanner->HasNext()) { 
-          scanner->PrintNext(stream, 0, true); 
-          stream << "\n"; 
-        } 
-        continue; 
-      } 
- 
-      snprintf(buffer, bufsize, "%-*s", COL_WIDTH, 
-               file_metadata->schema()->Column(i)->name().c_str()); 
-      stream << buffer << '|'; 
-    } 
-    if (format_dump) { 
-      continue; 
-    } 
-    stream << "\n"; 
- 
-    bool hasRow; 
-    do { 
-      hasRow = false; 
-      for (auto scanner : scanners) { 
-        if (scanner->HasNext()) { 
-          hasRow = true; 
-          scanner->PrintNext(stream, COL_WIDTH); 
-          stream << '|'; 
-        } 
-      } 
-      stream << "\n"; 
-    } while (hasRow); 
-  } 
-} 
- 
-void ParquetFilePrinter::JSONPrint(std::ostream& stream, std::list<int> selected_columns, 
-                                   const char* filename) { 
-  const FileMetaData* file_metadata = fileReader->metadata().get(); 
-  stream << "{\n"; 
-  stream << "  \"FileName\": \"" << filename << "\",\n"; 
-  stream << "  \"Version\": \"" << ParquetVersionToString(file_metadata->version()) 
-         << "\",\n"; 
-  stream << "  \"CreatedBy\": \"" << file_metadata->created_by() << "\",\n"; 
-  stream << "  \"TotalRows\": \"" << file_metadata->num_rows() << "\",\n"; 
-  stream << "  \"NumberOfRowGroups\": \"" << file_metadata->num_row_groups() << "\",\n"; 
-  stream << "  \"NumberOfRealColumns\": \"" 
-         << file_metadata->schema()->group_node()->field_count() << "\",\n"; 
-  stream << "  \"NumberOfColumns\": \"" << file_metadata->num_columns() << "\",\n"; 
- 
-  if (selected_columns.size() == 0) { 
-    for (int i = 0; i < file_metadata->num_columns(); i++) { 
-      selected_columns.push_back(i); 
-    } 
-  } else { 
-    for (auto i : selected_columns) { 
-      if (i < 0 || i >= file_metadata->num_columns()) { 
-        throw ParquetException("Selected column is out of range"); 
-      } 
-    } 
-  } 
- 
-  stream << "  \"Columns\": [\n"; 
-  int c = 0; 
-  for (auto i : selected_columns) { 
-    const ColumnDescriptor* descr = file_metadata->schema()->Column(i); 
-    stream << "     { \"Id\": \"" << i << "\"," 
-           << " \"Name\": \"" << descr->path()->ToDotString() << "\"," 
-           << " \"PhysicalType\": \"" << TypeToString(descr->physical_type()) << "\"," 
-           << " \"ConvertedType\": \"" << ConvertedTypeToString(descr->converted_type()) 
-           << "\"," 
-           << " \"LogicalType\": " << (descr->logical_type())->ToJSON() << " }"; 
-    c++; 
-    if (c != static_cast<int>(selected_columns.size())) { 
-      stream << ",\n"; 
-    } 
-  } 
- 
-  stream << "\n  ],\n  \"RowGroups\": [\n"; 
-  for (int r = 0; r < file_metadata->num_row_groups(); ++r) { 
-    stream << "     {\n       \"Id\": \"" << r << "\", "; 
- 
-    auto group_reader = fileReader->RowGroup(r); 
-    std::unique_ptr<RowGroupMetaData> group_metadata = file_metadata->RowGroup(r); 
- 
-    stream << " \"TotalBytes\": \"" << group_metadata->total_byte_size() << "\", "; 
-    stream << " \"TotalCompressedBytes\": \"" << group_metadata->total_compressed_size() 
-           << "\", "; 
-    stream << " \"Rows\": \"" << group_metadata->num_rows() << "\",\n"; 
- 
-    // Print column metadata 
-    stream << "       \"ColumnChunks\": [\n"; 
-    int c1 = 0; 
-    for (auto i : selected_columns) { 
-      auto column_chunk = group_metadata->ColumnChunk(i); 
-      std::shared_ptr<Statistics> stats = column_chunk->statistics(); 
- 
-      const ColumnDescriptor* descr = file_metadata->schema()->Column(i); 
-      stream << "          {\"Id\": \"" << i << "\", \"Values\": \"" 
-             << column_chunk->num_values() << "\", " 
-             << "\"StatsSet\": "; 
-      if (column_chunk->is_stats_set()) { 
-        stream << "\"True\", \"Stats\": {"; 
-        std::string min = stats->EncodeMin(), max = stats->EncodeMax(); 
-        stream << "\"NumNulls\": \"" << stats->null_count() << "\", " 
-               << "\"DistinctValues\": \"" << stats->distinct_count() << "\", " 
-               << "\"Max\": \"" << FormatStatValue(descr->physical_type(), max) << "\", " 
-               << "\"Min\": \"" << FormatStatValue(descr->physical_type(), min) 
-               << "\" },"; 
-      } else { 
-        stream << "\"False\","; 
-      } 
-      stream << "\n           \"Compression\": \"" 
-             << ::arrow::internal::AsciiToUpper( 
-                    Codec::GetCodecAsString(column_chunk->compression())) 
-             << "\", \"Encodings\": \""; 
-      for (auto encoding : column_chunk->encodings()) { 
-        stream << EncodingToString(encoding) << " "; 
-      } 
-      stream << "\", " 
-             << "\"UncompressedSize\": \"" << column_chunk->total_uncompressed_size() 
-             << "\", \"CompressedSize\": \"" << column_chunk->total_compressed_size(); 
- 
-      // end of a ColumnChunk 
-      stream << "\" }"; 
-      c1++; 
-      if (c1 != static_cast<int>(selected_columns.size())) { 
-        stream << ",\n"; 
-      } 
-    } 
- 
-    stream << "\n        ]\n     }"; 
-    if ((r + 1) != static_cast<int>(file_metadata->num_row_groups())) { 
-      stream << ",\n"; 
-    } 
-  } 
-  stream << "\n  ]\n}\n"; 
-} 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/printer.h"
+
+#include <cstdint>
+#include <cstdio>
+#include <memory>
+#include <ostream>
+#include <string>
+#include <vector>
+
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/string.h"
+
+#include "parquet/column_scanner.h"
+#include "parquet/exception.h"
+#include "parquet/file_reader.h"
+#include "parquet/metadata.h"
+#include "parquet/schema.h"
+#include "parquet/statistics.h"
+#include "parquet/types.h"
+
+namespace parquet {
+
+class ColumnReader;
+
+// ----------------------------------------------------------------------
+// ParquetFilePrinter::DebugPrint
+
+// the fixed initial size is just for an example
+#define COL_WIDTH 30
+
+void ParquetFilePrinter::DebugPrint(std::ostream& stream, std::list<int> selected_columns,
+                                    bool print_values, bool format_dump,
+                                    bool print_key_value_metadata, const char* filename) {
+  const FileMetaData* file_metadata = fileReader->metadata().get();
+
+  stream << "File Name: " << filename << "\n";
+  stream << "Version: " << ParquetVersionToString(file_metadata->version()) << "\n";
+  stream << "Created By: " << file_metadata->created_by() << "\n";
+  stream << "Total rows: " << file_metadata->num_rows() << "\n";
+
+  if (print_key_value_metadata && file_metadata->key_value_metadata()) {
+    auto key_value_metadata = file_metadata->key_value_metadata();
+    int64_t size_of_key_value_metadata = key_value_metadata->size();
+    stream << "Key Value File Metadata: " << size_of_key_value_metadata << " entries\n";
+    for (int64_t i = 0; i < size_of_key_value_metadata; i++) {
+      stream << " Key nr " << i << " " << key_value_metadata->key(i) << ": "
+             << key_value_metadata->value(i) << "\n";
+    }
+  }
+
+  stream << "Number of RowGroups: " << file_metadata->num_row_groups() << "\n";
+  stream << "Number of Real Columns: "
+         << file_metadata->schema()->group_node()->field_count() << "\n";
+
+  if (selected_columns.size() == 0) {
+    for (int i = 0; i < file_metadata->num_columns(); i++) {
+      selected_columns.push_back(i);
+    }
+  } else {
+    for (auto i : selected_columns) {
+      if (i < 0 || i >= file_metadata->num_columns()) {
+        throw ParquetException("Selected column is out of range");
+      }
+    }
+  }
+
+  stream << "Number of Columns: " << file_metadata->num_columns() << "\n";
+  stream << "Number of Selected Columns: " << selected_columns.size() << "\n";
+  for (auto i : selected_columns) {
+    const ColumnDescriptor* descr = file_metadata->schema()->Column(i);
+    stream << "Column " << i << ": " << descr->path()->ToDotString() << " ("
+           << TypeToString(descr->physical_type());
+    const auto& logical_type = descr->logical_type();
+    if (!logical_type->is_none()) {
+      stream << " / " << logical_type->ToString();
+    }
+    if (descr->converted_type() != ConvertedType::NONE) {
+      stream << " / " << ConvertedTypeToString(descr->converted_type());
+      if (descr->converted_type() == ConvertedType::DECIMAL) {
+        stream << "(" << descr->type_precision() << "," << descr->type_scale() << ")";
+      }
+    }
+    stream << ")" << std::endl;
+  }
+
+  for (int r = 0; r < file_metadata->num_row_groups(); ++r) {
+    stream << "--- Row Group: " << r << " ---\n";
+
+    auto group_reader = fileReader->RowGroup(r);
+    std::unique_ptr<RowGroupMetaData> group_metadata = file_metadata->RowGroup(r);
+
+    stream << "--- Total Bytes: " << group_metadata->total_byte_size() << " ---\n";
+    stream << "--- Total Compressed Bytes: " << group_metadata->total_compressed_size()
+           << " ---\n";
+    stream << "--- Rows: " << group_metadata->num_rows() << " ---\n";
+
+    // Print column metadata
+    for (auto i : selected_columns) {
+      auto column_chunk = group_metadata->ColumnChunk(i);
+      std::shared_ptr<Statistics> stats = column_chunk->statistics();
+
+      const ColumnDescriptor* descr = file_metadata->schema()->Column(i);
+      stream << "Column " << i << std::endl << "  Values: " << column_chunk->num_values();
+      if (column_chunk->is_stats_set()) {
+        std::string min = stats->EncodeMin(), max = stats->EncodeMax();
+        stream << ", Null Values: " << stats->null_count()
+               << ", Distinct Values: " << stats->distinct_count() << std::endl
+               << "  Max: " << FormatStatValue(descr->physical_type(), max)
+               << ", Min: " << FormatStatValue(descr->physical_type(), min);
+      } else {
+        stream << "  Statistics Not Set";
+      }
+      stream << std::endl
+             << "  Compression: "
+             << ::arrow::internal::AsciiToUpper(
+                    Codec::GetCodecAsString(column_chunk->compression()))
+             << ", Encodings:";
+      for (auto encoding : column_chunk->encodings()) {
+        stream << " " << EncodingToString(encoding);
+      }
+      stream << std::endl
+             << "  Uncompressed Size: " << column_chunk->total_uncompressed_size()
+             << ", Compressed Size: " << column_chunk->total_compressed_size()
+             << std::endl;
+    }
+
+    if (!print_values) {
+      continue;
+    }
+    stream << "--- Values ---\n";
+
+    static constexpr int bufsize = COL_WIDTH + 1;
+    char buffer[bufsize];
+
+    // Create readers for selected columns and print contents
+    std::vector<std::shared_ptr<Scanner>> scanners(selected_columns.size(), nullptr);
+    int j = 0;
+    for (auto i : selected_columns) {
+      std::shared_ptr<ColumnReader> col_reader = group_reader->Column(i);
+      // This is OK in this method as long as the RowGroupReader does not get
+      // deleted
+      auto& scanner = scanners[j++] = Scanner::Make(col_reader);
+
+      if (format_dump) {
+        stream << "Column " << i << std::endl;
+        while (scanner->HasNext()) {
+          scanner->PrintNext(stream, 0, true);
+          stream << "\n";
+        }
+        continue;
+      }
+
+      snprintf(buffer, bufsize, "%-*s", COL_WIDTH,
+               file_metadata->schema()->Column(i)->name().c_str());
+      stream << buffer << '|';
+    }
+    if (format_dump) {
+      continue;
+    }
+    stream << "\n";
+
+    bool hasRow;
+    do {
+      hasRow = false;
+      for (auto scanner : scanners) {
+        if (scanner->HasNext()) {
+          hasRow = true;
+          scanner->PrintNext(stream, COL_WIDTH);
+          stream << '|';
+        }
+      }
+      stream << "\n";
+    } while (hasRow);
+  }
+}
+
+void ParquetFilePrinter::JSONPrint(std::ostream& stream, std::list<int> selected_columns,
+                                   const char* filename) {
+  const FileMetaData* file_metadata = fileReader->metadata().get();
+  stream << "{\n";
+  stream << "  \"FileName\": \"" << filename << "\",\n";
+  stream << "  \"Version\": \"" << ParquetVersionToString(file_metadata->version())
+         << "\",\n";
+  stream << "  \"CreatedBy\": \"" << file_metadata->created_by() << "\",\n";
+  stream << "  \"TotalRows\": \"" << file_metadata->num_rows() << "\",\n";
+  stream << "  \"NumberOfRowGroups\": \"" << file_metadata->num_row_groups() << "\",\n";
+  stream << "  \"NumberOfRealColumns\": \""
+         << file_metadata->schema()->group_node()->field_count() << "\",\n";
+  stream << "  \"NumberOfColumns\": \"" << file_metadata->num_columns() << "\",\n";
+
+  if (selected_columns.size() == 0) {
+    for (int i = 0; i < file_metadata->num_columns(); i++) {
+      selected_columns.push_back(i);
+    }
+  } else {
+    for (auto i : selected_columns) {
+      if (i < 0 || i >= file_metadata->num_columns()) {
+        throw ParquetException("Selected column is out of range");
+      }
+    }
+  }
+
+  stream << "  \"Columns\": [\n";
+  int c = 0;
+  for (auto i : selected_columns) {
+    const ColumnDescriptor* descr = file_metadata->schema()->Column(i);
+    stream << "     { \"Id\": \"" << i << "\","
+           << " \"Name\": \"" << descr->path()->ToDotString() << "\","
+           << " \"PhysicalType\": \"" << TypeToString(descr->physical_type()) << "\","
+           << " \"ConvertedType\": \"" << ConvertedTypeToString(descr->converted_type())
+           << "\","
+           << " \"LogicalType\": " << (descr->logical_type())->ToJSON() << " }";
+    c++;
+    if (c != static_cast<int>(selected_columns.size())) {
+      stream << ",\n";
+    }
+  }
+
+  stream << "\n  ],\n  \"RowGroups\": [\n";
+  for (int r = 0; r < file_metadata->num_row_groups(); ++r) {
+    stream << "     {\n       \"Id\": \"" << r << "\", ";
+
+    auto group_reader = fileReader->RowGroup(r);
+    std::unique_ptr<RowGroupMetaData> group_metadata = file_metadata->RowGroup(r);
+
+    stream << " \"TotalBytes\": \"" << group_metadata->total_byte_size() << "\", ";
+    stream << " \"TotalCompressedBytes\": \"" << group_metadata->total_compressed_size()
+           << "\", ";
+    stream << " \"Rows\": \"" << group_metadata->num_rows() << "\",\n";
+
+    // Print column metadata
+    stream << "       \"ColumnChunks\": [\n";
+    int c1 = 0;
+    for (auto i : selected_columns) {
+      auto column_chunk = group_metadata->ColumnChunk(i);
+      std::shared_ptr<Statistics> stats = column_chunk->statistics();
+
+      const ColumnDescriptor* descr = file_metadata->schema()->Column(i);
+      stream << "          {\"Id\": \"" << i << "\", \"Values\": \""
+             << column_chunk->num_values() << "\", "
+             << "\"StatsSet\": ";
+      if (column_chunk->is_stats_set()) {
+        stream << "\"True\", \"Stats\": {";
+        std::string min = stats->EncodeMin(), max = stats->EncodeMax();
+        stream << "\"NumNulls\": \"" << stats->null_count() << "\", "
+               << "\"DistinctValues\": \"" << stats->distinct_count() << "\", "
+               << "\"Max\": \"" << FormatStatValue(descr->physical_type(), max) << "\", "
+               << "\"Min\": \"" << FormatStatValue(descr->physical_type(), min)
+               << "\" },";
+      } else {
+        stream << "\"False\",";
+      }
+      stream << "\n           \"Compression\": \""
+             << ::arrow::internal::AsciiToUpper(
+                    Codec::GetCodecAsString(column_chunk->compression()))
+             << "\", \"Encodings\": \"";
+      for (auto encoding : column_chunk->encodings()) {
+        stream << EncodingToString(encoding) << " ";
+      }
+      stream << "\", "
+             << "\"UncompressedSize\": \"" << column_chunk->total_uncompressed_size()
+             << "\", \"CompressedSize\": \"" << column_chunk->total_compressed_size();
+
+      // end of a ColumnChunk
+      stream << "\" }";
+      c1++;
+      if (c1 != static_cast<int>(selected_columns.size())) {
+        stream << ",\n";
+      }
+    }
+
+    stream << "\n        ]\n     }";
+    if ((r + 1) != static_cast<int>(file_metadata->num_row_groups())) {
+      stream << ",\n";
+    }
+  }
+  stream << "\n  ]\n}\n";
+}
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/printer.h b/contrib/libs/apache/arrow/cpp/src/parquet/printer.h
index b29b1bd6d7a..6bdf5b456fa 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/printer.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/printer.h
@@ -1,46 +1,46 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <iosfwd> 
-#include <list> 
- 
-#include "parquet/platform.h" 
- 
-namespace parquet { 
- 
-class ParquetFileReader; 
- 
-class PARQUET_EXPORT ParquetFilePrinter { 
- private: 
-  ParquetFileReader* fileReader; 
- 
- public: 
-  explicit ParquetFilePrinter(ParquetFileReader* reader) : fileReader(reader) {} 
-  ~ParquetFilePrinter() {} 
- 
-  void DebugPrint(std::ostream& stream, std::list<int> selected_columns, 
-                  bool print_values = false, bool format_dump = false, 
-                  bool print_key_value_metadata = false, 
-                  const char* filename = "No Name"); 
- 
-  void JSONPrint(std::ostream& stream, std::list<int> selected_columns, 
-                 const char* filename = "No Name"); 
-}; 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <iosfwd>
+#include <list>
+
+#include "parquet/platform.h"
+
+namespace parquet {
+
+class ParquetFileReader;
+
+class PARQUET_EXPORT ParquetFilePrinter {
+ private:
+  ParquetFileReader* fileReader;
+
+ public:
+  explicit ParquetFilePrinter(ParquetFileReader* reader) : fileReader(reader) {}
+  ~ParquetFilePrinter() {}
+
+  void DebugPrint(std::ostream& stream, std::list<int> selected_columns,
+                  bool print_values = false, bool format_dump = false,
+                  bool print_key_value_metadata = false,
+                  const char* filename = "No Name");
+
+  void JSONPrint(std::ostream& stream, std::list<int> selected_columns,
+                 const char* filename = "No Name");
+};
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/properties.cc b/contrib/libs/apache/arrow/cpp/src/parquet/properties.cc
index 1a28fb81e40..93638dbe28a 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/properties.cc
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/properties.cc
@@ -1,64 +1,64 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include <sstream> 
-#include <utility> 
- 
-#include "parquet/properties.h" 
- 
-#include "arrow/io/buffered.h" 
-#include "arrow/io/memory.h" 
-#include "arrow/util/logging.h" 
- 
-namespace parquet { 
- 
-std::shared_ptr<ArrowInputStream> ReaderProperties::GetStream( 
-    std::shared_ptr<ArrowInputFile> source, int64_t start, int64_t num_bytes) { 
-  if (buffered_stream_enabled_) { 
-    // ARROW-6180 / PARQUET-1636 Create isolated reader that references segment 
-    // of source 
-    std::shared_ptr<::arrow::io::InputStream> safe_stream = 
-        ::arrow::io::RandomAccessFile::GetStream(source, start, num_bytes); 
-    PARQUET_ASSIGN_OR_THROW( 
-        auto stream, ::arrow::io::BufferedInputStream::Create(buffer_size_, pool_, 
-                                                              safe_stream, num_bytes)); 
-    return std::move(stream); 
-  } else { 
-    PARQUET_ASSIGN_OR_THROW(auto data, source->ReadAt(start, num_bytes)); 
- 
-    if (data->size() != num_bytes) { 
-      std::stringstream ss; 
-      ss << "Tried reading " << num_bytes << " bytes starting at position " << start 
-         << " from file but only got " << data->size(); 
-      throw ParquetException(ss.str()); 
-    } 
-    return std::make_shared<::arrow::io::BufferReader>(data); 
-  } 
-} 
- 
-ArrowReaderProperties default_arrow_reader_properties() { 
-  static ArrowReaderProperties default_reader_props; 
-  return default_reader_props; 
-} 
- 
-std::shared_ptr<ArrowWriterProperties> default_arrow_writer_properties() { 
-  static std::shared_ptr<ArrowWriterProperties> default_writer_properties = 
-      ArrowWriterProperties::Builder().build(); 
-  return default_writer_properties; 
-} 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <sstream>
+#include <utility>
+
+#include "parquet/properties.h"
+
+#include "arrow/io/buffered.h"
+#include "arrow/io/memory.h"
+#include "arrow/util/logging.h"
+
+namespace parquet {
+
+std::shared_ptr<ArrowInputStream> ReaderProperties::GetStream(
+    std::shared_ptr<ArrowInputFile> source, int64_t start, int64_t num_bytes) {
+  if (buffered_stream_enabled_) {
+    // ARROW-6180 / PARQUET-1636 Create isolated reader that references segment
+    // of source
+    std::shared_ptr<::arrow::io::InputStream> safe_stream =
+        ::arrow::io::RandomAccessFile::GetStream(source, start, num_bytes);
+    PARQUET_ASSIGN_OR_THROW(
+        auto stream, ::arrow::io::BufferedInputStream::Create(buffer_size_, pool_,
+                                                              safe_stream, num_bytes));
+    return std::move(stream);
+  } else {
+    PARQUET_ASSIGN_OR_THROW(auto data, source->ReadAt(start, num_bytes));
+
+    if (data->size() != num_bytes) {
+      std::stringstream ss;
+      ss << "Tried reading " << num_bytes << " bytes starting at position " << start
+         << " from file but only got " << data->size();
+      throw ParquetException(ss.str());
+    }
+    return std::make_shared<::arrow::io::BufferReader>(data);
+  }
+}
+
+ArrowReaderProperties default_arrow_reader_properties() {
+  static ArrowReaderProperties default_reader_props;
+  return default_reader_props;
+}
+
+std::shared_ptr<ArrowWriterProperties> default_arrow_writer_properties() {
+  static std::shared_ptr<ArrowWriterProperties> default_writer_properties =
+      ArrowWriterProperties::Builder().build();
+  return default_writer_properties;
+}
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/properties.h b/contrib/libs/apache/arrow/cpp/src/parquet/properties.h
index bc86f98ef7f..d217b8efa52 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/properties.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/properties.h
@@ -1,813 +1,813 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <memory> 
-#include <string> 
-#include <unordered_map> 
-#include <unordered_set> 
-#include <utility> 
- 
-#include "arrow/io/caching.h" 
-#include "arrow/type.h" 
-#include "arrow/util/compression.h" 
-#include "parquet/encryption/encryption.h" 
-#include "parquet/exception.h" 
-#include "parquet/parquet_version.h" 
-#include "parquet/platform.h" 
-#include "parquet/schema.h" 
-#include "parquet/type_fwd.h" 
-#include "parquet/types.h" 
- 
-namespace parquet { 
- 
-/// Determines use of Parquet Format version >= 2.0.0 logical types. For 
-/// example, when writing from Arrow data structures, PARQUET_2_0 will enable 
-/// use of INT_* and UINT_* converted types as well as nanosecond timestamps 
-/// stored physically as INT64. Since some Parquet implementations do not 
-/// support the logical types added in the 2.0.0 format version, if you want to 
-/// maximize compatibility of your files you may want to use PARQUET_1_0. 
-/// 
-/// Note that the 2.x format version series also introduced new serialized 
-/// data page metadata and on disk data page layout. To enable this, use 
-/// ParquetDataPageVersion. 
-struct ParquetVersion; 
- 
-/// Controls serialization format of data pages.  parquet-format v2.0.0 
-/// introduced a new data page metadata type DataPageV2 and serialized page 
-/// structure (for example, encoded levels are no longer compressed). Prior to 
-/// the completion of PARQUET-457 in 2020, this library did not implement 
-/// DataPageV2 correctly, so if you use the V2 data page format, you may have 
-/// forward compatibility issues (older versions of the library will be unable 
-/// to read the files). Note that some Parquet implementations do not implement 
-/// DataPageV2 at all. 
-enum class ParquetDataPageVersion { V1, V2 }; 
- 
-/// Align the default buffer size to a small multiple of a page size. 
-constexpr int64_t kDefaultBufferSize = 4096 * 4; 
- 
-class PARQUET_EXPORT ReaderProperties { 
- public: 
-  explicit ReaderProperties(MemoryPool* pool = ::arrow::default_memory_pool()) 
-      : pool_(pool) {} 
- 
-  MemoryPool* memory_pool() const { return pool_; } 
- 
-  std::shared_ptr<ArrowInputStream> GetStream(std::shared_ptr<ArrowInputFile> source, 
-                                              int64_t start, int64_t num_bytes); 
- 
-  /// Buffered stream reading allows the user to control the memory usage of 
-  /// parquet readers. This ensure that all `RandomAccessFile::ReadAt` calls are 
-  /// wrapped in a buffered reader that uses a fix sized buffer (of size 
-  /// `buffer_size()`) instead of the full size of the ReadAt. 
-  /// 
-  /// The primary reason for this control knobs is for resource control and not 
-  /// performance. 
-  bool is_buffered_stream_enabled() const { return buffered_stream_enabled_; } 
-  void enable_buffered_stream() { buffered_stream_enabled_ = true; } 
-  void disable_buffered_stream() { buffered_stream_enabled_ = false; } 
- 
-  int64_t buffer_size() const { return buffer_size_; } 
-  void set_buffer_size(int64_t size) { buffer_size_ = size; } 
- 
-  void file_decryption_properties(std::shared_ptr<FileDecryptionProperties> decryption) { 
-    file_decryption_properties_ = std::move(decryption); 
-  } 
- 
-  const std::shared_ptr<FileDecryptionProperties>& file_decryption_properties() const { 
-    return file_decryption_properties_; 
-  } 
- 
- private: 
-  MemoryPool* pool_; 
-  int64_t buffer_size_ = kDefaultBufferSize; 
-  bool buffered_stream_enabled_ = false; 
-  std::shared_ptr<FileDecryptionProperties> file_decryption_properties_; 
-}; 
- 
-ReaderProperties PARQUET_EXPORT default_reader_properties(); 
- 
-static constexpr int64_t kDefaultDataPageSize = 1024 * 1024; 
-static constexpr bool DEFAULT_IS_DICTIONARY_ENABLED = true; 
-static constexpr int64_t DEFAULT_DICTIONARY_PAGE_SIZE_LIMIT = kDefaultDataPageSize; 
-static constexpr int64_t DEFAULT_WRITE_BATCH_SIZE = 1024; 
-static constexpr int64_t DEFAULT_MAX_ROW_GROUP_LENGTH = 64 * 1024 * 1024; 
-static constexpr bool DEFAULT_ARE_STATISTICS_ENABLED = true; 
-static constexpr int64_t DEFAULT_MAX_STATISTICS_SIZE = 4096; 
-static constexpr Encoding::type DEFAULT_ENCODING = Encoding::PLAIN; 
-static const char DEFAULT_CREATED_BY[] = CREATED_BY_VERSION; 
-static constexpr Compression::type DEFAULT_COMPRESSION_TYPE = Compression::UNCOMPRESSED; 
- 
-class PARQUET_EXPORT ColumnProperties { 
- public: 
-  ColumnProperties(Encoding::type encoding = DEFAULT_ENCODING, 
-                   Compression::type codec = DEFAULT_COMPRESSION_TYPE, 
-                   bool dictionary_enabled = DEFAULT_IS_DICTIONARY_ENABLED, 
-                   bool statistics_enabled = DEFAULT_ARE_STATISTICS_ENABLED, 
-                   size_t max_stats_size = DEFAULT_MAX_STATISTICS_SIZE) 
-      : encoding_(encoding), 
-        codec_(codec), 
-        dictionary_enabled_(dictionary_enabled), 
-        statistics_enabled_(statistics_enabled), 
-        max_stats_size_(max_stats_size), 
-        compression_level_(Codec::UseDefaultCompressionLevel()) {} 
- 
-  void set_encoding(Encoding::type encoding) { encoding_ = encoding; } 
- 
-  void set_compression(Compression::type codec) { codec_ = codec; } 
- 
-  void set_dictionary_enabled(bool dictionary_enabled) { 
-    dictionary_enabled_ = dictionary_enabled; 
-  } 
- 
-  void set_statistics_enabled(bool statistics_enabled) { 
-    statistics_enabled_ = statistics_enabled; 
-  } 
- 
-  void set_max_statistics_size(size_t max_stats_size) { 
-    max_stats_size_ = max_stats_size; 
-  } 
- 
-  void set_compression_level(int compression_level) { 
-    compression_level_ = compression_level; 
-  } 
- 
-  Encoding::type encoding() const { return encoding_; } 
- 
-  Compression::type compression() const { return codec_; } 
- 
-  bool dictionary_enabled() const { return dictionary_enabled_; } 
- 
-  bool statistics_enabled() const { return statistics_enabled_; } 
- 
-  size_t max_statistics_size() const { return max_stats_size_; } 
- 
-  int compression_level() const { return compression_level_; } 
- 
- private: 
-  Encoding::type encoding_; 
-  Compression::type codec_; 
-  bool dictionary_enabled_; 
-  bool statistics_enabled_; 
-  size_t max_stats_size_; 
-  int compression_level_; 
-}; 
- 
-class PARQUET_EXPORT WriterProperties { 
- public: 
-  class Builder { 
-   public: 
-    Builder() 
-        : pool_(::arrow::default_memory_pool()), 
-          dictionary_pagesize_limit_(DEFAULT_DICTIONARY_PAGE_SIZE_LIMIT), 
-          write_batch_size_(DEFAULT_WRITE_BATCH_SIZE), 
-          max_row_group_length_(DEFAULT_MAX_ROW_GROUP_LENGTH), 
-          pagesize_(kDefaultDataPageSize), 
-          version_(ParquetVersion::PARQUET_1_0), 
-          data_page_version_(ParquetDataPageVersion::V1), 
-          created_by_(DEFAULT_CREATED_BY) {} 
-    virtual ~Builder() {} 
- 
-    Builder* memory_pool(MemoryPool* pool) { 
-      pool_ = pool; 
-      return this; 
-    } 
- 
-    Builder* enable_dictionary() { 
-      default_column_properties_.set_dictionary_enabled(true); 
-      return this; 
-    } 
- 
-    Builder* disable_dictionary() { 
-      default_column_properties_.set_dictionary_enabled(false); 
-      return this; 
-    } 
- 
-    Builder* enable_dictionary(const std::string& path) { 
-      dictionary_enabled_[path] = true; 
-      return this; 
-    } 
- 
-    Builder* enable_dictionary(const std::shared_ptr<schema::ColumnPath>& path) { 
-      return this->enable_dictionary(path->ToDotString()); 
-    } 
- 
-    Builder* disable_dictionary(const std::string& path) { 
-      dictionary_enabled_[path] = false; 
-      return this; 
-    } 
- 
-    Builder* disable_dictionary(const std::shared_ptr<schema::ColumnPath>& path) { 
-      return this->disable_dictionary(path->ToDotString()); 
-    } 
- 
-    Builder* dictionary_pagesize_limit(int64_t dictionary_psize_limit) { 
-      dictionary_pagesize_limit_ = dictionary_psize_limit; 
-      return this; 
-    } 
- 
-    Builder* write_batch_size(int64_t write_batch_size) { 
-      write_batch_size_ = write_batch_size; 
-      return this; 
-    } 
- 
-    Builder* max_row_group_length(int64_t max_row_group_length) { 
-      max_row_group_length_ = max_row_group_length; 
-      return this; 
-    } 
- 
-    Builder* data_pagesize(int64_t pg_size) { 
-      pagesize_ = pg_size; 
-      return this; 
-    } 
- 
-    Builder* data_page_version(ParquetDataPageVersion data_page_version) { 
-      data_page_version_ = data_page_version; 
-      return this; 
-    } 
- 
-    Builder* version(ParquetVersion::type version) { 
-      version_ = version; 
-      return this; 
-    } 
- 
-    Builder* created_by(const std::string& created_by) { 
-      created_by_ = created_by; 
-      return this; 
-    } 
- 
-    /** 
-     * Define the encoding that is used when we don't utilise dictionary encoding. 
-     * 
-     * This either apply if dictionary encoding is disabled or if we fallback 
-     * as the dictionary grew too large. 
-     */ 
-    Builder* encoding(Encoding::type encoding_type) { 
-      if (encoding_type == Encoding::PLAIN_DICTIONARY || 
-          encoding_type == Encoding::RLE_DICTIONARY) { 
-        throw ParquetException("Can't use dictionary encoding as fallback encoding"); 
-      } 
- 
-      default_column_properties_.set_encoding(encoding_type); 
-      return this; 
-    } 
- 
-    /** 
-     * Define the encoding that is used when we don't utilise dictionary encoding. 
-     * 
-     * This either apply if dictionary encoding is disabled or if we fallback 
-     * as the dictionary grew too large. 
-     */ 
-    Builder* encoding(const std::string& path, Encoding::type encoding_type) { 
-      if (encoding_type == Encoding::PLAIN_DICTIONARY || 
-          encoding_type == Encoding::RLE_DICTIONARY) { 
-        throw ParquetException("Can't use dictionary encoding as fallback encoding"); 
-      } 
- 
-      encodings_[path] = encoding_type; 
-      return this; 
-    } 
- 
-    /** 
-     * Define the encoding that is used when we don't utilise dictionary encoding. 
-     * 
-     * This either apply if dictionary encoding is disabled or if we fallback 
-     * as the dictionary grew too large. 
-     */ 
-    Builder* encoding(const std::shared_ptr<schema::ColumnPath>& path, 
-                      Encoding::type encoding_type) { 
-      return this->encoding(path->ToDotString(), encoding_type); 
-    } 
- 
-    Builder* compression(Compression::type codec) { 
-      default_column_properties_.set_compression(codec); 
-      return this; 
-    } 
- 
-    Builder* max_statistics_size(size_t max_stats_sz) { 
-      default_column_properties_.set_max_statistics_size(max_stats_sz); 
-      return this; 
-    } 
- 
-    Builder* compression(const std::string& path, Compression::type codec) { 
-      codecs_[path] = codec; 
-      return this; 
-    } 
- 
-    Builder* compression(const std::shared_ptr<schema::ColumnPath>& path, 
-                         Compression::type codec) { 
-      return this->compression(path->ToDotString(), codec); 
-    } 
- 
-    /// \brief Specify the default compression level for the compressor in 
-    /// every column.  In case a column does not have an explicitly specified 
-    /// compression level, the default one would be used. 
-    /// 
-    /// The provided compression level is compressor specific. The user would 
-    /// have to familiarize oneself with the available levels for the selected 
-    /// compressor.  If the compressor does not allow for selecting different 
-    /// compression levels, calling this function would not have any effect. 
-    /// Parquet and Arrow do not validate the passed compression level.  If no 
-    /// level is selected by the user or if the special 
-    /// std::numeric_limits<int>::min() value is passed, then Arrow selects the 
-    /// compression level. 
-    Builder* compression_level(int compression_level) { 
-      default_column_properties_.set_compression_level(compression_level); 
-      return this; 
-    } 
- 
-    /// \brief Specify a compression level for the compressor for the column 
-    /// described by path. 
-    /// 
-    /// The provided compression level is compressor specific. The user would 
-    /// have to familiarize oneself with the available levels for the selected 
-    /// compressor.  If the compressor does not allow for selecting different 
-    /// compression levels, calling this function would not have any effect. 
-    /// Parquet and Arrow do not validate the passed compression level.  If no 
-    /// level is selected by the user or if the special 
-    /// std::numeric_limits<int>::min() value is passed, then Arrow selects the 
-    /// compression level. 
-    Builder* compression_level(const std::string& path, int compression_level) { 
-      codecs_compression_level_[path] = compression_level; 
-      return this; 
-    } 
- 
-    /// \brief Specify a compression level for the compressor for the column 
-    /// described by path. 
-    /// 
-    /// The provided compression level is compressor specific. The user would 
-    /// have to familiarize oneself with the available levels for the selected 
-    /// compressor.  If the compressor does not allow for selecting different 
-    /// compression levels, calling this function would not have any effect. 
-    /// Parquet and Arrow do not validate the passed compression level.  If no 
-    /// level is selected by the user or if the special 
-    /// std::numeric_limits<int>::min() value is passed, then Arrow selects the 
-    /// compression level. 
-    Builder* compression_level(const std::shared_ptr<schema::ColumnPath>& path, 
-                               int compression_level) { 
-      return this->compression_level(path->ToDotString(), compression_level); 
-    } 
- 
-    Builder* encryption( 
-        std::shared_ptr<FileEncryptionProperties> file_encryption_properties) { 
-      file_encryption_properties_ = std::move(file_encryption_properties); 
-      return this; 
-    } 
- 
-    Builder* enable_statistics() { 
-      default_column_properties_.set_statistics_enabled(true); 
-      return this; 
-    } 
- 
-    Builder* disable_statistics() { 
-      default_column_properties_.set_statistics_enabled(false); 
-      return this; 
-    } 
- 
-    Builder* enable_statistics(const std::string& path) { 
-      statistics_enabled_[path] = true; 
-      return this; 
-    } 
- 
-    Builder* enable_statistics(const std::shared_ptr<schema::ColumnPath>& path) { 
-      return this->enable_statistics(path->ToDotString()); 
-    } 
- 
-    Builder* disable_statistics(const std::string& path) { 
-      statistics_enabled_[path] = false; 
-      return this; 
-    } 
- 
-    Builder* disable_statistics(const std::shared_ptr<schema::ColumnPath>& path) { 
-      return this->disable_statistics(path->ToDotString()); 
-    } 
- 
-    std::shared_ptr<WriterProperties> build() { 
-      std::unordered_map<std::string, ColumnProperties> column_properties; 
-      auto get = [&](const std::string& key) -> ColumnProperties& { 
-        auto it = column_properties.find(key); 
-        if (it == column_properties.end()) 
-          return column_properties[key] = default_column_properties_; 
-        else 
-          return it->second; 
-      }; 
- 
-      for (const auto& item : encodings_) get(item.first).set_encoding(item.second); 
-      for (const auto& item : codecs_) get(item.first).set_compression(item.second); 
-      for (const auto& item : codecs_compression_level_) 
-        get(item.first).set_compression_level(item.second); 
-      for (const auto& item : dictionary_enabled_) 
-        get(item.first).set_dictionary_enabled(item.second); 
-      for (const auto& item : statistics_enabled_) 
-        get(item.first).set_statistics_enabled(item.second); 
- 
-      return std::shared_ptr<WriterProperties>(new WriterProperties( 
-          pool_, dictionary_pagesize_limit_, write_batch_size_, max_row_group_length_, 
-          pagesize_, version_, created_by_, std::move(file_encryption_properties_), 
-          default_column_properties_, column_properties, data_page_version_)); 
-    } 
- 
-   private: 
-    MemoryPool* pool_; 
-    int64_t dictionary_pagesize_limit_; 
-    int64_t write_batch_size_; 
-    int64_t max_row_group_length_; 
-    int64_t pagesize_; 
-    ParquetVersion::type version_; 
-    ParquetDataPageVersion data_page_version_; 
-    std::string created_by_; 
- 
-    std::shared_ptr<FileEncryptionProperties> file_encryption_properties_; 
- 
-    // Settings used for each column unless overridden in any of the maps below 
-    ColumnProperties default_column_properties_; 
-    std::unordered_map<std::string, Encoding::type> encodings_; 
-    std::unordered_map<std::string, Compression::type> codecs_; 
-    std::unordered_map<std::string, int32_t> codecs_compression_level_; 
-    std::unordered_map<std::string, bool> dictionary_enabled_; 
-    std::unordered_map<std::string, bool> statistics_enabled_; 
-  }; 
- 
-  inline MemoryPool* memory_pool() const { return pool_; } 
- 
-  inline int64_t dictionary_pagesize_limit() const { return dictionary_pagesize_limit_; } 
- 
-  inline int64_t write_batch_size() const { return write_batch_size_; } 
- 
-  inline int64_t max_row_group_length() const { return max_row_group_length_; } 
- 
-  inline int64_t data_pagesize() const { return pagesize_; } 
- 
-  inline ParquetDataPageVersion data_page_version() const { 
-    return parquet_data_page_version_; 
-  } 
- 
-  inline ParquetVersion::type version() const { return parquet_version_; } 
- 
-  inline std::string created_by() const { return parquet_created_by_; } 
- 
-  inline Encoding::type dictionary_index_encoding() const { 
-    if (parquet_version_ == ParquetVersion::PARQUET_1_0) { 
-      return Encoding::PLAIN_DICTIONARY; 
-    } else { 
-      return Encoding::RLE_DICTIONARY; 
-    } 
-  } 
- 
-  inline Encoding::type dictionary_page_encoding() const { 
-    if (parquet_version_ == ParquetVersion::PARQUET_1_0) { 
-      return Encoding::PLAIN_DICTIONARY; 
-    } else { 
-      return Encoding::PLAIN; 
-    } 
-  } 
- 
-  const ColumnProperties& column_properties( 
-      const std::shared_ptr<schema::ColumnPath>& path) const { 
-    auto it = column_properties_.find(path->ToDotString()); 
-    if (it != column_properties_.end()) return it->second; 
-    return default_column_properties_; 
-  } 
- 
-  Encoding::type encoding(const std::shared_ptr<schema::ColumnPath>& path) const { 
-    return column_properties(path).encoding(); 
-  } 
- 
-  Compression::type compression(const std::shared_ptr<schema::ColumnPath>& path) const { 
-    return column_properties(path).compression(); 
-  } 
- 
-  int compression_level(const std::shared_ptr<schema::ColumnPath>& path) const { 
-    return column_properties(path).compression_level(); 
-  } 
- 
-  bool dictionary_enabled(const std::shared_ptr<schema::ColumnPath>& path) const { 
-    return column_properties(path).dictionary_enabled(); 
-  } 
- 
-  bool statistics_enabled(const std::shared_ptr<schema::ColumnPath>& path) const { 
-    return column_properties(path).statistics_enabled(); 
-  } 
- 
-  size_t max_statistics_size(const std::shared_ptr<schema::ColumnPath>& path) const { 
-    return column_properties(path).max_statistics_size(); 
-  } 
- 
-  inline FileEncryptionProperties* file_encryption_properties() const { 
-    return file_encryption_properties_.get(); 
-  } 
- 
-  std::shared_ptr<ColumnEncryptionProperties> column_encryption_properties( 
-      const std::string& path) const { 
-    if (file_encryption_properties_) { 
-      return file_encryption_properties_->column_encryption_properties(path); 
-    } else { 
-      return NULLPTR; 
-    } 
-  } 
- 
- private: 
-  explicit WriterProperties( 
-      MemoryPool* pool, int64_t dictionary_pagesize_limit, int64_t write_batch_size, 
-      int64_t max_row_group_length, int64_t pagesize, ParquetVersion::type version, 
-      const std::string& created_by, 
-      std::shared_ptr<FileEncryptionProperties> file_encryption_properties, 
-      const ColumnProperties& default_column_properties, 
-      const std::unordered_map<std::string, ColumnProperties>& column_properties, 
-      ParquetDataPageVersion data_page_version) 
-      : pool_(pool), 
-        dictionary_pagesize_limit_(dictionary_pagesize_limit), 
-        write_batch_size_(write_batch_size), 
-        max_row_group_length_(max_row_group_length), 
-        pagesize_(pagesize), 
-        parquet_data_page_version_(data_page_version), 
-        parquet_version_(version), 
-        parquet_created_by_(created_by), 
-        file_encryption_properties_(file_encryption_properties), 
-        default_column_properties_(default_column_properties), 
-        column_properties_(column_properties) {} 
- 
-  MemoryPool* pool_; 
-  int64_t dictionary_pagesize_limit_; 
-  int64_t write_batch_size_; 
-  int64_t max_row_group_length_; 
-  int64_t pagesize_; 
-  ParquetDataPageVersion parquet_data_page_version_; 
-  ParquetVersion::type parquet_version_; 
-  std::string parquet_created_by_; 
- 
-  std::shared_ptr<FileEncryptionProperties> file_encryption_properties_; 
- 
-  ColumnProperties default_column_properties_; 
-  std::unordered_map<std::string, ColumnProperties> column_properties_; 
-}; 
- 
-PARQUET_EXPORT const std::shared_ptr<WriterProperties>& default_writer_properties(); 
- 
-// ---------------------------------------------------------------------- 
-// Properties specific to Apache Arrow columnar read and write 
- 
-static constexpr bool kArrowDefaultUseThreads = false; 
- 
-// Default number of rows to read when using ::arrow::RecordBatchReader 
-static constexpr int64_t kArrowDefaultBatchSize = 64 * 1024; 
- 
-/// EXPERIMENTAL: Properties for configuring FileReader behavior. 
-class PARQUET_EXPORT ArrowReaderProperties { 
- public: 
-  explicit ArrowReaderProperties(bool use_threads = kArrowDefaultUseThreads) 
-      : use_threads_(use_threads), 
-        read_dict_indices_(), 
-        batch_size_(kArrowDefaultBatchSize), 
-        pre_buffer_(false), 
-        cache_options_(::arrow::io::CacheOptions::Defaults()), 
-        coerce_int96_timestamp_unit_(::arrow::TimeUnit::NANO) {} 
- 
-  void set_use_threads(bool use_threads) { use_threads_ = use_threads; } 
- 
-  bool use_threads() const { return use_threads_; } 
- 
-  void set_read_dictionary(int column_index, bool read_dict) { 
-    if (read_dict) { 
-      read_dict_indices_.insert(column_index); 
-    } else { 
-      read_dict_indices_.erase(column_index); 
-    } 
-  } 
-  bool read_dictionary(int column_index) const { 
-    if (read_dict_indices_.find(column_index) != read_dict_indices_.end()) { 
-      return true; 
-    } else { 
-      return false; 
-    } 
-  } 
- 
-  void set_batch_size(int64_t batch_size) { batch_size_ = batch_size; } 
- 
-  int64_t batch_size() const { return batch_size_; } 
- 
-  /// Enable read coalescing. 
-  /// 
-  /// When enabled, the Arrow reader will pre-buffer necessary regions 
-  /// of the file in-memory. This is intended to improve performance on 
-  /// high-latency filesystems (e.g. Amazon S3). 
-  void set_pre_buffer(bool pre_buffer) { pre_buffer_ = pre_buffer; } 
- 
-  bool pre_buffer() const { return pre_buffer_; } 
- 
-  /// Set options for read coalescing. This can be used to tune the 
-  /// implementation for characteristics of different filesystems. 
-  void set_cache_options(::arrow::io::CacheOptions options) { cache_options_ = options; } 
- 
-  const ::arrow::io::CacheOptions& cache_options() const { return cache_options_; } 
- 
-  /// Set execution context for read coalescing. 
-  void set_io_context(const ::arrow::io::IOContext& ctx) { io_context_ = ctx; } 
- 
-  const ::arrow::io::IOContext& io_context() const { return io_context_; } 
- 
-  /// Set timestamp unit to use for deprecated INT96-encoded timestamps 
-  /// (default is NANO). 
-  void set_coerce_int96_timestamp_unit(::arrow::TimeUnit::type unit) { 
-    coerce_int96_timestamp_unit_ = unit; 
-  } 
- 
-  ::arrow::TimeUnit::type coerce_int96_timestamp_unit() const { 
-    return coerce_int96_timestamp_unit_; 
-  } 
- 
- private: 
-  bool use_threads_; 
-  std::unordered_set<int> read_dict_indices_; 
-  int64_t batch_size_; 
-  bool pre_buffer_; 
-  ::arrow::io::IOContext io_context_; 
-  ::arrow::io::CacheOptions cache_options_; 
-  ::arrow::TimeUnit::type coerce_int96_timestamp_unit_; 
-}; 
- 
-/// EXPERIMENTAL: Constructs the default ArrowReaderProperties 
-PARQUET_EXPORT 
-ArrowReaderProperties default_arrow_reader_properties(); 
- 
-class PARQUET_EXPORT ArrowWriterProperties { 
- public: 
-  enum EngineVersion { 
-    V1,  // Supports only nested lists. 
-    V2   // Full support for all nesting combinations 
-  }; 
-  class Builder { 
-   public: 
-    Builder() 
-        : write_timestamps_as_int96_(false), 
-          coerce_timestamps_enabled_(false), 
-          coerce_timestamps_unit_(::arrow::TimeUnit::SECOND), 
-          truncated_timestamps_allowed_(false), 
-          store_schema_(false), 
-          // TODO: At some point we should flip this. 
-          compliant_nested_types_(false), 
-          engine_version_(V2) {} 
-    virtual ~Builder() = default; 
- 
-    Builder* disable_deprecated_int96_timestamps() { 
-      write_timestamps_as_int96_ = false; 
-      return this; 
-    } 
- 
-    Builder* enable_deprecated_int96_timestamps() { 
-      write_timestamps_as_int96_ = true; 
-      return this; 
-    } 
- 
-    Builder* coerce_timestamps(::arrow::TimeUnit::type unit) { 
-      coerce_timestamps_enabled_ = true; 
-      coerce_timestamps_unit_ = unit; 
-      return this; 
-    } 
- 
-    Builder* allow_truncated_timestamps() { 
-      truncated_timestamps_allowed_ = true; 
-      return this; 
-    } 
- 
-    Builder* disallow_truncated_timestamps() { 
-      truncated_timestamps_allowed_ = false; 
-      return this; 
-    } 
- 
-    /// \brief EXPERIMENTAL: Write binary serialized Arrow schema to the file, 
-    /// to enable certain read options (like "read_dictionary") to be set 
-    /// automatically 
-    Builder* store_schema() { 
-      store_schema_ = true; 
-      return this; 
-    } 
- 
-    Builder* enable_compliant_nested_types() { 
-      compliant_nested_types_ = true; 
-      return this; 
-    } 
- 
-    Builder* disable_compliant_nested_types() { 
-      compliant_nested_types_ = false; 
-      return this; 
-    } 
- 
-    Builder* set_engine_version(EngineVersion version) { 
-      engine_version_ = version; 
-      return this; 
-    } 
- 
-    std::shared_ptr<ArrowWriterProperties> build() { 
-      return std::shared_ptr<ArrowWriterProperties>(new ArrowWriterProperties( 
-          write_timestamps_as_int96_, coerce_timestamps_enabled_, coerce_timestamps_unit_, 
-          truncated_timestamps_allowed_, store_schema_, compliant_nested_types_, 
-          engine_version_)); 
-    } 
- 
-   private: 
-    bool write_timestamps_as_int96_; 
- 
-    bool coerce_timestamps_enabled_; 
-    ::arrow::TimeUnit::type coerce_timestamps_unit_; 
-    bool truncated_timestamps_allowed_; 
- 
-    bool store_schema_; 
-    bool compliant_nested_types_; 
-    EngineVersion engine_version_; 
-  }; 
- 
-  bool support_deprecated_int96_timestamps() const { return write_timestamps_as_int96_; } 
- 
-  bool coerce_timestamps_enabled() const { return coerce_timestamps_enabled_; } 
-  ::arrow::TimeUnit::type coerce_timestamps_unit() const { 
-    return coerce_timestamps_unit_; 
-  } 
- 
-  bool truncated_timestamps_allowed() const { return truncated_timestamps_allowed_; } 
- 
-  bool store_schema() const { return store_schema_; } 
- 
-  /// \brief Enable nested type naming according to the parquet specification. 
-  /// 
-  /// Older versions of arrow wrote out field names for nested lists based on the name 
-  /// of the field.  According to the parquet specification they should always be 
-  /// "element". 
-  bool compliant_nested_types() const { return compliant_nested_types_; } 
- 
-  /// \brief The underlying engine version to use when writing Arrow data. 
-  /// 
-  /// V2 is currently the latest V1 is considered deprecated but left in 
-  /// place in case there are bugs detected in V2. 
-  EngineVersion engine_version() const { return engine_version_; } 
- 
- private: 
-  explicit ArrowWriterProperties(bool write_nanos_as_int96, 
-                                 bool coerce_timestamps_enabled, 
-                                 ::arrow::TimeUnit::type coerce_timestamps_unit, 
-                                 bool truncated_timestamps_allowed, bool store_schema, 
-                                 bool compliant_nested_types, 
-                                 EngineVersion engine_version) 
-      : write_timestamps_as_int96_(write_nanos_as_int96), 
-        coerce_timestamps_enabled_(coerce_timestamps_enabled), 
-        coerce_timestamps_unit_(coerce_timestamps_unit), 
-        truncated_timestamps_allowed_(truncated_timestamps_allowed), 
-        store_schema_(store_schema), 
-        compliant_nested_types_(compliant_nested_types), 
-        engine_version_(engine_version) {} 
- 
-  const bool write_timestamps_as_int96_; 
-  const bool coerce_timestamps_enabled_; 
-  const ::arrow::TimeUnit::type coerce_timestamps_unit_; 
-  const bool truncated_timestamps_allowed_; 
-  const bool store_schema_; 
-  const bool compliant_nested_types_; 
-  const EngineVersion engine_version_; 
-}; 
- 
-/// \brief State object used for writing Arrow data directly to a Parquet 
-/// column chunk. API possibly not stable 
-struct ArrowWriteContext { 
-  ArrowWriteContext(MemoryPool* memory_pool, ArrowWriterProperties* properties) 
-      : memory_pool(memory_pool), 
-        properties(properties), 
-        data_buffer(AllocateBuffer(memory_pool)), 
-        def_levels_buffer(AllocateBuffer(memory_pool)) {} 
- 
-  template <typename T> 
-  ::arrow::Status GetScratchData(const int64_t num_values, T** out) { 
-    ARROW_RETURN_NOT_OK(this->data_buffer->Resize(num_values * sizeof(T), false)); 
-    *out = reinterpret_cast<T*>(this->data_buffer->mutable_data()); 
-    return ::arrow::Status::OK(); 
-  } 
- 
-  MemoryPool* memory_pool; 
-  const ArrowWriterProperties* properties; 
- 
-  // Buffer used for storing the data of an array converted to the physical type 
-  // as expected by parquet-cpp. 
-  std::shared_ptr<ResizableBuffer> data_buffer; 
- 
-  // We use the shared ownership of this buffer 
-  std::shared_ptr<ResizableBuffer> def_levels_buffer; 
-}; 
- 
-PARQUET_EXPORT 
-std::shared_ptr<ArrowWriterProperties> default_arrow_writer_properties(); 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+
+#include "arrow/io/caching.h"
+#include "arrow/type.h"
+#include "arrow/util/compression.h"
+#include "parquet/encryption/encryption.h"
+#include "parquet/exception.h"
+#include "parquet/parquet_version.h"
+#include "parquet/platform.h"
+#include "parquet/schema.h"
+#include "parquet/type_fwd.h"
+#include "parquet/types.h"
+
+namespace parquet {
+
+/// Determines use of Parquet Format version >= 2.0.0 logical types. For
+/// example, when writing from Arrow data structures, PARQUET_2_0 will enable
+/// use of INT_* and UINT_* converted types as well as nanosecond timestamps
+/// stored physically as INT64. Since some Parquet implementations do not
+/// support the logical types added in the 2.0.0 format version, if you want to
+/// maximize compatibility of your files you may want to use PARQUET_1_0.
+///
+/// Note that the 2.x format version series also introduced new serialized
+/// data page metadata and on disk data page layout. To enable this, use
+/// ParquetDataPageVersion.
+struct ParquetVersion;
+
+/// Controls serialization format of data pages.  parquet-format v2.0.0
+/// introduced a new data page metadata type DataPageV2 and serialized page
+/// structure (for example, encoded levels are no longer compressed). Prior to
+/// the completion of PARQUET-457 in 2020, this library did not implement
+/// DataPageV2 correctly, so if you use the V2 data page format, you may have
+/// forward compatibility issues (older versions of the library will be unable
+/// to read the files). Note that some Parquet implementations do not implement
+/// DataPageV2 at all.
+enum class ParquetDataPageVersion { V1, V2 };
+
+/// Align the default buffer size to a small multiple of a page size.
+constexpr int64_t kDefaultBufferSize = 4096 * 4;
+
+class PARQUET_EXPORT ReaderProperties {
+ public:
+  explicit ReaderProperties(MemoryPool* pool = ::arrow::default_memory_pool())
+      : pool_(pool) {}
+
+  MemoryPool* memory_pool() const { return pool_; }
+
+  std::shared_ptr<ArrowInputStream> GetStream(std::shared_ptr<ArrowInputFile> source,
+                                              int64_t start, int64_t num_bytes);
+
+  /// Buffered stream reading allows the user to control the memory usage of
+  /// parquet readers. This ensure that all `RandomAccessFile::ReadAt` calls are
+  /// wrapped in a buffered reader that uses a fix sized buffer (of size
+  /// `buffer_size()`) instead of the full size of the ReadAt.
+  ///
+  /// The primary reason for this control knobs is for resource control and not
+  /// performance.
+  bool is_buffered_stream_enabled() const { return buffered_stream_enabled_; }
+  void enable_buffered_stream() { buffered_stream_enabled_ = true; }
+  void disable_buffered_stream() { buffered_stream_enabled_ = false; }
+
+  int64_t buffer_size() const { return buffer_size_; }
+  void set_buffer_size(int64_t size) { buffer_size_ = size; }
+
+  void file_decryption_properties(std::shared_ptr<FileDecryptionProperties> decryption) {
+    file_decryption_properties_ = std::move(decryption);
+  }
+
+  const std::shared_ptr<FileDecryptionProperties>& file_decryption_properties() const {
+    return file_decryption_properties_;
+  }
+
+ private:
+  MemoryPool* pool_;
+  int64_t buffer_size_ = kDefaultBufferSize;
+  bool buffered_stream_enabled_ = false;
+  std::shared_ptr<FileDecryptionProperties> file_decryption_properties_;
+};
+
+ReaderProperties PARQUET_EXPORT default_reader_properties();
+
+static constexpr int64_t kDefaultDataPageSize = 1024 * 1024;
+static constexpr bool DEFAULT_IS_DICTIONARY_ENABLED = true;
+static constexpr int64_t DEFAULT_DICTIONARY_PAGE_SIZE_LIMIT = kDefaultDataPageSize;
+static constexpr int64_t DEFAULT_WRITE_BATCH_SIZE = 1024;
+static constexpr int64_t DEFAULT_MAX_ROW_GROUP_LENGTH = 64 * 1024 * 1024;
+static constexpr bool DEFAULT_ARE_STATISTICS_ENABLED = true;
+static constexpr int64_t DEFAULT_MAX_STATISTICS_SIZE = 4096;
+static constexpr Encoding::type DEFAULT_ENCODING = Encoding::PLAIN;
+static const char DEFAULT_CREATED_BY[] = CREATED_BY_VERSION;
+static constexpr Compression::type DEFAULT_COMPRESSION_TYPE = Compression::UNCOMPRESSED;
+
+class PARQUET_EXPORT ColumnProperties {
+ public:
+  ColumnProperties(Encoding::type encoding = DEFAULT_ENCODING,
+                   Compression::type codec = DEFAULT_COMPRESSION_TYPE,
+                   bool dictionary_enabled = DEFAULT_IS_DICTIONARY_ENABLED,
+                   bool statistics_enabled = DEFAULT_ARE_STATISTICS_ENABLED,
+                   size_t max_stats_size = DEFAULT_MAX_STATISTICS_SIZE)
+      : encoding_(encoding),
+        codec_(codec),
+        dictionary_enabled_(dictionary_enabled),
+        statistics_enabled_(statistics_enabled),
+        max_stats_size_(max_stats_size),
+        compression_level_(Codec::UseDefaultCompressionLevel()) {}
+
+  void set_encoding(Encoding::type encoding) { encoding_ = encoding; }
+
+  void set_compression(Compression::type codec) { codec_ = codec; }
+
+  void set_dictionary_enabled(bool dictionary_enabled) {
+    dictionary_enabled_ = dictionary_enabled;
+  }
+
+  void set_statistics_enabled(bool statistics_enabled) {
+    statistics_enabled_ = statistics_enabled;
+  }
+
+  void set_max_statistics_size(size_t max_stats_size) {
+    max_stats_size_ = max_stats_size;
+  }
+
+  void set_compression_level(int compression_level) {
+    compression_level_ = compression_level;
+  }
+
+  Encoding::type encoding() const { return encoding_; }
+
+  Compression::type compression() const { return codec_; }
+
+  bool dictionary_enabled() const { return dictionary_enabled_; }
+
+  bool statistics_enabled() const { return statistics_enabled_; }
+
+  size_t max_statistics_size() const { return max_stats_size_; }
+
+  int compression_level() const { return compression_level_; }
+
+ private:
+  Encoding::type encoding_;
+  Compression::type codec_;
+  bool dictionary_enabled_;
+  bool statistics_enabled_;
+  size_t max_stats_size_;
+  int compression_level_;
+};
+
+class PARQUET_EXPORT WriterProperties {
+ public:
+  class Builder {
+   public:
+    Builder()
+        : pool_(::arrow::default_memory_pool()),
+          dictionary_pagesize_limit_(DEFAULT_DICTIONARY_PAGE_SIZE_LIMIT),
+          write_batch_size_(DEFAULT_WRITE_BATCH_SIZE),
+          max_row_group_length_(DEFAULT_MAX_ROW_GROUP_LENGTH),
+          pagesize_(kDefaultDataPageSize),
+          version_(ParquetVersion::PARQUET_1_0),
+          data_page_version_(ParquetDataPageVersion::V1),
+          created_by_(DEFAULT_CREATED_BY) {}
+    virtual ~Builder() {}
+
+    Builder* memory_pool(MemoryPool* pool) {
+      pool_ = pool;
+      return this;
+    }
+
+    Builder* enable_dictionary() {
+      default_column_properties_.set_dictionary_enabled(true);
+      return this;
+    }
+
+    Builder* disable_dictionary() {
+      default_column_properties_.set_dictionary_enabled(false);
+      return this;
+    }
+
+    Builder* enable_dictionary(const std::string& path) {
+      dictionary_enabled_[path] = true;
+      return this;
+    }
+
+    Builder* enable_dictionary(const std::shared_ptr<schema::ColumnPath>& path) {
+      return this->enable_dictionary(path->ToDotString());
+    }
+
+    Builder* disable_dictionary(const std::string& path) {
+      dictionary_enabled_[path] = false;
+      return this;
+    }
+
+    Builder* disable_dictionary(const std::shared_ptr<schema::ColumnPath>& path) {
+      return this->disable_dictionary(path->ToDotString());
+    }
+
+    Builder* dictionary_pagesize_limit(int64_t dictionary_psize_limit) {
+      dictionary_pagesize_limit_ = dictionary_psize_limit;
+      return this;
+    }
+
+    Builder* write_batch_size(int64_t write_batch_size) {
+      write_batch_size_ = write_batch_size;
+      return this;
+    }
+
+    Builder* max_row_group_length(int64_t max_row_group_length) {
+      max_row_group_length_ = max_row_group_length;
+      return this;
+    }
+
+    Builder* data_pagesize(int64_t pg_size) {
+      pagesize_ = pg_size;
+      return this;
+    }
+
+    Builder* data_page_version(ParquetDataPageVersion data_page_version) {
+      data_page_version_ = data_page_version;
+      return this;
+    }
+
+    Builder* version(ParquetVersion::type version) {
+      version_ = version;
+      return this;
+    }
+
+    Builder* created_by(const std::string& created_by) {
+      created_by_ = created_by;
+      return this;
+    }
+
+    /**
+     * Define the encoding that is used when we don't utilise dictionary encoding.
+     *
+     * This either apply if dictionary encoding is disabled or if we fallback
+     * as the dictionary grew too large.
+     */
+    Builder* encoding(Encoding::type encoding_type) {
+      if (encoding_type == Encoding::PLAIN_DICTIONARY ||
+          encoding_type == Encoding::RLE_DICTIONARY) {
+        throw ParquetException("Can't use dictionary encoding as fallback encoding");
+      }
+
+      default_column_properties_.set_encoding(encoding_type);
+      return this;
+    }
+
+    /**
+     * Define the encoding that is used when we don't utilise dictionary encoding.
+     *
+     * This either apply if dictionary encoding is disabled or if we fallback
+     * as the dictionary grew too large.
+     */
+    Builder* encoding(const std::string& path, Encoding::type encoding_type) {
+      if (encoding_type == Encoding::PLAIN_DICTIONARY ||
+          encoding_type == Encoding::RLE_DICTIONARY) {
+        throw ParquetException("Can't use dictionary encoding as fallback encoding");
+      }
+
+      encodings_[path] = encoding_type;
+      return this;
+    }
+
+    /**
+     * Define the encoding that is used when we don't utilise dictionary encoding.
+     *
+     * This either apply if dictionary encoding is disabled or if we fallback
+     * as the dictionary grew too large.
+     */
+    Builder* encoding(const std::shared_ptr<schema::ColumnPath>& path,
+                      Encoding::type encoding_type) {
+      return this->encoding(path->ToDotString(), encoding_type);
+    }
+
+    Builder* compression(Compression::type codec) {
+      default_column_properties_.set_compression(codec);
+      return this;
+    }
+
+    Builder* max_statistics_size(size_t max_stats_sz) {
+      default_column_properties_.set_max_statistics_size(max_stats_sz);
+      return this;
+    }
+
+    Builder* compression(const std::string& path, Compression::type codec) {
+      codecs_[path] = codec;
+      return this;
+    }
+
+    Builder* compression(const std::shared_ptr<schema::ColumnPath>& path,
+                         Compression::type codec) {
+      return this->compression(path->ToDotString(), codec);
+    }
+
+    /// \brief Specify the default compression level for the compressor in
+    /// every column.  In case a column does not have an explicitly specified
+    /// compression level, the default one would be used.
+    ///
+    /// The provided compression level is compressor specific. The user would
+    /// have to familiarize oneself with the available levels for the selected
+    /// compressor.  If the compressor does not allow for selecting different
+    /// compression levels, calling this function would not have any effect.
+    /// Parquet and Arrow do not validate the passed compression level.  If no
+    /// level is selected by the user or if the special
+    /// std::numeric_limits<int>::min() value is passed, then Arrow selects the
+    /// compression level.
+    Builder* compression_level(int compression_level) {
+      default_column_properties_.set_compression_level(compression_level);
+      return this;
+    }
+
+    /// \brief Specify a compression level for the compressor for the column
+    /// described by path.
+    ///
+    /// The provided compression level is compressor specific. The user would
+    /// have to familiarize oneself with the available levels for the selected
+    /// compressor.  If the compressor does not allow for selecting different
+    /// compression levels, calling this function would not have any effect.
+    /// Parquet and Arrow do not validate the passed compression level.  If no
+    /// level is selected by the user or if the special
+    /// std::numeric_limits<int>::min() value is passed, then Arrow selects the
+    /// compression level.
+    Builder* compression_level(const std::string& path, int compression_level) {
+      codecs_compression_level_[path] = compression_level;
+      return this;
+    }
+
+    /// \brief Specify a compression level for the compressor for the column
+    /// described by path.
+    ///
+    /// The provided compression level is compressor specific. The user would
+    /// have to familiarize oneself with the available levels for the selected
+    /// compressor.  If the compressor does not allow for selecting different
+    /// compression levels, calling this function would not have any effect.
+    /// Parquet and Arrow do not validate the passed compression level.  If no
+    /// level is selected by the user or if the special
+    /// std::numeric_limits<int>::min() value is passed, then Arrow selects the
+    /// compression level.
+    Builder* compression_level(const std::shared_ptr<schema::ColumnPath>& path,
+                               int compression_level) {
+      return this->compression_level(path->ToDotString(), compression_level);
+    }
+
+    Builder* encryption(
+        std::shared_ptr<FileEncryptionProperties> file_encryption_properties) {
+      file_encryption_properties_ = std::move(file_encryption_properties);
+      return this;
+    }
+
+    Builder* enable_statistics() {
+      default_column_properties_.set_statistics_enabled(true);
+      return this;
+    }
+
+    Builder* disable_statistics() {
+      default_column_properties_.set_statistics_enabled(false);
+      return this;
+    }
+
+    Builder* enable_statistics(const std::string& path) {
+      statistics_enabled_[path] = true;
+      return this;
+    }
+
+    Builder* enable_statistics(const std::shared_ptr<schema::ColumnPath>& path) {
+      return this->enable_statistics(path->ToDotString());
+    }
+
+    Builder* disable_statistics(const std::string& path) {
+      statistics_enabled_[path] = false;
+      return this;
+    }
+
+    Builder* disable_statistics(const std::shared_ptr<schema::ColumnPath>& path) {
+      return this->disable_statistics(path->ToDotString());
+    }
+
+    std::shared_ptr<WriterProperties> build() {
+      std::unordered_map<std::string, ColumnProperties> column_properties;
+      auto get = [&](const std::string& key) -> ColumnProperties& {
+        auto it = column_properties.find(key);
+        if (it == column_properties.end())
+          return column_properties[key] = default_column_properties_;
+        else
+          return it->second;
+      };
+
+      for (const auto& item : encodings_) get(item.first).set_encoding(item.second);
+      for (const auto& item : codecs_) get(item.first).set_compression(item.second);
+      for (const auto& item : codecs_compression_level_)
+        get(item.first).set_compression_level(item.second);
+      for (const auto& item : dictionary_enabled_)
+        get(item.first).set_dictionary_enabled(item.second);
+      for (const auto& item : statistics_enabled_)
+        get(item.first).set_statistics_enabled(item.second);
+
+      return std::shared_ptr<WriterProperties>(new WriterProperties(
+          pool_, dictionary_pagesize_limit_, write_batch_size_, max_row_group_length_,
+          pagesize_, version_, created_by_, std::move(file_encryption_properties_),
+          default_column_properties_, column_properties, data_page_version_));
+    }
+
+   private:
+    MemoryPool* pool_;
+    int64_t dictionary_pagesize_limit_;
+    int64_t write_batch_size_;
+    int64_t max_row_group_length_;
+    int64_t pagesize_;
+    ParquetVersion::type version_;
+    ParquetDataPageVersion data_page_version_;
+    std::string created_by_;
+
+    std::shared_ptr<FileEncryptionProperties> file_encryption_properties_;
+
+    // Settings used for each column unless overridden in any of the maps below
+    ColumnProperties default_column_properties_;
+    std::unordered_map<std::string, Encoding::type> encodings_;
+    std::unordered_map<std::string, Compression::type> codecs_;
+    std::unordered_map<std::string, int32_t> codecs_compression_level_;
+    std::unordered_map<std::string, bool> dictionary_enabled_;
+    std::unordered_map<std::string, bool> statistics_enabled_;
+  };
+
+  inline MemoryPool* memory_pool() const { return pool_; }
+
+  inline int64_t dictionary_pagesize_limit() const { return dictionary_pagesize_limit_; }
+
+  inline int64_t write_batch_size() const { return write_batch_size_; }
+
+  inline int64_t max_row_group_length() const { return max_row_group_length_; }
+
+  inline int64_t data_pagesize() const { return pagesize_; }
+
+  inline ParquetDataPageVersion data_page_version() const {
+    return parquet_data_page_version_;
+  }
+
+  inline ParquetVersion::type version() const { return parquet_version_; }
+
+  inline std::string created_by() const { return parquet_created_by_; }
+
+  inline Encoding::type dictionary_index_encoding() const {
+    if (parquet_version_ == ParquetVersion::PARQUET_1_0) {
+      return Encoding::PLAIN_DICTIONARY;
+    } else {
+      return Encoding::RLE_DICTIONARY;
+    }
+  }
+
+  inline Encoding::type dictionary_page_encoding() const {
+    if (parquet_version_ == ParquetVersion::PARQUET_1_0) {
+      return Encoding::PLAIN_DICTIONARY;
+    } else {
+      return Encoding::PLAIN;
+    }
+  }
+
+  const ColumnProperties& column_properties(
+      const std::shared_ptr<schema::ColumnPath>& path) const {
+    auto it = column_properties_.find(path->ToDotString());
+    if (it != column_properties_.end()) return it->second;
+    return default_column_properties_;
+  }
+
+  Encoding::type encoding(const std::shared_ptr<schema::ColumnPath>& path) const {
+    return column_properties(path).encoding();
+  }
+
+  Compression::type compression(const std::shared_ptr<schema::ColumnPath>& path) const {
+    return column_properties(path).compression();
+  }
+
+  int compression_level(const std::shared_ptr<schema::ColumnPath>& path) const {
+    return column_properties(path).compression_level();
+  }
+
+  bool dictionary_enabled(const std::shared_ptr<schema::ColumnPath>& path) const {
+    return column_properties(path).dictionary_enabled();
+  }
+
+  bool statistics_enabled(const std::shared_ptr<schema::ColumnPath>& path) const {
+    return column_properties(path).statistics_enabled();
+  }
+
+  size_t max_statistics_size(const std::shared_ptr<schema::ColumnPath>& path) const {
+    return column_properties(path).max_statistics_size();
+  }
+
+  inline FileEncryptionProperties* file_encryption_properties() const {
+    return file_encryption_properties_.get();
+  }
+
+  std::shared_ptr<ColumnEncryptionProperties> column_encryption_properties(
+      const std::string& path) const {
+    if (file_encryption_properties_) {
+      return file_encryption_properties_->column_encryption_properties(path);
+    } else {
+      return NULLPTR;
+    }
+  }
+
+ private:
+  explicit WriterProperties(
+      MemoryPool* pool, int64_t dictionary_pagesize_limit, int64_t write_batch_size,
+      int64_t max_row_group_length, int64_t pagesize, ParquetVersion::type version,
+      const std::string& created_by,
+      std::shared_ptr<FileEncryptionProperties> file_encryption_properties,
+      const ColumnProperties& default_column_properties,
+      const std::unordered_map<std::string, ColumnProperties>& column_properties,
+      ParquetDataPageVersion data_page_version)
+      : pool_(pool),
+        dictionary_pagesize_limit_(dictionary_pagesize_limit),
+        write_batch_size_(write_batch_size),
+        max_row_group_length_(max_row_group_length),
+        pagesize_(pagesize),
+        parquet_data_page_version_(data_page_version),
+        parquet_version_(version),
+        parquet_created_by_(created_by),
+        file_encryption_properties_(file_encryption_properties),
+        default_column_properties_(default_column_properties),
+        column_properties_(column_properties) {}
+
+  MemoryPool* pool_;
+  int64_t dictionary_pagesize_limit_;
+  int64_t write_batch_size_;
+  int64_t max_row_group_length_;
+  int64_t pagesize_;
+  ParquetDataPageVersion parquet_data_page_version_;
+  ParquetVersion::type parquet_version_;
+  std::string parquet_created_by_;
+
+  std::shared_ptr<FileEncryptionProperties> file_encryption_properties_;
+
+  ColumnProperties default_column_properties_;
+  std::unordered_map<std::string, ColumnProperties> column_properties_;
+};
+
+PARQUET_EXPORT const std::shared_ptr<WriterProperties>& default_writer_properties();
+
+// ----------------------------------------------------------------------
+// Properties specific to Apache Arrow columnar read and write
+
+static constexpr bool kArrowDefaultUseThreads = false;
+
+// Default number of rows to read when using ::arrow::RecordBatchReader
+static constexpr int64_t kArrowDefaultBatchSize = 64 * 1024;
+
+/// EXPERIMENTAL: Properties for configuring FileReader behavior.
+class PARQUET_EXPORT ArrowReaderProperties {
+ public:
+  explicit ArrowReaderProperties(bool use_threads = kArrowDefaultUseThreads)
+      : use_threads_(use_threads),
+        read_dict_indices_(),
+        batch_size_(kArrowDefaultBatchSize),
+        pre_buffer_(false),
+        cache_options_(::arrow::io::CacheOptions::Defaults()),
+        coerce_int96_timestamp_unit_(::arrow::TimeUnit::NANO) {}
+
+  void set_use_threads(bool use_threads) { use_threads_ = use_threads; }
+
+  bool use_threads() const { return use_threads_; }
+
+  void set_read_dictionary(int column_index, bool read_dict) {
+    if (read_dict) {
+      read_dict_indices_.insert(column_index);
+    } else {
+      read_dict_indices_.erase(column_index);
+    }
+  }
+  bool read_dictionary(int column_index) const {
+    if (read_dict_indices_.find(column_index) != read_dict_indices_.end()) {
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  void set_batch_size(int64_t batch_size) { batch_size_ = batch_size; }
+
+  int64_t batch_size() const { return batch_size_; }
+
+  /// Enable read coalescing.
+  ///
+  /// When enabled, the Arrow reader will pre-buffer necessary regions
+  /// of the file in-memory. This is intended to improve performance on
+  /// high-latency filesystems (e.g. Amazon S3).
+  void set_pre_buffer(bool pre_buffer) { pre_buffer_ = pre_buffer; }
+
+  bool pre_buffer() const { return pre_buffer_; }
+
+  /// Set options for read coalescing. This can be used to tune the
+  /// implementation for characteristics of different filesystems.
+  void set_cache_options(::arrow::io::CacheOptions options) { cache_options_ = options; }
+
+  const ::arrow::io::CacheOptions& cache_options() const { return cache_options_; }
+
+  /// Set execution context for read coalescing.
+  void set_io_context(const ::arrow::io::IOContext& ctx) { io_context_ = ctx; }
+
+  const ::arrow::io::IOContext& io_context() const { return io_context_; }
+
+  /// Set timestamp unit to use for deprecated INT96-encoded timestamps
+  /// (default is NANO).
+  void set_coerce_int96_timestamp_unit(::arrow::TimeUnit::type unit) {
+    coerce_int96_timestamp_unit_ = unit;
+  }
+
+  ::arrow::TimeUnit::type coerce_int96_timestamp_unit() const {
+    return coerce_int96_timestamp_unit_;
+  }
+
+ private:
+  bool use_threads_;
+  std::unordered_set<int> read_dict_indices_;
+  int64_t batch_size_;
+  bool pre_buffer_;
+  ::arrow::io::IOContext io_context_;
+  ::arrow::io::CacheOptions cache_options_;
+  ::arrow::TimeUnit::type coerce_int96_timestamp_unit_;
+};
+
+/// EXPERIMENTAL: Constructs the default ArrowReaderProperties
+PARQUET_EXPORT
+ArrowReaderProperties default_arrow_reader_properties();
+
+class PARQUET_EXPORT ArrowWriterProperties {
+ public:
+  enum EngineVersion {
+    V1,  // Supports only nested lists.
+    V2   // Full support for all nesting combinations
+  };
+  class Builder {
+   public:
+    Builder()
+        : write_timestamps_as_int96_(false),
+          coerce_timestamps_enabled_(false),
+          coerce_timestamps_unit_(::arrow::TimeUnit::SECOND),
+          truncated_timestamps_allowed_(false),
+          store_schema_(false),
+          // TODO: At some point we should flip this.
+          compliant_nested_types_(false),
+          engine_version_(V2) {}
+    virtual ~Builder() = default;
+
+    Builder* disable_deprecated_int96_timestamps() {
+      write_timestamps_as_int96_ = false;
+      return this;
+    }
+
+    Builder* enable_deprecated_int96_timestamps() {
+      write_timestamps_as_int96_ = true;
+      return this;
+    }
+
+    Builder* coerce_timestamps(::arrow::TimeUnit::type unit) {
+      coerce_timestamps_enabled_ = true;
+      coerce_timestamps_unit_ = unit;
+      return this;
+    }
+
+    Builder* allow_truncated_timestamps() {
+      truncated_timestamps_allowed_ = true;
+      return this;
+    }
+
+    Builder* disallow_truncated_timestamps() {
+      truncated_timestamps_allowed_ = false;
+      return this;
+    }
+
+    /// \brief EXPERIMENTAL: Write binary serialized Arrow schema to the file,
+    /// to enable certain read options (like "read_dictionary") to be set
+    /// automatically
+    Builder* store_schema() {
+      store_schema_ = true;
+      return this;
+    }
+
+    Builder* enable_compliant_nested_types() {
+      compliant_nested_types_ = true;
+      return this;
+    }
+
+    Builder* disable_compliant_nested_types() {
+      compliant_nested_types_ = false;
+      return this;
+    }
+
+    Builder* set_engine_version(EngineVersion version) {
+      engine_version_ = version;
+      return this;
+    }
+
+    std::shared_ptr<ArrowWriterProperties> build() {
+      return std::shared_ptr<ArrowWriterProperties>(new ArrowWriterProperties(
+          write_timestamps_as_int96_, coerce_timestamps_enabled_, coerce_timestamps_unit_,
+          truncated_timestamps_allowed_, store_schema_, compliant_nested_types_,
+          engine_version_));
+    }
+
+   private:
+    bool write_timestamps_as_int96_;
+
+    bool coerce_timestamps_enabled_;
+    ::arrow::TimeUnit::type coerce_timestamps_unit_;
+    bool truncated_timestamps_allowed_;
+
+    bool store_schema_;
+    bool compliant_nested_types_;
+    EngineVersion engine_version_;
+  };
+
+  bool support_deprecated_int96_timestamps() const { return write_timestamps_as_int96_; }
+
+  bool coerce_timestamps_enabled() const { return coerce_timestamps_enabled_; }
+  ::arrow::TimeUnit::type coerce_timestamps_unit() const {
+    return coerce_timestamps_unit_;
+  }
+
+  bool truncated_timestamps_allowed() const { return truncated_timestamps_allowed_; }
+
+  bool store_schema() const { return store_schema_; }
+
+  /// \brief Enable nested type naming according to the parquet specification.
+  ///
+  /// Older versions of arrow wrote out field names for nested lists based on the name
+  /// of the field.  According to the parquet specification they should always be
+  /// "element".
+  bool compliant_nested_types() const { return compliant_nested_types_; }
+
+  /// \brief The underlying engine version to use when writing Arrow data.
+  ///
+  /// V2 is currently the latest V1 is considered deprecated but left in
+  /// place in case there are bugs detected in V2.
+  EngineVersion engine_version() const { return engine_version_; }
+
+ private:
+  explicit ArrowWriterProperties(bool write_nanos_as_int96,
+                                 bool coerce_timestamps_enabled,
+                                 ::arrow::TimeUnit::type coerce_timestamps_unit,
+                                 bool truncated_timestamps_allowed, bool store_schema,
+                                 bool compliant_nested_types,
+                                 EngineVersion engine_version)
+      : write_timestamps_as_int96_(write_nanos_as_int96),
+        coerce_timestamps_enabled_(coerce_timestamps_enabled),
+        coerce_timestamps_unit_(coerce_timestamps_unit),
+        truncated_timestamps_allowed_(truncated_timestamps_allowed),
+        store_schema_(store_schema),
+        compliant_nested_types_(compliant_nested_types),
+        engine_version_(engine_version) {}
+
+  const bool write_timestamps_as_int96_;
+  const bool coerce_timestamps_enabled_;
+  const ::arrow::TimeUnit::type coerce_timestamps_unit_;
+  const bool truncated_timestamps_allowed_;
+  const bool store_schema_;
+  const bool compliant_nested_types_;
+  const EngineVersion engine_version_;
+};
+
+/// \brief State object used for writing Arrow data directly to a Parquet
+/// column chunk. API possibly not stable
+struct ArrowWriteContext {
+  ArrowWriteContext(MemoryPool* memory_pool, ArrowWriterProperties* properties)
+      : memory_pool(memory_pool),
+        properties(properties),
+        data_buffer(AllocateBuffer(memory_pool)),
+        def_levels_buffer(AllocateBuffer(memory_pool)) {}
+
+  template <typename T>
+  ::arrow::Status GetScratchData(const int64_t num_values, T** out) {
+    ARROW_RETURN_NOT_OK(this->data_buffer->Resize(num_values * sizeof(T), false));
+    *out = reinterpret_cast<T*>(this->data_buffer->mutable_data());
+    return ::arrow::Status::OK();
+  }
+
+  MemoryPool* memory_pool;
+  const ArrowWriterProperties* properties;
+
+  // Buffer used for storing the data of an array converted to the physical type
+  // as expected by parquet-cpp.
+  std::shared_ptr<ResizableBuffer> data_buffer;
+
+  // We use the shared ownership of this buffer
+  std::shared_ptr<ResizableBuffer> def_levels_buffer;
+};
+
+PARQUET_EXPORT
+std::shared_ptr<ArrowWriterProperties> default_arrow_writer_properties();
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/schema.cc b/contrib/libs/apache/arrow/cpp/src/parquet/schema.cc
index fe4e10d8514..cfa6bdb2912 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/schema.cc
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/schema.cc
@@ -1,945 +1,945 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "parquet/schema.h" 
- 
-#include <algorithm> 
-#include <cstring> 
-#include <memory> 
-#include <string> 
-#include <type_traits> 
-#include <utility> 
- 
-#include "arrow/util/logging.h" 
-#include "parquet/exception.h" 
-#include "parquet/schema_internal.h" 
-#include "parquet/thrift_internal.h" 
- 
-using parquet::format::SchemaElement; 
- 
-namespace parquet { 
- 
-namespace schema { 
- 
-namespace { 
- 
-void ThrowInvalidLogicalType(const LogicalType& logical_type) { 
-  std::stringstream ss; 
-  ss << "Invalid logical type: " << logical_type.ToString(); 
-  throw ParquetException(ss.str()); 
-} 
- 
-}  // namespace 
- 
-// ---------------------------------------------------------------------- 
-// ColumnPath 
- 
-std::shared_ptr<ColumnPath> ColumnPath::FromDotString(const std::string& dotstring) { 
-  std::stringstream ss(dotstring); 
-  std::string item; 
-  std::vector<std::string> path; 
-  while (std::getline(ss, item, '.')) { 
-    path.push_back(item); 
-  } 
-  return std::make_shared<ColumnPath>(std::move(path)); 
-} 
- 
-std::shared_ptr<ColumnPath> ColumnPath::FromNode(const Node& node) { 
-  // Build the path in reverse order as we traverse the nodes to the top 
-  std::vector<std::string> rpath_; 
-  const Node* cursor = &node; 
-  // The schema node is not part of the ColumnPath 
-  while (cursor->parent()) { 
-    rpath_.push_back(cursor->name()); 
-    cursor = cursor->parent(); 
-  } 
- 
-  // Build ColumnPath in correct order 
-  std::vector<std::string> path(rpath_.crbegin(), rpath_.crend()); 
-  return std::make_shared<ColumnPath>(std::move(path)); 
-} 
- 
-std::shared_ptr<ColumnPath> ColumnPath::extend(const std::string& node_name) const { 
-  std::vector<std::string> path; 
-  path.reserve(path_.size() + 1); 
-  path.resize(path_.size() + 1); 
-  std::copy(path_.cbegin(), path_.cend(), path.begin()); 
-  path[path_.size()] = node_name; 
- 
-  return std::make_shared<ColumnPath>(std::move(path)); 
-} 
- 
-std::string ColumnPath::ToDotString() const { 
-  std::stringstream ss; 
-  for (auto it = path_.cbegin(); it != path_.cend(); ++it) { 
-    if (it != path_.cbegin()) { 
-      ss << "."; 
-    } 
-    ss << *it; 
-  } 
-  return ss.str(); 
-} 
- 
-const std::vector<std::string>& ColumnPath::ToDotVector() const { return path_; } 
- 
-// ---------------------------------------------------------------------- 
-// Base node 
- 
-const std::shared_ptr<ColumnPath> Node::path() const { 
-  // TODO(itaiin): Cache the result, or more precisely, cache ->ToDotString() 
-  //    since it is being used to access the leaf nodes 
-  return ColumnPath::FromNode(*this); 
-} 
- 
-bool Node::EqualsInternal(const Node* other) const { 
-  return type_ == other->type_ && name_ == other->name_ && 
-         repetition_ == other->repetition_ && converted_type_ == other->converted_type_ && 
-         field_id_ == other->field_id() && 
-         logical_type_->Equals(*(other->logical_type())); 
-} 
- 
-void Node::SetParent(const Node* parent) { parent_ = parent; } 
- 
-// ---------------------------------------------------------------------- 
-// Primitive node 
- 
-PrimitiveNode::PrimitiveNode(const std::string& name, Repetition::type repetition, 
-                             Type::type type, ConvertedType::type converted_type, 
-                             int length, int precision, int scale, int id) 
-    : Node(Node::PRIMITIVE, name, repetition, converted_type, id), 
-      physical_type_(type), 
-      type_length_(length) { 
-  std::stringstream ss; 
- 
-  // PARQUET-842: In an earlier revision, decimal_metadata_.isset was being 
-  // set to true, but Impala will raise an incompatible metadata in such cases 
-  memset(&decimal_metadata_, 0, sizeof(decimal_metadata_)); 
- 
-  // Check if the physical and logical types match 
-  // Mapping referred from Apache parquet-mr as on 2016-02-22 
-  switch (converted_type) { 
-    case ConvertedType::NONE: 
-      // Logical type not set 
-      break; 
-    case ConvertedType::UTF8: 
-    case ConvertedType::JSON: 
-    case ConvertedType::BSON: 
-      if (type != Type::BYTE_ARRAY) { 
-        ss << ConvertedTypeToString(converted_type); 
-        ss << " can only annotate BYTE_ARRAY fields"; 
-        throw ParquetException(ss.str()); 
-      } 
-      break; 
-    case ConvertedType::DECIMAL: 
-      if ((type != Type::INT32) && (type != Type::INT64) && (type != Type::BYTE_ARRAY) && 
-          (type != Type::FIXED_LEN_BYTE_ARRAY)) { 
-        ss << "DECIMAL can only annotate INT32, INT64, BYTE_ARRAY, and FIXED"; 
-        throw ParquetException(ss.str()); 
-      } 
-      if (precision <= 0) { 
-        ss << "Invalid DECIMAL precision: " << precision 
-           << ". Precision must be a number between 1 and 38 inclusive"; 
-        throw ParquetException(ss.str()); 
-      } 
-      if (scale < 0) { 
-        ss << "Invalid DECIMAL scale: " << scale 
-           << ". Scale must be a number between 0 and precision inclusive"; 
-        throw ParquetException(ss.str()); 
-      } 
-      if (scale > precision) { 
-        ss << "Invalid DECIMAL scale " << scale; 
-        ss << " cannot be greater than precision " << precision; 
-        throw ParquetException(ss.str()); 
-      } 
-      decimal_metadata_.isset = true; 
-      decimal_metadata_.precision = precision; 
-      decimal_metadata_.scale = scale; 
-      break; 
-    case ConvertedType::DATE: 
-    case ConvertedType::TIME_MILLIS: 
-    case ConvertedType::UINT_8: 
-    case ConvertedType::UINT_16: 
-    case ConvertedType::UINT_32: 
-    case ConvertedType::INT_8: 
-    case ConvertedType::INT_16: 
-    case ConvertedType::INT_32: 
-      if (type != Type::INT32) { 
-        ss << ConvertedTypeToString(converted_type); 
-        ss << " can only annotate INT32"; 
-        throw ParquetException(ss.str()); 
-      } 
-      break; 
-    case ConvertedType::TIME_MICROS: 
-    case ConvertedType::TIMESTAMP_MILLIS: 
-    case ConvertedType::TIMESTAMP_MICROS: 
-    case ConvertedType::UINT_64: 
-    case ConvertedType::INT_64: 
-      if (type != Type::INT64) { 
-        ss << ConvertedTypeToString(converted_type); 
-        ss << " can only annotate INT64"; 
-        throw ParquetException(ss.str()); 
-      } 
-      break; 
-    case ConvertedType::INTERVAL: 
-      if ((type != Type::FIXED_LEN_BYTE_ARRAY) || (length != 12)) { 
-        ss << "INTERVAL can only annotate FIXED_LEN_BYTE_ARRAY(12)"; 
-        throw ParquetException(ss.str()); 
-      } 
-      break; 
-    case ConvertedType::ENUM: 
-      if (type != Type::BYTE_ARRAY) { 
-        ss << "ENUM can only annotate BYTE_ARRAY fields"; 
-        throw ParquetException(ss.str()); 
-      } 
-      break; 
-    case ConvertedType::NA: 
-      // NA can annotate any type 
-      break; 
-    default: 
-      ss << ConvertedTypeToString(converted_type); 
-      ss << " cannot be applied to a primitive type"; 
-      throw ParquetException(ss.str()); 
-  } 
-  // For forward compatibility, create an equivalent logical type 
-  logical_type_ = LogicalType::FromConvertedType(converted_type_, decimal_metadata_); 
-  if (!(logical_type_ && !logical_type_->is_nested() && 
-        logical_type_->is_compatible(converted_type_, decimal_metadata_))) { 
-    ThrowInvalidLogicalType(*logical_type_); 
-  } 
- 
-  if (type == Type::FIXED_LEN_BYTE_ARRAY) { 
-    if (length <= 0) { 
-      ss << "Invalid FIXED_LEN_BYTE_ARRAY length: " << length; 
-      throw ParquetException(ss.str()); 
-    } 
-    type_length_ = length; 
-  } 
-} 
- 
-PrimitiveNode::PrimitiveNode(const std::string& name, Repetition::type repetition, 
-                             std::shared_ptr<const LogicalType> logical_type, 
-                             Type::type physical_type, int physical_length, int id) 
-    : Node(Node::PRIMITIVE, name, repetition, std::move(logical_type), id), 
-      physical_type_(physical_type), 
-      type_length_(physical_length) { 
-  std::stringstream error; 
-  if (logical_type_) { 
-    // Check for logical type <=> node type consistency 
-    if (!logical_type_->is_nested()) { 
-      // Check for logical type <=> physical type consistency 
-      if (logical_type_->is_applicable(physical_type, physical_length)) { 
-        // For backward compatibility, assign equivalent legacy 
-        // converted type (if possible) 
-        converted_type_ = logical_type_->ToConvertedType(&decimal_metadata_); 
-      } else { 
-        error << logical_type_->ToString(); 
-        error << " can not be applied to primitive type "; 
-        error << TypeToString(physical_type); 
-        throw ParquetException(error.str()); 
-      } 
-    } else { 
-      error << "Nested logical type "; 
-      error << logical_type_->ToString(); 
-      error << " can not be applied to non-group node"; 
-      throw ParquetException(error.str()); 
-    } 
-  } else { 
-    logical_type_ = NoLogicalType::Make(); 
-    converted_type_ = logical_type_->ToConvertedType(&decimal_metadata_); 
-  } 
-  if (!(logical_type_ && !logical_type_->is_nested() && 
-        logical_type_->is_compatible(converted_type_, decimal_metadata_))) { 
-    ThrowInvalidLogicalType(*logical_type_); 
-  } 
- 
-  if (physical_type == Type::FIXED_LEN_BYTE_ARRAY) { 
-    if (physical_length <= 0) { 
-      error << "Invalid FIXED_LEN_BYTE_ARRAY length: " << physical_length; 
-      throw ParquetException(error.str()); 
-    } 
-  } 
-} 
- 
-bool PrimitiveNode::EqualsInternal(const PrimitiveNode* other) const { 
-  bool is_equal = true; 
-  if (physical_type_ != other->physical_type_) { 
-    return false; 
-  } 
-  if (converted_type_ == ConvertedType::DECIMAL) { 
-    is_equal &= (decimal_metadata_.precision == other->decimal_metadata_.precision) && 
-                (decimal_metadata_.scale == other->decimal_metadata_.scale); 
-  } 
-  if (physical_type_ == Type::FIXED_LEN_BYTE_ARRAY) { 
-    is_equal &= (type_length_ == other->type_length_); 
-  } 
-  return is_equal; 
-} 
- 
-bool PrimitiveNode::Equals(const Node* other) const { 
-  if (!Node::EqualsInternal(other)) { 
-    return false; 
-  } 
-  return EqualsInternal(static_cast<const PrimitiveNode*>(other)); 
-} 
- 
-void PrimitiveNode::Visit(Node::Visitor* visitor) { visitor->Visit(this); } 
- 
-void PrimitiveNode::VisitConst(Node::ConstVisitor* visitor) const { 
-  visitor->Visit(this); 
-} 
- 
-// ---------------------------------------------------------------------- 
-// Group node 
- 
-GroupNode::GroupNode(const std::string& name, Repetition::type repetition, 
-                     const NodeVector& fields, ConvertedType::type converted_type, int id) 
-    : Node(Node::GROUP, name, repetition, converted_type, id), fields_(fields) { 
-  // For forward compatibility, create an equivalent logical type 
-  logical_type_ = LogicalType::FromConvertedType(converted_type_); 
-  if (!(logical_type_ && (logical_type_->is_nested() || logical_type_->is_none()) && 
-        logical_type_->is_compatible(converted_type_))) { 
-    ThrowInvalidLogicalType(*logical_type_); 
-  } 
- 
-  field_name_to_idx_.clear(); 
-  auto field_idx = 0; 
-  for (NodePtr& field : fields_) { 
-    field->SetParent(this); 
-    field_name_to_idx_.emplace(field->name(), field_idx++); 
-  } 
-} 
- 
-GroupNode::GroupNode(const std::string& name, Repetition::type repetition, 
-                     const NodeVector& fields, 
-                     std::shared_ptr<const LogicalType> logical_type, int id) 
-    : Node(Node::GROUP, name, repetition, std::move(logical_type), id), fields_(fields) { 
-  if (logical_type_) { 
-    // Check for logical type <=> node type consistency 
-    if (logical_type_->is_nested()) { 
-      // For backward compatibility, assign equivalent legacy converted type (if possible) 
-      converted_type_ = logical_type_->ToConvertedType(nullptr); 
-    } else { 
-      std::stringstream error; 
-      error << "Logical type "; 
-      error << logical_type_->ToString(); 
-      error << " can not be applied to group node"; 
-      throw ParquetException(error.str()); 
-    } 
-  } else { 
-    logical_type_ = NoLogicalType::Make(); 
-    converted_type_ = logical_type_->ToConvertedType(nullptr); 
-  } 
-  if (!(logical_type_ && (logical_type_->is_nested() || logical_type_->is_none()) && 
-        logical_type_->is_compatible(converted_type_))) { 
-    ThrowInvalidLogicalType(*logical_type_); 
-  } 
- 
-  field_name_to_idx_.clear(); 
-  auto field_idx = 0; 
-  for (NodePtr& field : fields_) { 
-    field->SetParent(this); 
-    field_name_to_idx_.emplace(field->name(), field_idx++); 
-  } 
-} 
- 
-bool GroupNode::EqualsInternal(const GroupNode* other) const { 
-  if (this == other) { 
-    return true; 
-  } 
-  if (this->field_count() != other->field_count()) { 
-    return false; 
-  } 
-  for (int i = 0; i < this->field_count(); ++i) { 
-    if (!this->field(i)->Equals(other->field(i).get())) { 
-      return false; 
-    } 
-  } 
-  return true; 
-} 
- 
-bool GroupNode::Equals(const Node* other) const { 
-  if (!Node::EqualsInternal(other)) { 
-    return false; 
-  } 
-  return EqualsInternal(static_cast<const GroupNode*>(other)); 
-} 
- 
-int GroupNode::FieldIndex(const std::string& name) const { 
-  auto search = field_name_to_idx_.find(name); 
-  if (search == field_name_to_idx_.end()) { 
-    // Not found 
-    return -1; 
-  } 
-  return search->second; 
-} 
- 
-int GroupNode::FieldIndex(const Node& node) const { 
-  auto search = field_name_to_idx_.equal_range(node.name()); 
-  for (auto it = search.first; it != search.second; ++it) { 
-    const int idx = it->second; 
-    if (&node == field(idx).get()) { 
-      return idx; 
-    } 
-  } 
-  return -1; 
-} 
- 
-void GroupNode::Visit(Node::Visitor* visitor) { visitor->Visit(this); } 
- 
-void GroupNode::VisitConst(Node::ConstVisitor* visitor) const { visitor->Visit(this); } 
- 
-// ---------------------------------------------------------------------- 
-// Node construction from Parquet metadata 
- 
-std::unique_ptr<Node> GroupNode::FromParquet(const void* opaque_element, 
-                                             NodeVector fields) { 
-  const format::SchemaElement* element = 
-      static_cast<const format::SchemaElement*>(opaque_element); 
- 
-  int field_id = -1; 
-  if (element->__isset.field_id) { 
-    field_id = element->field_id; 
-  } 
- 
-  std::unique_ptr<GroupNode> group_node; 
-  if (element->__isset.logicalType) { 
-    // updated writer with logical type present 
-    group_node = std::unique_ptr<GroupNode>( 
-        new GroupNode(element->name, LoadEnumSafe(&element->repetition_type), fields, 
-                      LogicalType::FromThrift(element->logicalType), field_id)); 
-  } else { 
-    group_node = std::unique_ptr<GroupNode>(new GroupNode( 
-        element->name, LoadEnumSafe(&element->repetition_type), fields, 
-        (element->__isset.converted_type ? LoadEnumSafe(&element->converted_type) 
-                                         : ConvertedType::NONE), 
-        field_id)); 
-  } 
- 
-  return std::unique_ptr<Node>(group_node.release()); 
-} 
- 
-std::unique_ptr<Node> PrimitiveNode::FromParquet(const void* opaque_element) { 
-  const format::SchemaElement* element = 
-      static_cast<const format::SchemaElement*>(opaque_element); 
- 
-  int field_id = -1; 
-  if (element->__isset.field_id) { 
-    field_id = element->field_id; 
-  } 
- 
-  std::unique_ptr<PrimitiveNode> primitive_node; 
-  if (element->__isset.logicalType) { 
-    // updated writer with logical type present 
-    primitive_node = std::unique_ptr<PrimitiveNode>( 
-        new PrimitiveNode(element->name, LoadEnumSafe(&element->repetition_type), 
-                          LogicalType::FromThrift(element->logicalType), 
-                          LoadEnumSafe(&element->type), element->type_length, field_id)); 
-  } else if (element->__isset.converted_type) { 
-    // legacy writer with converted type present 
-    primitive_node = std::unique_ptr<PrimitiveNode>(new PrimitiveNode( 
-        element->name, LoadEnumSafe(&element->repetition_type), 
-        LoadEnumSafe(&element->type), LoadEnumSafe(&element->converted_type), 
-        element->type_length, element->precision, element->scale, field_id)); 
-  } else { 
-    // logical type not present 
-    primitive_node = std::unique_ptr<PrimitiveNode>(new PrimitiveNode( 
-        element->name, LoadEnumSafe(&element->repetition_type), NoLogicalType::Make(), 
-        LoadEnumSafe(&element->type), element->type_length, field_id)); 
-  } 
- 
-  // Return as unique_ptr to the base type 
-  return std::unique_ptr<Node>(primitive_node.release()); 
-} 
- 
-bool GroupNode::HasRepeatedFields() const { 
-  for (int i = 0; i < this->field_count(); ++i) { 
-    auto field = this->field(i); 
-    if (field->repetition() == Repetition::REPEATED) { 
-      return true; 
-    } 
-    if (field->is_group()) { 
-      const auto& group = static_cast<const GroupNode&>(*field); 
-      return group.HasRepeatedFields(); 
-    } 
-  } 
-  return false; 
-} 
- 
-void GroupNode::ToParquet(void* opaque_element) const { 
-  format::SchemaElement* element = static_cast<format::SchemaElement*>(opaque_element); 
-  element->__set_name(name_); 
-  element->__set_num_children(field_count()); 
-  element->__set_repetition_type(ToThrift(repetition_)); 
-  if (converted_type_ != ConvertedType::NONE) { 
-    element->__set_converted_type(ToThrift(converted_type_)); 
-  } 
-  if (field_id_ >= 0) { 
-    element->__set_field_id(field_id_); 
-  } 
-  if (logical_type_ && logical_type_->is_serialized()) { 
-    element->__set_logicalType(logical_type_->ToThrift()); 
-  } 
-  return; 
-} 
- 
-void PrimitiveNode::ToParquet(void* opaque_element) const { 
-  format::SchemaElement* element = static_cast<format::SchemaElement*>(opaque_element); 
-  element->__set_name(name_); 
-  element->__set_repetition_type(ToThrift(repetition_)); 
-  if (converted_type_ != ConvertedType::NONE) { 
-    if (converted_type_ != ConvertedType::NA) { 
-      element->__set_converted_type(ToThrift(converted_type_)); 
-    } else { 
-      // ConvertedType::NA is an unreleased, obsolete synonym for LogicalType::Null. 
-      // Never emit it (see PARQUET-1990 for discussion). 
-      if (!logical_type_ || !logical_type_->is_null()) { 
-        throw ParquetException( 
-            "ConvertedType::NA is obsolete, please use LogicalType::Null instead"); 
-      } 
-    } 
-  } 
-  if (field_id_ >= 0) { 
-    element->__set_field_id(field_id_); 
-  } 
-  if (logical_type_ && logical_type_->is_serialized() && 
-      // TODO(tpboudreau): remove the following conjunct to enable serialization 
-      // of IntervalTypes after parquet.thrift recognizes them 
-      !logical_type_->is_interval()) { 
-    element->__set_logicalType(logical_type_->ToThrift()); 
-  } 
-  element->__set_type(ToThrift(physical_type_)); 
-  if (physical_type_ == Type::FIXED_LEN_BYTE_ARRAY) { 
-    element->__set_type_length(type_length_); 
-  } 
-  if (decimal_metadata_.isset) { 
-    element->__set_precision(decimal_metadata_.precision); 
-    element->__set_scale(decimal_metadata_.scale); 
-  } 
-  return; 
-} 
- 
-// ---------------------------------------------------------------------- 
-// Schema converters 
- 
-std::unique_ptr<Node> Unflatten(const format::SchemaElement* elements, int length) { 
-  if (elements[0].num_children == 0) { 
-    if (length == 1) { 
-      // Degenerate case of Parquet file with no columns 
-      return GroupNode::FromParquet(elements, {}); 
-    } else { 
-      throw ParquetException( 
-          "Parquet schema had multiple nodes but root had no children"); 
-    } 
-  } 
- 
-  // We don't check that the root node is repeated since this is not 
-  // consistently set by implementations 
- 
-  int pos = 0; 
- 
-  std::function<std::unique_ptr<Node>()> NextNode = [&]() { 
-    if (pos == length) { 
-      throw ParquetException("Malformed schema: not enough elements"); 
-    } 
-    const SchemaElement& element = elements[pos++]; 
-    const void* opaque_element = static_cast<const void*>(&element); 
- 
-    if (element.num_children == 0 && element.__isset.type) { 
-      // Leaf (primitive) node: always has a type 
-      return PrimitiveNode::FromParquet(opaque_element); 
-    } else { 
-      // Group node (may have 0 children, but cannot have a type) 
-      NodeVector fields; 
-      for (int i = 0; i < element.num_children; ++i) { 
-        std::unique_ptr<Node> field = NextNode(); 
-        fields.push_back(NodePtr(field.release())); 
-      } 
-      return GroupNode::FromParquet(opaque_element, std::move(fields)); 
-    } 
-  }; 
-  return NextNode(); 
-} 
- 
-std::shared_ptr<SchemaDescriptor> FromParquet(const std::vector<SchemaElement>& schema) { 
-  if (schema.empty()) { 
-    throw ParquetException("Empty file schema (no root)"); 
-  } 
-  std::unique_ptr<Node> root = Unflatten(&schema[0], static_cast<int>(schema.size())); 
-  std::shared_ptr<SchemaDescriptor> descr = std::make_shared<SchemaDescriptor>(); 
-  descr->Init(std::shared_ptr<GroupNode>(static_cast<GroupNode*>(root.release()))); 
-  return descr; 
-} 
- 
-class SchemaVisitor : public Node::ConstVisitor { 
- public: 
-  explicit SchemaVisitor(std::vector<format::SchemaElement>* elements) 
-      : elements_(elements) {} 
- 
-  void Visit(const Node* node) override { 
-    format::SchemaElement element; 
-    node->ToParquet(&element); 
-    elements_->push_back(element); 
- 
-    if (node->is_group()) { 
-      const GroupNode* group_node = static_cast<const GroupNode*>(node); 
-      for (int i = 0; i < group_node->field_count(); ++i) { 
-        group_node->field(i)->VisitConst(this); 
-      } 
-    } 
-  } 
- 
- private: 
-  std::vector<format::SchemaElement>* elements_; 
-}; 
- 
-void ToParquet(const GroupNode* schema, std::vector<format::SchemaElement>* out) { 
-  SchemaVisitor visitor(out); 
-  schema->VisitConst(&visitor); 
-} 
- 
-// ---------------------------------------------------------------------- 
-// Schema printing 
- 
-static void PrintRepLevel(Repetition::type repetition, std::ostream& stream) { 
-  switch (repetition) { 
-    case Repetition::REQUIRED: 
-      stream << "required"; 
-      break; 
-    case Repetition::OPTIONAL: 
-      stream << "optional"; 
-      break; 
-    case Repetition::REPEATED: 
-      stream << "repeated"; 
-      break; 
-    default: 
-      break; 
-  } 
-} 
- 
-static void PrintType(const PrimitiveNode* node, std::ostream& stream) { 
-  switch (node->physical_type()) { 
-    case Type::BOOLEAN: 
-      stream << "boolean"; 
-      break; 
-    case Type::INT32: 
-      stream << "int32"; 
-      break; 
-    case Type::INT64: 
-      stream << "int64"; 
-      break; 
-    case Type::INT96: 
-      stream << "int96"; 
-      break; 
-    case Type::FLOAT: 
-      stream << "float"; 
-      break; 
-    case Type::DOUBLE: 
-      stream << "double"; 
-      break; 
-    case Type::BYTE_ARRAY: 
-      stream << "binary"; 
-      break; 
-    case Type::FIXED_LEN_BYTE_ARRAY: 
-      stream << "fixed_len_byte_array(" << node->type_length() << ")"; 
-      break; 
-    default: 
-      break; 
-  } 
-} 
- 
-static void PrintConvertedType(const PrimitiveNode* node, std::ostream& stream) { 
-  auto lt = node->converted_type(); 
-  auto la = node->logical_type(); 
-  if (la && la->is_valid() && !la->is_none()) { 
-    stream << " (" << la->ToString() << ")"; 
-  } else if (lt == ConvertedType::DECIMAL) { 
-    stream << " (" << ConvertedTypeToString(lt) << "(" 
-           << node->decimal_metadata().precision << "," << node->decimal_metadata().scale 
-           << "))"; 
-  } else if (lt != ConvertedType::NONE) { 
-    stream << " (" << ConvertedTypeToString(lt) << ")"; 
-  } 
-} 
- 
-struct SchemaPrinter : public Node::ConstVisitor { 
-  explicit SchemaPrinter(std::ostream& stream, int indent_width) 
-      : stream_(stream), indent_(0), indent_width_(2) {} 
- 
-  void Indent() { 
-    if (indent_ > 0) { 
-      std::string spaces(indent_, ' '); 
-      stream_ << spaces; 
-    } 
-  } 
- 
-  void Visit(const Node* node) { 
-    Indent(); 
-    if (node->is_group()) { 
-      Visit(static_cast<const GroupNode*>(node)); 
-    } else { 
-      // Primitive 
-      Visit(static_cast<const PrimitiveNode*>(node)); 
-    } 
-  } 
- 
-  void Visit(const PrimitiveNode* node) { 
-    PrintRepLevel(node->repetition(), stream_); 
-    stream_ << " "; 
-    PrintType(node, stream_); 
-    stream_ << " field_id=" << node->field_id() << " " << node->name(); 
-    PrintConvertedType(node, stream_); 
-    stream_ << ";" << std::endl; 
-  } 
- 
-  void Visit(const GroupNode* node) { 
-    PrintRepLevel(node->repetition(), stream_); 
-    stream_ << " group " 
-            << "field_id=" << node->field_id() << " " << node->name(); 
-    auto lt = node->converted_type(); 
-    auto la = node->logical_type(); 
-    if (la && la->is_valid() && !la->is_none()) { 
-      stream_ << " (" << la->ToString() << ")"; 
-    } else if (lt != ConvertedType::NONE) { 
-      stream_ << " (" << ConvertedTypeToString(lt) << ")"; 
-    } 
-    stream_ << " {" << std::endl; 
- 
-    indent_ += indent_width_; 
-    for (int i = 0; i < node->field_count(); ++i) { 
-      node->field(i)->VisitConst(this); 
-    } 
-    indent_ -= indent_width_; 
-    Indent(); 
-    stream_ << "}" << std::endl; 
-  } 
- 
-  std::ostream& stream_; 
-  int indent_; 
-  int indent_width_; 
-}; 
- 
-void PrintSchema(const Node* schema, std::ostream& stream, int indent_width) { 
-  SchemaPrinter printer(stream, indent_width); 
-  printer.Visit(schema); 
-} 
- 
-}  // namespace schema 
- 
-using schema::ColumnPath; 
-using schema::GroupNode; 
-using schema::Node; 
-using schema::NodePtr; 
-using schema::PrimitiveNode; 
- 
-void SchemaDescriptor::Init(std::unique_ptr<schema::Node> schema) { 
-  Init(NodePtr(schema.release())); 
-} 
- 
-class SchemaUpdater : public Node::Visitor { 
- public: 
-  explicit SchemaUpdater(const std::vector<ColumnOrder>& column_orders) 
-      : column_orders_(column_orders), leaf_count_(0) {} 
- 
-  void Visit(Node* node) override { 
-    if (node->is_group()) { 
-      GroupNode* group_node = static_cast<GroupNode*>(node); 
-      for (int i = 0; i < group_node->field_count(); ++i) { 
-        group_node->field(i)->Visit(this); 
-      } 
-    } else {  // leaf node 
-      PrimitiveNode* leaf_node = static_cast<PrimitiveNode*>(node); 
-      leaf_node->SetColumnOrder(column_orders_[leaf_count_++]); 
-    } 
-  } 
- 
- private: 
-  const std::vector<ColumnOrder>& column_orders_; 
-  int leaf_count_; 
-}; 
- 
-void SchemaDescriptor::updateColumnOrders(const std::vector<ColumnOrder>& column_orders) { 
-  if (static_cast<int>(column_orders.size()) != num_columns()) { 
-    throw ParquetException("Malformed schema: not enough ColumnOrder values"); 
-  } 
-  SchemaUpdater visitor(column_orders); 
-  const_cast<GroupNode*>(group_node_)->Visit(&visitor); 
-} 
- 
-void SchemaDescriptor::Init(NodePtr schema) { 
-  schema_ = std::move(schema); 
- 
-  if (!schema_->is_group()) { 
-    throw ParquetException("Must initialize with a schema group"); 
-  } 
- 
-  group_node_ = static_cast<const GroupNode*>(schema_.get()); 
-  leaves_.clear(); 
- 
-  for (int i = 0; i < group_node_->field_count(); ++i) { 
-    BuildTree(group_node_->field(i), 0, 0, group_node_->field(i)); 
-  } 
-} 
- 
-bool SchemaDescriptor::Equals(const SchemaDescriptor& other) const { 
-  if (this->num_columns() != other.num_columns()) { 
-    return false; 
-  } 
- 
-  for (int i = 0; i < this->num_columns(); ++i) { 
-    if (!this->Column(i)->Equals(*other.Column(i))) { 
-      return false; 
-    } 
-  } 
- 
-  return true; 
-} 
- 
-void SchemaDescriptor::BuildTree(const NodePtr& node, int16_t max_def_level, 
-                                 int16_t max_rep_level, const NodePtr& base) { 
-  if (node->is_optional()) { 
-    ++max_def_level; 
-  } else if (node->is_repeated()) { 
-    // Repeated fields add a definition level. This is used to distinguish 
-    // between an empty list and a list with an item in it. 
-    ++max_rep_level; 
-    ++max_def_level; 
-  } 
- 
-  // Now, walk the schema and create a ColumnDescriptor for each leaf node 
-  if (node->is_group()) { 
-    const GroupNode* group = static_cast<const GroupNode*>(node.get()); 
-    for (int i = 0; i < group->field_count(); ++i) { 
-      BuildTree(group->field(i), max_def_level, max_rep_level, base); 
-    } 
-  } else { 
-    node_to_leaf_index_[static_cast<const PrimitiveNode*>(node.get())] = 
-        static_cast<int>(leaves_.size()); 
- 
-    // Primitive node, append to leaves 
-    leaves_.push_back(ColumnDescriptor(node, max_def_level, max_rep_level, this)); 
-    leaf_to_base_.emplace(static_cast<int>(leaves_.size()) - 1, base); 
-    leaf_to_idx_.emplace(node->path()->ToDotString(), 
-                         static_cast<int>(leaves_.size()) - 1); 
-  } 
-} 
- 
-int SchemaDescriptor::GetColumnIndex(const PrimitiveNode& node) const { 
-  auto it = node_to_leaf_index_.find(&node); 
-  if (it == node_to_leaf_index_.end()) { 
-    return -1; 
-  } 
-  return it->second; 
-} 
- 
-ColumnDescriptor::ColumnDescriptor(schema::NodePtr node, int16_t max_definition_level, 
-                                   int16_t max_repetition_level, 
-                                   const SchemaDescriptor* schema_descr) 
-    : node_(std::move(node)), 
-      max_definition_level_(max_definition_level), 
-      max_repetition_level_(max_repetition_level) { 
-  if (!node_->is_primitive()) { 
-    throw ParquetException("Must be a primitive type"); 
-  } 
-  primitive_node_ = static_cast<const PrimitiveNode*>(node_.get()); 
-} 
- 
-bool ColumnDescriptor::Equals(const ColumnDescriptor& other) const { 
-  return primitive_node_->Equals(other.primitive_node_) && 
-         max_repetition_level() == other.max_repetition_level() && 
-         max_definition_level() == other.max_definition_level(); 
-} 
- 
-const ColumnDescriptor* SchemaDescriptor::Column(int i) const { 
-  DCHECK(i >= 0 && i < static_cast<int>(leaves_.size())); 
-  return &leaves_[i]; 
-} 
- 
-int SchemaDescriptor::ColumnIndex(const std::string& node_path) const { 
-  auto search = leaf_to_idx_.find(node_path); 
-  if (search == leaf_to_idx_.end()) { 
-    // Not found 
-    return -1; 
-  } 
-  return search->second; 
-} 
- 
-int SchemaDescriptor::ColumnIndex(const Node& node) const { 
-  auto search = leaf_to_idx_.equal_range(node.path()->ToDotString()); 
-  for (auto it = search.first; it != search.second; ++it) { 
-    const int idx = it->second; 
-    if (&node == Column(idx)->schema_node().get()) { 
-      return idx; 
-    } 
-  } 
-  return -1; 
-} 
- 
-const schema::Node* SchemaDescriptor::GetColumnRoot(int i) const { 
-  DCHECK(i >= 0 && i < static_cast<int>(leaves_.size())); 
-  return leaf_to_base_.find(i)->second.get(); 
-} 
- 
-bool SchemaDescriptor::HasRepeatedFields() const { 
-  return group_node_->HasRepeatedFields(); 
-} 
- 
-std::string SchemaDescriptor::ToString() const { 
-  std::ostringstream ss; 
-  PrintSchema(schema_.get(), ss); 
-  return ss.str(); 
-} 
- 
-std::string ColumnDescriptor::ToString() const { 
-  std::ostringstream ss; 
-  ss << "column descriptor = {" << std::endl 
-     << "  name: " << name() << "," << std::endl 
-     << "  path: " << path()->ToDotString() << "," << std::endl 
-     << "  physical_type: " << TypeToString(physical_type()) << "," << std::endl 
-     << "  converted_type: " << ConvertedTypeToString(converted_type()) << "," 
-     << std::endl 
-     << "  logical_type: " << logical_type()->ToString() << "," << std::endl 
-     << "  max_definition_level: " << max_definition_level() << "," << std::endl 
-     << "  max_repetition_level: " << max_repetition_level() << "," << std::endl; 
- 
-  if (physical_type() == ::parquet::Type::FIXED_LEN_BYTE_ARRAY) { 
-    ss << "  length: " << type_length() << "," << std::endl; 
-  } 
- 
-  if (converted_type() == parquet::ConvertedType::DECIMAL) { 
-    ss << "  precision: " << type_precision() << "," << std::endl 
-       << "  scale: " << type_scale() << "," << std::endl; 
-  } 
- 
-  ss << "}"; 
-  return ss.str(); 
-} 
- 
-int ColumnDescriptor::type_scale() const { 
-  return primitive_node_->decimal_metadata().scale; 
-} 
- 
-int ColumnDescriptor::type_precision() const { 
-  return primitive_node_->decimal_metadata().precision; 
-} 
- 
-int ColumnDescriptor::type_length() const { return primitive_node_->type_length(); } 
- 
-const std::shared_ptr<ColumnPath> ColumnDescriptor::path() const { 
-  return primitive_node_->path(); 
-} 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/schema.h"
+
+#include <algorithm>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <type_traits>
+#include <utility>
+
+#include "arrow/util/logging.h"
+#include "parquet/exception.h"
+#include "parquet/schema_internal.h"
+#include "parquet/thrift_internal.h"
+
+using parquet::format::SchemaElement;
+
+namespace parquet {
+
+namespace schema {
+
+namespace {
+
+void ThrowInvalidLogicalType(const LogicalType& logical_type) {
+  std::stringstream ss;
+  ss << "Invalid logical type: " << logical_type.ToString();
+  throw ParquetException(ss.str());
+}
+
+}  // namespace
+
+// ----------------------------------------------------------------------
+// ColumnPath
+
+std::shared_ptr<ColumnPath> ColumnPath::FromDotString(const std::string& dotstring) {
+  std::stringstream ss(dotstring);
+  std::string item;
+  std::vector<std::string> path;
+  while (std::getline(ss, item, '.')) {
+    path.push_back(item);
+  }
+  return std::make_shared<ColumnPath>(std::move(path));
+}
+
+std::shared_ptr<ColumnPath> ColumnPath::FromNode(const Node& node) {
+  // Build the path in reverse order as we traverse the nodes to the top
+  std::vector<std::string> rpath_;
+  const Node* cursor = &node;
+  // The schema node is not part of the ColumnPath
+  while (cursor->parent()) {
+    rpath_.push_back(cursor->name());
+    cursor = cursor->parent();
+  }
+
+  // Build ColumnPath in correct order
+  std::vector<std::string> path(rpath_.crbegin(), rpath_.crend());
+  return std::make_shared<ColumnPath>(std::move(path));
+}
+
+std::shared_ptr<ColumnPath> ColumnPath::extend(const std::string& node_name) const {
+  std::vector<std::string> path;
+  path.reserve(path_.size() + 1);
+  path.resize(path_.size() + 1);
+  std::copy(path_.cbegin(), path_.cend(), path.begin());
+  path[path_.size()] = node_name;
+
+  return std::make_shared<ColumnPath>(std::move(path));
+}
+
+std::string ColumnPath::ToDotString() const {
+  std::stringstream ss;
+  for (auto it = path_.cbegin(); it != path_.cend(); ++it) {
+    if (it != path_.cbegin()) {
+      ss << ".";
+    }
+    ss << *it;
+  }
+  return ss.str();
+}
+
+const std::vector<std::string>& ColumnPath::ToDotVector() const { return path_; }
+
+// ----------------------------------------------------------------------
+// Base node
+
+const std::shared_ptr<ColumnPath> Node::path() const {
+  // TODO(itaiin): Cache the result, or more precisely, cache ->ToDotString()
+  //    since it is being used to access the leaf nodes
+  return ColumnPath::FromNode(*this);
+}
+
+bool Node::EqualsInternal(const Node* other) const {
+  return type_ == other->type_ && name_ == other->name_ &&
+         repetition_ == other->repetition_ && converted_type_ == other->converted_type_ &&
+         field_id_ == other->field_id() &&
+         logical_type_->Equals(*(other->logical_type()));
+}
+
+void Node::SetParent(const Node* parent) { parent_ = parent; }
+
+// ----------------------------------------------------------------------
+// Primitive node
+
+PrimitiveNode::PrimitiveNode(const std::string& name, Repetition::type repetition,
+                             Type::type type, ConvertedType::type converted_type,
+                             int length, int precision, int scale, int id)
+    : Node(Node::PRIMITIVE, name, repetition, converted_type, id),
+      physical_type_(type),
+      type_length_(length) {
+  std::stringstream ss;
+
+  // PARQUET-842: In an earlier revision, decimal_metadata_.isset was being
+  // set to true, but Impala will raise an incompatible metadata in such cases
+  memset(&decimal_metadata_, 0, sizeof(decimal_metadata_));
+
+  // Check if the physical and logical types match
+  // Mapping referred from Apache parquet-mr as on 2016-02-22
+  switch (converted_type) {
+    case ConvertedType::NONE:
+      // Logical type not set
+      break;
+    case ConvertedType::UTF8:
+    case ConvertedType::JSON:
+    case ConvertedType::BSON:
+      if (type != Type::BYTE_ARRAY) {
+        ss << ConvertedTypeToString(converted_type);
+        ss << " can only annotate BYTE_ARRAY fields";
+        throw ParquetException(ss.str());
+      }
+      break;
+    case ConvertedType::DECIMAL:
+      if ((type != Type::INT32) && (type != Type::INT64) && (type != Type::BYTE_ARRAY) &&
+          (type != Type::FIXED_LEN_BYTE_ARRAY)) {
+        ss << "DECIMAL can only annotate INT32, INT64, BYTE_ARRAY, and FIXED";
+        throw ParquetException(ss.str());
+      }
+      if (precision <= 0) {
+        ss << "Invalid DECIMAL precision: " << precision
+           << ". Precision must be a number between 1 and 38 inclusive";
+        throw ParquetException(ss.str());
+      }
+      if (scale < 0) {
+        ss << "Invalid DECIMAL scale: " << scale
+           << ". Scale must be a number between 0 and precision inclusive";
+        throw ParquetException(ss.str());
+      }
+      if (scale > precision) {
+        ss << "Invalid DECIMAL scale " << scale;
+        ss << " cannot be greater than precision " << precision;
+        throw ParquetException(ss.str());
+      }
+      decimal_metadata_.isset = true;
+      decimal_metadata_.precision = precision;
+      decimal_metadata_.scale = scale;
+      break;
+    case ConvertedType::DATE:
+    case ConvertedType::TIME_MILLIS:
+    case ConvertedType::UINT_8:
+    case ConvertedType::UINT_16:
+    case ConvertedType::UINT_32:
+    case ConvertedType::INT_8:
+    case ConvertedType::INT_16:
+    case ConvertedType::INT_32:
+      if (type != Type::INT32) {
+        ss << ConvertedTypeToString(converted_type);
+        ss << " can only annotate INT32";
+        throw ParquetException(ss.str());
+      }
+      break;
+    case ConvertedType::TIME_MICROS:
+    case ConvertedType::TIMESTAMP_MILLIS:
+    case ConvertedType::TIMESTAMP_MICROS:
+    case ConvertedType::UINT_64:
+    case ConvertedType::INT_64:
+      if (type != Type::INT64) {
+        ss << ConvertedTypeToString(converted_type);
+        ss << " can only annotate INT64";
+        throw ParquetException(ss.str());
+      }
+      break;
+    case ConvertedType::INTERVAL:
+      if ((type != Type::FIXED_LEN_BYTE_ARRAY) || (length != 12)) {
+        ss << "INTERVAL can only annotate FIXED_LEN_BYTE_ARRAY(12)";
+        throw ParquetException(ss.str());
+      }
+      break;
+    case ConvertedType::ENUM:
+      if (type != Type::BYTE_ARRAY) {
+        ss << "ENUM can only annotate BYTE_ARRAY fields";
+        throw ParquetException(ss.str());
+      }
+      break;
+    case ConvertedType::NA:
+      // NA can annotate any type
+      break;
+    default:
+      ss << ConvertedTypeToString(converted_type);
+      ss << " cannot be applied to a primitive type";
+      throw ParquetException(ss.str());
+  }
+  // For forward compatibility, create an equivalent logical type
+  logical_type_ = LogicalType::FromConvertedType(converted_type_, decimal_metadata_);
+  if (!(logical_type_ && !logical_type_->is_nested() &&
+        logical_type_->is_compatible(converted_type_, decimal_metadata_))) {
+    ThrowInvalidLogicalType(*logical_type_);
+  }
+
+  if (type == Type::FIXED_LEN_BYTE_ARRAY) {
+    if (length <= 0) {
+      ss << "Invalid FIXED_LEN_BYTE_ARRAY length: " << length;
+      throw ParquetException(ss.str());
+    }
+    type_length_ = length;
+  }
+}
+
+PrimitiveNode::PrimitiveNode(const std::string& name, Repetition::type repetition,
+                             std::shared_ptr<const LogicalType> logical_type,
+                             Type::type physical_type, int physical_length, int id)
+    : Node(Node::PRIMITIVE, name, repetition, std::move(logical_type), id),
+      physical_type_(physical_type),
+      type_length_(physical_length) {
+  std::stringstream error;
+  if (logical_type_) {
+    // Check for logical type <=> node type consistency
+    if (!logical_type_->is_nested()) {
+      // Check for logical type <=> physical type consistency
+      if (logical_type_->is_applicable(physical_type, physical_length)) {
+        // For backward compatibility, assign equivalent legacy
+        // converted type (if possible)
+        converted_type_ = logical_type_->ToConvertedType(&decimal_metadata_);
+      } else {
+        error << logical_type_->ToString();
+        error << " can not be applied to primitive type ";
+        error << TypeToString(physical_type);
+        throw ParquetException(error.str());
+      }
+    } else {
+      error << "Nested logical type ";
+      error << logical_type_->ToString();
+      error << " can not be applied to non-group node";
+      throw ParquetException(error.str());
+    }
+  } else {
+    logical_type_ = NoLogicalType::Make();
+    converted_type_ = logical_type_->ToConvertedType(&decimal_metadata_);
+  }
+  if (!(logical_type_ && !logical_type_->is_nested() &&
+        logical_type_->is_compatible(converted_type_, decimal_metadata_))) {
+    ThrowInvalidLogicalType(*logical_type_);
+  }
+
+  if (physical_type == Type::FIXED_LEN_BYTE_ARRAY) {
+    if (physical_length <= 0) {
+      error << "Invalid FIXED_LEN_BYTE_ARRAY length: " << physical_length;
+      throw ParquetException(error.str());
+    }
+  }
+}
+
+bool PrimitiveNode::EqualsInternal(const PrimitiveNode* other) const {
+  bool is_equal = true;
+  if (physical_type_ != other->physical_type_) {
+    return false;
+  }
+  if (converted_type_ == ConvertedType::DECIMAL) {
+    is_equal &= (decimal_metadata_.precision == other->decimal_metadata_.precision) &&
+                (decimal_metadata_.scale == other->decimal_metadata_.scale);
+  }
+  if (physical_type_ == Type::FIXED_LEN_BYTE_ARRAY) {
+    is_equal &= (type_length_ == other->type_length_);
+  }
+  return is_equal;
+}
+
+bool PrimitiveNode::Equals(const Node* other) const {
+  if (!Node::EqualsInternal(other)) {
+    return false;
+  }
+  return EqualsInternal(static_cast<const PrimitiveNode*>(other));
+}
+
+void PrimitiveNode::Visit(Node::Visitor* visitor) { visitor->Visit(this); }
+
+void PrimitiveNode::VisitConst(Node::ConstVisitor* visitor) const {
+  visitor->Visit(this);
+}
+
+// ----------------------------------------------------------------------
+// Group node
+
+GroupNode::GroupNode(const std::string& name, Repetition::type repetition,
+                     const NodeVector& fields, ConvertedType::type converted_type, int id)
+    : Node(Node::GROUP, name, repetition, converted_type, id), fields_(fields) {
+  // For forward compatibility, create an equivalent logical type
+  logical_type_ = LogicalType::FromConvertedType(converted_type_);
+  if (!(logical_type_ && (logical_type_->is_nested() || logical_type_->is_none()) &&
+        logical_type_->is_compatible(converted_type_))) {
+    ThrowInvalidLogicalType(*logical_type_);
+  }
+
+  field_name_to_idx_.clear();
+  auto field_idx = 0;
+  for (NodePtr& field : fields_) {
+    field->SetParent(this);
+    field_name_to_idx_.emplace(field->name(), field_idx++);
+  }
+}
+
+GroupNode::GroupNode(const std::string& name, Repetition::type repetition,
+                     const NodeVector& fields,
+                     std::shared_ptr<const LogicalType> logical_type, int id)
+    : Node(Node::GROUP, name, repetition, std::move(logical_type), id), fields_(fields) {
+  if (logical_type_) {
+    // Check for logical type <=> node type consistency
+    if (logical_type_->is_nested()) {
+      // For backward compatibility, assign equivalent legacy converted type (if possible)
+      converted_type_ = logical_type_->ToConvertedType(nullptr);
+    } else {
+      std::stringstream error;
+      error << "Logical type ";
+      error << logical_type_->ToString();
+      error << " can not be applied to group node";
+      throw ParquetException(error.str());
+    }
+  } else {
+    logical_type_ = NoLogicalType::Make();
+    converted_type_ = logical_type_->ToConvertedType(nullptr);
+  }
+  if (!(logical_type_ && (logical_type_->is_nested() || logical_type_->is_none()) &&
+        logical_type_->is_compatible(converted_type_))) {
+    ThrowInvalidLogicalType(*logical_type_);
+  }
+
+  field_name_to_idx_.clear();
+  auto field_idx = 0;
+  for (NodePtr& field : fields_) {
+    field->SetParent(this);
+    field_name_to_idx_.emplace(field->name(), field_idx++);
+  }
+}
+
+bool GroupNode::EqualsInternal(const GroupNode* other) const {
+  if (this == other) {
+    return true;
+  }
+  if (this->field_count() != other->field_count()) {
+    return false;
+  }
+  for (int i = 0; i < this->field_count(); ++i) {
+    if (!this->field(i)->Equals(other->field(i).get())) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool GroupNode::Equals(const Node* other) const {
+  if (!Node::EqualsInternal(other)) {
+    return false;
+  }
+  return EqualsInternal(static_cast<const GroupNode*>(other));
+}
+
+int GroupNode::FieldIndex(const std::string& name) const {
+  auto search = field_name_to_idx_.find(name);
+  if (search == field_name_to_idx_.end()) {
+    // Not found
+    return -1;
+  }
+  return search->second;
+}
+
+int GroupNode::FieldIndex(const Node& node) const {
+  auto search = field_name_to_idx_.equal_range(node.name());
+  for (auto it = search.first; it != search.second; ++it) {
+    const int idx = it->second;
+    if (&node == field(idx).get()) {
+      return idx;
+    }
+  }
+  return -1;
+}
+
+void GroupNode::Visit(Node::Visitor* visitor) { visitor->Visit(this); }
+
+void GroupNode::VisitConst(Node::ConstVisitor* visitor) const { visitor->Visit(this); }
+
+// ----------------------------------------------------------------------
+// Node construction from Parquet metadata
+
+std::unique_ptr<Node> GroupNode::FromParquet(const void* opaque_element,
+                                             NodeVector fields) {
+  const format::SchemaElement* element =
+      static_cast<const format::SchemaElement*>(opaque_element);
+
+  int field_id = -1;
+  if (element->__isset.field_id) {
+    field_id = element->field_id;
+  }
+
+  std::unique_ptr<GroupNode> group_node;
+  if (element->__isset.logicalType) {
+    // updated writer with logical type present
+    group_node = std::unique_ptr<GroupNode>(
+        new GroupNode(element->name, LoadEnumSafe(&element->repetition_type), fields,
+                      LogicalType::FromThrift(element->logicalType), field_id));
+  } else {
+    group_node = std::unique_ptr<GroupNode>(new GroupNode(
+        element->name, LoadEnumSafe(&element->repetition_type), fields,
+        (element->__isset.converted_type ? LoadEnumSafe(&element->converted_type)
+                                         : ConvertedType::NONE),
+        field_id));
+  }
+
+  return std::unique_ptr<Node>(group_node.release());
+}
+
+std::unique_ptr<Node> PrimitiveNode::FromParquet(const void* opaque_element) {
+  const format::SchemaElement* element =
+      static_cast<const format::SchemaElement*>(opaque_element);
+
+  int field_id = -1;
+  if (element->__isset.field_id) {
+    field_id = element->field_id;
+  }
+
+  std::unique_ptr<PrimitiveNode> primitive_node;
+  if (element->__isset.logicalType) {
+    // updated writer with logical type present
+    primitive_node = std::unique_ptr<PrimitiveNode>(
+        new PrimitiveNode(element->name, LoadEnumSafe(&element->repetition_type),
+                          LogicalType::FromThrift(element->logicalType),
+                          LoadEnumSafe(&element->type), element->type_length, field_id));
+  } else if (element->__isset.converted_type) {
+    // legacy writer with converted type present
+    primitive_node = std::unique_ptr<PrimitiveNode>(new PrimitiveNode(
+        element->name, LoadEnumSafe(&element->repetition_type),
+        LoadEnumSafe(&element->type), LoadEnumSafe(&element->converted_type),
+        element->type_length, element->precision, element->scale, field_id));
+  } else {
+    // logical type not present
+    primitive_node = std::unique_ptr<PrimitiveNode>(new PrimitiveNode(
+        element->name, LoadEnumSafe(&element->repetition_type), NoLogicalType::Make(),
+        LoadEnumSafe(&element->type), element->type_length, field_id));
+  }
+
+  // Return as unique_ptr to the base type
+  return std::unique_ptr<Node>(primitive_node.release());
+}
+
+bool GroupNode::HasRepeatedFields() const {
+  for (int i = 0; i < this->field_count(); ++i) {
+    auto field = this->field(i);
+    if (field->repetition() == Repetition::REPEATED) {
+      return true;
+    }
+    if (field->is_group()) {
+      const auto& group = static_cast<const GroupNode&>(*field);
+      return group.HasRepeatedFields();
+    }
+  }
+  return false;
+}
+
+void GroupNode::ToParquet(void* opaque_element) const {
+  format::SchemaElement* element = static_cast<format::SchemaElement*>(opaque_element);
+  element->__set_name(name_);
+  element->__set_num_children(field_count());
+  element->__set_repetition_type(ToThrift(repetition_));
+  if (converted_type_ != ConvertedType::NONE) {
+    element->__set_converted_type(ToThrift(converted_type_));
+  }
+  if (field_id_ >= 0) {
+    element->__set_field_id(field_id_);
+  }
+  if (logical_type_ && logical_type_->is_serialized()) {
+    element->__set_logicalType(logical_type_->ToThrift());
+  }
+  return;
+}
+
+void PrimitiveNode::ToParquet(void* opaque_element) const {
+  format::SchemaElement* element = static_cast<format::SchemaElement*>(opaque_element);
+  element->__set_name(name_);
+  element->__set_repetition_type(ToThrift(repetition_));
+  if (converted_type_ != ConvertedType::NONE) {
+    if (converted_type_ != ConvertedType::NA) {
+      element->__set_converted_type(ToThrift(converted_type_));
+    } else {
+      // ConvertedType::NA is an unreleased, obsolete synonym for LogicalType::Null.
+      // Never emit it (see PARQUET-1990 for discussion).
+      if (!logical_type_ || !logical_type_->is_null()) {
+        throw ParquetException(
+            "ConvertedType::NA is obsolete, please use LogicalType::Null instead");
+      }
+    }
+  }
+  if (field_id_ >= 0) {
+    element->__set_field_id(field_id_);
+  }
+  if (logical_type_ && logical_type_->is_serialized() &&
+      // TODO(tpboudreau): remove the following conjunct to enable serialization
+      // of IntervalTypes after parquet.thrift recognizes them
+      !logical_type_->is_interval()) {
+    element->__set_logicalType(logical_type_->ToThrift());
+  }
+  element->__set_type(ToThrift(physical_type_));
+  if (physical_type_ == Type::FIXED_LEN_BYTE_ARRAY) {
+    element->__set_type_length(type_length_);
+  }
+  if (decimal_metadata_.isset) {
+    element->__set_precision(decimal_metadata_.precision);
+    element->__set_scale(decimal_metadata_.scale);
+  }
+  return;
+}
+
+// ----------------------------------------------------------------------
+// Schema converters
+
+std::unique_ptr<Node> Unflatten(const format::SchemaElement* elements, int length) {
+  if (elements[0].num_children == 0) {
+    if (length == 1) {
+      // Degenerate case of Parquet file with no columns
+      return GroupNode::FromParquet(elements, {});
+    } else {
+      throw ParquetException(
+          "Parquet schema had multiple nodes but root had no children");
+    }
+  }
+
+  // We don't check that the root node is repeated since this is not
+  // consistently set by implementations
+
+  int pos = 0;
+
+  std::function<std::unique_ptr<Node>()> NextNode = [&]() {
+    if (pos == length) {
+      throw ParquetException("Malformed schema: not enough elements");
+    }
+    const SchemaElement& element = elements[pos++];
+    const void* opaque_element = static_cast<const void*>(&element);
+
+    if (element.num_children == 0 && element.__isset.type) {
+      // Leaf (primitive) node: always has a type
+      return PrimitiveNode::FromParquet(opaque_element);
+    } else {
+      // Group node (may have 0 children, but cannot have a type)
+      NodeVector fields;
+      for (int i = 0; i < element.num_children; ++i) {
+        std::unique_ptr<Node> field = NextNode();
+        fields.push_back(NodePtr(field.release()));
+      }
+      return GroupNode::FromParquet(opaque_element, std::move(fields));
+    }
+  };
+  return NextNode();
+}
+
+std::shared_ptr<SchemaDescriptor> FromParquet(const std::vector<SchemaElement>& schema) {
+  if (schema.empty()) {
+    throw ParquetException("Empty file schema (no root)");
+  }
+  std::unique_ptr<Node> root = Unflatten(&schema[0], static_cast<int>(schema.size()));
+  std::shared_ptr<SchemaDescriptor> descr = std::make_shared<SchemaDescriptor>();
+  descr->Init(std::shared_ptr<GroupNode>(static_cast<GroupNode*>(root.release())));
+  return descr;
+}
+
+class SchemaVisitor : public Node::ConstVisitor {
+ public:
+  explicit SchemaVisitor(std::vector<format::SchemaElement>* elements)
+      : elements_(elements) {}
+
+  void Visit(const Node* node) override {
+    format::SchemaElement element;
+    node->ToParquet(&element);
+    elements_->push_back(element);
+
+    if (node->is_group()) {
+      const GroupNode* group_node = static_cast<const GroupNode*>(node);
+      for (int i = 0; i < group_node->field_count(); ++i) {
+        group_node->field(i)->VisitConst(this);
+      }
+    }
+  }
+
+ private:
+  std::vector<format::SchemaElement>* elements_;
+};
+
+void ToParquet(const GroupNode* schema, std::vector<format::SchemaElement>* out) {
+  SchemaVisitor visitor(out);
+  schema->VisitConst(&visitor);
+}
+
+// ----------------------------------------------------------------------
+// Schema printing
+
+static void PrintRepLevel(Repetition::type repetition, std::ostream& stream) {
+  switch (repetition) {
+    case Repetition::REQUIRED:
+      stream << "required";
+      break;
+    case Repetition::OPTIONAL:
+      stream << "optional";
+      break;
+    case Repetition::REPEATED:
+      stream << "repeated";
+      break;
+    default:
+      break;
+  }
+}
+
+static void PrintType(const PrimitiveNode* node, std::ostream& stream) {
+  switch (node->physical_type()) {
+    case Type::BOOLEAN:
+      stream << "boolean";
+      break;
+    case Type::INT32:
+      stream << "int32";
+      break;
+    case Type::INT64:
+      stream << "int64";
+      break;
+    case Type::INT96:
+      stream << "int96";
+      break;
+    case Type::FLOAT:
+      stream << "float";
+      break;
+    case Type::DOUBLE:
+      stream << "double";
+      break;
+    case Type::BYTE_ARRAY:
+      stream << "binary";
+      break;
+    case Type::FIXED_LEN_BYTE_ARRAY:
+      stream << "fixed_len_byte_array(" << node->type_length() << ")";
+      break;
+    default:
+      break;
+  }
+}
+
+static void PrintConvertedType(const PrimitiveNode* node, std::ostream& stream) {
+  auto lt = node->converted_type();
+  auto la = node->logical_type();
+  if (la && la->is_valid() && !la->is_none()) {
+    stream << " (" << la->ToString() << ")";
+  } else if (lt == ConvertedType::DECIMAL) {
+    stream << " (" << ConvertedTypeToString(lt) << "("
+           << node->decimal_metadata().precision << "," << node->decimal_metadata().scale
+           << "))";
+  } else if (lt != ConvertedType::NONE) {
+    stream << " (" << ConvertedTypeToString(lt) << ")";
+  }
+}
+
+struct SchemaPrinter : public Node::ConstVisitor {
+  explicit SchemaPrinter(std::ostream& stream, int indent_width)
+      : stream_(stream), indent_(0), indent_width_(2) {}
+
+  void Indent() {
+    if (indent_ > 0) {
+      std::string spaces(indent_, ' ');
+      stream_ << spaces;
+    }
+  }
+
+  void Visit(const Node* node) {
+    Indent();
+    if (node->is_group()) {
+      Visit(static_cast<const GroupNode*>(node));
+    } else {
+      // Primitive
+      Visit(static_cast<const PrimitiveNode*>(node));
+    }
+  }
+
+  void Visit(const PrimitiveNode* node) {
+    PrintRepLevel(node->repetition(), stream_);
+    stream_ << " ";
+    PrintType(node, stream_);
+    stream_ << " field_id=" << node->field_id() << " " << node->name();
+    PrintConvertedType(node, stream_);
+    stream_ << ";" << std::endl;
+  }
+
+  void Visit(const GroupNode* node) {
+    PrintRepLevel(node->repetition(), stream_);
+    stream_ << " group "
+            << "field_id=" << node->field_id() << " " << node->name();
+    auto lt = node->converted_type();
+    auto la = node->logical_type();
+    if (la && la->is_valid() && !la->is_none()) {
+      stream_ << " (" << la->ToString() << ")";
+    } else if (lt != ConvertedType::NONE) {
+      stream_ << " (" << ConvertedTypeToString(lt) << ")";
+    }
+    stream_ << " {" << std::endl;
+
+    indent_ += indent_width_;
+    for (int i = 0; i < node->field_count(); ++i) {
+      node->field(i)->VisitConst(this);
+    }
+    indent_ -= indent_width_;
+    Indent();
+    stream_ << "}" << std::endl;
+  }
+
+  std::ostream& stream_;
+  int indent_;
+  int indent_width_;
+};
+
+void PrintSchema(const Node* schema, std::ostream& stream, int indent_width) {
+  SchemaPrinter printer(stream, indent_width);
+  printer.Visit(schema);
+}
+
+}  // namespace schema
+
+using schema::ColumnPath;
+using schema::GroupNode;
+using schema::Node;
+using schema::NodePtr;
+using schema::PrimitiveNode;
+
+void SchemaDescriptor::Init(std::unique_ptr<schema::Node> schema) {
+  Init(NodePtr(schema.release()));
+}
+
+class SchemaUpdater : public Node::Visitor {
+ public:
+  explicit SchemaUpdater(const std::vector<ColumnOrder>& column_orders)
+      : column_orders_(column_orders), leaf_count_(0) {}
+
+  void Visit(Node* node) override {
+    if (node->is_group()) {
+      GroupNode* group_node = static_cast<GroupNode*>(node);
+      for (int i = 0; i < group_node->field_count(); ++i) {
+        group_node->field(i)->Visit(this);
+      }
+    } else {  // leaf node
+      PrimitiveNode* leaf_node = static_cast<PrimitiveNode*>(node);
+      leaf_node->SetColumnOrder(column_orders_[leaf_count_++]);
+    }
+  }
+
+ private:
+  const std::vector<ColumnOrder>& column_orders_;
+  int leaf_count_;
+};
+
+void SchemaDescriptor::updateColumnOrders(const std::vector<ColumnOrder>& column_orders) {
+  if (static_cast<int>(column_orders.size()) != num_columns()) {
+    throw ParquetException("Malformed schema: not enough ColumnOrder values");
+  }
+  SchemaUpdater visitor(column_orders);
+  const_cast<GroupNode*>(group_node_)->Visit(&visitor);
+}
+
+void SchemaDescriptor::Init(NodePtr schema) {
+  schema_ = std::move(schema);
+
+  if (!schema_->is_group()) {
+    throw ParquetException("Must initialize with a schema group");
+  }
+
+  group_node_ = static_cast<const GroupNode*>(schema_.get());
+  leaves_.clear();
+
+  for (int i = 0; i < group_node_->field_count(); ++i) {
+    BuildTree(group_node_->field(i), 0, 0, group_node_->field(i));
+  }
+}
+
+bool SchemaDescriptor::Equals(const SchemaDescriptor& other) const {
+  if (this->num_columns() != other.num_columns()) {
+    return false;
+  }
+
+  for (int i = 0; i < this->num_columns(); ++i) {
+    if (!this->Column(i)->Equals(*other.Column(i))) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+void SchemaDescriptor::BuildTree(const NodePtr& node, int16_t max_def_level,
+                                 int16_t max_rep_level, const NodePtr& base) {
+  if (node->is_optional()) {
+    ++max_def_level;
+  } else if (node->is_repeated()) {
+    // Repeated fields add a definition level. This is used to distinguish
+    // between an empty list and a list with an item in it.
+    ++max_rep_level;
+    ++max_def_level;
+  }
+
+  // Now, walk the schema and create a ColumnDescriptor for each leaf node
+  if (node->is_group()) {
+    const GroupNode* group = static_cast<const GroupNode*>(node.get());
+    for (int i = 0; i < group->field_count(); ++i) {
+      BuildTree(group->field(i), max_def_level, max_rep_level, base);
+    }
+  } else {
+    node_to_leaf_index_[static_cast<const PrimitiveNode*>(node.get())] =
+        static_cast<int>(leaves_.size());
+
+    // Primitive node, append to leaves
+    leaves_.push_back(ColumnDescriptor(node, max_def_level, max_rep_level, this));
+    leaf_to_base_.emplace(static_cast<int>(leaves_.size()) - 1, base);
+    leaf_to_idx_.emplace(node->path()->ToDotString(),
+                         static_cast<int>(leaves_.size()) - 1);
+  }
+}
+
+int SchemaDescriptor::GetColumnIndex(const PrimitiveNode& node) const {
+  auto it = node_to_leaf_index_.find(&node);
+  if (it == node_to_leaf_index_.end()) {
+    return -1;
+  }
+  return it->second;
+}
+
+ColumnDescriptor::ColumnDescriptor(schema::NodePtr node, int16_t max_definition_level,
+                                   int16_t max_repetition_level,
+                                   const SchemaDescriptor* schema_descr)
+    : node_(std::move(node)),
+      max_definition_level_(max_definition_level),
+      max_repetition_level_(max_repetition_level) {
+  if (!node_->is_primitive()) {
+    throw ParquetException("Must be a primitive type");
+  }
+  primitive_node_ = static_cast<const PrimitiveNode*>(node_.get());
+}
+
+bool ColumnDescriptor::Equals(const ColumnDescriptor& other) const {
+  return primitive_node_->Equals(other.primitive_node_) &&
+         max_repetition_level() == other.max_repetition_level() &&
+         max_definition_level() == other.max_definition_level();
+}
+
+const ColumnDescriptor* SchemaDescriptor::Column(int i) const {
+  DCHECK(i >= 0 && i < static_cast<int>(leaves_.size()));
+  return &leaves_[i];
+}
+
+int SchemaDescriptor::ColumnIndex(const std::string& node_path) const {
+  auto search = leaf_to_idx_.find(node_path);
+  if (search == leaf_to_idx_.end()) {
+    // Not found
+    return -1;
+  }
+  return search->second;
+}
+
+int SchemaDescriptor::ColumnIndex(const Node& node) const {
+  auto search = leaf_to_idx_.equal_range(node.path()->ToDotString());
+  for (auto it = search.first; it != search.second; ++it) {
+    const int idx = it->second;
+    if (&node == Column(idx)->schema_node().get()) {
+      return idx;
+    }
+  }
+  return -1;
+}
+
+const schema::Node* SchemaDescriptor::GetColumnRoot(int i) const {
+  DCHECK(i >= 0 && i < static_cast<int>(leaves_.size()));
+  return leaf_to_base_.find(i)->second.get();
+}
+
+bool SchemaDescriptor::HasRepeatedFields() const {
+  return group_node_->HasRepeatedFields();
+}
+
+std::string SchemaDescriptor::ToString() const {
+  std::ostringstream ss;
+  PrintSchema(schema_.get(), ss);
+  return ss.str();
+}
+
+std::string ColumnDescriptor::ToString() const {
+  std::ostringstream ss;
+  ss << "column descriptor = {" << std::endl
+     << "  name: " << name() << "," << std::endl
+     << "  path: " << path()->ToDotString() << "," << std::endl
+     << "  physical_type: " << TypeToString(physical_type()) << "," << std::endl
+     << "  converted_type: " << ConvertedTypeToString(converted_type()) << ","
+     << std::endl
+     << "  logical_type: " << logical_type()->ToString() << "," << std::endl
+     << "  max_definition_level: " << max_definition_level() << "," << std::endl
+     << "  max_repetition_level: " << max_repetition_level() << "," << std::endl;
+
+  if (physical_type() == ::parquet::Type::FIXED_LEN_BYTE_ARRAY) {
+    ss << "  length: " << type_length() << "," << std::endl;
+  }
+
+  if (converted_type() == parquet::ConvertedType::DECIMAL) {
+    ss << "  precision: " << type_precision() << "," << std::endl
+       << "  scale: " << type_scale() << "," << std::endl;
+  }
+
+  ss << "}";
+  return ss.str();
+}
+
+int ColumnDescriptor::type_scale() const {
+  return primitive_node_->decimal_metadata().scale;
+}
+
+int ColumnDescriptor::type_precision() const {
+  return primitive_node_->decimal_metadata().precision;
+}
+
+int ColumnDescriptor::type_length() const { return primitive_node_->type_length(); }
+
+const std::shared_ptr<ColumnPath> ColumnDescriptor::path() const {
+  return primitive_node_->path();
+}
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/schema.h b/contrib/libs/apache/arrow/cpp/src/parquet/schema.h
index 63fc4706c7e..7dcfa7d144e 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/schema.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/schema.h
@@ -1,494 +1,494 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-// This module contains the logical parquet-cpp types (independent of Thrift 
-// structures), schema nodes, and related type tools 
- 
-#pragma once 
- 
-#include <cstdint> 
-#include <memory> 
-#include <ostream> 
-#include <string> 
-#include <unordered_map> 
-#include <utility> 
-#include <vector> 
- 
-#include "parquet/platform.h" 
-#include "parquet/types.h" 
- 
-namespace parquet { 
- 
-class SchemaDescriptor; 
- 
-namespace schema { 
- 
-class Node; 
- 
-// List encodings: using the terminology from Impala to define different styles 
-// of representing logical lists (a.k.a. ARRAY types) in Parquet schemas. Since 
-// the converted type named in the Parquet metadata is ConvertedType::LIST we 
-// use that terminology here. It also helps distinguish from the *_ARRAY 
-// primitive types. 
-// 
-// One-level encoding: Only allows required lists with required cells 
-//   repeated value_type name 
-// 
-// Two-level encoding: Enables optional lists with only required cells 
-//   <required/optional> group list 
-//     repeated value_type item 
-// 
-// Three-level encoding: Enables optional lists with optional cells 
-//   <required/optional> group bag 
-//     repeated group list 
-//       <required/optional> value_type item 
-// 
-// 2- and 1-level encoding are respectively equivalent to 3-level encoding with 
-// the non-repeated nodes set to required. 
-// 
-// The "official" encoding recommended in the Parquet spec is the 3-level, and 
-// we use that as the default when creating list types. For semantic completeness 
-// we allow the other two. Since all types of encodings will occur "in the 
-// wild" we need to be able to interpret the associated definition levels in 
-// the context of the actual encoding used in the file. 
-// 
-// NB: Some Parquet writers may not set ConvertedType::LIST on the repeated 
-// SchemaElement, which could make things challenging if we are trying to infer 
-// that a sequence of nodes semantically represents an array according to one 
-// of these encodings (versus a struct containing an array). We should refuse 
-// the temptation to guess, as they say. 
-struct ListEncoding { 
-  enum type { ONE_LEVEL, TWO_LEVEL, THREE_LEVEL }; 
-}; 
- 
-class PARQUET_EXPORT ColumnPath { 
- public: 
-  ColumnPath() : path_() {} 
-  explicit ColumnPath(const std::vector<std::string>& path) : path_(path) {} 
-  explicit ColumnPath(std::vector<std::string>&& path) : path_(std::move(path)) {} 
- 
-  static std::shared_ptr<ColumnPath> FromDotString(const std::string& dotstring); 
-  static std::shared_ptr<ColumnPath> FromNode(const Node& node); 
- 
-  std::shared_ptr<ColumnPath> extend(const std::string& node_name) const; 
-  std::string ToDotString() const; 
-  const std::vector<std::string>& ToDotVector() const; 
- 
- protected: 
-  std::vector<std::string> path_; 
-}; 
- 
-// Base class for logical schema types. A type has a name, repetition level, 
-// and optionally a logical type (ConvertedType in Parquet metadata parlance) 
-class PARQUET_EXPORT Node { 
- public: 
-  enum type { PRIMITIVE, GROUP }; 
- 
-  virtual ~Node() {} 
- 
-  bool is_primitive() const { return type_ == Node::PRIMITIVE; } 
- 
-  bool is_group() const { return type_ == Node::GROUP; } 
- 
-  bool is_optional() const { return repetition_ == Repetition::OPTIONAL; } 
- 
-  bool is_repeated() const { return repetition_ == Repetition::REPEATED; } 
- 
-  bool is_required() const { return repetition_ == Repetition::REQUIRED; } 
- 
-  virtual bool Equals(const Node* other) const = 0; 
- 
-  const std::string& name() const { return name_; } 
- 
-  Node::type node_type() const { return type_; } 
- 
-  Repetition::type repetition() const { return repetition_; } 
- 
-  ConvertedType::type converted_type() const { return converted_type_; } 
- 
-  const std::shared_ptr<const LogicalType>& logical_type() const { return logical_type_; } 
- 
-  /// \brief The field_id value for the serialized SchemaElement. If the 
-  /// field_id is less than 0 (e.g. -1), it will not be set when serialized to 
-  /// Thrift. 
-  int field_id() const { return field_id_; } 
- 
-  PARQUET_DEPRECATED("id() is deprecated. Use field_id() instead") 
-  int id() const { return field_id_; } 
- 
-  const Node* parent() const { return parent_; } 
- 
-  const std::shared_ptr<ColumnPath> path() const; 
- 
-  virtual void ToParquet(void* element) const = 0; 
- 
-  // Node::Visitor abstract class for walking schemas with the visitor pattern 
-  class Visitor { 
-   public: 
-    virtual ~Visitor() {} 
- 
-    virtual void Visit(Node* node) = 0; 
-  }; 
-  class ConstVisitor { 
-   public: 
-    virtual ~ConstVisitor() {} 
- 
-    virtual void Visit(const Node* node) = 0; 
-  }; 
- 
-  virtual void Visit(Visitor* visitor) = 0; 
-  virtual void VisitConst(ConstVisitor* visitor) const = 0; 
- 
- protected: 
-  friend class GroupNode; 
- 
-  Node(Node::type type, const std::string& name, Repetition::type repetition, 
-       ConvertedType::type converted_type = ConvertedType::NONE, int field_id = -1) 
-      : type_(type), 
-        name_(name), 
-        repetition_(repetition), 
-        converted_type_(converted_type), 
-        field_id_(field_id), 
-        parent_(NULLPTR) {} 
- 
-  Node(Node::type type, const std::string& name, Repetition::type repetition, 
-       std::shared_ptr<const LogicalType> logical_type, int field_id = -1) 
-      : type_(type), 
-        name_(name), 
-        repetition_(repetition), 
-        logical_type_(std::move(logical_type)), 
-        field_id_(field_id), 
-        parent_(NULLPTR) {} 
- 
-  Node::type type_; 
-  std::string name_; 
-  Repetition::type repetition_; 
-  ConvertedType::type converted_type_; 
-  std::shared_ptr<const LogicalType> logical_type_; 
-  int field_id_; 
-  // Nodes should not be shared, they have a single parent. 
-  const Node* parent_; 
- 
-  bool EqualsInternal(const Node* other) const; 
-  void SetParent(const Node* p_parent); 
- 
- private: 
-  PARQUET_DISALLOW_COPY_AND_ASSIGN(Node); 
-}; 
- 
-// Save our breath all over the place with these typedefs 
-typedef std::shared_ptr<Node> NodePtr; 
-typedef std::vector<NodePtr> NodeVector; 
- 
-// A type that is one of the primitive Parquet storage types. In addition to 
-// the other type metadata (name, repetition level, logical type), also has the 
-// physical storage type and their type-specific metadata (byte width, decimal 
-// parameters) 
-class PARQUET_EXPORT PrimitiveNode : public Node { 
- public: 
-  static std::unique_ptr<Node> FromParquet(const void* opaque_element); 
- 
-  // A field_id -1 (or any negative value) will be serialized as null in Thrift 
-  static inline NodePtr Make(const std::string& name, Repetition::type repetition, 
-                             Type::type type, 
-                             ConvertedType::type converted_type = ConvertedType::NONE, 
-                             int length = -1, int precision = -1, int scale = -1, 
-                             int field_id = -1) { 
-    return NodePtr(new PrimitiveNode(name, repetition, type, converted_type, length, 
-                                     precision, scale, field_id)); 
-  } 
- 
-  // If no logical type, pass LogicalType::None() or nullptr 
-  // A field_id -1 (or any negative value) will be serialized as null in Thrift 
-  static inline NodePtr Make(const std::string& name, Repetition::type repetition, 
-                             std::shared_ptr<const LogicalType> logical_type, 
-                             Type::type primitive_type, int primitive_length = -1, 
-                             int field_id = -1) { 
-    return NodePtr(new PrimitiveNode(name, repetition, logical_type, primitive_type, 
-                                     primitive_length, field_id)); 
-  } 
- 
-  bool Equals(const Node* other) const override; 
- 
-  Type::type physical_type() const { return physical_type_; } 
- 
-  ColumnOrder column_order() const { return column_order_; } 
- 
-  void SetColumnOrder(ColumnOrder column_order) { column_order_ = column_order; } 
- 
-  int32_t type_length() const { return type_length_; } 
- 
-  const DecimalMetadata& decimal_metadata() const { return decimal_metadata_; } 
- 
-  void ToParquet(void* element) const override; 
-  void Visit(Visitor* visitor) override; 
-  void VisitConst(ConstVisitor* visitor) const override; 
- 
- private: 
-  PrimitiveNode(const std::string& name, Repetition::type repetition, Type::type type, 
-                ConvertedType::type converted_type = ConvertedType::NONE, int length = -1, 
-                int precision = -1, int scale = -1, int field_id = -1); 
- 
-  PrimitiveNode(const std::string& name, Repetition::type repetition, 
-                std::shared_ptr<const LogicalType> logical_type, 
-                Type::type primitive_type, int primitive_length = -1, int field_id = -1); 
- 
-  Type::type physical_type_; 
-  int32_t type_length_; 
-  DecimalMetadata decimal_metadata_; 
-  ColumnOrder column_order_; 
- 
-  // For FIXED_LEN_BYTE_ARRAY 
-  void SetTypeLength(int32_t length) { type_length_ = length; } 
- 
-  bool EqualsInternal(const PrimitiveNode* other) const; 
- 
-  FRIEND_TEST(TestPrimitiveNode, Attrs); 
-  FRIEND_TEST(TestPrimitiveNode, Equals); 
-  FRIEND_TEST(TestPrimitiveNode, PhysicalLogicalMapping); 
-  FRIEND_TEST(TestPrimitiveNode, FromParquet); 
-}; 
- 
-class PARQUET_EXPORT GroupNode : public Node { 
- public: 
-  static std::unique_ptr<Node> FromParquet(const void* opaque_element, 
-                                           NodeVector fields = {}); 
- 
-  // A field_id -1 (or any negative value) will be serialized as null in Thrift 
-  static inline NodePtr Make(const std::string& name, Repetition::type repetition, 
-                             const NodeVector& fields, 
-                             ConvertedType::type converted_type = ConvertedType::NONE, 
-                             int field_id = -1) { 
-    return NodePtr(new GroupNode(name, repetition, fields, converted_type, field_id)); 
-  } 
- 
-  // If no logical type, pass nullptr 
-  // A field_id -1 (or any negative value) will be serialized as null in Thrift 
-  static inline NodePtr Make(const std::string& name, Repetition::type repetition, 
-                             const NodeVector& fields, 
-                             std::shared_ptr<const LogicalType> logical_type, 
-                             int field_id = -1) { 
-    return NodePtr(new GroupNode(name, repetition, fields, logical_type, field_id)); 
-  } 
- 
-  bool Equals(const Node* other) const override; 
- 
-  NodePtr field(int i) const { return fields_[i]; } 
-  // Get the index of a field by its name, or negative value if not found. 
-  // If several fields share the same name, it is unspecified which one 
-  // is returned. 
-  int FieldIndex(const std::string& name) const; 
-  // Get the index of a field by its node, or negative value if not found. 
-  int FieldIndex(const Node& node) const; 
- 
-  int field_count() const { return static_cast<int>(fields_.size()); } 
- 
-  void ToParquet(void* element) const override; 
-  void Visit(Visitor* visitor) override; 
-  void VisitConst(ConstVisitor* visitor) const override; 
- 
-  /// \brief Return true if this node or any child node has REPEATED repetition 
-  /// type 
-  bool HasRepeatedFields() const; 
- 
- private: 
-  GroupNode(const std::string& name, Repetition::type repetition, 
-            const NodeVector& fields, 
-            ConvertedType::type converted_type = ConvertedType::NONE, int field_id = -1); 
- 
-  GroupNode(const std::string& name, Repetition::type repetition, 
-            const NodeVector& fields, std::shared_ptr<const LogicalType> logical_type, 
-            int field_id = -1); 
- 
-  NodeVector fields_; 
-  bool EqualsInternal(const GroupNode* other) const; 
- 
-  // Mapping between field name to the field index 
-  std::unordered_multimap<std::string, int> field_name_to_idx_; 
- 
-  FRIEND_TEST(TestGroupNode, Attrs); 
-  FRIEND_TEST(TestGroupNode, Equals); 
-  FRIEND_TEST(TestGroupNode, FieldIndex); 
-  FRIEND_TEST(TestGroupNode, FieldIndexDuplicateName); 
-}; 
- 
-// ---------------------------------------------------------------------- 
-// Convenience primitive type factory functions 
- 
-#define PRIMITIVE_FACTORY(FuncName, TYPE)                                                \ 
-  static inline NodePtr FuncName(const std::string& name,                                \ 
-                                 Repetition::type repetition = Repetition::OPTIONAL,     \ 
-                                 int field_id = -1) {                                    \ 
-    return PrimitiveNode::Make(name, repetition, Type::TYPE, ConvertedType::NONE,        \ 
-                               /*length=*/-1, /*precision=*/-1, /*scale=*/-1, field_id); \ 
-  } 
- 
-PRIMITIVE_FACTORY(Boolean, BOOLEAN) 
-PRIMITIVE_FACTORY(Int32, INT32) 
-PRIMITIVE_FACTORY(Int64, INT64) 
-PRIMITIVE_FACTORY(Int96, INT96) 
-PRIMITIVE_FACTORY(Float, FLOAT) 
-PRIMITIVE_FACTORY(Double, DOUBLE) 
-PRIMITIVE_FACTORY(ByteArray, BYTE_ARRAY) 
- 
-void PARQUET_EXPORT PrintSchema(const schema::Node* schema, std::ostream& stream, 
-                                int indent_width = 2); 
- 
-}  // namespace schema 
- 
-// The ColumnDescriptor encapsulates information necessary to interpret 
-// primitive column data in the context of a particular schema. We have to 
-// examine the node structure of a column's path to the root in the schema tree 
-// to be able to reassemble the nested structure from the repetition and 
-// definition levels. 
-class PARQUET_EXPORT ColumnDescriptor { 
- public: 
-  ColumnDescriptor(schema::NodePtr node, int16_t max_definition_level, 
-                   int16_t max_repetition_level, 
-                   const SchemaDescriptor* schema_descr = NULLPTR); 
- 
-  bool Equals(const ColumnDescriptor& other) const; 
- 
-  int16_t max_definition_level() const { return max_definition_level_; } 
- 
-  int16_t max_repetition_level() const { return max_repetition_level_; } 
- 
-  Type::type physical_type() const { return primitive_node_->physical_type(); } 
- 
-  ConvertedType::type converted_type() const { return primitive_node_->converted_type(); } 
- 
-  const std::shared_ptr<const LogicalType>& logical_type() const { 
-    return primitive_node_->logical_type(); 
-  } 
- 
-  ColumnOrder column_order() const { return primitive_node_->column_order(); } 
- 
-  SortOrder::type sort_order() const { 
-    auto la = logical_type(); 
-    auto pt = physical_type(); 
-    return la ? GetSortOrder(la, pt) : GetSortOrder(converted_type(), pt); 
-  } 
- 
-  const std::string& name() const { return primitive_node_->name(); } 
- 
-  const std::shared_ptr<schema::ColumnPath> path() const; 
- 
-  const schema::NodePtr& schema_node() const { return node_; } 
- 
-  std::string ToString() const; 
- 
-  int type_length() const; 
- 
-  int type_precision() const; 
- 
-  int type_scale() const; 
- 
- private: 
-  schema::NodePtr node_; 
-  const schema::PrimitiveNode* primitive_node_; 
- 
-  int16_t max_definition_level_; 
-  int16_t max_repetition_level_; 
-}; 
- 
-// Container for the converted Parquet schema with a computed information from 
-// the schema analysis needed for file reading 
-// 
-// * Column index to Node 
-// * Max repetition / definition levels for each primitive node 
-// 
-// The ColumnDescriptor objects produced by this class can be used to assist in 
-// the reconstruction of fully materialized data structures from the 
-// repetition-definition level encoding of nested data 
-// 
-// TODO(wesm): this object can be recomputed from a Schema 
-class PARQUET_EXPORT SchemaDescriptor { 
- public: 
-  SchemaDescriptor() {} 
-  ~SchemaDescriptor() {} 
- 
-  // Analyze the schema 
-  void Init(std::unique_ptr<schema::Node> schema); 
-  void Init(schema::NodePtr schema); 
- 
-  const ColumnDescriptor* Column(int i) const; 
- 
-  // Get the index of a column by its dotstring path, or negative value if not found. 
-  // If several columns share the same dotstring path, it is unspecified which one 
-  // is returned. 
-  int ColumnIndex(const std::string& node_path) const; 
-  // Get the index of a column by its node, or negative value if not found. 
-  int ColumnIndex(const schema::Node& node) const; 
- 
-  bool Equals(const SchemaDescriptor& other) const; 
- 
-  // The number of physical columns appearing in the file 
-  int num_columns() const { return static_cast<int>(leaves_.size()); } 
- 
-  const schema::NodePtr& schema_root() const { return schema_; } 
- 
-  const schema::GroupNode* group_node() const { return group_node_; } 
- 
-  // Returns the root (child of the schema root) node of the leaf(column) node 
-  const schema::Node* GetColumnRoot(int i) const; 
- 
-  const std::string& name() const { return group_node_->name(); } 
- 
-  std::string ToString() const; 
- 
-  void updateColumnOrders(const std::vector<ColumnOrder>& column_orders); 
- 
-  /// \brief Return column index corresponding to a particular 
-  /// PrimitiveNode. Returns -1 if not found 
-  int GetColumnIndex(const schema::PrimitiveNode& node) const; 
- 
-  /// \brief Return true if any field or their children have REPEATED repetition 
-  /// type 
-  bool HasRepeatedFields() const; 
- 
- private: 
-  friend class ColumnDescriptor; 
- 
-  // Root Node 
-  schema::NodePtr schema_; 
-  // Root Node 
-  const schema::GroupNode* group_node_; 
- 
-  void BuildTree(const schema::NodePtr& node, int16_t max_def_level, 
-                 int16_t max_rep_level, const schema::NodePtr& base); 
- 
-  // Result of leaf node / tree analysis 
-  std::vector<ColumnDescriptor> leaves_; 
- 
-  std::unordered_map<const schema::PrimitiveNode*, int> node_to_leaf_index_; 
- 
-  // Mapping between leaf nodes and root group of leaf (first node 
-  // below the schema's root group) 
-  // 
-  // For example, the leaf `a.b.c.d` would have a link back to `a` 
-  // 
-  // -- a  <------ 
-  // -- -- b     | 
-  // -- -- -- c  | 
-  // -- -- -- -- d 
-  std::unordered_map<int, schema::NodePtr> leaf_to_base_; 
- 
-  // Mapping between ColumnPath DotString to the leaf index 
-  std::unordered_multimap<std::string, int> leaf_to_idx_; 
-}; 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This module contains the logical parquet-cpp types (independent of Thrift
+// structures), schema nodes, and related type tools
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <ostream>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "parquet/platform.h"
+#include "parquet/types.h"
+
+namespace parquet {
+
+class SchemaDescriptor;
+
+namespace schema {
+
+class Node;
+
+// List encodings: using the terminology from Impala to define different styles
+// of representing logical lists (a.k.a. ARRAY types) in Parquet schemas. Since
+// the converted type named in the Parquet metadata is ConvertedType::LIST we
+// use that terminology here. It also helps distinguish from the *_ARRAY
+// primitive types.
+//
+// One-level encoding: Only allows required lists with required cells
+//   repeated value_type name
+//
+// Two-level encoding: Enables optional lists with only required cells
+//   <required/optional> group list
+//     repeated value_type item
+//
+// Three-level encoding: Enables optional lists with optional cells
+//   <required/optional> group bag
+//     repeated group list
+//       <required/optional> value_type item
+//
+// 2- and 1-level encoding are respectively equivalent to 3-level encoding with
+// the non-repeated nodes set to required.
+//
+// The "official" encoding recommended in the Parquet spec is the 3-level, and
+// we use that as the default when creating list types. For semantic completeness
+// we allow the other two. Since all types of encodings will occur "in the
+// wild" we need to be able to interpret the associated definition levels in
+// the context of the actual encoding used in the file.
+//
+// NB: Some Parquet writers may not set ConvertedType::LIST on the repeated
+// SchemaElement, which could make things challenging if we are trying to infer
+// that a sequence of nodes semantically represents an array according to one
+// of these encodings (versus a struct containing an array). We should refuse
+// the temptation to guess, as they say.
+struct ListEncoding {
+  enum type { ONE_LEVEL, TWO_LEVEL, THREE_LEVEL };
+};
+
+class PARQUET_EXPORT ColumnPath {
+ public:
+  ColumnPath() : path_() {}
+  explicit ColumnPath(const std::vector<std::string>& path) : path_(path) {}
+  explicit ColumnPath(std::vector<std::string>&& path) : path_(std::move(path)) {}
+
+  static std::shared_ptr<ColumnPath> FromDotString(const std::string& dotstring);
+  static std::shared_ptr<ColumnPath> FromNode(const Node& node);
+
+  std::shared_ptr<ColumnPath> extend(const std::string& node_name) const;
+  std::string ToDotString() const;
+  const std::vector<std::string>& ToDotVector() const;
+
+ protected:
+  std::vector<std::string> path_;
+};
+
+// Base class for logical schema types. A type has a name, repetition level,
+// and optionally a logical type (ConvertedType in Parquet metadata parlance)
+class PARQUET_EXPORT Node {
+ public:
+  enum type { PRIMITIVE, GROUP };
+
+  virtual ~Node() {}
+
+  bool is_primitive() const { return type_ == Node::PRIMITIVE; }
+
+  bool is_group() const { return type_ == Node::GROUP; }
+
+  bool is_optional() const { return repetition_ == Repetition::OPTIONAL; }
+
+  bool is_repeated() const { return repetition_ == Repetition::REPEATED; }
+
+  bool is_required() const { return repetition_ == Repetition::REQUIRED; }
+
+  virtual bool Equals(const Node* other) const = 0;
+
+  const std::string& name() const { return name_; }
+
+  Node::type node_type() const { return type_; }
+
+  Repetition::type repetition() const { return repetition_; }
+
+  ConvertedType::type converted_type() const { return converted_type_; }
+
+  const std::shared_ptr<const LogicalType>& logical_type() const { return logical_type_; }
+
+  /// \brief The field_id value for the serialized SchemaElement. If the
+  /// field_id is less than 0 (e.g. -1), it will not be set when serialized to
+  /// Thrift.
+  int field_id() const { return field_id_; }
+
+  PARQUET_DEPRECATED("id() is deprecated. Use field_id() instead")
+  int id() const { return field_id_; }
+
+  const Node* parent() const { return parent_; }
+
+  const std::shared_ptr<ColumnPath> path() const;
+
+  virtual void ToParquet(void* element) const = 0;
+
+  // Node::Visitor abstract class for walking schemas with the visitor pattern
+  class Visitor {
+   public:
+    virtual ~Visitor() {}
+
+    virtual void Visit(Node* node) = 0;
+  };
+  class ConstVisitor {
+   public:
+    virtual ~ConstVisitor() {}
+
+    virtual void Visit(const Node* node) = 0;
+  };
+
+  virtual void Visit(Visitor* visitor) = 0;
+  virtual void VisitConst(ConstVisitor* visitor) const = 0;
+
+ protected:
+  friend class GroupNode;
+
+  Node(Node::type type, const std::string& name, Repetition::type repetition,
+       ConvertedType::type converted_type = ConvertedType::NONE, int field_id = -1)
+      : type_(type),
+        name_(name),
+        repetition_(repetition),
+        converted_type_(converted_type),
+        field_id_(field_id),
+        parent_(NULLPTR) {}
+
+  Node(Node::type type, const std::string& name, Repetition::type repetition,
+       std::shared_ptr<const LogicalType> logical_type, int field_id = -1)
+      : type_(type),
+        name_(name),
+        repetition_(repetition),
+        logical_type_(std::move(logical_type)),
+        field_id_(field_id),
+        parent_(NULLPTR) {}
+
+  Node::type type_;
+  std::string name_;
+  Repetition::type repetition_;
+  ConvertedType::type converted_type_;
+  std::shared_ptr<const LogicalType> logical_type_;
+  int field_id_;
+  // Nodes should not be shared, they have a single parent.
+  const Node* parent_;
+
+  bool EqualsInternal(const Node* other) const;
+  void SetParent(const Node* p_parent);
+
+ private:
+  PARQUET_DISALLOW_COPY_AND_ASSIGN(Node);
+};
+
+// Save our breath all over the place with these typedefs
+typedef std::shared_ptr<Node> NodePtr;
+typedef std::vector<NodePtr> NodeVector;
+
+// A type that is one of the primitive Parquet storage types. In addition to
+// the other type metadata (name, repetition level, logical type), also has the
+// physical storage type and their type-specific metadata (byte width, decimal
+// parameters)
+class PARQUET_EXPORT PrimitiveNode : public Node {
+ public:
+  static std::unique_ptr<Node> FromParquet(const void* opaque_element);
+
+  // A field_id -1 (or any negative value) will be serialized as null in Thrift
+  static inline NodePtr Make(const std::string& name, Repetition::type repetition,
+                             Type::type type,
+                             ConvertedType::type converted_type = ConvertedType::NONE,
+                             int length = -1, int precision = -1, int scale = -1,
+                             int field_id = -1) {
+    return NodePtr(new PrimitiveNode(name, repetition, type, converted_type, length,
+                                     precision, scale, field_id));
+  }
+
+  // If no logical type, pass LogicalType::None() or nullptr
+  // A field_id -1 (or any negative value) will be serialized as null in Thrift
+  static inline NodePtr Make(const std::string& name, Repetition::type repetition,
+                             std::shared_ptr<const LogicalType> logical_type,
+                             Type::type primitive_type, int primitive_length = -1,
+                             int field_id = -1) {
+    return NodePtr(new PrimitiveNode(name, repetition, logical_type, primitive_type,
+                                     primitive_length, field_id));
+  }
+
+  bool Equals(const Node* other) const override;
+
+  Type::type physical_type() const { return physical_type_; }
+
+  ColumnOrder column_order() const { return column_order_; }
+
+  void SetColumnOrder(ColumnOrder column_order) { column_order_ = column_order; }
+
+  int32_t type_length() const { return type_length_; }
+
+  const DecimalMetadata& decimal_metadata() const { return decimal_metadata_; }
+
+  void ToParquet(void* element) const override;
+  void Visit(Visitor* visitor) override;
+  void VisitConst(ConstVisitor* visitor) const override;
+
+ private:
+  PrimitiveNode(const std::string& name, Repetition::type repetition, Type::type type,
+                ConvertedType::type converted_type = ConvertedType::NONE, int length = -1,
+                int precision = -1, int scale = -1, int field_id = -1);
+
+  PrimitiveNode(const std::string& name, Repetition::type repetition,
+                std::shared_ptr<const LogicalType> logical_type,
+                Type::type primitive_type, int primitive_length = -1, int field_id = -1);
+
+  Type::type physical_type_;
+  int32_t type_length_;
+  DecimalMetadata decimal_metadata_;
+  ColumnOrder column_order_;
+
+  // For FIXED_LEN_BYTE_ARRAY
+  void SetTypeLength(int32_t length) { type_length_ = length; }
+
+  bool EqualsInternal(const PrimitiveNode* other) const;
+
+  FRIEND_TEST(TestPrimitiveNode, Attrs);
+  FRIEND_TEST(TestPrimitiveNode, Equals);
+  FRIEND_TEST(TestPrimitiveNode, PhysicalLogicalMapping);
+  FRIEND_TEST(TestPrimitiveNode, FromParquet);
+};
+
+class PARQUET_EXPORT GroupNode : public Node {
+ public:
+  static std::unique_ptr<Node> FromParquet(const void* opaque_element,
+                                           NodeVector fields = {});
+
+  // A field_id -1 (or any negative value) will be serialized as null in Thrift
+  static inline NodePtr Make(const std::string& name, Repetition::type repetition,
+                             const NodeVector& fields,
+                             ConvertedType::type converted_type = ConvertedType::NONE,
+                             int field_id = -1) {
+    return NodePtr(new GroupNode(name, repetition, fields, converted_type, field_id));
+  }
+
+  // If no logical type, pass nullptr
+  // A field_id -1 (or any negative value) will be serialized as null in Thrift
+  static inline NodePtr Make(const std::string& name, Repetition::type repetition,
+                             const NodeVector& fields,
+                             std::shared_ptr<const LogicalType> logical_type,
+                             int field_id = -1) {
+    return NodePtr(new GroupNode(name, repetition, fields, logical_type, field_id));
+  }
+
+  bool Equals(const Node* other) const override;
+
+  NodePtr field(int i) const { return fields_[i]; }
+  // Get the index of a field by its name, or negative value if not found.
+  // If several fields share the same name, it is unspecified which one
+  // is returned.
+  int FieldIndex(const std::string& name) const;
+  // Get the index of a field by its node, or negative value if not found.
+  int FieldIndex(const Node& node) const;
+
+  int field_count() const { return static_cast<int>(fields_.size()); }
+
+  void ToParquet(void* element) const override;
+  void Visit(Visitor* visitor) override;
+  void VisitConst(ConstVisitor* visitor) const override;
+
+  /// \brief Return true if this node or any child node has REPEATED repetition
+  /// type
+  bool HasRepeatedFields() const;
+
+ private:
+  GroupNode(const std::string& name, Repetition::type repetition,
+            const NodeVector& fields,
+            ConvertedType::type converted_type = ConvertedType::NONE, int field_id = -1);
+
+  GroupNode(const std::string& name, Repetition::type repetition,
+            const NodeVector& fields, std::shared_ptr<const LogicalType> logical_type,
+            int field_id = -1);
+
+  NodeVector fields_;
+  bool EqualsInternal(const GroupNode* other) const;
+
+  // Mapping between field name to the field index
+  std::unordered_multimap<std::string, int> field_name_to_idx_;
+
+  FRIEND_TEST(TestGroupNode, Attrs);
+  FRIEND_TEST(TestGroupNode, Equals);
+  FRIEND_TEST(TestGroupNode, FieldIndex);
+  FRIEND_TEST(TestGroupNode, FieldIndexDuplicateName);
+};
+
+// ----------------------------------------------------------------------
+// Convenience primitive type factory functions
+
+#define PRIMITIVE_FACTORY(FuncName, TYPE)                                                \
+  static inline NodePtr FuncName(const std::string& name,                                \
+                                 Repetition::type repetition = Repetition::OPTIONAL,     \
+                                 int field_id = -1) {                                    \
+    return PrimitiveNode::Make(name, repetition, Type::TYPE, ConvertedType::NONE,        \
+                               /*length=*/-1, /*precision=*/-1, /*scale=*/-1, field_id); \
+  }
+
+PRIMITIVE_FACTORY(Boolean, BOOLEAN)
+PRIMITIVE_FACTORY(Int32, INT32)
+PRIMITIVE_FACTORY(Int64, INT64)
+PRIMITIVE_FACTORY(Int96, INT96)
+PRIMITIVE_FACTORY(Float, FLOAT)
+PRIMITIVE_FACTORY(Double, DOUBLE)
+PRIMITIVE_FACTORY(ByteArray, BYTE_ARRAY)
+
+void PARQUET_EXPORT PrintSchema(const schema::Node* schema, std::ostream& stream,
+                                int indent_width = 2);
+
+}  // namespace schema
+
+// The ColumnDescriptor encapsulates information necessary to interpret
+// primitive column data in the context of a particular schema. We have to
+// examine the node structure of a column's path to the root in the schema tree
+// to be able to reassemble the nested structure from the repetition and
+// definition levels.
+class PARQUET_EXPORT ColumnDescriptor {
+ public:
+  ColumnDescriptor(schema::NodePtr node, int16_t max_definition_level,
+                   int16_t max_repetition_level,
+                   const SchemaDescriptor* schema_descr = NULLPTR);
+
+  bool Equals(const ColumnDescriptor& other) const;
+
+  int16_t max_definition_level() const { return max_definition_level_; }
+
+  int16_t max_repetition_level() const { return max_repetition_level_; }
+
+  Type::type physical_type() const { return primitive_node_->physical_type(); }
+
+  ConvertedType::type converted_type() const { return primitive_node_->converted_type(); }
+
+  const std::shared_ptr<const LogicalType>& logical_type() const {
+    return primitive_node_->logical_type();
+  }
+
+  ColumnOrder column_order() const { return primitive_node_->column_order(); }
+
+  SortOrder::type sort_order() const {
+    auto la = logical_type();
+    auto pt = physical_type();
+    return la ? GetSortOrder(la, pt) : GetSortOrder(converted_type(), pt);
+  }
+
+  const std::string& name() const { return primitive_node_->name(); }
+
+  const std::shared_ptr<schema::ColumnPath> path() const;
+
+  const schema::NodePtr& schema_node() const { return node_; }
+
+  std::string ToString() const;
+
+  int type_length() const;
+
+  int type_precision() const;
+
+  int type_scale() const;
+
+ private:
+  schema::NodePtr node_;
+  const schema::PrimitiveNode* primitive_node_;
+
+  int16_t max_definition_level_;
+  int16_t max_repetition_level_;
+};
+
+// Container for the converted Parquet schema with a computed information from
+// the schema analysis needed for file reading
+//
+// * Column index to Node
+// * Max repetition / definition levels for each primitive node
+//
+// The ColumnDescriptor objects produced by this class can be used to assist in
+// the reconstruction of fully materialized data structures from the
+// repetition-definition level encoding of nested data
+//
+// TODO(wesm): this object can be recomputed from a Schema
+class PARQUET_EXPORT SchemaDescriptor {
+ public:
+  SchemaDescriptor() {}
+  ~SchemaDescriptor() {}
+
+  // Analyze the schema
+  void Init(std::unique_ptr<schema::Node> schema);
+  void Init(schema::NodePtr schema);
+
+  const ColumnDescriptor* Column(int i) const;
+
+  // Get the index of a column by its dotstring path, or negative value if not found.
+  // If several columns share the same dotstring path, it is unspecified which one
+  // is returned.
+  int ColumnIndex(const std::string& node_path) const;
+  // Get the index of a column by its node, or negative value if not found.
+  int ColumnIndex(const schema::Node& node) const;
+
+  bool Equals(const SchemaDescriptor& other) const;
+
+  // The number of physical columns appearing in the file
+  int num_columns() const { return static_cast<int>(leaves_.size()); }
+
+  const schema::NodePtr& schema_root() const { return schema_; }
+
+  const schema::GroupNode* group_node() const { return group_node_; }
+
+  // Returns the root (child of the schema root) node of the leaf(column) node
+  const schema::Node* GetColumnRoot(int i) const;
+
+  const std::string& name() const { return group_node_->name(); }
+
+  std::string ToString() const;
+
+  void updateColumnOrders(const std::vector<ColumnOrder>& column_orders);
+
+  /// \brief Return column index corresponding to a particular
+  /// PrimitiveNode. Returns -1 if not found
+  int GetColumnIndex(const schema::PrimitiveNode& node) const;
+
+  /// \brief Return true if any field or their children have REPEATED repetition
+  /// type
+  bool HasRepeatedFields() const;
+
+ private:
+  friend class ColumnDescriptor;
+
+  // Root Node
+  schema::NodePtr schema_;
+  // Root Node
+  const schema::GroupNode* group_node_;
+
+  void BuildTree(const schema::NodePtr& node, int16_t max_def_level,
+                 int16_t max_rep_level, const schema::NodePtr& base);
+
+  // Result of leaf node / tree analysis
+  std::vector<ColumnDescriptor> leaves_;
+
+  std::unordered_map<const schema::PrimitiveNode*, int> node_to_leaf_index_;
+
+  // Mapping between leaf nodes and root group of leaf (first node
+  // below the schema's root group)
+  //
+  // For example, the leaf `a.b.c.d` would have a link back to `a`
+  //
+  // -- a  <------
+  // -- -- b     |
+  // -- -- -- c  |
+  // -- -- -- -- d
+  std::unordered_map<int, schema::NodePtr> leaf_to_base_;
+
+  // Mapping between ColumnPath DotString to the leaf index
+  std::unordered_multimap<std::string, int> leaf_to_idx_;
+};
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/schema_internal.h b/contrib/libs/apache/arrow/cpp/src/parquet/schema_internal.h
index 42102884bb0..c0cfffc87e2 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/schema_internal.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/schema_internal.h
@@ -1,54 +1,54 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-// Non-public Thrift schema serialization utilities 
- 
-#pragma once 
- 
-#include <memory> 
-#include <vector> 
- 
-#include "parquet/platform.h" 
-#include "parquet/schema.h" 
-#include "parquet/types.h" 
- 
-namespace parquet { 
- 
-namespace format { 
-class SchemaElement; 
-} 
- 
-namespace schema { 
- 
-// ---------------------------------------------------------------------- 
-// Conversion from Parquet Thrift metadata 
- 
-PARQUET_EXPORT 
-std::shared_ptr<SchemaDescriptor> FromParquet( 
-    const std::vector<format::SchemaElement>& schema); 
- 
-PARQUET_EXPORT 
-std::unique_ptr<Node> Unflatten(const format::SchemaElement* elements, int length); 
- 
-// ---------------------------------------------------------------------- 
-// Conversion to Parquet Thrift metadata 
- 
-PARQUET_EXPORT 
-void ToParquet(const GroupNode* schema, std::vector<format::SchemaElement>* out); 
- 
-}  // namespace schema 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Non-public Thrift schema serialization utilities
+
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include "parquet/platform.h"
+#include "parquet/schema.h"
+#include "parquet/types.h"
+
+namespace parquet {
+
+namespace format {
+class SchemaElement;
+}
+
+namespace schema {
+
+// ----------------------------------------------------------------------
+// Conversion from Parquet Thrift metadata
+
+PARQUET_EXPORT
+std::shared_ptr<SchemaDescriptor> FromParquet(
+    const std::vector<format::SchemaElement>& schema);
+
+PARQUET_EXPORT
+std::unique_ptr<Node> Unflatten(const format::SchemaElement* elements, int length);
+
+// ----------------------------------------------------------------------
+// Conversion to Parquet Thrift metadata
+
+PARQUET_EXPORT
+void ToParquet(const GroupNode* schema, std::vector<format::SchemaElement>* out);
+
+}  // namespace schema
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/statistics.cc b/contrib/libs/apache/arrow/cpp/src/parquet/statistics.cc
index 3b037ac74bf..72341590e75 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/statistics.cc
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/statistics.cc
@@ -1,885 +1,885 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "parquet/statistics.h" 
- 
-#include <algorithm> 
-#include <cmath> 
-#include <cstring> 
-#include <limits> 
-#include <type_traits> 
-#include <utility> 
- 
-#include "arrow/array.h" 
-#include "arrow/type.h" 
-#include "arrow/type_traits.h" 
-#include "arrow/util/bit_run_reader.h" 
-#include "arrow/util/checked_cast.h" 
-#include "arrow/util/logging.h" 
-#include "arrow/util/optional.h" 
-#include "arrow/util/ubsan.h" 
-#include "arrow/visitor_inline.h" 
-#include "parquet/encoding.h" 
-#include "parquet/exception.h" 
-#include "parquet/platform.h" 
-#include "parquet/schema.h" 
- 
-using arrow::default_memory_pool; 
-using arrow::MemoryPool; 
-using arrow::internal::checked_cast; 
-using arrow::util::SafeCopy; 
- 
-namespace parquet { 
-namespace { 
- 
-// ---------------------------------------------------------------------- 
-// Comparator implementations 
- 
-constexpr int value_length(int value_length, const ByteArray& value) { return value.len; } 
-constexpr int value_length(int type_length, const FLBA& value) { return type_length; } 
- 
-template <typename DType, bool is_signed> 
-struct CompareHelper { 
-  using T = typename DType::c_type; 
- 
-  static_assert(!std::is_unsigned<T>::value || std::is_same<T, bool>::value, 
-                "T is an unsigned numeric"); 
- 
-  constexpr static T DefaultMin() { return std::numeric_limits<T>::max(); } 
-  constexpr static T DefaultMax() { return std::numeric_limits<T>::lowest(); } 
- 
-  // MSVC17 fix, isnan is not overloaded for IntegralType as per C++11 
-  // standard requirements. 
-  template <typename T1 = T> 
-  static ::arrow::enable_if_t<std::is_floating_point<T1>::value, T> Coalesce(T val, 
-                                                                             T fallback) { 
-    return std::isnan(val) ? fallback : val; 
-  } 
- 
-  template <typename T1 = T> 
-  static ::arrow::enable_if_t<!std::is_floating_point<T1>::value, T> Coalesce( 
-      T val, T fallback) { 
-    return val; 
-  } 
- 
-  static inline bool Compare(int type_length, const T& a, const T& b) { return a < b; } 
- 
-  static T Min(int type_length, T a, T b) { return a < b ? a : b; } 
-  static T Max(int type_length, T a, T b) { return a < b ? b : a; } 
-}; 
- 
-template <typename DType> 
-struct UnsignedCompareHelperBase { 
-  using T = typename DType::c_type; 
-  using UCType = typename std::make_unsigned<T>::type; 
- 
-  static_assert(!std::is_same<T, UCType>::value, "T is unsigned"); 
-  static_assert(sizeof(T) == sizeof(UCType), "T and UCType not the same size"); 
- 
-  // NOTE: according to the C++ spec, unsigned-to-signed conversion is 
-  // implementation-defined if the original value does not fit in the signed type 
-  // (i.e., two's complement cannot be assumed even on mainstream machines, 
-  // because the compiler may decide otherwise).  Hence the use of `SafeCopy` 
-  // below for deterministic bit-casting. 
-  // (see "Integer conversions" in 
-  //  https://en.cppreference.com/w/cpp/language/implicit_conversion) 
- 
-  static const T DefaultMin() { return SafeCopy<T>(std::numeric_limits<UCType>::max()); } 
-  static const T DefaultMax() { return 0; } 
- 
-  static T Coalesce(T val, T fallback) { return val; } 
- 
-  static bool Compare(int type_length, T a, T b) { 
-    return SafeCopy<UCType>(a) < SafeCopy<UCType>(b); 
-  } 
- 
-  static T Min(int type_length, T a, T b) { return Compare(type_length, a, b) ? a : b; } 
-  static T Max(int type_length, T a, T b) { return Compare(type_length, a, b) ? b : a; } 
-}; 
- 
-template <> 
-struct CompareHelper<Int32Type, false> : public UnsignedCompareHelperBase<Int32Type> {}; 
- 
-template <> 
-struct CompareHelper<Int64Type, false> : public UnsignedCompareHelperBase<Int64Type> {}; 
- 
-template <bool is_signed> 
-struct CompareHelper<Int96Type, is_signed> { 
-  using T = typename Int96Type::c_type; 
-  using msb_type = typename std::conditional<is_signed, int32_t, uint32_t>::type; 
- 
-  static T DefaultMin() { 
-    uint32_t kMsbMax = SafeCopy<uint32_t>(std::numeric_limits<msb_type>::max()); 
-    uint32_t kMax = std::numeric_limits<uint32_t>::max(); 
-    return {kMax, kMax, kMsbMax}; 
-  } 
-  static T DefaultMax() { 
-    uint32_t kMsbMin = SafeCopy<uint32_t>(std::numeric_limits<msb_type>::min()); 
-    uint32_t kMin = std::numeric_limits<uint32_t>::min(); 
-    return {kMin, kMin, kMsbMin}; 
-  } 
-  static T Coalesce(T val, T fallback) { return val; } 
- 
-  static inline bool Compare(int type_length, const T& a, const T& b) { 
-    if (a.value[2] != b.value[2]) { 
-      // Only the MSB bit is by Signed comparison. For little-endian, this is the 
-      // last bit of Int96 type. 
-      return SafeCopy<msb_type>(a.value[2]) < SafeCopy<msb_type>(b.value[2]); 
-    } else if (a.value[1] != b.value[1]) { 
-      return (a.value[1] < b.value[1]); 
-    } 
-    return (a.value[0] < b.value[0]); 
-  } 
- 
-  static T Min(int type_length, const T& a, const T& b) { 
-    return Compare(0, a, b) ? a : b; 
-  } 
-  static T Max(int type_length, const T& a, const T& b) { 
-    return Compare(0, a, b) ? b : a; 
-  } 
-}; 
- 
-template <typename T, bool is_signed> 
-struct BinaryLikeComparer {}; 
- 
-template <typename T> 
-struct BinaryLikeComparer<T, /*is_signed=*/false> { 
-  static bool Compare(int type_length, const T& a, const T& b) { 
-    int a_length = value_length(type_length, a); 
-    int b_length = value_length(type_length, b); 
-    // Unsigned comparison is used for non-numeric types so straight 
-    // lexiographic comparison makes sense. (a.ptr is always unsigned).... 
-    return std::lexicographical_compare(a.ptr, a.ptr + a_length, b.ptr, b.ptr + b_length); 
-  } 
-}; 
- 
-template <typename T> 
-struct BinaryLikeComparer<T, /*is_signed=*/true> { 
-  static bool Compare(int type_length, const T& a, const T& b) { 
-    // Is signed is used for integers encoded as big-endian twos 
-    // complement integers. (e.g. decimals). 
-    int a_length = value_length(type_length, a); 
-    int b_length = value_length(type_length, b); 
- 
-    // At least of the lengths is zero. 
-    if (a_length == 0 || b_length == 0) { 
-      return a_length == 0 && b_length > 0; 
-    } 
- 
-    int8_t first_a = *a.ptr; 
-    int8_t first_b = *b.ptr; 
-    // We can short circuit for different signed numbers or 
-    // for equal length bytes arrays that have different first bytes. 
-    // The equality requirement is necessary for sign extension cases. 
-    // 0xFF10 should be eqaul to 0x10 (due to big endian sign extension). 
-    if ((0x80 & first_a) != (0x80 & first_b) || 
-        (a_length == b_length && first_a != first_b)) { 
-      return first_a < first_b; 
-    } 
-    // When the lengths are unequal and the numbers are of the same 
-    // sign we need to do comparison by sign extending the shorter 
-    // value first, and once we get to equal sized arrays, lexicographical 
-    // unsigned comparison of everything but the first byte is sufficient. 
-    const uint8_t* a_start = a.ptr; 
-    const uint8_t* b_start = b.ptr; 
-    if (a_length != b_length) { 
-      const uint8_t* lead_start = nullptr; 
-      const uint8_t* lead_end = nullptr; 
-      if (a_length > b_length) { 
-        int lead_length = a_length - b_length; 
-        lead_start = a.ptr; 
-        lead_end = a.ptr + lead_length; 
-        a_start += lead_length; 
-      } else { 
-        DCHECK_LT(a_length, b_length); 
-        int lead_length = b_length - a_length; 
-        lead_start = b.ptr; 
-        lead_end = b.ptr + lead_length; 
-        b_start += lead_length; 
-      } 
-      // Compare extra bytes to the sign extension of the first 
-      // byte of the other number. 
-      uint8_t extension = first_a < 0 ? 0xFF : 0; 
-      bool not_equal = std::any_of(lead_start, lead_end, 
-                                   [extension](uint8_t a) { return extension != a; }); 
-      if (not_equal) { 
-        // Since sign extension are extrema values for unsigned bytes: 
-        // 
-        // Four cases exist: 
-        //    negative values: 
-        //      b is the longer value. 
-        //        b must be the lesser value: return false 
-        //      else: 
-        //        a must be the lesser value: return true 
-        // 
-        //    positive values: 
-        //      b  is the longer value. 
-        //        values in b must be greater than a: return true 
-        //      else: 
-        //        values in a must be greater than b: return false 
-        bool negative_values = first_a < 0; 
-        bool b_longer = a_length < b_length; 
-        return negative_values != b_longer; 
-      } 
-    } else { 
-      a_start++; 
-      b_start++; 
-    } 
-    return std::lexicographical_compare(a_start, a.ptr + a_length, b_start, 
-                                        b.ptr + b_length); 
-  } 
-}; 
- 
-template <typename DType, bool is_signed> 
-struct BinaryLikeCompareHelperBase { 
-  using T = typename DType::c_type; 
- 
-  static T DefaultMin() { return {}; } 
-  static T DefaultMax() { return {}; } 
-  static T Coalesce(T val, T fallback) { return val; } 
- 
-  static inline bool Compare(int type_length, const T& a, const T& b) { 
-    return BinaryLikeComparer<T, is_signed>::Compare(type_length, a, b); 
-  } 
-  static T Min(int type_length, const T& a, const T& b) { 
-    if (a.ptr == nullptr) return b; 
-    if (b.ptr == nullptr) return a; 
-    return Compare(type_length, a, b) ? a : b; 
-  } 
- 
-  static T Max(int type_length, const T& a, const T& b) { 
-    if (a.ptr == nullptr) return b; 
-    if (b.ptr == nullptr) return a; 
-    return Compare(type_length, a, b) ? b : a; 
-  } 
-}; 
- 
-template <bool is_signed> 
-struct CompareHelper<ByteArrayType, is_signed> 
-    : public BinaryLikeCompareHelperBase<ByteArrayType, is_signed> {}; 
- 
-template <bool is_signed> 
-struct CompareHelper<FLBAType, is_signed> 
-    : public BinaryLikeCompareHelperBase<FLBAType, is_signed> {}; 
- 
-using ::arrow::util::optional; 
- 
-template <typename T> 
-::arrow::enable_if_t<std::is_integral<T>::value, optional<std::pair<T, T>>> 
-CleanStatistic(std::pair<T, T> min_max) { 
-  return min_max; 
-} 
- 
-// In case of floating point types, the following rules are applied (as per 
-// upstream parquet-mr): 
-// - If any of min/max is NaN, return nothing. 
-// - If min is 0.0f, replace with -0.0f 
-// - If max is -0.0f, replace with 0.0f 
-template <typename T> 
-::arrow::enable_if_t<std::is_floating_point<T>::value, optional<std::pair<T, T>>> 
-CleanStatistic(std::pair<T, T> min_max) { 
-  T min = min_max.first; 
-  T max = min_max.second; 
- 
-  // Ignore if one of the value is nan. 
-  if (std::isnan(min) || std::isnan(max)) { 
-    return ::arrow::util::nullopt; 
-  } 
- 
-  if (min == std::numeric_limits<T>::max() && max == std::numeric_limits<T>::lowest()) { 
-    return ::arrow::util::nullopt; 
-  } 
- 
-  T zero{}; 
- 
-  if (min == zero && !std::signbit(min)) { 
-    min = -min; 
-  } 
- 
-  if (max == zero && std::signbit(max)) { 
-    max = -max; 
-  } 
- 
-  return {{min, max}}; 
-} 
- 
-optional<std::pair<FLBA, FLBA>> CleanStatistic(std::pair<FLBA, FLBA> min_max) { 
-  if (min_max.first.ptr == nullptr || min_max.second.ptr == nullptr) { 
-    return ::arrow::util::nullopt; 
-  } 
-  return min_max; 
-} 
- 
-optional<std::pair<ByteArray, ByteArray>> CleanStatistic( 
-    std::pair<ByteArray, ByteArray> min_max) { 
-  if (min_max.first.ptr == nullptr || min_max.second.ptr == nullptr) { 
-    return ::arrow::util::nullopt; 
-  } 
-  return min_max; 
-} 
- 
-template <bool is_signed, typename DType> 
-class TypedComparatorImpl : virtual public TypedComparator<DType> { 
- public: 
-  using T = typename DType::c_type; 
-  using Helper = CompareHelper<DType, is_signed>; 
- 
-  explicit TypedComparatorImpl(int type_length = -1) : type_length_(type_length) {} 
- 
-  bool CompareInline(const T& a, const T& b) const { 
-    return Helper::Compare(type_length_, a, b); 
-  } 
- 
-  bool Compare(const T& a, const T& b) override { return CompareInline(a, b); } 
- 
-  std::pair<T, T> GetMinMax(const T* values, int64_t length) override { 
-    DCHECK_GT(length, 0); 
- 
-    T min = Helper::DefaultMin(); 
-    T max = Helper::DefaultMax(); 
- 
-    for (int64_t i = 0; i < length; i++) { 
-      auto val = values[i]; 
-      min = Helper::Min(type_length_, min, Helper::Coalesce(val, Helper::DefaultMin())); 
-      max = Helper::Max(type_length_, max, Helper::Coalesce(val, Helper::DefaultMax())); 
-    } 
- 
-    return {min, max}; 
-  } 
- 
-  std::pair<T, T> GetMinMaxSpaced(const T* values, int64_t length, 
-                                  const uint8_t* valid_bits, 
-                                  int64_t valid_bits_offset) override { 
-    DCHECK_GT(length, 0); 
- 
-    T min = Helper::DefaultMin(); 
-    T max = Helper::DefaultMax(); 
- 
-    ::arrow::internal::VisitSetBitRunsVoid( 
-        valid_bits, valid_bits_offset, length, [&](int64_t position, int64_t length) { 
-          for (int64_t i = 0; i < length; i++) { 
-            const auto val = values[i + position]; 
-            min = Helper::Min(type_length_, min, 
-                              Helper::Coalesce(val, Helper::DefaultMin())); 
-            max = Helper::Max(type_length_, max, 
-                              Helper::Coalesce(val, Helper::DefaultMax())); 
-          } 
-        }); 
- 
-    return {min, max}; 
-  } 
- 
-  std::pair<T, T> GetMinMax(const ::arrow::Array& values) override; 
- 
- private: 
-  int type_length_; 
-}; 
- 
-// ARROW-11675: A hand-written version of GetMinMax(), to work around 
-// what looks like a MSVC code generation bug. 
-// This does not seem to be required for GetMinMaxSpaced(). 
-template <> 
-std::pair<int32_t, int32_t> 
-TypedComparatorImpl</*is_signed=*/false, Int32Type>::GetMinMax(const int32_t* values, 
-                                                               int64_t length) { 
-  DCHECK_GT(length, 0); 
- 
-  const uint32_t* unsigned_values = reinterpret_cast<const uint32_t*>(values); 
-  uint32_t min = std::numeric_limits<uint32_t>::max(); 
-  uint32_t max = std::numeric_limits<uint32_t>::lowest(); 
- 
-  for (int64_t i = 0; i < length; i++) { 
-    const auto val = unsigned_values[i]; 
-    min = std::min<uint32_t>(min, val); 
-    max = std::max<uint32_t>(max, val); 
-  } 
- 
-  return {SafeCopy<int32_t>(min), SafeCopy<int32_t>(max)}; 
-} 
- 
-template <bool is_signed, typename DType> 
-std::pair<typename DType::c_type, typename DType::c_type> 
-TypedComparatorImpl<is_signed, DType>::GetMinMax(const ::arrow::Array& values) { 
-  ParquetException::NYI(values.type()->ToString()); 
-} 
- 
-template <bool is_signed> 
-std::pair<ByteArray, ByteArray> GetMinMaxBinaryHelper( 
-    const TypedComparatorImpl<is_signed, ByteArrayType>& comparator, 
-    const ::arrow::Array& values) { 
-  using Helper = CompareHelper<ByteArrayType, is_signed>; 
- 
-  ByteArray min = Helper::DefaultMin(); 
-  ByteArray max = Helper::DefaultMax(); 
-  constexpr int type_length = -1; 
- 
-  const auto valid_func = [&](ByteArray val) { 
-    min = Helper::Min(type_length, val, min); 
-    max = Helper::Max(type_length, val, max); 
-  }; 
-  const auto null_func = [&]() {}; 
- 
-  if (::arrow::is_binary_like(values.type_id())) { 
-    ::arrow::VisitArrayDataInline<::arrow::BinaryType>( 
-        *values.data(), std::move(valid_func), std::move(null_func)); 
-  } else { 
-    DCHECK(::arrow::is_large_binary_like(values.type_id())); 
-    ::arrow::VisitArrayDataInline<::arrow::LargeBinaryType>( 
-        *values.data(), std::move(valid_func), std::move(null_func)); 
-  } 
- 
-  return {min, max}; 
-} 
- 
-template <> 
-std::pair<ByteArray, ByteArray> TypedComparatorImpl<true, ByteArrayType>::GetMinMax( 
-    const ::arrow::Array& values) { 
-  return GetMinMaxBinaryHelper<true>(*this, values); 
-} 
- 
-template <> 
-std::pair<ByteArray, ByteArray> TypedComparatorImpl<false, ByteArrayType>::GetMinMax( 
-    const ::arrow::Array& values) { 
-  return GetMinMaxBinaryHelper<false>(*this, values); 
-} 
- 
-template <typename DType> 
-class TypedStatisticsImpl : public TypedStatistics<DType> { 
- public: 
-  using T = typename DType::c_type; 
- 
-  TypedStatisticsImpl(const ColumnDescriptor* descr, MemoryPool* pool) 
-      : descr_(descr), 
-        pool_(pool), 
-        min_buffer_(AllocateBuffer(pool_, 0)), 
-        max_buffer_(AllocateBuffer(pool_, 0)) { 
-    auto comp = Comparator::Make(descr); 
-    comparator_ = std::static_pointer_cast<TypedComparator<DType>>(comp); 
-    Reset(); 
-    has_null_count_ = true; 
-    has_distinct_count_ = true; 
-  } 
- 
-  TypedStatisticsImpl(const T& min, const T& max, int64_t num_values, int64_t null_count, 
-                      int64_t distinct_count) 
-      : pool_(default_memory_pool()), 
-        min_buffer_(AllocateBuffer(pool_, 0)), 
-        max_buffer_(AllocateBuffer(pool_, 0)) { 
-    IncrementNumValues(num_values); 
-    IncrementNullCount(null_count); 
-    IncrementDistinctCount(distinct_count); 
- 
-    Copy(min, &min_, min_buffer_.get()); 
-    Copy(max, &max_, max_buffer_.get()); 
-    has_min_max_ = true; 
-  } 
- 
-  TypedStatisticsImpl(const ColumnDescriptor* descr, const std::string& encoded_min, 
-                      const std::string& encoded_max, int64_t num_values, 
-                      int64_t null_count, int64_t distinct_count, bool has_min_max, 
-                      bool has_null_count, bool has_distinct_count, MemoryPool* pool) 
-      : TypedStatisticsImpl(descr, pool) { 
-    IncrementNumValues(num_values); 
-    if (has_null_count_) { 
-      IncrementNullCount(null_count); 
-    } 
-    if (has_distinct_count) { 
-      IncrementDistinctCount(distinct_count); 
-    } 
- 
-    if (!encoded_min.empty()) { 
-      PlainDecode(encoded_min, &min_); 
-    } 
-    if (!encoded_max.empty()) { 
-      PlainDecode(encoded_max, &max_); 
-    } 
-    has_min_max_ = has_min_max; 
-  } 
- 
-  bool HasDistinctCount() const override { return has_distinct_count_; }; 
-  bool HasMinMax() const override { return has_min_max_; } 
-  bool HasNullCount() const override { return has_null_count_; }; 
- 
-  bool Equals(const Statistics& raw_other) const override { 
-    if (physical_type() != raw_other.physical_type()) return false; 
- 
-    const auto& other = checked_cast<const TypedStatisticsImpl&>(raw_other); 
- 
-    if (has_min_max_ != other.has_min_max_) return false; 
- 
-    return (has_min_max_ && MinMaxEqual(other)) && null_count() == other.null_count() && 
-           distinct_count() == other.distinct_count() && 
-           num_values() == other.num_values(); 
-  } 
- 
-  bool MinMaxEqual(const TypedStatisticsImpl& other) const; 
- 
-  void Reset() override { 
-    ResetCounts(); 
-    has_min_max_ = false; 
-    has_distinct_count_ = false; 
-    has_null_count_ = false; 
-  } 
- 
-  void SetMinMax(const T& arg_min, const T& arg_max) override { 
-    SetMinMaxPair({arg_min, arg_max}); 
-  } 
- 
-  void Merge(const TypedStatistics<DType>& other) override { 
-    this->num_values_ += other.num_values(); 
-    if (other.HasNullCount()) { 
-      this->statistics_.null_count += other.null_count(); 
-    } 
-    if (other.HasDistinctCount()) { 
-      this->statistics_.distinct_count += other.distinct_count(); 
-    } 
-    if (other.HasMinMax()) { 
-      SetMinMax(other.min(), other.max()); 
-    } 
-  } 
- 
-  void Update(const T* values, int64_t num_not_null, int64_t num_null) override; 
-  void UpdateSpaced(const T* values, const uint8_t* valid_bits, int64_t valid_bits_spaced, 
-                    int64_t num_not_null, int64_t num_null) override; 
- 
-  void Update(const ::arrow::Array& values) override { 
-    IncrementNullCount(values.null_count()); 
-    IncrementNumValues(values.length() - values.null_count()); 
- 
-    if (values.null_count() == values.length()) { 
-      return; 
-    } 
- 
-    SetMinMaxPair(comparator_->GetMinMax(values)); 
-  } 
- 
-  const T& min() const override { return min_; } 
- 
-  const T& max() const override { return max_; } 
- 
-  Type::type physical_type() const override { return descr_->physical_type(); } 
- 
-  const ColumnDescriptor* descr() const override { return descr_; } 
- 
-  std::string EncodeMin() const override { 
-    std::string s; 
-    if (HasMinMax()) this->PlainEncode(min_, &s); 
-    return s; 
-  } 
- 
-  std::string EncodeMax() const override { 
-    std::string s; 
-    if (HasMinMax()) this->PlainEncode(max_, &s); 
-    return s; 
-  } 
- 
-  EncodedStatistics Encode() override { 
-    EncodedStatistics s; 
-    if (HasMinMax()) { 
-      s.set_min(this->EncodeMin()); 
-      s.set_max(this->EncodeMax()); 
-    } 
-    if (HasNullCount()) { 
-      s.set_null_count(this->null_count()); 
-    } 
-    return s; 
-  } 
- 
-  int64_t null_count() const override { return statistics_.null_count; } 
-  int64_t distinct_count() const override { return statistics_.distinct_count; } 
-  int64_t num_values() const override { return num_values_; } 
- 
- private: 
-  const ColumnDescriptor* descr_; 
-  bool has_min_max_ = false; 
-  bool has_null_count_ = false; 
-  bool has_distinct_count_ = false; 
-  T min_; 
-  T max_; 
-  ::arrow::MemoryPool* pool_; 
-  int64_t num_values_ = 0; 
-  EncodedStatistics statistics_; 
-  std::shared_ptr<TypedComparator<DType>> comparator_; 
-  std::shared_ptr<ResizableBuffer> min_buffer_, max_buffer_; 
- 
-  void PlainEncode(const T& src, std::string* dst) const; 
-  void PlainDecode(const std::string& src, T* dst) const; 
- 
-  void Copy(const T& src, T* dst, ResizableBuffer*) { *dst = src; } 
- 
-  void IncrementNullCount(int64_t n) { 
-    statistics_.null_count += n; 
-    has_null_count_ = true; 
-  } 
- 
-  void IncrementNumValues(int64_t n) { num_values_ += n; } 
- 
-  void IncrementDistinctCount(int64_t n) { 
-    statistics_.distinct_count += n; 
-    has_distinct_count_ = true; 
-  } 
- 
-  void ResetCounts() { 
-    this->statistics_.null_count = 0; 
-    this->statistics_.distinct_count = 0; 
-    this->num_values_ = 0; 
-  } 
- 
-  void SetMinMaxPair(std::pair<T, T> min_max) { 
-    // CleanStatistic can return a nullopt in case of erroneous values, e.g. NaN 
-    auto maybe_min_max = CleanStatistic(min_max); 
-    if (!maybe_min_max) return; 
- 
-    auto min = maybe_min_max.value().first; 
-    auto max = maybe_min_max.value().second; 
- 
-    if (!has_min_max_) { 
-      has_min_max_ = true; 
-      Copy(min, &min_, min_buffer_.get()); 
-      Copy(max, &max_, max_buffer_.get()); 
-    } else { 
-      Copy(comparator_->Compare(min_, min) ? min_ : min, &min_, min_buffer_.get()); 
-      Copy(comparator_->Compare(max_, max) ? max : max_, &max_, max_buffer_.get()); 
-    } 
-  } 
-}; 
- 
-template <> 
-inline bool TypedStatisticsImpl<FLBAType>::MinMaxEqual( 
-    const TypedStatisticsImpl<FLBAType>& other) const { 
-  uint32_t len = descr_->type_length(); 
-  return std::memcmp(min_.ptr, other.min_.ptr, len) == 0 && 
-         std::memcmp(max_.ptr, other.max_.ptr, len) == 0; 
-} 
- 
-template <typename DType> 
-bool TypedStatisticsImpl<DType>::MinMaxEqual( 
-    const TypedStatisticsImpl<DType>& other) const { 
-  return min_ != other.min_ && max_ != other.max_; 
-} 
- 
-template <> 
-inline void TypedStatisticsImpl<FLBAType>::Copy(const FLBA& src, FLBA* dst, 
-                                                ResizableBuffer* buffer) { 
-  if (dst->ptr == src.ptr) return; 
-  uint32_t len = descr_->type_length(); 
-  PARQUET_THROW_NOT_OK(buffer->Resize(len, false)); 
-  std::memcpy(buffer->mutable_data(), src.ptr, len); 
-  *dst = FLBA(buffer->data()); 
-} 
- 
-template <> 
-inline void TypedStatisticsImpl<ByteArrayType>::Copy(const ByteArray& src, ByteArray* dst, 
-                                                     ResizableBuffer* buffer) { 
-  if (dst->ptr == src.ptr) return; 
-  PARQUET_THROW_NOT_OK(buffer->Resize(src.len, false)); 
-  std::memcpy(buffer->mutable_data(), src.ptr, src.len); 
-  *dst = ByteArray(src.len, buffer->data()); 
-} 
- 
-template <typename DType> 
-void TypedStatisticsImpl<DType>::Update(const T* values, int64_t num_not_null, 
-                                        int64_t num_null) { 
-  DCHECK_GE(num_not_null, 0); 
-  DCHECK_GE(num_null, 0); 
- 
-  IncrementNullCount(num_null); 
-  IncrementNumValues(num_not_null); 
- 
-  if (num_not_null == 0) return; 
-  SetMinMaxPair(comparator_->GetMinMax(values, num_not_null)); 
-} 
- 
-template <typename DType> 
-void TypedStatisticsImpl<DType>::UpdateSpaced(const T* values, const uint8_t* valid_bits, 
-                                              int64_t valid_bits_offset, 
-                                              int64_t num_not_null, int64_t num_null) { 
-  DCHECK_GE(num_not_null, 0); 
-  DCHECK_GE(num_null, 0); 
- 
-  IncrementNullCount(num_null); 
-  IncrementNumValues(num_not_null); 
- 
-  if (num_not_null == 0) return; 
- 
-  int64_t length = num_null + num_not_null; 
-  SetMinMaxPair( 
-      comparator_->GetMinMaxSpaced(values, length, valid_bits, valid_bits_offset)); 
-} 
- 
-template <typename DType> 
-void TypedStatisticsImpl<DType>::PlainEncode(const T& src, std::string* dst) const { 
-  auto encoder = MakeTypedEncoder<DType>(Encoding::PLAIN, false, descr_, pool_); 
-  encoder->Put(&src, 1); 
-  auto buffer = encoder->FlushValues(); 
-  auto ptr = reinterpret_cast<const char*>(buffer->data()); 
-  dst->assign(ptr, buffer->size()); 
-} 
- 
-template <typename DType> 
-void TypedStatisticsImpl<DType>::PlainDecode(const std::string& src, T* dst) const { 
-  auto decoder = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_); 
-  decoder->SetData(1, reinterpret_cast<const uint8_t*>(src.c_str()), 
-                   static_cast<int>(src.size())); 
-  decoder->Decode(dst, 1); 
-} 
- 
-template <> 
-void TypedStatisticsImpl<ByteArrayType>::PlainEncode(const T& src, 
-                                                     std::string* dst) const { 
-  dst->assign(reinterpret_cast<const char*>(src.ptr), src.len); 
-} 
- 
-template <> 
-void TypedStatisticsImpl<ByteArrayType>::PlainDecode(const std::string& src, 
-                                                     T* dst) const { 
-  dst->len = static_cast<uint32_t>(src.size()); 
-  dst->ptr = reinterpret_cast<const uint8_t*>(src.c_str()); 
-} 
- 
-}  // namespace 
- 
-// ---------------------------------------------------------------------- 
-// Public factory functions 
- 
-std::shared_ptr<Comparator> Comparator::Make(Type::type physical_type, 
-                                             SortOrder::type sort_order, 
-                                             int type_length) { 
-  if (SortOrder::SIGNED == sort_order) { 
-    switch (physical_type) { 
-      case Type::BOOLEAN: 
-        return std::make_shared<TypedComparatorImpl<true, BooleanType>>(); 
-      case Type::INT32: 
-        return std::make_shared<TypedComparatorImpl<true, Int32Type>>(); 
-      case Type::INT64: 
-        return std::make_shared<TypedComparatorImpl<true, Int64Type>>(); 
-      case Type::INT96: 
-        return std::make_shared<TypedComparatorImpl<true, Int96Type>>(); 
-      case Type::FLOAT: 
-        return std::make_shared<TypedComparatorImpl<true, FloatType>>(); 
-      case Type::DOUBLE: 
-        return std::make_shared<TypedComparatorImpl<true, DoubleType>>(); 
-      case Type::BYTE_ARRAY: 
-        return std::make_shared<TypedComparatorImpl<true, ByteArrayType>>(); 
-      case Type::FIXED_LEN_BYTE_ARRAY: 
-        return std::make_shared<TypedComparatorImpl<true, FLBAType>>(type_length); 
-      default: 
-        ParquetException::NYI("Signed Compare not implemented"); 
-    } 
-  } else if (SortOrder::UNSIGNED == sort_order) { 
-    switch (physical_type) { 
-      case Type::INT32: 
-        return std::make_shared<TypedComparatorImpl<false, Int32Type>>(); 
-      case Type::INT64: 
-        return std::make_shared<TypedComparatorImpl<false, Int64Type>>(); 
-      case Type::INT96: 
-        return std::make_shared<TypedComparatorImpl<false, Int96Type>>(); 
-      case Type::BYTE_ARRAY: 
-        return std::make_shared<TypedComparatorImpl<false, ByteArrayType>>(); 
-      case Type::FIXED_LEN_BYTE_ARRAY: 
-        return std::make_shared<TypedComparatorImpl<false, FLBAType>>(type_length); 
-      default: 
-        ParquetException::NYI("Unsigned Compare not implemented"); 
-    } 
-  } else { 
-    throw ParquetException("UNKNOWN Sort Order"); 
-  } 
-  return nullptr; 
-} 
- 
-std::shared_ptr<Comparator> Comparator::Make(const ColumnDescriptor* descr) { 
-  return Make(descr->physical_type(), descr->sort_order(), descr->type_length()); 
-} 
- 
-std::shared_ptr<Statistics> Statistics::Make(const ColumnDescriptor* descr, 
-                                             ::arrow::MemoryPool* pool) { 
-  switch (descr->physical_type()) { 
-    case Type::BOOLEAN: 
-      return std::make_shared<TypedStatisticsImpl<BooleanType>>(descr, pool); 
-    case Type::INT32: 
-      return std::make_shared<TypedStatisticsImpl<Int32Type>>(descr, pool); 
-    case Type::INT64: 
-      return std::make_shared<TypedStatisticsImpl<Int64Type>>(descr, pool); 
-    case Type::FLOAT: 
-      return std::make_shared<TypedStatisticsImpl<FloatType>>(descr, pool); 
-    case Type::DOUBLE: 
-      return std::make_shared<TypedStatisticsImpl<DoubleType>>(descr, pool); 
-    case Type::BYTE_ARRAY: 
-      return std::make_shared<TypedStatisticsImpl<ByteArrayType>>(descr, pool); 
-    case Type::FIXED_LEN_BYTE_ARRAY: 
-      return std::make_shared<TypedStatisticsImpl<FLBAType>>(descr, pool); 
-    default: 
-      ParquetException::NYI("Statistics not implemented"); 
-  } 
-} 
- 
-std::shared_ptr<Statistics> Statistics::Make(Type::type physical_type, const void* min, 
-                                             const void* max, int64_t num_values, 
-                                             int64_t null_count, int64_t distinct_count) { 
-#define MAKE_STATS(CAP_TYPE, KLASS)                                                    \ 
-  case Type::CAP_TYPE:                                                                 \ 
-    return std::make_shared<TypedStatisticsImpl<KLASS>>(                               \ 
-        *reinterpret_cast<const typename KLASS::c_type*>(min),                         \ 
-        *reinterpret_cast<const typename KLASS::c_type*>(max), num_values, null_count, \ 
-        distinct_count) 
- 
-  switch (physical_type) { 
-    MAKE_STATS(BOOLEAN, BooleanType); 
-    MAKE_STATS(INT32, Int32Type); 
-    MAKE_STATS(INT64, Int64Type); 
-    MAKE_STATS(FLOAT, FloatType); 
-    MAKE_STATS(DOUBLE, DoubleType); 
-    MAKE_STATS(BYTE_ARRAY, ByteArrayType); 
-    MAKE_STATS(FIXED_LEN_BYTE_ARRAY, FLBAType); 
-    default: 
-      break; 
-  } 
-#undef MAKE_STATS 
-  DCHECK(false) << "Cannot reach here"; 
-  return nullptr; 
-} 
- 
-std::shared_ptr<Statistics> Statistics::Make(const ColumnDescriptor* descr, 
-                                             const std::string& encoded_min, 
-                                             const std::string& encoded_max, 
-                                             int64_t num_values, int64_t null_count, 
-                                             int64_t distinct_count, bool has_min_max, 
-                                             bool has_null_count, bool has_distinct_count, 
-                                             ::arrow::MemoryPool* pool) { 
-#define MAKE_STATS(CAP_TYPE, KLASS)                                              \ 
-  case Type::CAP_TYPE:                                                           \ 
-    return std::make_shared<TypedStatisticsImpl<KLASS>>(                         \ 
-        descr, encoded_min, encoded_max, num_values, null_count, distinct_count, \ 
-        has_min_max, has_null_count, has_distinct_count, pool) 
- 
-  switch (descr->physical_type()) { 
-    MAKE_STATS(BOOLEAN, BooleanType); 
-    MAKE_STATS(INT32, Int32Type); 
-    MAKE_STATS(INT64, Int64Type); 
-    MAKE_STATS(FLOAT, FloatType); 
-    MAKE_STATS(DOUBLE, DoubleType); 
-    MAKE_STATS(BYTE_ARRAY, ByteArrayType); 
-    MAKE_STATS(FIXED_LEN_BYTE_ARRAY, FLBAType); 
-    default: 
-      break; 
-  } 
-#undef MAKE_STATS 
-  DCHECK(false) << "Cannot reach here"; 
-  return nullptr; 
-} 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/statistics.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+#include <limits>
+#include <type_traits>
+#include <utility>
+
+#include "arrow/array.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit_run_reader.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/optional.h"
+#include "arrow/util/ubsan.h"
+#include "arrow/visitor_inline.h"
+#include "parquet/encoding.h"
+#include "parquet/exception.h"
+#include "parquet/platform.h"
+#include "parquet/schema.h"
+
+using arrow::default_memory_pool;
+using arrow::MemoryPool;
+using arrow::internal::checked_cast;
+using arrow::util::SafeCopy;
+
+namespace parquet {
+namespace {
+
+// ----------------------------------------------------------------------
+// Comparator implementations
+
+constexpr int value_length(int value_length, const ByteArray& value) { return value.len; }
+constexpr int value_length(int type_length, const FLBA& value) { return type_length; }
+
+template <typename DType, bool is_signed>
+struct CompareHelper {
+  using T = typename DType::c_type;
+
+  static_assert(!std::is_unsigned<T>::value || std::is_same<T, bool>::value,
+                "T is an unsigned numeric");
+
+  constexpr static T DefaultMin() { return std::numeric_limits<T>::max(); }
+  constexpr static T DefaultMax() { return std::numeric_limits<T>::lowest(); }
+
+  // MSVC17 fix, isnan is not overloaded for IntegralType as per C++11
+  // standard requirements.
+  template <typename T1 = T>
+  static ::arrow::enable_if_t<std::is_floating_point<T1>::value, T> Coalesce(T val,
+                                                                             T fallback) {
+    return std::isnan(val) ? fallback : val;
+  }
+
+  template <typename T1 = T>
+  static ::arrow::enable_if_t<!std::is_floating_point<T1>::value, T> Coalesce(
+      T val, T fallback) {
+    return val;
+  }
+
+  static inline bool Compare(int type_length, const T& a, const T& b) { return a < b; }
+
+  static T Min(int type_length, T a, T b) { return a < b ? a : b; }
+  static T Max(int type_length, T a, T b) { return a < b ? b : a; }
+};
+
+template <typename DType>
+struct UnsignedCompareHelperBase {
+  using T = typename DType::c_type;
+  using UCType = typename std::make_unsigned<T>::type;
+
+  static_assert(!std::is_same<T, UCType>::value, "T is unsigned");
+  static_assert(sizeof(T) == sizeof(UCType), "T and UCType not the same size");
+
+  // NOTE: according to the C++ spec, unsigned-to-signed conversion is
+  // implementation-defined if the original value does not fit in the signed type
+  // (i.e., two's complement cannot be assumed even on mainstream machines,
+  // because the compiler may decide otherwise).  Hence the use of `SafeCopy`
+  // below for deterministic bit-casting.
+  // (see "Integer conversions" in
+  //  https://en.cppreference.com/w/cpp/language/implicit_conversion)
+
+  static const T DefaultMin() { return SafeCopy<T>(std::numeric_limits<UCType>::max()); }
+  static const T DefaultMax() { return 0; }
+
+  static T Coalesce(T val, T fallback) { return val; }
+
+  static bool Compare(int type_length, T a, T b) {
+    return SafeCopy<UCType>(a) < SafeCopy<UCType>(b);
+  }
+
+  static T Min(int type_length, T a, T b) { return Compare(type_length, a, b) ? a : b; }
+  static T Max(int type_length, T a, T b) { return Compare(type_length, a, b) ? b : a; }
+};
+
+template <>
+struct CompareHelper<Int32Type, false> : public UnsignedCompareHelperBase<Int32Type> {};
+
+template <>
+struct CompareHelper<Int64Type, false> : public UnsignedCompareHelperBase<Int64Type> {};
+
+template <bool is_signed>
+struct CompareHelper<Int96Type, is_signed> {
+  using T = typename Int96Type::c_type;
+  using msb_type = typename std::conditional<is_signed, int32_t, uint32_t>::type;
+
+  static T DefaultMin() {
+    uint32_t kMsbMax = SafeCopy<uint32_t>(std::numeric_limits<msb_type>::max());
+    uint32_t kMax = std::numeric_limits<uint32_t>::max();
+    return {kMax, kMax, kMsbMax};
+  }
+  static T DefaultMax() {
+    uint32_t kMsbMin = SafeCopy<uint32_t>(std::numeric_limits<msb_type>::min());
+    uint32_t kMin = std::numeric_limits<uint32_t>::min();
+    return {kMin, kMin, kMsbMin};
+  }
+  static T Coalesce(T val, T fallback) { return val; }
+
+  static inline bool Compare(int type_length, const T& a, const T& b) {
+    if (a.value[2] != b.value[2]) {
+      // Only the MSB bit is by Signed comparison. For little-endian, this is the
+      // last bit of Int96 type.
+      return SafeCopy<msb_type>(a.value[2]) < SafeCopy<msb_type>(b.value[2]);
+    } else if (a.value[1] != b.value[1]) {
+      return (a.value[1] < b.value[1]);
+    }
+    return (a.value[0] < b.value[0]);
+  }
+
+  static T Min(int type_length, const T& a, const T& b) {
+    return Compare(0, a, b) ? a : b;
+  }
+  static T Max(int type_length, const T& a, const T& b) {
+    return Compare(0, a, b) ? b : a;
+  }
+};
+
+template <typename T, bool is_signed>
+struct BinaryLikeComparer {};
+
+template <typename T>
+struct BinaryLikeComparer<T, /*is_signed=*/false> {
+  static bool Compare(int type_length, const T& a, const T& b) {
+    int a_length = value_length(type_length, a);
+    int b_length = value_length(type_length, b);
+    // Unsigned comparison is used for non-numeric types so straight
+    // lexiographic comparison makes sense. (a.ptr is always unsigned)....
+    return std::lexicographical_compare(a.ptr, a.ptr + a_length, b.ptr, b.ptr + b_length);
+  }
+};
+
+template <typename T>
+struct BinaryLikeComparer<T, /*is_signed=*/true> {
+  static bool Compare(int type_length, const T& a, const T& b) {
+    // Is signed is used for integers encoded as big-endian twos
+    // complement integers. (e.g. decimals).
+    int a_length = value_length(type_length, a);
+    int b_length = value_length(type_length, b);
+
+    // At least of the lengths is zero.
+    if (a_length == 0 || b_length == 0) {
+      return a_length == 0 && b_length > 0;
+    }
+
+    int8_t first_a = *a.ptr;
+    int8_t first_b = *b.ptr;
+    // We can short circuit for different signed numbers or
+    // for equal length bytes arrays that have different first bytes.
+    // The equality requirement is necessary for sign extension cases.
+    // 0xFF10 should be eqaul to 0x10 (due to big endian sign extension).
+    if ((0x80 & first_a) != (0x80 & first_b) ||
+        (a_length == b_length && first_a != first_b)) {
+      return first_a < first_b;
+    }
+    // When the lengths are unequal and the numbers are of the same
+    // sign we need to do comparison by sign extending the shorter
+    // value first, and once we get to equal sized arrays, lexicographical
+    // unsigned comparison of everything but the first byte is sufficient.
+    const uint8_t* a_start = a.ptr;
+    const uint8_t* b_start = b.ptr;
+    if (a_length != b_length) {
+      const uint8_t* lead_start = nullptr;
+      const uint8_t* lead_end = nullptr;
+      if (a_length > b_length) {
+        int lead_length = a_length - b_length;
+        lead_start = a.ptr;
+        lead_end = a.ptr + lead_length;
+        a_start += lead_length;
+      } else {
+        DCHECK_LT(a_length, b_length);
+        int lead_length = b_length - a_length;
+        lead_start = b.ptr;
+        lead_end = b.ptr + lead_length;
+        b_start += lead_length;
+      }
+      // Compare extra bytes to the sign extension of the first
+      // byte of the other number.
+      uint8_t extension = first_a < 0 ? 0xFF : 0;
+      bool not_equal = std::any_of(lead_start, lead_end,
+                                   [extension](uint8_t a) { return extension != a; });
+      if (not_equal) {
+        // Since sign extension are extrema values for unsigned bytes:
+        //
+        // Four cases exist:
+        //    negative values:
+        //      b is the longer value.
+        //        b must be the lesser value: return false
+        //      else:
+        //        a must be the lesser value: return true
+        //
+        //    positive values:
+        //      b  is the longer value.
+        //        values in b must be greater than a: return true
+        //      else:
+        //        values in a must be greater than b: return false
+        bool negative_values = first_a < 0;
+        bool b_longer = a_length < b_length;
+        return negative_values != b_longer;
+      }
+    } else {
+      a_start++;
+      b_start++;
+    }
+    return std::lexicographical_compare(a_start, a.ptr + a_length, b_start,
+                                        b.ptr + b_length);
+  }
+};
+
+template <typename DType, bool is_signed>
+struct BinaryLikeCompareHelperBase {
+  using T = typename DType::c_type;
+
+  static T DefaultMin() { return {}; }
+  static T DefaultMax() { return {}; }
+  static T Coalesce(T val, T fallback) { return val; }
+
+  static inline bool Compare(int type_length, const T& a, const T& b) {
+    return BinaryLikeComparer<T, is_signed>::Compare(type_length, a, b);
+  }
+  static T Min(int type_length, const T& a, const T& b) {
+    if (a.ptr == nullptr) return b;
+    if (b.ptr == nullptr) return a;
+    return Compare(type_length, a, b) ? a : b;
+  }
+
+  static T Max(int type_length, const T& a, const T& b) {
+    if (a.ptr == nullptr) return b;
+    if (b.ptr == nullptr) return a;
+    return Compare(type_length, a, b) ? b : a;
+  }
+};
+
+template <bool is_signed>
+struct CompareHelper<ByteArrayType, is_signed>
+    : public BinaryLikeCompareHelperBase<ByteArrayType, is_signed> {};
+
+template <bool is_signed>
+struct CompareHelper<FLBAType, is_signed>
+    : public BinaryLikeCompareHelperBase<FLBAType, is_signed> {};
+
+using ::arrow::util::optional;
+
+template <typename T>
+::arrow::enable_if_t<std::is_integral<T>::value, optional<std::pair<T, T>>>
+CleanStatistic(std::pair<T, T> min_max) {
+  return min_max;
+}
+
+// In case of floating point types, the following rules are applied (as per
+// upstream parquet-mr):
+// - If any of min/max is NaN, return nothing.
+// - If min is 0.0f, replace with -0.0f
+// - If max is -0.0f, replace with 0.0f
+template <typename T>
+::arrow::enable_if_t<std::is_floating_point<T>::value, optional<std::pair<T, T>>>
+CleanStatistic(std::pair<T, T> min_max) {
+  T min = min_max.first;
+  T max = min_max.second;
+
+  // Ignore if one of the value is nan.
+  if (std::isnan(min) || std::isnan(max)) {
+    return ::arrow::util::nullopt;
+  }
+
+  if (min == std::numeric_limits<T>::max() && max == std::numeric_limits<T>::lowest()) {
+    return ::arrow::util::nullopt;
+  }
+
+  T zero{};
+
+  if (min == zero && !std::signbit(min)) {
+    min = -min;
+  }
+
+  if (max == zero && std::signbit(max)) {
+    max = -max;
+  }
+
+  return {{min, max}};
+}
+
+optional<std::pair<FLBA, FLBA>> CleanStatistic(std::pair<FLBA, FLBA> min_max) {
+  if (min_max.first.ptr == nullptr || min_max.second.ptr == nullptr) {
+    return ::arrow::util::nullopt;
+  }
+  return min_max;
+}
+
+optional<std::pair<ByteArray, ByteArray>> CleanStatistic(
+    std::pair<ByteArray, ByteArray> min_max) {
+  if (min_max.first.ptr == nullptr || min_max.second.ptr == nullptr) {
+    return ::arrow::util::nullopt;
+  }
+  return min_max;
+}
+
+template <bool is_signed, typename DType>
+class TypedComparatorImpl : virtual public TypedComparator<DType> {
+ public:
+  using T = typename DType::c_type;
+  using Helper = CompareHelper<DType, is_signed>;
+
+  explicit TypedComparatorImpl(int type_length = -1) : type_length_(type_length) {}
+
+  bool CompareInline(const T& a, const T& b) const {
+    return Helper::Compare(type_length_, a, b);
+  }
+
+  bool Compare(const T& a, const T& b) override { return CompareInline(a, b); }
+
+  std::pair<T, T> GetMinMax(const T* values, int64_t length) override {
+    DCHECK_GT(length, 0);
+
+    T min = Helper::DefaultMin();
+    T max = Helper::DefaultMax();
+
+    for (int64_t i = 0; i < length; i++) {
+      auto val = values[i];
+      min = Helper::Min(type_length_, min, Helper::Coalesce(val, Helper::DefaultMin()));
+      max = Helper::Max(type_length_, max, Helper::Coalesce(val, Helper::DefaultMax()));
+    }
+
+    return {min, max};
+  }
+
+  std::pair<T, T> GetMinMaxSpaced(const T* values, int64_t length,
+                                  const uint8_t* valid_bits,
+                                  int64_t valid_bits_offset) override {
+    DCHECK_GT(length, 0);
+
+    T min = Helper::DefaultMin();
+    T max = Helper::DefaultMax();
+
+    ::arrow::internal::VisitSetBitRunsVoid(
+        valid_bits, valid_bits_offset, length, [&](int64_t position, int64_t length) {
+          for (int64_t i = 0; i < length; i++) {
+            const auto val = values[i + position];
+            min = Helper::Min(type_length_, min,
+                              Helper::Coalesce(val, Helper::DefaultMin()));
+            max = Helper::Max(type_length_, max,
+                              Helper::Coalesce(val, Helper::DefaultMax()));
+          }
+        });
+
+    return {min, max};
+  }
+
+  std::pair<T, T> GetMinMax(const ::arrow::Array& values) override;
+
+ private:
+  int type_length_;
+};
+
+// ARROW-11675: A hand-written version of GetMinMax(), to work around
+// what looks like a MSVC code generation bug.
+// This does not seem to be required for GetMinMaxSpaced().
+template <>
+std::pair<int32_t, int32_t>
+TypedComparatorImpl</*is_signed=*/false, Int32Type>::GetMinMax(const int32_t* values,
+                                                               int64_t length) {
+  DCHECK_GT(length, 0);
+
+  const uint32_t* unsigned_values = reinterpret_cast<const uint32_t*>(values);
+  uint32_t min = std::numeric_limits<uint32_t>::max();
+  uint32_t max = std::numeric_limits<uint32_t>::lowest();
+
+  for (int64_t i = 0; i < length; i++) {
+    const auto val = unsigned_values[i];
+    min = std::min<uint32_t>(min, val);
+    max = std::max<uint32_t>(max, val);
+  }
+
+  return {SafeCopy<int32_t>(min), SafeCopy<int32_t>(max)};
+}
+
+template <bool is_signed, typename DType>
+std::pair<typename DType::c_type, typename DType::c_type>
+TypedComparatorImpl<is_signed, DType>::GetMinMax(const ::arrow::Array& values) {
+  ParquetException::NYI(values.type()->ToString());
+}
+
+template <bool is_signed>
+std::pair<ByteArray, ByteArray> GetMinMaxBinaryHelper(
+    const TypedComparatorImpl<is_signed, ByteArrayType>& comparator,
+    const ::arrow::Array& values) {
+  using Helper = CompareHelper<ByteArrayType, is_signed>;
+
+  ByteArray min = Helper::DefaultMin();
+  ByteArray max = Helper::DefaultMax();
+  constexpr int type_length = -1;
+
+  const auto valid_func = [&](ByteArray val) {
+    min = Helper::Min(type_length, val, min);
+    max = Helper::Max(type_length, val, max);
+  };
+  const auto null_func = [&]() {};
+
+  if (::arrow::is_binary_like(values.type_id())) {
+    ::arrow::VisitArrayDataInline<::arrow::BinaryType>(
+        *values.data(), std::move(valid_func), std::move(null_func));
+  } else {
+    DCHECK(::arrow::is_large_binary_like(values.type_id()));
+    ::arrow::VisitArrayDataInline<::arrow::LargeBinaryType>(
+        *values.data(), std::move(valid_func), std::move(null_func));
+  }
+
+  return {min, max};
+}
+
+template <>
+std::pair<ByteArray, ByteArray> TypedComparatorImpl<true, ByteArrayType>::GetMinMax(
+    const ::arrow::Array& values) {
+  return GetMinMaxBinaryHelper<true>(*this, values);
+}
+
+template <>
+std::pair<ByteArray, ByteArray> TypedComparatorImpl<false, ByteArrayType>::GetMinMax(
+    const ::arrow::Array& values) {
+  return GetMinMaxBinaryHelper<false>(*this, values);
+}
+
+template <typename DType>
+class TypedStatisticsImpl : public TypedStatistics<DType> {
+ public:
+  using T = typename DType::c_type;
+
+  TypedStatisticsImpl(const ColumnDescriptor* descr, MemoryPool* pool)
+      : descr_(descr),
+        pool_(pool),
+        min_buffer_(AllocateBuffer(pool_, 0)),
+        max_buffer_(AllocateBuffer(pool_, 0)) {
+    auto comp = Comparator::Make(descr);
+    comparator_ = std::static_pointer_cast<TypedComparator<DType>>(comp);
+    Reset();
+    has_null_count_ = true;
+    has_distinct_count_ = true;
+  }
+
+  TypedStatisticsImpl(const T& min, const T& max, int64_t num_values, int64_t null_count,
+                      int64_t distinct_count)
+      : pool_(default_memory_pool()),
+        min_buffer_(AllocateBuffer(pool_, 0)),
+        max_buffer_(AllocateBuffer(pool_, 0)) {
+    IncrementNumValues(num_values);
+    IncrementNullCount(null_count);
+    IncrementDistinctCount(distinct_count);
+
+    Copy(min, &min_, min_buffer_.get());
+    Copy(max, &max_, max_buffer_.get());
+    has_min_max_ = true;
+  }
+
+  TypedStatisticsImpl(const ColumnDescriptor* descr, const std::string& encoded_min,
+                      const std::string& encoded_max, int64_t num_values,
+                      int64_t null_count, int64_t distinct_count, bool has_min_max,
+                      bool has_null_count, bool has_distinct_count, MemoryPool* pool)
+      : TypedStatisticsImpl(descr, pool) {
+    IncrementNumValues(num_values);
+    if (has_null_count_) {
+      IncrementNullCount(null_count);
+    }
+    if (has_distinct_count) {
+      IncrementDistinctCount(distinct_count);
+    }
+
+    if (!encoded_min.empty()) {
+      PlainDecode(encoded_min, &min_);
+    }
+    if (!encoded_max.empty()) {
+      PlainDecode(encoded_max, &max_);
+    }
+    has_min_max_ = has_min_max;
+  }
+
+  bool HasDistinctCount() const override { return has_distinct_count_; };
+  bool HasMinMax() const override { return has_min_max_; }
+  bool HasNullCount() const override { return has_null_count_; };
+
+  bool Equals(const Statistics& raw_other) const override {
+    if (physical_type() != raw_other.physical_type()) return false;
+
+    const auto& other = checked_cast<const TypedStatisticsImpl&>(raw_other);
+
+    if (has_min_max_ != other.has_min_max_) return false;
+
+    return (has_min_max_ && MinMaxEqual(other)) && null_count() == other.null_count() &&
+           distinct_count() == other.distinct_count() &&
+           num_values() == other.num_values();
+  }
+
+  bool MinMaxEqual(const TypedStatisticsImpl& other) const;
+
+  void Reset() override {
+    ResetCounts();
+    has_min_max_ = false;
+    has_distinct_count_ = false;
+    has_null_count_ = false;
+  }
+
+  void SetMinMax(const T& arg_min, const T& arg_max) override {
+    SetMinMaxPair({arg_min, arg_max});
+  }
+
+  void Merge(const TypedStatistics<DType>& other) override {
+    this->num_values_ += other.num_values();
+    if (other.HasNullCount()) {
+      this->statistics_.null_count += other.null_count();
+    }
+    if (other.HasDistinctCount()) {
+      this->statistics_.distinct_count += other.distinct_count();
+    }
+    if (other.HasMinMax()) {
+      SetMinMax(other.min(), other.max());
+    }
+  }
+
+  void Update(const T* values, int64_t num_not_null, int64_t num_null) override;
+  void UpdateSpaced(const T* values, const uint8_t* valid_bits, int64_t valid_bits_spaced,
+                    int64_t num_not_null, int64_t num_null) override;
+
+  void Update(const ::arrow::Array& values) override {
+    IncrementNullCount(values.null_count());
+    IncrementNumValues(values.length() - values.null_count());
+
+    if (values.null_count() == values.length()) {
+      return;
+    }
+
+    SetMinMaxPair(comparator_->GetMinMax(values));
+  }
+
+  const T& min() const override { return min_; }
+
+  const T& max() const override { return max_; }
+
+  Type::type physical_type() const override { return descr_->physical_type(); }
+
+  const ColumnDescriptor* descr() const override { return descr_; }
+
+  std::string EncodeMin() const override {
+    std::string s;
+    if (HasMinMax()) this->PlainEncode(min_, &s);
+    return s;
+  }
+
+  std::string EncodeMax() const override {
+    std::string s;
+    if (HasMinMax()) this->PlainEncode(max_, &s);
+    return s;
+  }
+
+  EncodedStatistics Encode() override {
+    EncodedStatistics s;
+    if (HasMinMax()) {
+      s.set_min(this->EncodeMin());
+      s.set_max(this->EncodeMax());
+    }
+    if (HasNullCount()) {
+      s.set_null_count(this->null_count());
+    }
+    return s;
+  }
+
+  int64_t null_count() const override { return statistics_.null_count; }
+  int64_t distinct_count() const override { return statistics_.distinct_count; }
+  int64_t num_values() const override { return num_values_; }
+
+ private:
+  const ColumnDescriptor* descr_;
+  bool has_min_max_ = false;
+  bool has_null_count_ = false;
+  bool has_distinct_count_ = false;
+  T min_;
+  T max_;
+  ::arrow::MemoryPool* pool_;
+  int64_t num_values_ = 0;
+  EncodedStatistics statistics_;
+  std::shared_ptr<TypedComparator<DType>> comparator_;
+  std::shared_ptr<ResizableBuffer> min_buffer_, max_buffer_;
+
+  void PlainEncode(const T& src, std::string* dst) const;
+  void PlainDecode(const std::string& src, T* dst) const;
+
+  void Copy(const T& src, T* dst, ResizableBuffer*) { *dst = src; }
+
+  void IncrementNullCount(int64_t n) {
+    statistics_.null_count += n;
+    has_null_count_ = true;
+  }
+
+  void IncrementNumValues(int64_t n) { num_values_ += n; }
+
+  void IncrementDistinctCount(int64_t n) {
+    statistics_.distinct_count += n;
+    has_distinct_count_ = true;
+  }
+
+  void ResetCounts() {
+    this->statistics_.null_count = 0;
+    this->statistics_.distinct_count = 0;
+    this->num_values_ = 0;
+  }
+
+  void SetMinMaxPair(std::pair<T, T> min_max) {
+    // CleanStatistic can return a nullopt in case of erroneous values, e.g. NaN
+    auto maybe_min_max = CleanStatistic(min_max);
+    if (!maybe_min_max) return;
+
+    auto min = maybe_min_max.value().first;
+    auto max = maybe_min_max.value().second;
+
+    if (!has_min_max_) {
+      has_min_max_ = true;
+      Copy(min, &min_, min_buffer_.get());
+      Copy(max, &max_, max_buffer_.get());
+    } else {
+      Copy(comparator_->Compare(min_, min) ? min_ : min, &min_, min_buffer_.get());
+      Copy(comparator_->Compare(max_, max) ? max : max_, &max_, max_buffer_.get());
+    }
+  }
+};
+
+template <>
+inline bool TypedStatisticsImpl<FLBAType>::MinMaxEqual(
+    const TypedStatisticsImpl<FLBAType>& other) const {
+  uint32_t len = descr_->type_length();
+  return std::memcmp(min_.ptr, other.min_.ptr, len) == 0 &&
+         std::memcmp(max_.ptr, other.max_.ptr, len) == 0;
+}
+
+template <typename DType>
+bool TypedStatisticsImpl<DType>::MinMaxEqual(
+    const TypedStatisticsImpl<DType>& other) const {
+  return min_ != other.min_ && max_ != other.max_;
+}
+
+template <>
+inline void TypedStatisticsImpl<FLBAType>::Copy(const FLBA& src, FLBA* dst,
+                                                ResizableBuffer* buffer) {
+  if (dst->ptr == src.ptr) return;
+  uint32_t len = descr_->type_length();
+  PARQUET_THROW_NOT_OK(buffer->Resize(len, false));
+  std::memcpy(buffer->mutable_data(), src.ptr, len);
+  *dst = FLBA(buffer->data());
+}
+
+template <>
+inline void TypedStatisticsImpl<ByteArrayType>::Copy(const ByteArray& src, ByteArray* dst,
+                                                     ResizableBuffer* buffer) {
+  if (dst->ptr == src.ptr) return;
+  PARQUET_THROW_NOT_OK(buffer->Resize(src.len, false));
+  std::memcpy(buffer->mutable_data(), src.ptr, src.len);
+  *dst = ByteArray(src.len, buffer->data());
+}
+
+template <typename DType>
+void TypedStatisticsImpl<DType>::Update(const T* values, int64_t num_not_null,
+                                        int64_t num_null) {
+  DCHECK_GE(num_not_null, 0);
+  DCHECK_GE(num_null, 0);
+
+  IncrementNullCount(num_null);
+  IncrementNumValues(num_not_null);
+
+  if (num_not_null == 0) return;
+  SetMinMaxPair(comparator_->GetMinMax(values, num_not_null));
+}
+
+template <typename DType>
+void TypedStatisticsImpl<DType>::UpdateSpaced(const T* values, const uint8_t* valid_bits,
+                                              int64_t valid_bits_offset,
+                                              int64_t num_not_null, int64_t num_null) {
+  DCHECK_GE(num_not_null, 0);
+  DCHECK_GE(num_null, 0);
+
+  IncrementNullCount(num_null);
+  IncrementNumValues(num_not_null);
+
+  if (num_not_null == 0) return;
+
+  int64_t length = num_null + num_not_null;
+  SetMinMaxPair(
+      comparator_->GetMinMaxSpaced(values, length, valid_bits, valid_bits_offset));
+}
+
+template <typename DType>
+void TypedStatisticsImpl<DType>::PlainEncode(const T& src, std::string* dst) const {
+  auto encoder = MakeTypedEncoder<DType>(Encoding::PLAIN, false, descr_, pool_);
+  encoder->Put(&src, 1);
+  auto buffer = encoder->FlushValues();
+  auto ptr = reinterpret_cast<const char*>(buffer->data());
+  dst->assign(ptr, buffer->size());
+}
+
+template <typename DType>
+void TypedStatisticsImpl<DType>::PlainDecode(const std::string& src, T* dst) const {
+  auto decoder = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_);
+  decoder->SetData(1, reinterpret_cast<const uint8_t*>(src.c_str()),
+                   static_cast<int>(src.size()));
+  decoder->Decode(dst, 1);
+}
+
+template <>
+void TypedStatisticsImpl<ByteArrayType>::PlainEncode(const T& src,
+                                                     std::string* dst) const {
+  dst->assign(reinterpret_cast<const char*>(src.ptr), src.len);
+}
+
+template <>
+void TypedStatisticsImpl<ByteArrayType>::PlainDecode(const std::string& src,
+                                                     T* dst) const {
+  dst->len = static_cast<uint32_t>(src.size());
+  dst->ptr = reinterpret_cast<const uint8_t*>(src.c_str());
+}
+
+}  // namespace
+
+// ----------------------------------------------------------------------
+// Public factory functions
+
+std::shared_ptr<Comparator> Comparator::Make(Type::type physical_type,
+                                             SortOrder::type sort_order,
+                                             int type_length) {
+  if (SortOrder::SIGNED == sort_order) {
+    switch (physical_type) {
+      case Type::BOOLEAN:
+        return std::make_shared<TypedComparatorImpl<true, BooleanType>>();
+      case Type::INT32:
+        return std::make_shared<TypedComparatorImpl<true, Int32Type>>();
+      case Type::INT64:
+        return std::make_shared<TypedComparatorImpl<true, Int64Type>>();
+      case Type::INT96:
+        return std::make_shared<TypedComparatorImpl<true, Int96Type>>();
+      case Type::FLOAT:
+        return std::make_shared<TypedComparatorImpl<true, FloatType>>();
+      case Type::DOUBLE:
+        return std::make_shared<TypedComparatorImpl<true, DoubleType>>();
+      case Type::BYTE_ARRAY:
+        return std::make_shared<TypedComparatorImpl<true, ByteArrayType>>();
+      case Type::FIXED_LEN_BYTE_ARRAY:
+        return std::make_shared<TypedComparatorImpl<true, FLBAType>>(type_length);
+      default:
+        ParquetException::NYI("Signed Compare not implemented");
+    }
+  } else if (SortOrder::UNSIGNED == sort_order) {
+    switch (physical_type) {
+      case Type::INT32:
+        return std::make_shared<TypedComparatorImpl<false, Int32Type>>();
+      case Type::INT64:
+        return std::make_shared<TypedComparatorImpl<false, Int64Type>>();
+      case Type::INT96:
+        return std::make_shared<TypedComparatorImpl<false, Int96Type>>();
+      case Type::BYTE_ARRAY:
+        return std::make_shared<TypedComparatorImpl<false, ByteArrayType>>();
+      case Type::FIXED_LEN_BYTE_ARRAY:
+        return std::make_shared<TypedComparatorImpl<false, FLBAType>>(type_length);
+      default:
+        ParquetException::NYI("Unsigned Compare not implemented");
+    }
+  } else {
+    throw ParquetException("UNKNOWN Sort Order");
+  }
+  return nullptr;
+}
+
+std::shared_ptr<Comparator> Comparator::Make(const ColumnDescriptor* descr) {
+  return Make(descr->physical_type(), descr->sort_order(), descr->type_length());
+}
+
+std::shared_ptr<Statistics> Statistics::Make(const ColumnDescriptor* descr,
+                                             ::arrow::MemoryPool* pool) {
+  switch (descr->physical_type()) {
+    case Type::BOOLEAN:
+      return std::make_shared<TypedStatisticsImpl<BooleanType>>(descr, pool);
+    case Type::INT32:
+      return std::make_shared<TypedStatisticsImpl<Int32Type>>(descr, pool);
+    case Type::INT64:
+      return std::make_shared<TypedStatisticsImpl<Int64Type>>(descr, pool);
+    case Type::FLOAT:
+      return std::make_shared<TypedStatisticsImpl<FloatType>>(descr, pool);
+    case Type::DOUBLE:
+      return std::make_shared<TypedStatisticsImpl<DoubleType>>(descr, pool);
+    case Type::BYTE_ARRAY:
+      return std::make_shared<TypedStatisticsImpl<ByteArrayType>>(descr, pool);
+    case Type::FIXED_LEN_BYTE_ARRAY:
+      return std::make_shared<TypedStatisticsImpl<FLBAType>>(descr, pool);
+    default:
+      ParquetException::NYI("Statistics not implemented");
+  }
+}
+
+std::shared_ptr<Statistics> Statistics::Make(Type::type physical_type, const void* min,
+                                             const void* max, int64_t num_values,
+                                             int64_t null_count, int64_t distinct_count) {
+#define MAKE_STATS(CAP_TYPE, KLASS)                                                    \
+  case Type::CAP_TYPE:                                                                 \
+    return std::make_shared<TypedStatisticsImpl<KLASS>>(                               \
+        *reinterpret_cast<const typename KLASS::c_type*>(min),                         \
+        *reinterpret_cast<const typename KLASS::c_type*>(max), num_values, null_count, \
+        distinct_count)
+
+  switch (physical_type) {
+    MAKE_STATS(BOOLEAN, BooleanType);
+    MAKE_STATS(INT32, Int32Type);
+    MAKE_STATS(INT64, Int64Type);
+    MAKE_STATS(FLOAT, FloatType);
+    MAKE_STATS(DOUBLE, DoubleType);
+    MAKE_STATS(BYTE_ARRAY, ByteArrayType);
+    MAKE_STATS(FIXED_LEN_BYTE_ARRAY, FLBAType);
+    default:
+      break;
+  }
+#undef MAKE_STATS
+  DCHECK(false) << "Cannot reach here";
+  return nullptr;
+}
+
+std::shared_ptr<Statistics> Statistics::Make(const ColumnDescriptor* descr,
+                                             const std::string& encoded_min,
+                                             const std::string& encoded_max,
+                                             int64_t num_values, int64_t null_count,
+                                             int64_t distinct_count, bool has_min_max,
+                                             bool has_null_count, bool has_distinct_count,
+                                             ::arrow::MemoryPool* pool) {
+#define MAKE_STATS(CAP_TYPE, KLASS)                                              \
+  case Type::CAP_TYPE:                                                           \
+    return std::make_shared<TypedStatisticsImpl<KLASS>>(                         \
+        descr, encoded_min, encoded_max, num_values, null_count, distinct_count, \
+        has_min_max, has_null_count, has_distinct_count, pool)
+
+  switch (descr->physical_type()) {
+    MAKE_STATS(BOOLEAN, BooleanType);
+    MAKE_STATS(INT32, Int32Type);
+    MAKE_STATS(INT64, Int64Type);
+    MAKE_STATS(FLOAT, FloatType);
+    MAKE_STATS(DOUBLE, DoubleType);
+    MAKE_STATS(BYTE_ARRAY, ByteArrayType);
+    MAKE_STATS(FIXED_LEN_BYTE_ARRAY, FLBAType);
+    default:
+      break;
+  }
+#undef MAKE_STATS
+  DCHECK(false) << "Cannot reach here";
+  return nullptr;
+}
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/statistics.h b/contrib/libs/apache/arrow/cpp/src/parquet/statistics.h
index 1242180000c..18f68f21b87 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/statistics.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/statistics.h
@@ -1,342 +1,342 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <algorithm> 
-#include <cstddef> 
-#include <cstdint> 
-#include <memory> 
-#include <string> 
-#include <utility> 
- 
-#include "parquet/platform.h" 
-#include "parquet/types.h" 
- 
-namespace arrow { 
- 
-class Array; 
-class BinaryArray; 
- 
-}  // namespace arrow 
- 
-namespace parquet { 
- 
-class ColumnDescriptor; 
- 
-// ---------------------------------------------------------------------- 
-// Value comparator interfaces 
- 
-/// \brief Base class for value comparators. Generally used with 
-/// TypedComparator<T> 
-class PARQUET_EXPORT Comparator { 
- public: 
-  virtual ~Comparator() {} 
- 
-  /// \brief Create a comparator explicitly from physical type and 
-  /// sort order 
-  /// \param[in] physical_type the physical type for the typed 
-  /// comparator 
-  /// \param[in] sort_order either SortOrder::SIGNED or 
-  /// SortOrder::UNSIGNED 
-  /// \param[in] type_length for FIXED_LEN_BYTE_ARRAY only 
-  static std::shared_ptr<Comparator> Make(Type::type physical_type, 
-                                          SortOrder::type sort_order, 
-                                          int type_length = -1); 
- 
-  /// \brief Create typed comparator inferring default sort order from 
-  /// ColumnDescriptor 
-  /// \param[in] descr the Parquet column schema 
-  static std::shared_ptr<Comparator> Make(const ColumnDescriptor* descr); 
-}; 
- 
-/// \brief Interface for comparison of physical types according to the 
-/// semantics of a particular logical type. 
-template <typename DType> 
-class TypedComparator : public Comparator { 
- public: 
-  using T = typename DType::c_type; 
- 
-  /// \brief Scalar comparison of two elements, return true if first 
-  /// is strictly less than the second 
-  virtual bool Compare(const T& a, const T& b) = 0; 
- 
-  /// \brief Compute maximum and minimum elements in a batch of 
-  /// elements without any nulls 
-  virtual std::pair<T, T> GetMinMax(const T* values, int64_t length) = 0; 
- 
-  /// \brief Compute minimum and maximum elements from an Arrow array. Only 
-  /// valid for certain Parquet Type / Arrow Type combinations, like BYTE_ARRAY 
-  /// / arrow::BinaryArray 
-  virtual std::pair<T, T> GetMinMax(const ::arrow::Array& values) = 0; 
- 
-  /// \brief Compute maximum and minimum elements in a batch of 
-  /// elements with accompanying bitmap indicating which elements are 
-  /// included (bit set) and excluded (bit not set) 
-  /// 
-  /// \param[in] values the sequence of values 
-  /// \param[in] length the length of the sequence 
-  /// \param[in] valid_bits a bitmap indicating which elements are 
-  /// included (1) or excluded (0) 
-  /// \param[in] valid_bits_offset the bit offset into the bitmap of 
-  /// the first element in the sequence 
-  virtual std::pair<T, T> GetMinMaxSpaced(const T* values, int64_t length, 
-                                          const uint8_t* valid_bits, 
-                                          int64_t valid_bits_offset) = 0; 
-}; 
- 
-/// \brief Typed version of Comparator::Make 
-template <typename DType> 
-std::shared_ptr<TypedComparator<DType>> MakeComparator(Type::type physical_type, 
-                                                       SortOrder::type sort_order, 
-                                                       int type_length = -1) { 
-  return std::static_pointer_cast<TypedComparator<DType>>( 
-      Comparator::Make(physical_type, sort_order, type_length)); 
-} 
- 
-/// \brief Typed version of Comparator::Make 
-template <typename DType> 
-std::shared_ptr<TypedComparator<DType>> MakeComparator(const ColumnDescriptor* descr) { 
-  return std::static_pointer_cast<TypedComparator<DType>>(Comparator::Make(descr)); 
-} 
- 
-// ---------------------------------------------------------------------- 
- 
-/// \brief Structure represented encoded statistics to be written to 
-/// and from Parquet serialized metadata 
-class PARQUET_EXPORT EncodedStatistics { 
-  std::shared_ptr<std::string> max_, min_; 
-  bool is_signed_ = false; 
- 
- public: 
-  EncodedStatistics() 
-      : max_(std::make_shared<std::string>()), min_(std::make_shared<std::string>()) {} 
- 
-  const std::string& max() const { return *max_; } 
-  const std::string& min() const { return *min_; } 
- 
-  int64_t null_count = 0; 
-  int64_t distinct_count = 0; 
- 
-  bool has_min = false; 
-  bool has_max = false; 
-  bool has_null_count = false; 
-  bool has_distinct_count = false; 
- 
-  // From parquet-mr 
-  // Don't write stats larger than the max size rather than truncating. The 
-  // rationale is that some engines may use the minimum value in the page as 
-  // the true minimum for aggregations and there is no way to mark that a 
-  // value has been truncated and is a lower bound and not in the page. 
-  void ApplyStatSizeLimits(size_t length) { 
-    if (max_->length() > length) { 
-      has_max = false; 
-    } 
-    if (min_->length() > length) { 
-      has_min = false; 
-    } 
-  } 
- 
-  bool is_set() const { 
-    return has_min || has_max || has_null_count || has_distinct_count; 
-  } 
- 
-  bool is_signed() const { return is_signed_; } 
- 
-  void set_is_signed(bool is_signed) { is_signed_ = is_signed; } 
- 
-  EncodedStatistics& set_max(const std::string& value) { 
-    *max_ = value; 
-    has_max = true; 
-    return *this; 
-  } 
- 
-  EncodedStatistics& set_min(const std::string& value) { 
-    *min_ = value; 
-    has_min = true; 
-    return *this; 
-  } 
- 
-  EncodedStatistics& set_null_count(int64_t value) { 
-    null_count = value; 
-    has_null_count = true; 
-    return *this; 
-  } 
- 
-  EncodedStatistics& set_distinct_count(int64_t value) { 
-    distinct_count = value; 
-    has_distinct_count = true; 
-    return *this; 
-  } 
-}; 
- 
-/// \brief Base type for computing column statistics while writing a file 
-class PARQUET_EXPORT Statistics { 
- public: 
-  virtual ~Statistics() {} 
- 
-  /// \brief Create a new statistics instance given a column schema 
-  /// definition 
-  /// \param[in] descr the column schema 
-  /// \param[in] pool a memory pool to use for any memory allocations, optional 
-  static std::shared_ptr<Statistics> Make( 
-      const ColumnDescriptor* descr, 
-      ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()); 
- 
-  /// \brief Create a new statistics instance given a column schema 
-  /// definition and pre-existing state 
-  /// \param[in] descr the column schema 
-  /// \param[in] encoded_min the encoded minimum value 
-  /// \param[in] encoded_max the encoded maximum value 
-  /// \param[in] num_values total number of values 
-  /// \param[in] null_count number of null values 
-  /// \param[in] distinct_count number of distinct values 
-  /// \param[in] has_min_max whether the min/max statistics are set 
-  /// \param[in] has_null_count whether the null_count statistics are set 
-  /// \param[in] has_distinct_count whether the distinct_count statistics are set 
-  /// \param[in] pool a memory pool to use for any memory allocations, optional 
-  static std::shared_ptr<Statistics> Make( 
-      const ColumnDescriptor* descr, const std::string& encoded_min, 
-      const std::string& encoded_max, int64_t num_values, int64_t null_count, 
-      int64_t distinct_count, bool has_min_max, bool has_null_count, 
-      bool has_distinct_count, 
-      ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()); 
- 
-  /// \brief Return true if the count of null values is set 
-  virtual bool HasNullCount() const = 0; 
- 
-  /// \brief The number of null values, may not be set 
-  virtual int64_t null_count() const = 0; 
- 
-  /// \brief Return true if the count of distinct values is set 
-  virtual bool HasDistinctCount() const = 0; 
- 
-  /// \brief The number of distinct values, may not be set 
-  virtual int64_t distinct_count() const = 0; 
- 
-  /// \brief The total number of values in the column 
-  virtual int64_t num_values() const = 0; 
- 
-  /// \brief Return true if the min and max statistics are set. Obtain 
-  /// with TypedStatistics<T>::min and max 
-  virtual bool HasMinMax() const = 0; 
- 
-  /// \brief Reset state of object to initial (no data observed) state 
-  virtual void Reset() = 0; 
- 
-  /// \brief Plain-encoded minimum value 
-  virtual std::string EncodeMin() const = 0; 
- 
-  /// \brief Plain-encoded maximum value 
-  virtual std::string EncodeMax() const = 0; 
- 
-  /// \brief The finalized encoded form of the statistics for transport 
-  virtual EncodedStatistics Encode() = 0; 
- 
-  /// \brief The physical type of the column schema 
-  virtual Type::type physical_type() const = 0; 
- 
-  /// \brief The full type descriptor from the column schema 
-  virtual const ColumnDescriptor* descr() const = 0; 
- 
-  /// \brief Check two Statistics for equality 
-  virtual bool Equals(const Statistics& other) const = 0; 
- 
- protected: 
-  static std::shared_ptr<Statistics> Make(Type::type physical_type, const void* min, 
-                                          const void* max, int64_t num_values, 
-                                          int64_t null_count, int64_t distinct_count); 
-}; 
- 
-/// \brief A typed implementation of Statistics 
-template <typename DType> 
-class TypedStatistics : public Statistics { 
- public: 
-  using T = typename DType::c_type; 
- 
-  /// \brief The current minimum value 
-  virtual const T& min() const = 0; 
- 
-  /// \brief The current maximum value 
-  virtual const T& max() const = 0; 
- 
-  /// \brief Update state with state of another Statistics object 
-  virtual void Merge(const TypedStatistics<DType>& other) = 0; 
- 
-  /// \brief Batch statistics update 
-  virtual void Update(const T* values, int64_t num_not_null, int64_t num_null) = 0; 
- 
-  /// \brief Batch statistics update with supplied validity bitmap 
-  virtual void UpdateSpaced(const T* values, const uint8_t* valid_bits, 
-                            int64_t valid_bits_offset, int64_t num_not_null, 
-                            int64_t num_null) = 0; 
- 
-  /// \brief EXPERIMENTAL: Update statistics with an Arrow array without 
-  /// conversion to a primitive Parquet C type. Only implemented for certain 
-  /// Parquet type / Arrow type combinations like BYTE_ARRAY / 
-  /// arrow::BinaryArray 
-  virtual void Update(const ::arrow::Array& values) = 0; 
- 
-  /// \brief Set min and max values to particular values 
-  virtual void SetMinMax(const T& min, const T& max) = 0; 
-}; 
- 
-using BoolStatistics = TypedStatistics<BooleanType>; 
-using Int32Statistics = TypedStatistics<Int32Type>; 
-using Int64Statistics = TypedStatistics<Int64Type>; 
-using FloatStatistics = TypedStatistics<FloatType>; 
-using DoubleStatistics = TypedStatistics<DoubleType>; 
-using ByteArrayStatistics = TypedStatistics<ByteArrayType>; 
-using FLBAStatistics = TypedStatistics<FLBAType>; 
- 
-/// \brief Typed version of Statistics::Make 
-template <typename DType> 
-std::shared_ptr<TypedStatistics<DType>> MakeStatistics( 
-    const ColumnDescriptor* descr, 
-    ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()) { 
-  return std::static_pointer_cast<TypedStatistics<DType>>(Statistics::Make(descr, pool)); 
-} 
- 
-/// \brief Create Statistics initialized to a particular state 
-/// \param[in] min the minimum value 
-/// \param[in] max the minimum value 
-/// \param[in] num_values number of values 
-/// \param[in] null_count number of null values 
-/// \param[in] distinct_count number of distinct values 
-template <typename DType> 
-std::shared_ptr<TypedStatistics<DType>> MakeStatistics(const typename DType::c_type& min, 
-                                                       const typename DType::c_type& max, 
-                                                       int64_t num_values, 
-                                                       int64_t null_count, 
-                                                       int64_t distinct_count) { 
-  return std::static_pointer_cast<TypedStatistics<DType>>(Statistics::Make( 
-      DType::type_num, &min, &max, num_values, null_count, distinct_count)); 
-} 
- 
-/// \brief Typed version of Statistics::Make 
-template <typename DType> 
-std::shared_ptr<TypedStatistics<DType>> MakeStatistics( 
-    const ColumnDescriptor* descr, const std::string& encoded_min, 
-    const std::string& encoded_max, int64_t num_values, int64_t null_count, 
-    int64_t distinct_count, bool has_min_max, bool has_null_count, 
-    bool has_distinct_count, ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()) { 
-  return std::static_pointer_cast<TypedStatistics<DType>>(Statistics::Make( 
-      descr, encoded_min, encoded_max, num_values, null_count, distinct_count, 
-      has_min_max, has_null_count, has_distinct_count, pool)); 
-} 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "parquet/platform.h"
+#include "parquet/types.h"
+
+namespace arrow {
+
+class Array;
+class BinaryArray;
+
+}  // namespace arrow
+
+namespace parquet {
+
+class ColumnDescriptor;
+
+// ----------------------------------------------------------------------
+// Value comparator interfaces
+
+/// \brief Base class for value comparators. Generally used with
+/// TypedComparator<T>
+class PARQUET_EXPORT Comparator {
+ public:
+  virtual ~Comparator() {}
+
+  /// \brief Create a comparator explicitly from physical type and
+  /// sort order
+  /// \param[in] physical_type the physical type for the typed
+  /// comparator
+  /// \param[in] sort_order either SortOrder::SIGNED or
+  /// SortOrder::UNSIGNED
+  /// \param[in] type_length for FIXED_LEN_BYTE_ARRAY only
+  static std::shared_ptr<Comparator> Make(Type::type physical_type,
+                                          SortOrder::type sort_order,
+                                          int type_length = -1);
+
+  /// \brief Create typed comparator inferring default sort order from
+  /// ColumnDescriptor
+  /// \param[in] descr the Parquet column schema
+  static std::shared_ptr<Comparator> Make(const ColumnDescriptor* descr);
+};
+
+/// \brief Interface for comparison of physical types according to the
+/// semantics of a particular logical type.
+template <typename DType>
+class TypedComparator : public Comparator {
+ public:
+  using T = typename DType::c_type;
+
+  /// \brief Scalar comparison of two elements, return true if first
+  /// is strictly less than the second
+  virtual bool Compare(const T& a, const T& b) = 0;
+
+  /// \brief Compute maximum and minimum elements in a batch of
+  /// elements without any nulls
+  virtual std::pair<T, T> GetMinMax(const T* values, int64_t length) = 0;
+
+  /// \brief Compute minimum and maximum elements from an Arrow array. Only
+  /// valid for certain Parquet Type / Arrow Type combinations, like BYTE_ARRAY
+  /// / arrow::BinaryArray
+  virtual std::pair<T, T> GetMinMax(const ::arrow::Array& values) = 0;
+
+  /// \brief Compute maximum and minimum elements in a batch of
+  /// elements with accompanying bitmap indicating which elements are
+  /// included (bit set) and excluded (bit not set)
+  ///
+  /// \param[in] values the sequence of values
+  /// \param[in] length the length of the sequence
+  /// \param[in] valid_bits a bitmap indicating which elements are
+  /// included (1) or excluded (0)
+  /// \param[in] valid_bits_offset the bit offset into the bitmap of
+  /// the first element in the sequence
+  virtual std::pair<T, T> GetMinMaxSpaced(const T* values, int64_t length,
+                                          const uint8_t* valid_bits,
+                                          int64_t valid_bits_offset) = 0;
+};
+
+/// \brief Typed version of Comparator::Make
+template <typename DType>
+std::shared_ptr<TypedComparator<DType>> MakeComparator(Type::type physical_type,
+                                                       SortOrder::type sort_order,
+                                                       int type_length = -1) {
+  return std::static_pointer_cast<TypedComparator<DType>>(
+      Comparator::Make(physical_type, sort_order, type_length));
+}
+
+/// \brief Typed version of Comparator::Make
+template <typename DType>
+std::shared_ptr<TypedComparator<DType>> MakeComparator(const ColumnDescriptor* descr) {
+  return std::static_pointer_cast<TypedComparator<DType>>(Comparator::Make(descr));
+}
+
+// ----------------------------------------------------------------------
+
+/// \brief Structure represented encoded statistics to be written to
+/// and from Parquet serialized metadata
+class PARQUET_EXPORT EncodedStatistics {
+  std::shared_ptr<std::string> max_, min_;
+  bool is_signed_ = false;
+
+ public:
+  EncodedStatistics()
+      : max_(std::make_shared<std::string>()), min_(std::make_shared<std::string>()) {}
+
+  const std::string& max() const { return *max_; }
+  const std::string& min() const { return *min_; }
+
+  int64_t null_count = 0;
+  int64_t distinct_count = 0;
+
+  bool has_min = false;
+  bool has_max = false;
+  bool has_null_count = false;
+  bool has_distinct_count = false;
+
+  // From parquet-mr
+  // Don't write stats larger than the max size rather than truncating. The
+  // rationale is that some engines may use the minimum value in the page as
+  // the true minimum for aggregations and there is no way to mark that a
+  // value has been truncated and is a lower bound and not in the page.
+  void ApplyStatSizeLimits(size_t length) {
+    if (max_->length() > length) {
+      has_max = false;
+    }
+    if (min_->length() > length) {
+      has_min = false;
+    }
+  }
+
+  bool is_set() const {
+    return has_min || has_max || has_null_count || has_distinct_count;
+  }
+
+  bool is_signed() const { return is_signed_; }
+
+  void set_is_signed(bool is_signed) { is_signed_ = is_signed; }
+
+  EncodedStatistics& set_max(const std::string& value) {
+    *max_ = value;
+    has_max = true;
+    return *this;
+  }
+
+  EncodedStatistics& set_min(const std::string& value) {
+    *min_ = value;
+    has_min = true;
+    return *this;
+  }
+
+  EncodedStatistics& set_null_count(int64_t value) {
+    null_count = value;
+    has_null_count = true;
+    return *this;
+  }
+
+  EncodedStatistics& set_distinct_count(int64_t value) {
+    distinct_count = value;
+    has_distinct_count = true;
+    return *this;
+  }
+};
+
+/// \brief Base type for computing column statistics while writing a file
+class PARQUET_EXPORT Statistics {
+ public:
+  virtual ~Statistics() {}
+
+  /// \brief Create a new statistics instance given a column schema
+  /// definition
+  /// \param[in] descr the column schema
+  /// \param[in] pool a memory pool to use for any memory allocations, optional
+  static std::shared_ptr<Statistics> Make(
+      const ColumnDescriptor* descr,
+      ::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
+
+  /// \brief Create a new statistics instance given a column schema
+  /// definition and pre-existing state
+  /// \param[in] descr the column schema
+  /// \param[in] encoded_min the encoded minimum value
+  /// \param[in] encoded_max the encoded maximum value
+  /// \param[in] num_values total number of values
+  /// \param[in] null_count number of null values
+  /// \param[in] distinct_count number of distinct values
+  /// \param[in] has_min_max whether the min/max statistics are set
+  /// \param[in] has_null_count whether the null_count statistics are set
+  /// \param[in] has_distinct_count whether the distinct_count statistics are set
+  /// \param[in] pool a memory pool to use for any memory allocations, optional
+  static std::shared_ptr<Statistics> Make(
+      const ColumnDescriptor* descr, const std::string& encoded_min,
+      const std::string& encoded_max, int64_t num_values, int64_t null_count,
+      int64_t distinct_count, bool has_min_max, bool has_null_count,
+      bool has_distinct_count,
+      ::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
+
+  /// \brief Return true if the count of null values is set
+  virtual bool HasNullCount() const = 0;
+
+  /// \brief The number of null values, may not be set
+  virtual int64_t null_count() const = 0;
+
+  /// \brief Return true if the count of distinct values is set
+  virtual bool HasDistinctCount() const = 0;
+
+  /// \brief The number of distinct values, may not be set
+  virtual int64_t distinct_count() const = 0;
+
+  /// \brief The total number of values in the column
+  virtual int64_t num_values() const = 0;
+
+  /// \brief Return true if the min and max statistics are set. Obtain
+  /// with TypedStatistics<T>::min and max
+  virtual bool HasMinMax() const = 0;
+
+  /// \brief Reset state of object to initial (no data observed) state
+  virtual void Reset() = 0;
+
+  /// \brief Plain-encoded minimum value
+  virtual std::string EncodeMin() const = 0;
+
+  /// \brief Plain-encoded maximum value
+  virtual std::string EncodeMax() const = 0;
+
+  /// \brief The finalized encoded form of the statistics for transport
+  virtual EncodedStatistics Encode() = 0;
+
+  /// \brief The physical type of the column schema
+  virtual Type::type physical_type() const = 0;
+
+  /// \brief The full type descriptor from the column schema
+  virtual const ColumnDescriptor* descr() const = 0;
+
+  /// \brief Check two Statistics for equality
+  virtual bool Equals(const Statistics& other) const = 0;
+
+ protected:
+  static std::shared_ptr<Statistics> Make(Type::type physical_type, const void* min,
+                                          const void* max, int64_t num_values,
+                                          int64_t null_count, int64_t distinct_count);
+};
+
+/// \brief A typed implementation of Statistics
+template <typename DType>
+class TypedStatistics : public Statistics {
+ public:
+  using T = typename DType::c_type;
+
+  /// \brief The current minimum value
+  virtual const T& min() const = 0;
+
+  /// \brief The current maximum value
+  virtual const T& max() const = 0;
+
+  /// \brief Update state with state of another Statistics object
+  virtual void Merge(const TypedStatistics<DType>& other) = 0;
+
+  /// \brief Batch statistics update
+  virtual void Update(const T* values, int64_t num_not_null, int64_t num_null) = 0;
+
+  /// \brief Batch statistics update with supplied validity bitmap
+  virtual void UpdateSpaced(const T* values, const uint8_t* valid_bits,
+                            int64_t valid_bits_offset, int64_t num_not_null,
+                            int64_t num_null) = 0;
+
+  /// \brief EXPERIMENTAL: Update statistics with an Arrow array without
+  /// conversion to a primitive Parquet C type. Only implemented for certain
+  /// Parquet type / Arrow type combinations like BYTE_ARRAY /
+  /// arrow::BinaryArray
+  virtual void Update(const ::arrow::Array& values) = 0;
+
+  /// \brief Set min and max values to particular values
+  virtual void SetMinMax(const T& min, const T& max) = 0;
+};
+
+using BoolStatistics = TypedStatistics<BooleanType>;
+using Int32Statistics = TypedStatistics<Int32Type>;
+using Int64Statistics = TypedStatistics<Int64Type>;
+using FloatStatistics = TypedStatistics<FloatType>;
+using DoubleStatistics = TypedStatistics<DoubleType>;
+using ByteArrayStatistics = TypedStatistics<ByteArrayType>;
+using FLBAStatistics = TypedStatistics<FLBAType>;
+
+/// \brief Typed version of Statistics::Make
+template <typename DType>
+std::shared_ptr<TypedStatistics<DType>> MakeStatistics(
+    const ColumnDescriptor* descr,
+    ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()) {
+  return std::static_pointer_cast<TypedStatistics<DType>>(Statistics::Make(descr, pool));
+}
+
+/// \brief Create Statistics initialized to a particular state
+/// \param[in] min the minimum value
+/// \param[in] max the minimum value
+/// \param[in] num_values number of values
+/// \param[in] null_count number of null values
+/// \param[in] distinct_count number of distinct values
+template <typename DType>
+std::shared_ptr<TypedStatistics<DType>> MakeStatistics(const typename DType::c_type& min,
+                                                       const typename DType::c_type& max,
+                                                       int64_t num_values,
+                                                       int64_t null_count,
+                                                       int64_t distinct_count) {
+  return std::static_pointer_cast<TypedStatistics<DType>>(Statistics::Make(
+      DType::type_num, &min, &max, num_values, null_count, distinct_count));
+}
+
+/// \brief Typed version of Statistics::Make
+template <typename DType>
+std::shared_ptr<TypedStatistics<DType>> MakeStatistics(
+    const ColumnDescriptor* descr, const std::string& encoded_min,
+    const std::string& encoded_max, int64_t num_values, int64_t null_count,
+    int64_t distinct_count, bool has_min_max, bool has_null_count,
+    bool has_distinct_count, ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()) {
+  return std::static_pointer_cast<TypedStatistics<DType>>(Statistics::Make(
+      descr, encoded_min, encoded_max, num_values, null_count, distinct_count,
+      has_min_max, has_null_count, has_distinct_count, pool));
+}
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/stream_reader.cc b/contrib/libs/apache/arrow/cpp/src/parquet/stream_reader.cc
index af7a35ddbc1..9a7cc8cdf86 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/stream_reader.cc
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/stream_reader.cc
@@ -1,521 +1,521 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "parquet/stream_reader.h" 
- 
-#include <set> 
-#include <utility> 
- 
-namespace parquet { 
- 
-constexpr int64_t StreamReader::kBatchSizeOne; 
- 
-// The converted type expected by the stream reader does not always 
-// exactly match with the schema in the Parquet file.  The following 
-// is a list of converted types which are allowed instead of the 
-// expected converted type. 
-// Each pair given is: 
-//   {<StreamReader expected type>, <Parquet file converted type>} 
-// So for example {ConvertedType::INT_32, ConvertedType::NONE} means 
-// that if the StreamReader was expecting the converted type INT_32, 
-// then it will allow the Parquet file to use the converted type 
-// NONE. 
-// 
-static const std::set<std::pair<ConvertedType::type, ConvertedType::type> > 
-    converted_type_exceptions = {{ConvertedType::INT_32, ConvertedType::NONE}, 
-                                 {ConvertedType::INT_64, ConvertedType::NONE}, 
-                                 {ConvertedType::INT_32, ConvertedType::DECIMAL}, 
-                                 {ConvertedType::INT_64, ConvertedType::DECIMAL}, 
-                                 {ConvertedType::UTF8, ConvertedType::NONE}}; 
- 
-StreamReader::StreamReader(std::unique_ptr<ParquetFileReader> reader) 
-    : file_reader_{std::move(reader)}, eof_{false} { 
-  file_metadata_ = file_reader_->metadata(); 
- 
-  auto schema = file_metadata_->schema(); 
-  auto group_node = schema->group_node(); 
- 
-  nodes_.resize(schema->num_columns()); 
- 
-  for (auto i = 0; i < schema->num_columns(); ++i) { 
-    nodes_[i] = std::static_pointer_cast<schema::PrimitiveNode>(group_node->field(i)); 
-  } 
-  NextRowGroup(); 
-} 
- 
-int StreamReader::num_columns() const { 
-  // Check for file metadata i.e. object is not default constructed. 
-  if (file_metadata_) { 
-    return file_metadata_->num_columns(); 
-  } 
-  return 0; 
-} 
- 
-int64_t StreamReader::num_rows() const { 
-  // Check for file metadata i.e. object is not default constructed. 
-  if (file_metadata_) { 
-    return file_metadata_->num_rows(); 
-  } 
-  return 0; 
-} 
- 
-StreamReader& StreamReader::operator>>(bool& v) { 
-  CheckColumn(Type::BOOLEAN, ConvertedType::NONE); 
-  Read<BoolReader>(&v); 
-  return *this; 
-} 
- 
-StreamReader& StreamReader::operator>>(int8_t& v) { 
-  CheckColumn(Type::INT32, ConvertedType::INT_8); 
-  Read<Int32Reader, int32_t>(&v); 
-  return *this; 
-} 
- 
-StreamReader& StreamReader::operator>>(uint8_t& v) { 
-  CheckColumn(Type::INT32, ConvertedType::UINT_8); 
-  Read<Int32Reader, int32_t>(&v); 
-  return *this; 
-} 
- 
-StreamReader& StreamReader::operator>>(int16_t& v) { 
-  CheckColumn(Type::INT32, ConvertedType::INT_16); 
-  Read<Int32Reader, int32_t>(&v); 
-  return *this; 
-} 
- 
-StreamReader& StreamReader::operator>>(uint16_t& v) { 
-  CheckColumn(Type::INT32, ConvertedType::UINT_16); 
-  Read<Int32Reader, int32_t>(&v); 
-  return *this; 
-} 
- 
-StreamReader& StreamReader::operator>>(int32_t& v) { 
-  CheckColumn(Type::INT32, ConvertedType::INT_32); 
-  Read<Int32Reader>(&v); 
-  return *this; 
-} 
- 
-StreamReader& StreamReader::operator>>(uint32_t& v) { 
-  CheckColumn(Type::INT32, ConvertedType::UINT_32); 
-  Read<Int32Reader>(reinterpret_cast<int32_t*>(&v)); 
-  return *this; 
-} 
- 
-StreamReader& StreamReader::operator>>(int64_t& v) { 
-  CheckColumn(Type::INT64, ConvertedType::INT_64); 
-  Read<Int64Reader>(&v); 
-  return *this; 
-} 
- 
-StreamReader& StreamReader::operator>>(uint64_t& v) { 
-  CheckColumn(Type::INT64, ConvertedType::UINT_64); 
-  Read<Int64Reader>(reinterpret_cast<int64_t*>(&v)); 
-  return *this; 
-} 
- 
-StreamReader& StreamReader::operator>>(std::chrono::milliseconds& v) { 
-  CheckColumn(Type::INT64, ConvertedType::TIMESTAMP_MILLIS); 
-  int64_t tmp; 
-  Read<Int64Reader>(&tmp); 
-  v = std::chrono::milliseconds{tmp}; 
-  return *this; 
-} 
- 
-StreamReader& StreamReader::operator>>(std::chrono::microseconds& v) { 
-  CheckColumn(Type::INT64, ConvertedType::TIMESTAMP_MICROS); 
-  int64_t tmp; 
-  Read<Int64Reader>(&tmp); 
-  v = std::chrono::microseconds{tmp}; 
-  return *this; 
-} 
- 
-StreamReader& StreamReader::operator>>(float& v) { 
-  CheckColumn(Type::FLOAT, ConvertedType::NONE); 
-  Read<FloatReader>(&v); 
-  return *this; 
-} 
- 
-StreamReader& StreamReader::operator>>(double& v) { 
-  CheckColumn(Type::DOUBLE, ConvertedType::NONE); 
-  Read<DoubleReader>(&v); 
-  return *this; 
-} 
- 
-StreamReader& StreamReader::operator>>(char& v) { 
-  CheckColumn(Type::FIXED_LEN_BYTE_ARRAY, ConvertedType::NONE, 1); 
-  FixedLenByteArray flba; 
- 
-  Read(&flba); 
-  v = static_cast<char>(flba.ptr[0]); 
-  return *this; 
-} 
- 
-StreamReader& StreamReader::operator>>(std::string& v) { 
-  CheckColumn(Type::BYTE_ARRAY, ConvertedType::UTF8); 
-  ByteArray ba; 
- 
-  Read(&ba); 
-  v = std::string(reinterpret_cast<const char*>(ba.ptr), ba.len); 
-  return *this; 
-} 
- 
-StreamReader& StreamReader::operator>>(optional<bool>& v) { 
-  CheckColumn(Type::BOOLEAN, ConvertedType::NONE); 
-  ReadOptional<BoolReader>(&v); 
-  return *this; 
-} 
- 
-StreamReader& StreamReader::operator>>(optional<int8_t>& v) { 
-  CheckColumn(Type::INT32, ConvertedType::INT_8); 
-  ReadOptional<Int32Reader, int32_t>(&v); 
-  return *this; 
-} 
- 
-StreamReader& StreamReader::operator>>(optional<uint8_t>& v) { 
-  CheckColumn(Type::INT32, ConvertedType::UINT_8); 
-  ReadOptional<Int32Reader, int32_t>(&v); 
-  return *this; 
-} 
- 
-StreamReader& StreamReader::operator>>(optional<int16_t>& v) { 
-  CheckColumn(Type::INT32, ConvertedType::INT_16); 
-  ReadOptional<Int32Reader, int32_t>(&v); 
-  return *this; 
-} 
- 
-StreamReader& StreamReader::operator>>(optional<uint16_t>& v) { 
-  CheckColumn(Type::INT32, ConvertedType::UINT_16); 
-  ReadOptional<Int32Reader, int32_t>(&v); 
-  return *this; 
-} 
- 
-StreamReader& StreamReader::operator>>(optional<int32_t>& v) { 
-  CheckColumn(Type::INT32, ConvertedType::INT_32); 
-  ReadOptional<Int32Reader>(&v); 
-  return *this; 
-} 
- 
-StreamReader& StreamReader::operator>>(optional<uint32_t>& v) { 
-  CheckColumn(Type::INT32, ConvertedType::UINT_32); 
-  ReadOptional<Int32Reader>(&v); 
-  return *this; 
-} 
- 
-StreamReader& StreamReader::operator>>(optional<int64_t>& v) { 
-  CheckColumn(Type::INT64, ConvertedType::INT_64); 
-  ReadOptional<Int64Reader>(&v); 
-  return *this; 
-} 
- 
-StreamReader& StreamReader::operator>>(optional<uint64_t>& v) { 
-  CheckColumn(Type::INT64, ConvertedType::UINT_64); 
-  ReadOptional<Int64Reader>(&v); 
-  return *this; 
-} 
- 
-StreamReader& StreamReader::operator>>(optional<float>& v) { 
-  CheckColumn(Type::FLOAT, ConvertedType::NONE); 
-  ReadOptional<FloatReader>(&v); 
-  return *this; 
-} 
- 
-StreamReader& StreamReader::operator>>(optional<double>& v) { 
-  CheckColumn(Type::DOUBLE, ConvertedType::NONE); 
-  ReadOptional<DoubleReader>(&v); 
-  return *this; 
-} 
- 
-StreamReader& StreamReader::operator>>(optional<std::chrono::milliseconds>& v) { 
-  CheckColumn(Type::INT64, ConvertedType::TIMESTAMP_MILLIS); 
-  ReadOptional<Int64Reader, int64_t>(&v); 
-  return *this; 
-} 
- 
-StreamReader& StreamReader::operator>>(optional<std::chrono::microseconds>& v) { 
-  CheckColumn(Type::INT64, ConvertedType::TIMESTAMP_MICROS); 
-  ReadOptional<Int64Reader, int64_t>(&v); 
-  return *this; 
-} 
- 
-StreamReader& StreamReader::operator>>(optional<char>& v) { 
-  CheckColumn(Type::FIXED_LEN_BYTE_ARRAY, ConvertedType::NONE, 1); 
-  FixedLenByteArray flba; 
- 
-  if (ReadOptional(&flba)) { 
-    v = static_cast<char>(flba.ptr[0]); 
-  } else { 
-    v.reset(); 
-  } 
-  return *this; 
-} 
- 
-StreamReader& StreamReader::operator>>(optional<std::string>& v) { 
-  CheckColumn(Type::BYTE_ARRAY, ConvertedType::UTF8); 
-  ByteArray ba; 
- 
-  if (ReadOptional(&ba)) { 
-    v = std::string(reinterpret_cast<const char*>(ba.ptr), ba.len); 
-  } else { 
-    v.reset(); 
-  } 
-  return *this; 
-} 
- 
-void StreamReader::ReadFixedLength(char* ptr, int len) { 
-  CheckColumn(Type::FIXED_LEN_BYTE_ARRAY, ConvertedType::NONE, len); 
-  FixedLenByteArray flba; 
-  Read(&flba); 
-  std::memcpy(ptr, flba.ptr, len); 
-} 
- 
-void StreamReader::Read(ByteArray* v) { 
-  const auto& node = nodes_[column_index_]; 
-  auto reader = static_cast<ByteArrayReader*>(column_readers_[column_index_++].get()); 
-  int16_t def_level; 
-  int16_t rep_level; 
-  int64_t values_read; 
- 
-  reader->ReadBatch(kBatchSizeOne, &def_level, &rep_level, v, &values_read); 
- 
-  if (values_read != 1) { 
-    ThrowReadFailedException(node); 
-  } 
-} 
- 
-bool StreamReader::ReadOptional(ByteArray* v) { 
-  const auto& node = nodes_[column_index_]; 
-  auto reader = static_cast<ByteArrayReader*>(column_readers_[column_index_++].get()); 
-  int16_t def_level; 
-  int16_t rep_level; 
-  int64_t values_read; 
- 
-  reader->ReadBatch(kBatchSizeOne, &def_level, &rep_level, v, &values_read); 
- 
-  if (values_read == 1) { 
-    return true; 
-  } else if ((values_read == 0) && (def_level == 0)) { 
-    return false; 
-  } 
-  ThrowReadFailedException(node); 
-} 
- 
-void StreamReader::Read(FixedLenByteArray* v) { 
-  const auto& node = nodes_[column_index_]; 
-  auto reader = 
-      static_cast<FixedLenByteArrayReader*>(column_readers_[column_index_++].get()); 
-  int16_t def_level; 
-  int16_t rep_level; 
-  int64_t values_read; 
- 
-  reader->ReadBatch(kBatchSizeOne, &def_level, &rep_level, v, &values_read); 
- 
-  if (values_read != 1) { 
-    ThrowReadFailedException(node); 
-  } 
-} 
- 
-bool StreamReader::ReadOptional(FixedLenByteArray* v) { 
-  const auto& node = nodes_[column_index_]; 
-  auto reader = 
-      static_cast<FixedLenByteArrayReader*>(column_readers_[column_index_++].get()); 
-  int16_t def_level; 
-  int16_t rep_level; 
-  int64_t values_read; 
- 
-  reader->ReadBatch(kBatchSizeOne, &def_level, &rep_level, v, &values_read); 
- 
-  if (values_read == 1) { 
-    return true; 
-  } else if ((values_read == 0) && (def_level == 0)) { 
-    return false; 
-  } 
-  ThrowReadFailedException(node); 
-} 
- 
-void StreamReader::EndRow() { 
-  if (!file_reader_) { 
-    throw ParquetException("StreamReader not initialized"); 
-  } 
-  if (static_cast<std::size_t>(column_index_) < nodes_.size()) { 
-    throw ParquetException("Cannot end row with " + std::to_string(column_index_) + 
-                           " of " + std::to_string(nodes_.size()) + " columns read"); 
-  } 
-  column_index_ = 0; 
-  ++current_row_; 
- 
-  if (!column_readers_[0]->HasNext()) { 
-    NextRowGroup(); 
-  } 
-} 
- 
-void StreamReader::NextRowGroup() { 
-  // Find next none-empty row group 
-  while (row_group_index_ < file_metadata_->num_row_groups()) { 
-    row_group_reader_ = file_reader_->RowGroup(row_group_index_); 
-    ++row_group_index_; 
- 
-    column_readers_.resize(file_metadata_->num_columns()); 
- 
-    for (int i = 0; i < file_metadata_->num_columns(); ++i) { 
-      column_readers_[i] = row_group_reader_->Column(i); 
-    } 
-    if (column_readers_[0]->HasNext()) { 
-      row_group_row_offset_ = current_row_; 
-      return; 
-    } 
-  } 
-  // No more row groups found. 
-  SetEof(); 
-} 
- 
-void StreamReader::SetEof() { 
-  // Do not reset file_metadata_ to ensure queries on the number of 
-  // rows/columns still function. 
-  eof_ = true; 
-  file_reader_.reset(); 
-  row_group_reader_.reset(); 
-  column_readers_.clear(); 
-  nodes_.clear(); 
-} 
- 
-int64_t StreamReader::SkipRows(int64_t num_rows_to_skip) { 
-  if (0 != column_index_) { 
-    throw ParquetException("Must finish reading current row before skipping rows."); 
-  } 
-  int64_t num_rows_remaining_to_skip = num_rows_to_skip; 
- 
-  while (!eof_ && (num_rows_remaining_to_skip > 0)) { 
-    int64_t num_rows_in_row_group = row_group_reader_->metadata()->num_rows(); 
-    int64_t num_rows_remaining_in_row_group = 
-        num_rows_in_row_group - current_row_ - row_group_row_offset_; 
- 
-    if (num_rows_remaining_in_row_group > num_rows_remaining_to_skip) { 
-      for (auto reader : column_readers_) { 
-        SkipRowsInColumn(reader.get(), num_rows_remaining_to_skip); 
-      } 
-      current_row_ += num_rows_remaining_to_skip; 
-      num_rows_remaining_to_skip = 0; 
-    } else { 
-      num_rows_remaining_to_skip -= num_rows_remaining_in_row_group; 
-      current_row_ += num_rows_remaining_in_row_group; 
-      NextRowGroup(); 
-    } 
-  } 
-  return num_rows_to_skip - num_rows_remaining_to_skip; 
-} 
- 
-int64_t StreamReader::SkipColumns(int64_t num_columns_to_skip) { 
-  int64_t num_columns_skipped = 0; 
- 
-  if (!eof_) { 
-    for (; (num_columns_to_skip > num_columns_skipped) && 
-           static_cast<std::size_t>(column_index_) < nodes_.size(); 
-         ++column_index_) { 
-      SkipRowsInColumn(column_readers_[column_index_].get(), 1); 
-      ++num_columns_skipped; 
-    } 
-  } 
-  return num_columns_skipped; 
-} 
- 
-void StreamReader::SkipRowsInColumn(ColumnReader* reader, int64_t num_rows_to_skip) { 
-  int64_t num_skipped = 0; 
- 
-  switch (reader->type()) { 
-    case Type::BOOLEAN: 
-      num_skipped = static_cast<BoolReader*>(reader)->Skip(num_rows_to_skip); 
-      break; 
-    case Type::INT32: 
-      num_skipped = static_cast<Int32Reader*>(reader)->Skip(num_rows_to_skip); 
-      break; 
-    case Type::INT64: 
-      num_skipped = static_cast<Int64Reader*>(reader)->Skip(num_rows_to_skip); 
-      break; 
-    case Type::BYTE_ARRAY: 
-      num_skipped = static_cast<ByteArrayReader*>(reader)->Skip(num_rows_to_skip); 
-      break; 
-    case Type::FIXED_LEN_BYTE_ARRAY: 
-      num_skipped = static_cast<FixedLenByteArrayReader*>(reader)->Skip(num_rows_to_skip); 
-      break; 
-    case Type::FLOAT: 
-      num_skipped = static_cast<FloatReader*>(reader)->Skip(num_rows_to_skip); 
-      break; 
-    case Type::DOUBLE: 
-      num_skipped = static_cast<DoubleReader*>(reader)->Skip(num_rows_to_skip); 
-      break; 
-    case Type::INT96: 
-      num_skipped = static_cast<Int96Reader*>(reader)->Skip(num_rows_to_skip); 
-      break; 
-    case Type::UNDEFINED: 
-      throw ParquetException("Unexpected type: " + TypeToString(reader->type())); 
-      break; 
-  } 
-  if (num_rows_to_skip != num_skipped) { 
-    throw ParquetException("Skipped " + std::to_string(num_skipped) + "/" + 
-                           std::to_string(num_rows_to_skip) + " rows in column " + 
-                           reader->descr()->name()); 
-  } 
-} 
- 
-void StreamReader::CheckColumn(Type::type physical_type, 
-                               ConvertedType::type converted_type, int length) { 
-  if (static_cast<std::size_t>(column_index_) >= nodes_.size()) { 
-    if (eof_) { 
-      ParquetException::EofException(); 
-    } 
-    throw ParquetException("Column index out-of-bounds.  Index " + 
-                           std::to_string(column_index_) + " is invalid for " + 
-                           std::to_string(nodes_.size()) + " columns"); 
-  } 
-  const auto& node = nodes_[column_index_]; 
- 
-  if (physical_type != node->physical_type()) { 
-    throw ParquetException("Column physical type mismatch.  Column '" + node->name() + 
-                           "' has physical type '" + TypeToString(node->physical_type()) + 
-                           "' not '" + TypeToString(physical_type) + "'"); 
-  } 
-  if (converted_type != node->converted_type()) { 
-    // The converted type does not always match with the value 
-    // provided so check the set of exceptions. 
-    if (converted_type_exceptions.find({converted_type, node->converted_type()}) == 
-        converted_type_exceptions.end()) { 
-      throw ParquetException("Column converted type mismatch.  Column '" + node->name() + 
-                             "' has converted type '" + 
-                             ConvertedTypeToString(node->converted_type()) + "' not '" + 
-                             ConvertedTypeToString(converted_type) + "'"); 
-    } 
-  } 
-  // Length must be exact. 
-  if (length != node->type_length()) { 
-    throw ParquetException("Column length mismatch.  Column '" + node->name() + 
-                           "' has length " + std::to_string(node->type_length()) + 
-                           "] not " + std::to_string(length)); 
-  } 
-}  // namespace parquet 
- 
-void StreamReader::ThrowReadFailedException( 
-    const std::shared_ptr<schema::PrimitiveNode>& node) { 
-  throw ParquetException("Failed to read value for column '" + node->name() + 
-                         "' on row " + std::to_string(current_row_)); 
-} 
- 
-StreamReader& operator>>(StreamReader& os, EndRowType) { 
-  os.EndRow(); 
-  return os; 
-} 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/stream_reader.h"
+
+#include <set>
+#include <utility>
+
+namespace parquet {
+
+constexpr int64_t StreamReader::kBatchSizeOne;
+
+// The converted type expected by the stream reader does not always
+// exactly match with the schema in the Parquet file.  The following
+// is a list of converted types which are allowed instead of the
+// expected converted type.
+// Each pair given is:
+//   {<StreamReader expected type>, <Parquet file converted type>}
+// So for example {ConvertedType::INT_32, ConvertedType::NONE} means
+// that if the StreamReader was expecting the converted type INT_32,
+// then it will allow the Parquet file to use the converted type
+// NONE.
+//
+static const std::set<std::pair<ConvertedType::type, ConvertedType::type> >
+    converted_type_exceptions = {{ConvertedType::INT_32, ConvertedType::NONE},
+                                 {ConvertedType::INT_64, ConvertedType::NONE},
+                                 {ConvertedType::INT_32, ConvertedType::DECIMAL},
+                                 {ConvertedType::INT_64, ConvertedType::DECIMAL},
+                                 {ConvertedType::UTF8, ConvertedType::NONE}};
+
+StreamReader::StreamReader(std::unique_ptr<ParquetFileReader> reader)
+    : file_reader_{std::move(reader)}, eof_{false} {
+  file_metadata_ = file_reader_->metadata();
+
+  auto schema = file_metadata_->schema();
+  auto group_node = schema->group_node();
+
+  nodes_.resize(schema->num_columns());
+
+  for (auto i = 0; i < schema->num_columns(); ++i) {
+    nodes_[i] = std::static_pointer_cast<schema::PrimitiveNode>(group_node->field(i));
+  }
+  NextRowGroup();
+}
+
+int StreamReader::num_columns() const {
+  // Check for file metadata i.e. object is not default constructed.
+  if (file_metadata_) {
+    return file_metadata_->num_columns();
+  }
+  return 0;
+}
+
+int64_t StreamReader::num_rows() const {
+  // Check for file metadata i.e. object is not default constructed.
+  if (file_metadata_) {
+    return file_metadata_->num_rows();
+  }
+  return 0;
+}
+
+StreamReader& StreamReader::operator>>(bool& v) {
+  CheckColumn(Type::BOOLEAN, ConvertedType::NONE);
+  Read<BoolReader>(&v);
+  return *this;
+}
+
+StreamReader& StreamReader::operator>>(int8_t& v) {
+  CheckColumn(Type::INT32, ConvertedType::INT_8);
+  Read<Int32Reader, int32_t>(&v);
+  return *this;
+}
+
+StreamReader& StreamReader::operator>>(uint8_t& v) {
+  CheckColumn(Type::INT32, ConvertedType::UINT_8);
+  Read<Int32Reader, int32_t>(&v);
+  return *this;
+}
+
+StreamReader& StreamReader::operator>>(int16_t& v) {
+  CheckColumn(Type::INT32, ConvertedType::INT_16);
+  Read<Int32Reader, int32_t>(&v);
+  return *this;
+}
+
+StreamReader& StreamReader::operator>>(uint16_t& v) {
+  CheckColumn(Type::INT32, ConvertedType::UINT_16);
+  Read<Int32Reader, int32_t>(&v);
+  return *this;
+}
+
+StreamReader& StreamReader::operator>>(int32_t& v) {
+  CheckColumn(Type::INT32, ConvertedType::INT_32);
+  Read<Int32Reader>(&v);
+  return *this;
+}
+
+StreamReader& StreamReader::operator>>(uint32_t& v) {
+  CheckColumn(Type::INT32, ConvertedType::UINT_32);
+  Read<Int32Reader>(reinterpret_cast<int32_t*>(&v));
+  return *this;
+}
+
+StreamReader& StreamReader::operator>>(int64_t& v) {
+  CheckColumn(Type::INT64, ConvertedType::INT_64);
+  Read<Int64Reader>(&v);
+  return *this;
+}
+
+StreamReader& StreamReader::operator>>(uint64_t& v) {
+  CheckColumn(Type::INT64, ConvertedType::UINT_64);
+  Read<Int64Reader>(reinterpret_cast<int64_t*>(&v));
+  return *this;
+}
+
+StreamReader& StreamReader::operator>>(std::chrono::milliseconds& v) {
+  CheckColumn(Type::INT64, ConvertedType::TIMESTAMP_MILLIS);
+  int64_t tmp;
+  Read<Int64Reader>(&tmp);
+  v = std::chrono::milliseconds{tmp};
+  return *this;
+}
+
+StreamReader& StreamReader::operator>>(std::chrono::microseconds& v) {
+  CheckColumn(Type::INT64, ConvertedType::TIMESTAMP_MICROS);
+  int64_t tmp;
+  Read<Int64Reader>(&tmp);
+  v = std::chrono::microseconds{tmp};
+  return *this;
+}
+
+StreamReader& StreamReader::operator>>(float& v) {
+  CheckColumn(Type::FLOAT, ConvertedType::NONE);
+  Read<FloatReader>(&v);
+  return *this;
+}
+
+StreamReader& StreamReader::operator>>(double& v) {
+  CheckColumn(Type::DOUBLE, ConvertedType::NONE);
+  Read<DoubleReader>(&v);
+  return *this;
+}
+
+StreamReader& StreamReader::operator>>(char& v) {
+  CheckColumn(Type::FIXED_LEN_BYTE_ARRAY, ConvertedType::NONE, 1);
+  FixedLenByteArray flba;
+
+  Read(&flba);
+  v = static_cast<char>(flba.ptr[0]);
+  return *this;
+}
+
+StreamReader& StreamReader::operator>>(std::string& v) {
+  CheckColumn(Type::BYTE_ARRAY, ConvertedType::UTF8);
+  ByteArray ba;
+
+  Read(&ba);
+  v = std::string(reinterpret_cast<const char*>(ba.ptr), ba.len);
+  return *this;
+}
+
+StreamReader& StreamReader::operator>>(optional<bool>& v) {
+  CheckColumn(Type::BOOLEAN, ConvertedType::NONE);
+  ReadOptional<BoolReader>(&v);
+  return *this;
+}
+
+StreamReader& StreamReader::operator>>(optional<int8_t>& v) {
+  CheckColumn(Type::INT32, ConvertedType::INT_8);
+  ReadOptional<Int32Reader, int32_t>(&v);
+  return *this;
+}
+
+StreamReader& StreamReader::operator>>(optional<uint8_t>& v) {
+  CheckColumn(Type::INT32, ConvertedType::UINT_8);
+  ReadOptional<Int32Reader, int32_t>(&v);
+  return *this;
+}
+
+StreamReader& StreamReader::operator>>(optional<int16_t>& v) {
+  CheckColumn(Type::INT32, ConvertedType::INT_16);
+  ReadOptional<Int32Reader, int32_t>(&v);
+  return *this;
+}
+
+StreamReader& StreamReader::operator>>(optional<uint16_t>& v) {
+  CheckColumn(Type::INT32, ConvertedType::UINT_16);
+  ReadOptional<Int32Reader, int32_t>(&v);
+  return *this;
+}
+
+StreamReader& StreamReader::operator>>(optional<int32_t>& v) {
+  CheckColumn(Type::INT32, ConvertedType::INT_32);
+  ReadOptional<Int32Reader>(&v);
+  return *this;
+}
+
+StreamReader& StreamReader::operator>>(optional<uint32_t>& v) {
+  CheckColumn(Type::INT32, ConvertedType::UINT_32);
+  ReadOptional<Int32Reader>(&v);
+  return *this;
+}
+
+StreamReader& StreamReader::operator>>(optional<int64_t>& v) {
+  CheckColumn(Type::INT64, ConvertedType::INT_64);
+  ReadOptional<Int64Reader>(&v);
+  return *this;
+}
+
+StreamReader& StreamReader::operator>>(optional<uint64_t>& v) {
+  CheckColumn(Type::INT64, ConvertedType::UINT_64);
+  ReadOptional<Int64Reader>(&v);
+  return *this;
+}
+
+StreamReader& StreamReader::operator>>(optional<float>& v) {
+  CheckColumn(Type::FLOAT, ConvertedType::NONE);
+  ReadOptional<FloatReader>(&v);
+  return *this;
+}
+
+StreamReader& StreamReader::operator>>(optional<double>& v) {
+  CheckColumn(Type::DOUBLE, ConvertedType::NONE);
+  ReadOptional<DoubleReader>(&v);
+  return *this;
+}
+
+StreamReader& StreamReader::operator>>(optional<std::chrono::milliseconds>& v) {
+  CheckColumn(Type::INT64, ConvertedType::TIMESTAMP_MILLIS);
+  ReadOptional<Int64Reader, int64_t>(&v);
+  return *this;
+}
+
+StreamReader& StreamReader::operator>>(optional<std::chrono::microseconds>& v) {
+  CheckColumn(Type::INT64, ConvertedType::TIMESTAMP_MICROS);
+  ReadOptional<Int64Reader, int64_t>(&v);
+  return *this;
+}
+
+StreamReader& StreamReader::operator>>(optional<char>& v) {
+  CheckColumn(Type::FIXED_LEN_BYTE_ARRAY, ConvertedType::NONE, 1);
+  FixedLenByteArray flba;
+
+  if (ReadOptional(&flba)) {
+    v = static_cast<char>(flba.ptr[0]);
+  } else {
+    v.reset();
+  }
+  return *this;
+}
+
+StreamReader& StreamReader::operator>>(optional<std::string>& v) {
+  CheckColumn(Type::BYTE_ARRAY, ConvertedType::UTF8);
+  ByteArray ba;
+
+  if (ReadOptional(&ba)) {
+    v = std::string(reinterpret_cast<const char*>(ba.ptr), ba.len);
+  } else {
+    v.reset();
+  }
+  return *this;
+}
+
+void StreamReader::ReadFixedLength(char* ptr, int len) {
+  CheckColumn(Type::FIXED_LEN_BYTE_ARRAY, ConvertedType::NONE, len);
+  FixedLenByteArray flba;
+  Read(&flba);
+  std::memcpy(ptr, flba.ptr, len);
+}
+
+void StreamReader::Read(ByteArray* v) {
+  const auto& node = nodes_[column_index_];
+  auto reader = static_cast<ByteArrayReader*>(column_readers_[column_index_++].get());
+  int16_t def_level;
+  int16_t rep_level;
+  int64_t values_read;
+
+  reader->ReadBatch(kBatchSizeOne, &def_level, &rep_level, v, &values_read);
+
+  if (values_read != 1) {
+    ThrowReadFailedException(node);
+  }
+}
+
+bool StreamReader::ReadOptional(ByteArray* v) {
+  const auto& node = nodes_[column_index_];
+  auto reader = static_cast<ByteArrayReader*>(column_readers_[column_index_++].get());
+  int16_t def_level;
+  int16_t rep_level;
+  int64_t values_read;
+
+  reader->ReadBatch(kBatchSizeOne, &def_level, &rep_level, v, &values_read);
+
+  if (values_read == 1) {
+    return true;
+  } else if ((values_read == 0) && (def_level == 0)) {
+    return false;
+  }
+  ThrowReadFailedException(node);
+}
+
+void StreamReader::Read(FixedLenByteArray* v) {
+  const auto& node = nodes_[column_index_];
+  auto reader =
+      static_cast<FixedLenByteArrayReader*>(column_readers_[column_index_++].get());
+  int16_t def_level;
+  int16_t rep_level;
+  int64_t values_read;
+
+  reader->ReadBatch(kBatchSizeOne, &def_level, &rep_level, v, &values_read);
+
+  if (values_read != 1) {
+    ThrowReadFailedException(node);
+  }
+}
+
+bool StreamReader::ReadOptional(FixedLenByteArray* v) {
+  const auto& node = nodes_[column_index_];
+  auto reader =
+      static_cast<FixedLenByteArrayReader*>(column_readers_[column_index_++].get());
+  int16_t def_level;
+  int16_t rep_level;
+  int64_t values_read;
+
+  reader->ReadBatch(kBatchSizeOne, &def_level, &rep_level, v, &values_read);
+
+  if (values_read == 1) {
+    return true;
+  } else if ((values_read == 0) && (def_level == 0)) {
+    return false;
+  }
+  ThrowReadFailedException(node);
+}
+
+void StreamReader::EndRow() {
+  if (!file_reader_) {
+    throw ParquetException("StreamReader not initialized");
+  }
+  if (static_cast<std::size_t>(column_index_) < nodes_.size()) {
+    throw ParquetException("Cannot end row with " + std::to_string(column_index_) +
+                           " of " + std::to_string(nodes_.size()) + " columns read");
+  }
+  column_index_ = 0;
+  ++current_row_;
+
+  if (!column_readers_[0]->HasNext()) {
+    NextRowGroup();
+  }
+}
+
+void StreamReader::NextRowGroup() {
+  // Find next none-empty row group
+  while (row_group_index_ < file_metadata_->num_row_groups()) {
+    row_group_reader_ = file_reader_->RowGroup(row_group_index_);
+    ++row_group_index_;
+
+    column_readers_.resize(file_metadata_->num_columns());
+
+    for (int i = 0; i < file_metadata_->num_columns(); ++i) {
+      column_readers_[i] = row_group_reader_->Column(i);
+    }
+    if (column_readers_[0]->HasNext()) {
+      row_group_row_offset_ = current_row_;
+      return;
+    }
+  }
+  // No more row groups found.
+  SetEof();
+}
+
+void StreamReader::SetEof() {
+  // Do not reset file_metadata_ to ensure queries on the number of
+  // rows/columns still function.
+  eof_ = true;
+  file_reader_.reset();
+  row_group_reader_.reset();
+  column_readers_.clear();
+  nodes_.clear();
+}
+
+int64_t StreamReader::SkipRows(int64_t num_rows_to_skip) {
+  if (0 != column_index_) {
+    throw ParquetException("Must finish reading current row before skipping rows.");
+  }
+  int64_t num_rows_remaining_to_skip = num_rows_to_skip;
+
+  while (!eof_ && (num_rows_remaining_to_skip > 0)) {
+    int64_t num_rows_in_row_group = row_group_reader_->metadata()->num_rows();
+    int64_t num_rows_remaining_in_row_group =
+        num_rows_in_row_group - current_row_ - row_group_row_offset_;
+
+    if (num_rows_remaining_in_row_group > num_rows_remaining_to_skip) {
+      for (auto reader : column_readers_) {
+        SkipRowsInColumn(reader.get(), num_rows_remaining_to_skip);
+      }
+      current_row_ += num_rows_remaining_to_skip;
+      num_rows_remaining_to_skip = 0;
+    } else {
+      num_rows_remaining_to_skip -= num_rows_remaining_in_row_group;
+      current_row_ += num_rows_remaining_in_row_group;
+      NextRowGroup();
+    }
+  }
+  return num_rows_to_skip - num_rows_remaining_to_skip;
+}
+
+int64_t StreamReader::SkipColumns(int64_t num_columns_to_skip) {
+  int64_t num_columns_skipped = 0;
+
+  if (!eof_) {
+    for (; (num_columns_to_skip > num_columns_skipped) &&
+           static_cast<std::size_t>(column_index_) < nodes_.size();
+         ++column_index_) {
+      SkipRowsInColumn(column_readers_[column_index_].get(), 1);
+      ++num_columns_skipped;
+    }
+  }
+  return num_columns_skipped;
+}
+
+void StreamReader::SkipRowsInColumn(ColumnReader* reader, int64_t num_rows_to_skip) {
+  int64_t num_skipped = 0;
+
+  switch (reader->type()) {
+    case Type::BOOLEAN:
+      num_skipped = static_cast<BoolReader*>(reader)->Skip(num_rows_to_skip);
+      break;
+    case Type::INT32:
+      num_skipped = static_cast<Int32Reader*>(reader)->Skip(num_rows_to_skip);
+      break;
+    case Type::INT64:
+      num_skipped = static_cast<Int64Reader*>(reader)->Skip(num_rows_to_skip);
+      break;
+    case Type::BYTE_ARRAY:
+      num_skipped = static_cast<ByteArrayReader*>(reader)->Skip(num_rows_to_skip);
+      break;
+    case Type::FIXED_LEN_BYTE_ARRAY:
+      num_skipped = static_cast<FixedLenByteArrayReader*>(reader)->Skip(num_rows_to_skip);
+      break;
+    case Type::FLOAT:
+      num_skipped = static_cast<FloatReader*>(reader)->Skip(num_rows_to_skip);
+      break;
+    case Type::DOUBLE:
+      num_skipped = static_cast<DoubleReader*>(reader)->Skip(num_rows_to_skip);
+      break;
+    case Type::INT96:
+      num_skipped = static_cast<Int96Reader*>(reader)->Skip(num_rows_to_skip);
+      break;
+    case Type::UNDEFINED:
+      throw ParquetException("Unexpected type: " + TypeToString(reader->type()));
+      break;
+  }
+  if (num_rows_to_skip != num_skipped) {
+    throw ParquetException("Skipped " + std::to_string(num_skipped) + "/" +
+                           std::to_string(num_rows_to_skip) + " rows in column " +
+                           reader->descr()->name());
+  }
+}
+
+void StreamReader::CheckColumn(Type::type physical_type,
+                               ConvertedType::type converted_type, int length) {
+  if (static_cast<std::size_t>(column_index_) >= nodes_.size()) {
+    if (eof_) {
+      ParquetException::EofException();
+    }
+    throw ParquetException("Column index out-of-bounds.  Index " +
+                           std::to_string(column_index_) + " is invalid for " +
+                           std::to_string(nodes_.size()) + " columns");
+  }
+  const auto& node = nodes_[column_index_];
+
+  if (physical_type != node->physical_type()) {
+    throw ParquetException("Column physical type mismatch.  Column '" + node->name() +
+                           "' has physical type '" + TypeToString(node->physical_type()) +
+                           "' not '" + TypeToString(physical_type) + "'");
+  }
+  if (converted_type != node->converted_type()) {
+    // The converted type does not always match with the value
+    // provided so check the set of exceptions.
+    if (converted_type_exceptions.find({converted_type, node->converted_type()}) ==
+        converted_type_exceptions.end()) {
+      throw ParquetException("Column converted type mismatch.  Column '" + node->name() +
+                             "' has converted type '" +
+                             ConvertedTypeToString(node->converted_type()) + "' not '" +
+                             ConvertedTypeToString(converted_type) + "'");
+    }
+  }
+  // Length must be exact.
+  if (length != node->type_length()) {
+    throw ParquetException("Column length mismatch.  Column '" + node->name() +
+                           "' has length " + std::to_string(node->type_length()) +
+                           "] not " + std::to_string(length));
+  }
+}  // namespace parquet
+
+void StreamReader::ThrowReadFailedException(
+    const std::shared_ptr<schema::PrimitiveNode>& node) {
+  throw ParquetException("Failed to read value for column '" + node->name() +
+                         "' on row " + std::to_string(current_row_));
+}
+
+StreamReader& operator>>(StreamReader& os, EndRowType) {
+  os.EndRow();
+  return os;
+}
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/stream_reader.h b/contrib/libs/apache/arrow/cpp/src/parquet/stream_reader.h
index 3dfebb27146..806b0e8ad9a 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/stream_reader.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/stream_reader.h
@@ -1,299 +1,299 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <array> 
-#include <chrono> 
-#include <cstdint> 
-#include <cstring> 
-#include <memory> 
-#include <string> 
-#include <vector> 
- 
-#include "arrow/util/optional.h" 
-#include "parquet/column_reader.h" 
-#include "parquet/file_reader.h" 
-#include "parquet/stream_writer.h" 
- 
-namespace parquet { 
- 
-/// \brief A class for reading Parquet files using an output stream type API. 
-/// 
-/// The values given must be of the correct type i.e. the type must 
-/// match the file schema exactly otherwise a ParquetException will be 
-/// thrown. 
-/// 
-/// The user must explicitly advance to the next row using the 
-/// EndRow() function or EndRow input manipulator. 
-/// 
-/// Required and optional fields are supported: 
-/// - Required fields are read using operator>>(T) 
-/// - Optional fields are read with 
-///   operator>>(arrow::util::optional<T>) 
-/// 
-/// Note that operator>>(arrow::util::optional<T>) can be used to read 
-/// required fields. 
-/// 
-/// Similarly operator>>(T) can be used to read optional fields. 
-/// However, if the value is not present then a ParquetException will 
-/// be raised. 
-/// 
-/// Currently there is no support for repeated fields. 
-/// 
-class PARQUET_EXPORT StreamReader { 
- public: 
-  template <typename T> 
-  using optional = ::arrow::util::optional<T>; 
- 
-  // N.B. Default constructed objects are not usable.  This 
-  //      constructor is provided so that the object may be move 
-  //      assigned afterwards. 
-  StreamReader() = default; 
- 
-  explicit StreamReader(std::unique_ptr<ParquetFileReader> reader); 
- 
-  ~StreamReader() = default; 
- 
-  bool eof() const { return eof_; } 
- 
-  int current_column() const { return column_index_; } 
- 
-  int64_t current_row() const { return current_row_; } 
- 
-  int num_columns() const; 
- 
-  int64_t num_rows() const; 
- 
-  // Moving is possible. 
-  StreamReader(StreamReader&&) = default; 
-  StreamReader& operator=(StreamReader&&) = default; 
- 
-  // Copying is not allowed. 
-  StreamReader(const StreamReader&) = delete; 
-  StreamReader& operator=(const StreamReader&) = delete; 
- 
-  StreamReader& operator>>(bool& v); 
- 
-  StreamReader& operator>>(int8_t& v); 
- 
-  StreamReader& operator>>(uint8_t& v); 
- 
-  StreamReader& operator>>(int16_t& v); 
- 
-  StreamReader& operator>>(uint16_t& v); 
- 
-  StreamReader& operator>>(int32_t& v); 
- 
-  StreamReader& operator>>(uint32_t& v); 
- 
-  StreamReader& operator>>(int64_t& v); 
- 
-  StreamReader& operator>>(uint64_t& v); 
- 
-  StreamReader& operator>>(std::chrono::milliseconds& v); 
- 
-  StreamReader& operator>>(std::chrono::microseconds& v); 
- 
-  StreamReader& operator>>(float& v); 
- 
-  StreamReader& operator>>(double& v); 
- 
-  StreamReader& operator>>(char& v); 
- 
-  template <int N> 
-  StreamReader& operator>>(char (&v)[N]) { 
-    ReadFixedLength(v, N); 
-    return *this; 
-  } 
- 
-  template <std::size_t N> 
-  StreamReader& operator>>(std::array<char, N>& v) { 
-    ReadFixedLength(v.data(), static_cast<int>(N)); 
-    return *this; 
-  } 
- 
-  // N.B. Cannot allow for reading to a arbitrary char pointer as the 
-  //      length cannot be verified.  Also it would overshadow the 
-  //      char[N] input operator. 
-  // StreamReader& operator>>(char * v); 
- 
-  StreamReader& operator>>(std::string& v); 
- 
-  // Input operators for optional fields. 
- 
-  StreamReader& operator>>(optional<bool>& v); 
- 
-  StreamReader& operator>>(optional<int8_t>& v); 
- 
-  StreamReader& operator>>(optional<uint8_t>& v); 
- 
-  StreamReader& operator>>(optional<int16_t>& v); 
- 
-  StreamReader& operator>>(optional<uint16_t>& v); 
- 
-  StreamReader& operator>>(optional<int32_t>& v); 
- 
-  StreamReader& operator>>(optional<uint32_t>& v); 
- 
-  StreamReader& operator>>(optional<int64_t>& v); 
- 
-  StreamReader& operator>>(optional<uint64_t>& v); 
- 
-  StreamReader& operator>>(optional<float>& v); 
- 
-  StreamReader& operator>>(optional<double>& v); 
- 
-  StreamReader& operator>>(optional<std::chrono::milliseconds>& v); 
- 
-  StreamReader& operator>>(optional<std::chrono::microseconds>& v); 
- 
-  StreamReader& operator>>(optional<char>& v); 
- 
-  StreamReader& operator>>(optional<std::string>& v); 
- 
-  template <std::size_t N> 
-  StreamReader& operator>>(optional<std::array<char, N>>& v) { 
-    CheckColumn(Type::FIXED_LEN_BYTE_ARRAY, ConvertedType::NONE, N); 
-    FixedLenByteArray flba; 
-    if (ReadOptional(&flba)) { 
-      v = std::array<char, N>{}; 
-      std::memcpy(v->data(), flba.ptr, N); 
-    } else { 
-      v.reset(); 
-    } 
-    return *this; 
-  } 
- 
-  /// \brief Terminate current row and advance to next one. 
-  /// \throws ParquetException if all columns in the row were not 
-  /// read or skipped. 
-  void EndRow(); 
- 
-  /// \brief Skip the data in the next columns. 
-  /// If the number of columns exceeds the columns remaining on the 
-  /// current row then skipping is terminated - it does _not_ continue 
-  /// skipping columns on the next row. 
-  /// Skipping of columns still requires the use 'EndRow' even if all 
-  /// remaining columns were skipped. 
-  /// \return Number of columns actually skipped. 
-  int64_t SkipColumns(int64_t num_columns_to_skip); 
- 
-  /// \brief Skip the data in the next rows. 
-  /// Skipping of rows is not allowed if reading of data for the 
-  /// current row is not finished. 
-  /// Skipping of rows will be terminated if the end of file is 
-  /// reached. 
-  /// \return Number of rows actually skipped. 
-  int64_t SkipRows(int64_t num_rows_to_skip); 
- 
- protected: 
-  [[noreturn]] void ThrowReadFailedException( 
-      const std::shared_ptr<schema::PrimitiveNode>& node); 
- 
-  template <typename ReaderType, typename T> 
-  void Read(T* v) { 
-    const auto& node = nodes_[column_index_]; 
-    auto reader = static_cast<ReaderType*>(column_readers_[column_index_++].get()); 
-    int16_t def_level; 
-    int16_t rep_level; 
-    int64_t values_read; 
- 
-    reader->ReadBatch(kBatchSizeOne, &def_level, &rep_level, v, &values_read); 
- 
-    if (values_read != 1) { 
-      ThrowReadFailedException(node); 
-    } 
-  } 
- 
-  template <typename ReaderType, typename ReadType, typename T> 
-  void Read(T* v) { 
-    const auto& node = nodes_[column_index_]; 
-    auto reader = static_cast<ReaderType*>(column_readers_[column_index_++].get()); 
-    int16_t def_level; 
-    int16_t rep_level; 
-    ReadType tmp; 
-    int64_t values_read; 
- 
-    reader->ReadBatch(kBatchSizeOne, &def_level, &rep_level, &tmp, &values_read); 
- 
-    if (values_read == 1) { 
-      *v = tmp; 
-    } else { 
-      ThrowReadFailedException(node); 
-    } 
-  } 
- 
-  template <typename ReaderType, typename ReadType = typename ReaderType::T, typename T> 
-  void ReadOptional(optional<T>* v) { 
-    const auto& node = nodes_[column_index_]; 
-    auto reader = static_cast<ReaderType*>(column_readers_[column_index_++].get()); 
-    int16_t def_level; 
-    int16_t rep_level; 
-    ReadType tmp; 
-    int64_t values_read; 
- 
-    reader->ReadBatch(kBatchSizeOne, &def_level, &rep_level, &tmp, &values_read); 
- 
-    if (values_read == 1) { 
-      *v = T(tmp); 
-    } else if ((values_read == 0) && (def_level == 0)) { 
-      v->reset(); 
-    } else { 
-      ThrowReadFailedException(node); 
-    } 
-  } 
- 
-  void ReadFixedLength(char* ptr, int len); 
- 
-  void Read(ByteArray* v); 
- 
-  void Read(FixedLenByteArray* v); 
- 
-  bool ReadOptional(ByteArray* v); 
- 
-  bool ReadOptional(FixedLenByteArray* v); 
- 
-  void NextRowGroup(); 
- 
-  void CheckColumn(Type::type physical_type, ConvertedType::type converted_type, 
-                   int length = 0); 
- 
-  void SkipRowsInColumn(ColumnReader* reader, int64_t num_rows_to_skip); 
- 
-  void SetEof(); 
- 
- private: 
-  std::unique_ptr<ParquetFileReader> file_reader_; 
-  std::shared_ptr<FileMetaData> file_metadata_; 
-  std::shared_ptr<RowGroupReader> row_group_reader_; 
-  std::vector<std::shared_ptr<ColumnReader>> column_readers_; 
-  std::vector<std::shared_ptr<schema::PrimitiveNode>> nodes_; 
- 
-  bool eof_{true}; 
-  int row_group_index_{0}; 
-  int column_index_{0}; 
-  int64_t current_row_{0}; 
-  int64_t row_group_row_offset_{0}; 
- 
-  static constexpr int64_t kBatchSizeOne = 1; 
-};  // namespace parquet 
- 
-PARQUET_EXPORT 
-StreamReader& operator>>(StreamReader&, EndRowType); 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <array>
+#include <chrono>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/util/optional.h"
+#include "parquet/column_reader.h"
+#include "parquet/file_reader.h"
+#include "parquet/stream_writer.h"
+
+namespace parquet {
+
+/// \brief A class for reading Parquet files using an output stream type API.
+///
+/// The values given must be of the correct type i.e. the type must
+/// match the file schema exactly otherwise a ParquetException will be
+/// thrown.
+///
+/// The user must explicitly advance to the next row using the
+/// EndRow() function or EndRow input manipulator.
+///
+/// Required and optional fields are supported:
+/// - Required fields are read using operator>>(T)
+/// - Optional fields are read with
+///   operator>>(arrow::util::optional<T>)
+///
+/// Note that operator>>(arrow::util::optional<T>) can be used to read
+/// required fields.
+///
+/// Similarly operator>>(T) can be used to read optional fields.
+/// However, if the value is not present then a ParquetException will
+/// be raised.
+///
+/// Currently there is no support for repeated fields.
+///
+class PARQUET_EXPORT StreamReader {
+ public:
+  template <typename T>
+  using optional = ::arrow::util::optional<T>;
+
+  // N.B. Default constructed objects are not usable.  This
+  //      constructor is provided so that the object may be move
+  //      assigned afterwards.
+  StreamReader() = default;
+
+  explicit StreamReader(std::unique_ptr<ParquetFileReader> reader);
+
+  ~StreamReader() = default;
+
+  bool eof() const { return eof_; }
+
+  int current_column() const { return column_index_; }
+
+  int64_t current_row() const { return current_row_; }
+
+  int num_columns() const;
+
+  int64_t num_rows() const;
+
+  // Moving is possible.
+  StreamReader(StreamReader&&) = default;
+  StreamReader& operator=(StreamReader&&) = default;
+
+  // Copying is not allowed.
+  StreamReader(const StreamReader&) = delete;
+  StreamReader& operator=(const StreamReader&) = delete;
+
+  StreamReader& operator>>(bool& v);
+
+  StreamReader& operator>>(int8_t& v);
+
+  StreamReader& operator>>(uint8_t& v);
+
+  StreamReader& operator>>(int16_t& v);
+
+  StreamReader& operator>>(uint16_t& v);
+
+  StreamReader& operator>>(int32_t& v);
+
+  StreamReader& operator>>(uint32_t& v);
+
+  StreamReader& operator>>(int64_t& v);
+
+  StreamReader& operator>>(uint64_t& v);
+
+  StreamReader& operator>>(std::chrono::milliseconds& v);
+
+  StreamReader& operator>>(std::chrono::microseconds& v);
+
+  StreamReader& operator>>(float& v);
+
+  StreamReader& operator>>(double& v);
+
+  StreamReader& operator>>(char& v);
+
+  template <int N>
+  StreamReader& operator>>(char (&v)[N]) {
+    ReadFixedLength(v, N);
+    return *this;
+  }
+
+  template <std::size_t N>
+  StreamReader& operator>>(std::array<char, N>& v) {
+    ReadFixedLength(v.data(), static_cast<int>(N));
+    return *this;
+  }
+
+  // N.B. Cannot allow for reading to a arbitrary char pointer as the
+  //      length cannot be verified.  Also it would overshadow the
+  //      char[N] input operator.
+  // StreamReader& operator>>(char * v);
+
+  StreamReader& operator>>(std::string& v);
+
+  // Input operators for optional fields.
+
+  StreamReader& operator>>(optional<bool>& v);
+
+  StreamReader& operator>>(optional<int8_t>& v);
+
+  StreamReader& operator>>(optional<uint8_t>& v);
+
+  StreamReader& operator>>(optional<int16_t>& v);
+
+  StreamReader& operator>>(optional<uint16_t>& v);
+
+  StreamReader& operator>>(optional<int32_t>& v);
+
+  StreamReader& operator>>(optional<uint32_t>& v);
+
+  StreamReader& operator>>(optional<int64_t>& v);
+
+  StreamReader& operator>>(optional<uint64_t>& v);
+
+  StreamReader& operator>>(optional<float>& v);
+
+  StreamReader& operator>>(optional<double>& v);
+
+  StreamReader& operator>>(optional<std::chrono::milliseconds>& v);
+
+  StreamReader& operator>>(optional<std::chrono::microseconds>& v);
+
+  StreamReader& operator>>(optional<char>& v);
+
+  StreamReader& operator>>(optional<std::string>& v);
+
+  template <std::size_t N>
+  StreamReader& operator>>(optional<std::array<char, N>>& v) {
+    CheckColumn(Type::FIXED_LEN_BYTE_ARRAY, ConvertedType::NONE, N);
+    FixedLenByteArray flba;
+    if (ReadOptional(&flba)) {
+      v = std::array<char, N>{};
+      std::memcpy(v->data(), flba.ptr, N);
+    } else {
+      v.reset();
+    }
+    return *this;
+  }
+
+  /// \brief Terminate current row and advance to next one.
+  /// \throws ParquetException if all columns in the row were not
+  /// read or skipped.
+  void EndRow();
+
+  /// \brief Skip the data in the next columns.
+  /// If the number of columns exceeds the columns remaining on the
+  /// current row then skipping is terminated - it does _not_ continue
+  /// skipping columns on the next row.
+  /// Skipping of columns still requires the use 'EndRow' even if all
+  /// remaining columns were skipped.
+  /// \return Number of columns actually skipped.
+  int64_t SkipColumns(int64_t num_columns_to_skip);
+
+  /// \brief Skip the data in the next rows.
+  /// Skipping of rows is not allowed if reading of data for the
+  /// current row is not finished.
+  /// Skipping of rows will be terminated if the end of file is
+  /// reached.
+  /// \return Number of rows actually skipped.
+  int64_t SkipRows(int64_t num_rows_to_skip);
+
+ protected:
+  [[noreturn]] void ThrowReadFailedException(
+      const std::shared_ptr<schema::PrimitiveNode>& node);
+
+  template <typename ReaderType, typename T>
+  void Read(T* v) {
+    const auto& node = nodes_[column_index_];
+    auto reader = static_cast<ReaderType*>(column_readers_[column_index_++].get());
+    int16_t def_level;
+    int16_t rep_level;
+    int64_t values_read;
+
+    reader->ReadBatch(kBatchSizeOne, &def_level, &rep_level, v, &values_read);
+
+    if (values_read != 1) {
+      ThrowReadFailedException(node);
+    }
+  }
+
+  template <typename ReaderType, typename ReadType, typename T>
+  void Read(T* v) {
+    const auto& node = nodes_[column_index_];
+    auto reader = static_cast<ReaderType*>(column_readers_[column_index_++].get());
+    int16_t def_level;
+    int16_t rep_level;
+    ReadType tmp;
+    int64_t values_read;
+
+    reader->ReadBatch(kBatchSizeOne, &def_level, &rep_level, &tmp, &values_read);
+
+    if (values_read == 1) {
+      *v = tmp;
+    } else {
+      ThrowReadFailedException(node);
+    }
+  }
+
+  template <typename ReaderType, typename ReadType = typename ReaderType::T, typename T>
+  void ReadOptional(optional<T>* v) {
+    const auto& node = nodes_[column_index_];
+    auto reader = static_cast<ReaderType*>(column_readers_[column_index_++].get());
+    int16_t def_level;
+    int16_t rep_level;
+    ReadType tmp;
+    int64_t values_read;
+
+    reader->ReadBatch(kBatchSizeOne, &def_level, &rep_level, &tmp, &values_read);
+
+    if (values_read == 1) {
+      *v = T(tmp);
+    } else if ((values_read == 0) && (def_level == 0)) {
+      v->reset();
+    } else {
+      ThrowReadFailedException(node);
+    }
+  }
+
+  void ReadFixedLength(char* ptr, int len);
+
+  void Read(ByteArray* v);
+
+  void Read(FixedLenByteArray* v);
+
+  bool ReadOptional(ByteArray* v);
+
+  bool ReadOptional(FixedLenByteArray* v);
+
+  void NextRowGroup();
+
+  void CheckColumn(Type::type physical_type, ConvertedType::type converted_type,
+                   int length = 0);
+
+  void SkipRowsInColumn(ColumnReader* reader, int64_t num_rows_to_skip);
+
+  void SetEof();
+
+ private:
+  std::unique_ptr<ParquetFileReader> file_reader_;
+  std::shared_ptr<FileMetaData> file_metadata_;
+  std::shared_ptr<RowGroupReader> row_group_reader_;
+  std::vector<std::shared_ptr<ColumnReader>> column_readers_;
+  std::vector<std::shared_ptr<schema::PrimitiveNode>> nodes_;
+
+  bool eof_{true};
+  int row_group_index_{0};
+  int column_index_{0};
+  int64_t current_row_{0};
+  int64_t row_group_row_offset_{0};
+
+  static constexpr int64_t kBatchSizeOne = 1;
+};  // namespace parquet
+
+PARQUET_EXPORT
+StreamReader& operator>>(StreamReader&, EndRowType);
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/stream_writer.cc b/contrib/libs/apache/arrow/cpp/src/parquet/stream_writer.cc
index 2ebbd3c5e23..253ebf1bc91 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/stream_writer.cc
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/stream_writer.cc
@@ -1,324 +1,324 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include "parquet/stream_writer.h" 
- 
-#include <utility> 
- 
-namespace parquet { 
- 
-int64_t StreamWriter::default_row_group_size_{512 * 1024 * 1024};  // 512MB 
- 
-constexpr int16_t StreamWriter::kDefLevelZero; 
-constexpr int16_t StreamWriter::kDefLevelOne; 
-constexpr int16_t StreamWriter::kRepLevelZero; 
-constexpr int64_t StreamWriter::kBatchSizeOne; 
- 
-StreamWriter::FixedStringView::FixedStringView(const char* data_ptr) 
-    : data{data_ptr}, size{std::strlen(data_ptr)} {} 
- 
-StreamWriter::FixedStringView::FixedStringView(const char* data_ptr, std::size_t data_len) 
-    : data{data_ptr}, size{data_len} {} 
- 
-StreamWriter::StreamWriter(std::unique_ptr<ParquetFileWriter> writer) 
-    : file_writer_{std::move(writer)}, 
-      row_group_writer_{file_writer_->AppendBufferedRowGroup()} { 
-  auto schema = file_writer_->schema(); 
-  auto group_node = schema->group_node(); 
- 
-  nodes_.resize(schema->num_columns()); 
- 
-  for (auto i = 0; i < schema->num_columns(); ++i) { 
-    nodes_[i] = std::static_pointer_cast<schema::PrimitiveNode>(group_node->field(i)); 
-  } 
-} 
- 
-void StreamWriter::SetDefaultMaxRowGroupSize(int64_t max_size) { 
-  default_row_group_size_ = max_size; 
-} 
- 
-void StreamWriter::SetMaxRowGroupSize(int64_t max_size) { 
-  max_row_group_size_ = max_size; 
-} 
- 
-int StreamWriter::num_columns() const { return static_cast<int>(nodes_.size()); } 
- 
-StreamWriter& StreamWriter::operator<<(bool v) { 
-  CheckColumn(Type::BOOLEAN, ConvertedType::NONE); 
-  return Write<BoolWriter>(v); 
-} 
- 
-StreamWriter& StreamWriter::operator<<(int8_t v) { 
-  CheckColumn(Type::INT32, ConvertedType::INT_8); 
-  return Write<Int32Writer>(static_cast<int32_t>(v)); 
-} 
- 
-StreamWriter& StreamWriter::operator<<(uint8_t v) { 
-  CheckColumn(Type::INT32, ConvertedType::UINT_8); 
-  return Write<Int32Writer>(static_cast<int32_t>(v)); 
-} 
- 
-StreamWriter& StreamWriter::operator<<(int16_t v) { 
-  CheckColumn(Type::INT32, ConvertedType::INT_16); 
-  return Write<Int32Writer>(static_cast<int32_t>(v)); 
-} 
- 
-StreamWriter& StreamWriter::operator<<(uint16_t v) { 
-  CheckColumn(Type::INT32, ConvertedType::UINT_16); 
-  return Write<Int32Writer>(static_cast<int32_t>(v)); 
-} 
- 
-StreamWriter& StreamWriter::operator<<(int32_t v) { 
-  CheckColumn(Type::INT32, ConvertedType::INT_32); 
-  return Write<Int32Writer>(v); 
-} 
- 
-StreamWriter& StreamWriter::operator<<(uint32_t v) { 
-  CheckColumn(Type::INT32, ConvertedType::UINT_32); 
-  return Write<Int32Writer>(static_cast<int32_t>(v)); 
-} 
- 
-StreamWriter& StreamWriter::operator<<(int64_t v) { 
-  CheckColumn(Type::INT64, ConvertedType::INT_64); 
-  return Write<Int64Writer>(v); 
-} 
- 
-StreamWriter& StreamWriter::operator<<(uint64_t v) { 
-  CheckColumn(Type::INT64, ConvertedType::UINT_64); 
-  return Write<Int64Writer>(static_cast<int64_t>(v)); 
-} 
- 
-StreamWriter& StreamWriter::operator<<(const std::chrono::milliseconds& v) { 
-  CheckColumn(Type::INT64, ConvertedType::TIMESTAMP_MILLIS); 
-  return Write<Int64Writer>(static_cast<int64_t>(v.count())); 
-} 
- 
-StreamWriter& StreamWriter::operator<<(const std::chrono::microseconds& v) { 
-  CheckColumn(Type::INT64, ConvertedType::TIMESTAMP_MICROS); 
-  return Write<Int64Writer>(static_cast<int64_t>(v.count())); 
-} 
- 
-StreamWriter& StreamWriter::operator<<(float v) { 
-  CheckColumn(Type::FLOAT, ConvertedType::NONE); 
-  return Write<FloatWriter>(v); 
-} 
- 
-StreamWriter& StreamWriter::operator<<(double v) { 
-  CheckColumn(Type::DOUBLE, ConvertedType::NONE); 
-  return Write<DoubleWriter>(v); 
-} 
- 
-StreamWriter& StreamWriter::operator<<(char v) { return WriteFixedLength(&v, 1); } 
- 
-StreamWriter& StreamWriter::operator<<(FixedStringView v) { 
-  return WriteFixedLength(v.data, v.size); 
-} 
- 
-StreamWriter& StreamWriter::operator<<(const char* v) { 
-  return WriteVariableLength(v, std::strlen(v)); 
-} 
- 
-StreamWriter& StreamWriter::operator<<(const std::string& v) { 
-  return WriteVariableLength(v.data(), v.size()); 
-} 
- 
-StreamWriter& StreamWriter::operator<<(::arrow::util::string_view v) { 
-  return WriteVariableLength(v.data(), v.size()); 
-} 
- 
-StreamWriter& StreamWriter::WriteVariableLength(const char* data_ptr, 
-                                                std::size_t data_len) { 
-  CheckColumn(Type::BYTE_ARRAY, ConvertedType::UTF8); 
- 
-  auto writer = static_cast<ByteArrayWriter*>(row_group_writer_->column(column_index_++)); 
- 
-  if (data_ptr != nullptr) { 
-    ByteArray ba_value; 
- 
-    ba_value.ptr = reinterpret_cast<const uint8_t*>(data_ptr); 
-    ba_value.len = static_cast<uint32_t>(data_len); 
- 
-    writer->WriteBatch(kBatchSizeOne, &kDefLevelOne, &kRepLevelZero, &ba_value); 
-  } else { 
-    writer->WriteBatch(kBatchSizeOne, &kDefLevelZero, &kRepLevelZero, nullptr); 
-  } 
-  if (max_row_group_size_ > 0) { 
-    row_group_size_ += writer->EstimatedBufferedValueBytes(); 
-  } 
-  return *this; 
-} 
- 
-StreamWriter& StreamWriter::WriteFixedLength(const char* data_ptr, std::size_t data_len) { 
-  CheckColumn(Type::FIXED_LEN_BYTE_ARRAY, ConvertedType::NONE, 
-              static_cast<int>(data_len)); 
- 
-  auto writer = 
-      static_cast<FixedLenByteArrayWriter*>(row_group_writer_->column(column_index_++)); 
- 
-  if (data_ptr != nullptr) { 
-    FixedLenByteArray flba_value; 
- 
-    flba_value.ptr = reinterpret_cast<const uint8_t*>(data_ptr); 
-    writer->WriteBatch(kBatchSizeOne, &kDefLevelOne, &kRepLevelZero, &flba_value); 
-  } else { 
-    writer->WriteBatch(kBatchSizeOne, &kDefLevelZero, &kRepLevelZero, nullptr); 
-  } 
-  if (max_row_group_size_ > 0) { 
-    row_group_size_ += writer->EstimatedBufferedValueBytes(); 
-  } 
-  return *this; 
-} 
- 
-void StreamWriter::CheckColumn(Type::type physical_type, 
-                               ConvertedType::type converted_type, int length) { 
-  if (static_cast<std::size_t>(column_index_) >= nodes_.size()) { 
-    throw ParquetException("Column index out-of-bounds.  Index " + 
-                           std::to_string(column_index_) + " is invalid for " + 
-                           std::to_string(nodes_.size()) + " columns"); 
-  } 
-  const auto& node = nodes_[column_index_]; 
- 
-  if (physical_type != node->physical_type()) { 
-    throw ParquetException("Column physical type mismatch.  Column '" + node->name() + 
-                           "' has physical type '" + TypeToString(node->physical_type()) + 
-                           "' not '" + TypeToString(physical_type) + "'"); 
-  } 
-  if (converted_type != node->converted_type()) { 
-    throw ParquetException("Column converted type mismatch.  Column '" + node->name() + 
-                           "' has converted type[" + 
-                           ConvertedTypeToString(node->converted_type()) + "] not '" + 
-                           ConvertedTypeToString(converted_type) + "'"); 
-  } 
-  // Length must be exact. 
-  // A shorter length fixed array is not acceptable as it would 
-  // result in array bound read errors. 
-  // 
-  if (length != node->type_length()) { 
-    throw ParquetException("Column length mismatch.  Column '" + node->name() + 
-                           "' has length " + std::to_string(node->type_length()) + 
-                           " not " + std::to_string(length)); 
-  } 
-} 
- 
-int64_t StreamWriter::SkipColumns(int num_columns_to_skip) { 
-  int num_columns_skipped = 0; 
- 
-  for (; (num_columns_to_skip > num_columns_skipped) && 
-         static_cast<std::size_t>(column_index_) < nodes_.size(); 
-       ++num_columns_skipped) { 
-    const auto& node = nodes_[column_index_]; 
- 
-    if (node->is_required()) { 
-      throw ParquetException("Cannot skip column '" + node->name() + 
-                             "' as it is required."); 
-    } 
-    auto writer = row_group_writer_->column(column_index_++); 
- 
-    WriteNullValue(writer); 
-  } 
-  return num_columns_skipped; 
-} 
- 
-void StreamWriter::WriteNullValue(ColumnWriter* writer) { 
-  switch (writer->type()) { 
-    case Type::BOOLEAN: 
-      static_cast<BoolWriter*>(writer)->WriteBatch(kBatchSizeOne, &kDefLevelZero, 
-                                                   &kRepLevelZero, nullptr); 
-      break; 
-    case Type::INT32: 
-      static_cast<Int32Writer*>(writer)->WriteBatch(kBatchSizeOne, &kDefLevelZero, 
-                                                    &kRepLevelZero, nullptr); 
-      break; 
-    case Type::INT64: 
-      static_cast<Int64Writer*>(writer)->WriteBatch(kBatchSizeOne, &kDefLevelZero, 
-                                                    &kRepLevelZero, nullptr); 
-      break; 
-    case Type::BYTE_ARRAY: 
-      static_cast<ByteArrayWriter*>(writer)->WriteBatch(kBatchSizeOne, &kDefLevelZero, 
-                                                        &kRepLevelZero, nullptr); 
-      break; 
-    case Type::FIXED_LEN_BYTE_ARRAY: 
-      static_cast<FixedLenByteArrayWriter*>(writer)->WriteBatch( 
-          kBatchSizeOne, &kDefLevelZero, &kRepLevelZero, nullptr); 
-      break; 
-    case Type::FLOAT: 
-      static_cast<FloatWriter*>(writer)->WriteBatch(kBatchSizeOne, &kDefLevelZero, 
-                                                    &kRepLevelZero, nullptr); 
-      break; 
-    case Type::DOUBLE: 
-      static_cast<DoubleWriter*>(writer)->WriteBatch(kBatchSizeOne, &kDefLevelZero, 
-                                                     &kRepLevelZero, nullptr); 
-      break; 
-    case Type::INT96: 
-    case Type::UNDEFINED: 
-      throw ParquetException("Unexpected type: " + TypeToString(writer->type())); 
-      break; 
-  } 
-} 
- 
-void StreamWriter::SkipOptionalColumn() { 
-  if (SkipColumns(1) != 1) { 
-    throw ParquetException("Failed to skip optional column at column index " + 
-                           std::to_string(column_index_)); 
-  } 
-} 
- 
-void StreamWriter::EndRow() { 
-  if (!file_writer_) { 
-    throw ParquetException("StreamWriter not initialized"); 
-  } 
-  if (static_cast<std::size_t>(column_index_) < nodes_.size()) { 
-    throw ParquetException("Cannot end row with " + std::to_string(column_index_) + 
-                           " of " + std::to_string(nodes_.size()) + " columns written"); 
-  } 
-  column_index_ = 0; 
-  ++current_row_; 
- 
-  if (max_row_group_size_ > 0) { 
-    if (row_group_size_ > max_row_group_size_) { 
-      EndRowGroup(); 
-    } 
-    // Initialize for each row with size already written 
-    // (compressed + uncompressed). 
-    // 
-    row_group_size_ = row_group_writer_->total_bytes_written() + 
-                      row_group_writer_->total_compressed_bytes(); 
-  } 
-} 
- 
-void StreamWriter::EndRowGroup() { 
-  if (!file_writer_) { 
-    throw ParquetException("StreamWriter not initialized"); 
-  } 
-  // Avoid creating empty row groups. 
-  if (row_group_writer_->num_rows() > 0) { 
-    row_group_writer_->Close(); 
-    row_group_writer_.reset(file_writer_->AppendBufferedRowGroup()); 
-  } 
-} 
- 
-StreamWriter& operator<<(StreamWriter& os, EndRowType) { 
-  os.EndRow(); 
-  return os; 
-} 
- 
-StreamWriter& operator<<(StreamWriter& os, EndRowGroupType) { 
-  os.EndRowGroup(); 
-  return os; 
-} 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/stream_writer.h"
+
+#include <utility>
+
+namespace parquet {
+
+int64_t StreamWriter::default_row_group_size_{512 * 1024 * 1024};  // 512MB
+
+constexpr int16_t StreamWriter::kDefLevelZero;
+constexpr int16_t StreamWriter::kDefLevelOne;
+constexpr int16_t StreamWriter::kRepLevelZero;
+constexpr int64_t StreamWriter::kBatchSizeOne;
+
+StreamWriter::FixedStringView::FixedStringView(const char* data_ptr)
+    : data{data_ptr}, size{std::strlen(data_ptr)} {}
+
+StreamWriter::FixedStringView::FixedStringView(const char* data_ptr, std::size_t data_len)
+    : data{data_ptr}, size{data_len} {}
+
+StreamWriter::StreamWriter(std::unique_ptr<ParquetFileWriter> writer)
+    : file_writer_{std::move(writer)},
+      row_group_writer_{file_writer_->AppendBufferedRowGroup()} {
+  auto schema = file_writer_->schema();
+  auto group_node = schema->group_node();
+
+  nodes_.resize(schema->num_columns());
+
+  for (auto i = 0; i < schema->num_columns(); ++i) {
+    nodes_[i] = std::static_pointer_cast<schema::PrimitiveNode>(group_node->field(i));
+  }
+}
+
+void StreamWriter::SetDefaultMaxRowGroupSize(int64_t max_size) {
+  default_row_group_size_ = max_size;
+}
+
+void StreamWriter::SetMaxRowGroupSize(int64_t max_size) {
+  max_row_group_size_ = max_size;
+}
+
+int StreamWriter::num_columns() const { return static_cast<int>(nodes_.size()); }
+
+StreamWriter& StreamWriter::operator<<(bool v) {
+  CheckColumn(Type::BOOLEAN, ConvertedType::NONE);
+  return Write<BoolWriter>(v);
+}
+
+StreamWriter& StreamWriter::operator<<(int8_t v) {
+  CheckColumn(Type::INT32, ConvertedType::INT_8);
+  return Write<Int32Writer>(static_cast<int32_t>(v));
+}
+
+StreamWriter& StreamWriter::operator<<(uint8_t v) {
+  CheckColumn(Type::INT32, ConvertedType::UINT_8);
+  return Write<Int32Writer>(static_cast<int32_t>(v));
+}
+
+StreamWriter& StreamWriter::operator<<(int16_t v) {
+  CheckColumn(Type::INT32, ConvertedType::INT_16);
+  return Write<Int32Writer>(static_cast<int32_t>(v));
+}
+
+StreamWriter& StreamWriter::operator<<(uint16_t v) {
+  CheckColumn(Type::INT32, ConvertedType::UINT_16);
+  return Write<Int32Writer>(static_cast<int32_t>(v));
+}
+
+StreamWriter& StreamWriter::operator<<(int32_t v) {
+  CheckColumn(Type::INT32, ConvertedType::INT_32);
+  return Write<Int32Writer>(v);
+}
+
+StreamWriter& StreamWriter::operator<<(uint32_t v) {
+  CheckColumn(Type::INT32, ConvertedType::UINT_32);
+  return Write<Int32Writer>(static_cast<int32_t>(v));
+}
+
+StreamWriter& StreamWriter::operator<<(int64_t v) {
+  CheckColumn(Type::INT64, ConvertedType::INT_64);
+  return Write<Int64Writer>(v);
+}
+
+StreamWriter& StreamWriter::operator<<(uint64_t v) {
+  CheckColumn(Type::INT64, ConvertedType::UINT_64);
+  return Write<Int64Writer>(static_cast<int64_t>(v));
+}
+
+StreamWriter& StreamWriter::operator<<(const std::chrono::milliseconds& v) {
+  CheckColumn(Type::INT64, ConvertedType::TIMESTAMP_MILLIS);
+  return Write<Int64Writer>(static_cast<int64_t>(v.count()));
+}
+
+StreamWriter& StreamWriter::operator<<(const std::chrono::microseconds& v) {
+  CheckColumn(Type::INT64, ConvertedType::TIMESTAMP_MICROS);
+  return Write<Int64Writer>(static_cast<int64_t>(v.count()));
+}
+
+StreamWriter& StreamWriter::operator<<(float v) {
+  CheckColumn(Type::FLOAT, ConvertedType::NONE);
+  return Write<FloatWriter>(v);
+}
+
+StreamWriter& StreamWriter::operator<<(double v) {
+  CheckColumn(Type::DOUBLE, ConvertedType::NONE);
+  return Write<DoubleWriter>(v);
+}
+
+StreamWriter& StreamWriter::operator<<(char v) { return WriteFixedLength(&v, 1); }
+
+StreamWriter& StreamWriter::operator<<(FixedStringView v) {
+  return WriteFixedLength(v.data, v.size);
+}
+
+StreamWriter& StreamWriter::operator<<(const char* v) {
+  return WriteVariableLength(v, std::strlen(v));
+}
+
+StreamWriter& StreamWriter::operator<<(const std::string& v) {
+  return WriteVariableLength(v.data(), v.size());
+}
+
+StreamWriter& StreamWriter::operator<<(::arrow::util::string_view v) {
+  return WriteVariableLength(v.data(), v.size());
+}
+
+StreamWriter& StreamWriter::WriteVariableLength(const char* data_ptr,
+                                                std::size_t data_len) {
+  CheckColumn(Type::BYTE_ARRAY, ConvertedType::UTF8);
+
+  auto writer = static_cast<ByteArrayWriter*>(row_group_writer_->column(column_index_++));
+
+  if (data_ptr != nullptr) {
+    ByteArray ba_value;
+
+    ba_value.ptr = reinterpret_cast<const uint8_t*>(data_ptr);
+    ba_value.len = static_cast<uint32_t>(data_len);
+
+    writer->WriteBatch(kBatchSizeOne, &kDefLevelOne, &kRepLevelZero, &ba_value);
+  } else {
+    writer->WriteBatch(kBatchSizeOne, &kDefLevelZero, &kRepLevelZero, nullptr);
+  }
+  if (max_row_group_size_ > 0) {
+    row_group_size_ += writer->EstimatedBufferedValueBytes();
+  }
+  return *this;
+}
+
+StreamWriter& StreamWriter::WriteFixedLength(const char* data_ptr, std::size_t data_len) {
+  CheckColumn(Type::FIXED_LEN_BYTE_ARRAY, ConvertedType::NONE,
+              static_cast<int>(data_len));
+
+  auto writer =
+      static_cast<FixedLenByteArrayWriter*>(row_group_writer_->column(column_index_++));
+
+  if (data_ptr != nullptr) {
+    FixedLenByteArray flba_value;
+
+    flba_value.ptr = reinterpret_cast<const uint8_t*>(data_ptr);
+    writer->WriteBatch(kBatchSizeOne, &kDefLevelOne, &kRepLevelZero, &flba_value);
+  } else {
+    writer->WriteBatch(kBatchSizeOne, &kDefLevelZero, &kRepLevelZero, nullptr);
+  }
+  if (max_row_group_size_ > 0) {
+    row_group_size_ += writer->EstimatedBufferedValueBytes();
+  }
+  return *this;
+}
+
+void StreamWriter::CheckColumn(Type::type physical_type,
+                               ConvertedType::type converted_type, int length) {
+  if (static_cast<std::size_t>(column_index_) >= nodes_.size()) {
+    throw ParquetException("Column index out-of-bounds.  Index " +
+                           std::to_string(column_index_) + " is invalid for " +
+                           std::to_string(nodes_.size()) + " columns");
+  }
+  const auto& node = nodes_[column_index_];
+
+  if (physical_type != node->physical_type()) {
+    throw ParquetException("Column physical type mismatch.  Column '" + node->name() +
+                           "' has physical type '" + TypeToString(node->physical_type()) +
+                           "' not '" + TypeToString(physical_type) + "'");
+  }
+  if (converted_type != node->converted_type()) {
+    throw ParquetException("Column converted type mismatch.  Column '" + node->name() +
+                           "' has converted type[" +
+                           ConvertedTypeToString(node->converted_type()) + "] not '" +
+                           ConvertedTypeToString(converted_type) + "'");
+  }
+  // Length must be exact.
+  // A shorter length fixed array is not acceptable as it would
+  // result in array bound read errors.
+  //
+  if (length != node->type_length()) {
+    throw ParquetException("Column length mismatch.  Column '" + node->name() +
+                           "' has length " + std::to_string(node->type_length()) +
+                           " not " + std::to_string(length));
+  }
+}
+
+int64_t StreamWriter::SkipColumns(int num_columns_to_skip) {
+  int num_columns_skipped = 0;
+
+  for (; (num_columns_to_skip > num_columns_skipped) &&
+         static_cast<std::size_t>(column_index_) < nodes_.size();
+       ++num_columns_skipped) {
+    const auto& node = nodes_[column_index_];
+
+    if (node->is_required()) {
+      throw ParquetException("Cannot skip column '" + node->name() +
+                             "' as it is required.");
+    }
+    auto writer = row_group_writer_->column(column_index_++);
+
+    WriteNullValue(writer);
+  }
+  return num_columns_skipped;
+}
+
+void StreamWriter::WriteNullValue(ColumnWriter* writer) {
+  switch (writer->type()) {
+    case Type::BOOLEAN:
+      static_cast<BoolWriter*>(writer)->WriteBatch(kBatchSizeOne, &kDefLevelZero,
+                                                   &kRepLevelZero, nullptr);
+      break;
+    case Type::INT32:
+      static_cast<Int32Writer*>(writer)->WriteBatch(kBatchSizeOne, &kDefLevelZero,
+                                                    &kRepLevelZero, nullptr);
+      break;
+    case Type::INT64:
+      static_cast<Int64Writer*>(writer)->WriteBatch(kBatchSizeOne, &kDefLevelZero,
+                                                    &kRepLevelZero, nullptr);
+      break;
+    case Type::BYTE_ARRAY:
+      static_cast<ByteArrayWriter*>(writer)->WriteBatch(kBatchSizeOne, &kDefLevelZero,
+                                                        &kRepLevelZero, nullptr);
+      break;
+    case Type::FIXED_LEN_BYTE_ARRAY:
+      static_cast<FixedLenByteArrayWriter*>(writer)->WriteBatch(
+          kBatchSizeOne, &kDefLevelZero, &kRepLevelZero, nullptr);
+      break;
+    case Type::FLOAT:
+      static_cast<FloatWriter*>(writer)->WriteBatch(kBatchSizeOne, &kDefLevelZero,
+                                                    &kRepLevelZero, nullptr);
+      break;
+    case Type::DOUBLE:
+      static_cast<DoubleWriter*>(writer)->WriteBatch(kBatchSizeOne, &kDefLevelZero,
+                                                     &kRepLevelZero, nullptr);
+      break;
+    case Type::INT96:
+    case Type::UNDEFINED:
+      throw ParquetException("Unexpected type: " + TypeToString(writer->type()));
+      break;
+  }
+}
+
+void StreamWriter::SkipOptionalColumn() {
+  if (SkipColumns(1) != 1) {
+    throw ParquetException("Failed to skip optional column at column index " +
+                           std::to_string(column_index_));
+  }
+}
+
+void StreamWriter::EndRow() {
+  if (!file_writer_) {
+    throw ParquetException("StreamWriter not initialized");
+  }
+  if (static_cast<std::size_t>(column_index_) < nodes_.size()) {
+    throw ParquetException("Cannot end row with " + std::to_string(column_index_) +
+                           " of " + std::to_string(nodes_.size()) + " columns written");
+  }
+  column_index_ = 0;
+  ++current_row_;
+
+  if (max_row_group_size_ > 0) {
+    if (row_group_size_ > max_row_group_size_) {
+      EndRowGroup();
+    }
+    // Initialize for each row with size already written
+    // (compressed + uncompressed).
+    //
+    row_group_size_ = row_group_writer_->total_bytes_written() +
+                      row_group_writer_->total_compressed_bytes();
+  }
+}
+
+void StreamWriter::EndRowGroup() {
+  if (!file_writer_) {
+    throw ParquetException("StreamWriter not initialized");
+  }
+  // Avoid creating empty row groups.
+  if (row_group_writer_->num_rows() > 0) {
+    row_group_writer_->Close();
+    row_group_writer_.reset(file_writer_->AppendBufferedRowGroup());
+  }
+}
+
+StreamWriter& operator<<(StreamWriter& os, EndRowType) {
+  os.EndRow();
+  return os;
+}
+
+StreamWriter& operator<<(StreamWriter& os, EndRowGroupType) {
+  os.EndRowGroup();
+  return os;
+}
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/stream_writer.h b/contrib/libs/apache/arrow/cpp/src/parquet/stream_writer.h
index ebd9a278a2b..d0db850c341 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/stream_writer.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/stream_writer.h
@@ -1,243 +1,243 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <array> 
-#include <chrono> 
-#include <cstdint> 
-#include <memory> 
-#include <string> 
-#include <vector> 
- 
-#include "arrow/util/optional.h" 
-#include "arrow/util/string_view.h" 
-#include "parquet/column_writer.h" 
-#include "parquet/file_writer.h" 
- 
-namespace parquet { 
- 
-/// \brief A class for writing Parquet files using an output stream type API. 
-/// 
-/// The values given must be of the correct type i.e. the type must 
-/// match the file schema exactly otherwise a ParquetException will be 
-/// thrown. 
-/// 
-/// The user must explicitly indicate the end of the row using the 
-/// EndRow() function or EndRow output manipulator. 
-/// 
-/// A maximum row group size can be configured, the default size is 
-/// 512MB.  Alternatively the row group size can be set to zero and the 
-/// user can create new row groups by calling the EndRowGroup() 
-/// function or using the EndRowGroup output manipulator. 
-/// 
-/// Required and optional fields are supported: 
-/// - Required fields are written using operator<<(T) 
-/// - Optional fields are written using 
-///   operator<<(arrow::util::optional<T>). 
-/// 
-/// Note that operator<<(T) can be used to write optional fields. 
-/// 
-/// Similarly, operator<<(arrow::util::optional<T>) can be used to 
-/// write required fields.  However if the optional parameter does not 
-/// have a value (i.e. it is nullopt) then a ParquetException will be 
-/// raised. 
-/// 
-/// Currently there is no support for repeated fields. 
-/// 
-class PARQUET_EXPORT StreamWriter { 
- public: 
-  template <typename T> 
-  using optional = ::arrow::util::optional<T>; 
- 
-  // N.B. Default constructed objects are not usable.  This 
-  //      constructor is provided so that the object may be move 
-  //      assigned afterwards. 
-  StreamWriter() = default; 
- 
-  explicit StreamWriter(std::unique_ptr<ParquetFileWriter> writer); 
- 
-  ~StreamWriter() = default; 
- 
-  static void SetDefaultMaxRowGroupSize(int64_t max_size); 
- 
-  void SetMaxRowGroupSize(int64_t max_size); 
- 
-  int current_column() const { return column_index_; } 
- 
-  int64_t current_row() const { return current_row_; } 
- 
-  int num_columns() const; 
- 
-  // Moving is possible. 
-  StreamWriter(StreamWriter&&) = default; 
-  StreamWriter& operator=(StreamWriter&&) = default; 
- 
-  // Copying is not allowed. 
-  StreamWriter(const StreamWriter&) = delete; 
-  StreamWriter& operator=(const StreamWriter&) = delete; 
- 
-  /// \brief Output operators for required fields. 
-  /// These can also be used for optional fields when a value must be set. 
-  StreamWriter& operator<<(bool v); 
- 
-  StreamWriter& operator<<(int8_t v); 
- 
-  StreamWriter& operator<<(uint8_t v); 
- 
-  StreamWriter& operator<<(int16_t v); 
- 
-  StreamWriter& operator<<(uint16_t v); 
- 
-  StreamWriter& operator<<(int32_t v); 
- 
-  StreamWriter& operator<<(uint32_t v); 
- 
-  StreamWriter& operator<<(int64_t v); 
- 
-  StreamWriter& operator<<(uint64_t v); 
- 
-  StreamWriter& operator<<(const std::chrono::milliseconds& v); 
- 
-  StreamWriter& operator<<(const std::chrono::microseconds& v); 
- 
-  StreamWriter& operator<<(float v); 
- 
-  StreamWriter& operator<<(double v); 
- 
-  StreamWriter& operator<<(char v); 
- 
-  /// \brief Helper class to write fixed length strings. 
-  /// This is useful as the standard string view (such as 
-  /// arrow::util::string_view) is for variable length data. 
-  struct PARQUET_EXPORT FixedStringView { 
-    FixedStringView() = default; 
- 
-    explicit FixedStringView(const char* data_ptr); 
- 
-    FixedStringView(const char* data_ptr, std::size_t data_len); 
- 
-    const char* data{NULLPTR}; 
-    std::size_t size{0}; 
-  }; 
- 
-  /// \brief Output operators for fixed length strings. 
-  template <int N> 
-  StreamWriter& operator<<(const char (&v)[N]) { 
-    return WriteFixedLength(v, N); 
-  } 
-  template <std::size_t N> 
-  StreamWriter& operator<<(const std::array<char, N>& v) { 
-    return WriteFixedLength(v.data(), N); 
-  } 
-  StreamWriter& operator<<(FixedStringView v); 
- 
-  /// \brief Output operators for variable length strings. 
-  StreamWriter& operator<<(const char* v); 
-  StreamWriter& operator<<(const std::string& v); 
-  StreamWriter& operator<<(::arrow::util::string_view v); 
- 
-  /// \brief Output operator for optional fields. 
-  template <typename T> 
-  StreamWriter& operator<<(const optional<T>& v) { 
-    if (v) { 
-      return operator<<(*v); 
-    } 
-    SkipOptionalColumn(); 
-    return *this; 
-  } 
- 
-  /// \brief Skip the next N columns of optional data.  If there are 
-  /// less than N columns remaining then the excess columns are 
-  /// ignored. 
-  /// \throws ParquetException if there is an attempt to skip any 
-  /// required column. 
-  /// \return Number of columns actually skipped. 
-  int64_t SkipColumns(int num_columns_to_skip); 
- 
-  /// \brief Terminate the current row and advance to next one. 
-  /// \throws ParquetException if all columns in the row were not 
-  /// written or skipped. 
-  void EndRow(); 
- 
-  /// \brief Terminate the current row group and create new one. 
-  void EndRowGroup(); 
- 
- protected: 
-  template <typename WriterType, typename T> 
-  StreamWriter& Write(const T v) { 
-    auto writer = static_cast<WriterType*>(row_group_writer_->column(column_index_++)); 
- 
-    writer->WriteBatch(kBatchSizeOne, &kDefLevelOne, &kRepLevelZero, &v); 
- 
-    if (max_row_group_size_ > 0) { 
-      row_group_size_ += writer->EstimatedBufferedValueBytes(); 
-    } 
-    return *this; 
-  } 
- 
-  StreamWriter& WriteVariableLength(const char* data_ptr, std::size_t data_len); 
- 
-  StreamWriter& WriteFixedLength(const char* data_ptr, std::size_t data_len); 
- 
-  void CheckColumn(Type::type physical_type, ConvertedType::type converted_type, 
-                   int length = -1); 
- 
-  /// \brief Skip the next column which must be optional. 
-  /// \throws ParquetException if the next column does not exist or is 
-  /// not optional. 
-  void SkipOptionalColumn(); 
- 
-  void WriteNullValue(ColumnWriter* writer); 
- 
- private: 
-  using node_ptr_type = std::shared_ptr<schema::PrimitiveNode>; 
- 
-  struct null_deleter { 
-    void operator()(void*) {} 
-  }; 
- 
-  int32_t column_index_{0}; 
-  int64_t current_row_{0}; 
-  int64_t row_group_size_{0}; 
-  int64_t max_row_group_size_{default_row_group_size_}; 
- 
-  std::unique_ptr<ParquetFileWriter> file_writer_; 
-  std::unique_ptr<RowGroupWriter, null_deleter> row_group_writer_; 
-  std::vector<node_ptr_type> nodes_; 
- 
-  static constexpr int16_t kDefLevelZero = 0; 
-  static constexpr int16_t kDefLevelOne = 1; 
-  static constexpr int16_t kRepLevelZero = 0; 
-  static constexpr int64_t kBatchSizeOne = 1; 
- 
-  static int64_t default_row_group_size_; 
-}; 
- 
-struct PARQUET_EXPORT EndRowType {}; 
-constexpr EndRowType EndRow = {}; 
- 
-struct PARQUET_EXPORT EndRowGroupType {}; 
-constexpr EndRowGroupType EndRowGroup = {}; 
- 
-PARQUET_EXPORT 
-StreamWriter& operator<<(StreamWriter&, EndRowType); 
- 
-PARQUET_EXPORT 
-StreamWriter& operator<<(StreamWriter&, EndRowGroupType); 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <array>
+#include <chrono>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/util/optional.h"
+#include "arrow/util/string_view.h"
+#include "parquet/column_writer.h"
+#include "parquet/file_writer.h"
+
+namespace parquet {
+
+/// \brief A class for writing Parquet files using an output stream type API.
+///
+/// The values given must be of the correct type i.e. the type must
+/// match the file schema exactly otherwise a ParquetException will be
+/// thrown.
+///
+/// The user must explicitly indicate the end of the row using the
+/// EndRow() function or EndRow output manipulator.
+///
+/// A maximum row group size can be configured, the default size is
+/// 512MB.  Alternatively the row group size can be set to zero and the
+/// user can create new row groups by calling the EndRowGroup()
+/// function or using the EndRowGroup output manipulator.
+///
+/// Required and optional fields are supported:
+/// - Required fields are written using operator<<(T)
+/// - Optional fields are written using
+///   operator<<(arrow::util::optional<T>).
+///
+/// Note that operator<<(T) can be used to write optional fields.
+///
+/// Similarly, operator<<(arrow::util::optional<T>) can be used to
+/// write required fields.  However if the optional parameter does not
+/// have a value (i.e. it is nullopt) then a ParquetException will be
+/// raised.
+///
+/// Currently there is no support for repeated fields.
+///
+class PARQUET_EXPORT StreamWriter {
+ public:
+  template <typename T>
+  using optional = ::arrow::util::optional<T>;
+
+  // N.B. Default constructed objects are not usable.  This
+  //      constructor is provided so that the object may be move
+  //      assigned afterwards.
+  StreamWriter() = default;
+
+  explicit StreamWriter(std::unique_ptr<ParquetFileWriter> writer);
+
+  ~StreamWriter() = default;
+
+  static void SetDefaultMaxRowGroupSize(int64_t max_size);
+
+  void SetMaxRowGroupSize(int64_t max_size);
+
+  int current_column() const { return column_index_; }
+
+  int64_t current_row() const { return current_row_; }
+
+  int num_columns() const;
+
+  // Moving is possible.
+  StreamWriter(StreamWriter&&) = default;
+  StreamWriter& operator=(StreamWriter&&) = default;
+
+  // Copying is not allowed.
+  StreamWriter(const StreamWriter&) = delete;
+  StreamWriter& operator=(const StreamWriter&) = delete;
+
+  /// \brief Output operators for required fields.
+  /// These can also be used for optional fields when a value must be set.
+  StreamWriter& operator<<(bool v);
+
+  StreamWriter& operator<<(int8_t v);
+
+  StreamWriter& operator<<(uint8_t v);
+
+  StreamWriter& operator<<(int16_t v);
+
+  StreamWriter& operator<<(uint16_t v);
+
+  StreamWriter& operator<<(int32_t v);
+
+  StreamWriter& operator<<(uint32_t v);
+
+  StreamWriter& operator<<(int64_t v);
+
+  StreamWriter& operator<<(uint64_t v);
+
+  StreamWriter& operator<<(const std::chrono::milliseconds& v);
+
+  StreamWriter& operator<<(const std::chrono::microseconds& v);
+
+  StreamWriter& operator<<(float v);
+
+  StreamWriter& operator<<(double v);
+
+  StreamWriter& operator<<(char v);
+
+  /// \brief Helper class to write fixed length strings.
+  /// This is useful as the standard string view (such as
+  /// arrow::util::string_view) is for variable length data.
+  struct PARQUET_EXPORT FixedStringView {
+    FixedStringView() = default;
+
+    explicit FixedStringView(const char* data_ptr);
+
+    FixedStringView(const char* data_ptr, std::size_t data_len);
+
+    const char* data{NULLPTR};
+    std::size_t size{0};
+  };
+
+  /// \brief Output operators for fixed length strings.
+  template <int N>
+  StreamWriter& operator<<(const char (&v)[N]) {
+    return WriteFixedLength(v, N);
+  }
+  template <std::size_t N>
+  StreamWriter& operator<<(const std::array<char, N>& v) {
+    return WriteFixedLength(v.data(), N);
+  }
+  StreamWriter& operator<<(FixedStringView v);
+
+  /// \brief Output operators for variable length strings.
+  StreamWriter& operator<<(const char* v);
+  StreamWriter& operator<<(const std::string& v);
+  StreamWriter& operator<<(::arrow::util::string_view v);
+
+  /// \brief Output operator for optional fields.
+  template <typename T>
+  StreamWriter& operator<<(const optional<T>& v) {
+    if (v) {
+      return operator<<(*v);
+    }
+    SkipOptionalColumn();
+    return *this;
+  }
+
+  /// \brief Skip the next N columns of optional data.  If there are
+  /// less than N columns remaining then the excess columns are
+  /// ignored.
+  /// \throws ParquetException if there is an attempt to skip any
+  /// required column.
+  /// \return Number of columns actually skipped.
+  int64_t SkipColumns(int num_columns_to_skip);
+
+  /// \brief Terminate the current row and advance to next one.
+  /// \throws ParquetException if all columns in the row were not
+  /// written or skipped.
+  void EndRow();
+
+  /// \brief Terminate the current row group and create new one.
+  void EndRowGroup();
+
+ protected:
+  template <typename WriterType, typename T>
+  StreamWriter& Write(const T v) {
+    auto writer = static_cast<WriterType*>(row_group_writer_->column(column_index_++));
+
+    writer->WriteBatch(kBatchSizeOne, &kDefLevelOne, &kRepLevelZero, &v);
+
+    if (max_row_group_size_ > 0) {
+      row_group_size_ += writer->EstimatedBufferedValueBytes();
+    }
+    return *this;
+  }
+
+  StreamWriter& WriteVariableLength(const char* data_ptr, std::size_t data_len);
+
+  StreamWriter& WriteFixedLength(const char* data_ptr, std::size_t data_len);
+
+  void CheckColumn(Type::type physical_type, ConvertedType::type converted_type,
+                   int length = -1);
+
+  /// \brief Skip the next column which must be optional.
+  /// \throws ParquetException if the next column does not exist or is
+  /// not optional.
+  void SkipOptionalColumn();
+
+  void WriteNullValue(ColumnWriter* writer);
+
+ private:
+  using node_ptr_type = std::shared_ptr<schema::PrimitiveNode>;
+
+  struct null_deleter {
+    void operator()(void*) {}
+  };
+
+  int32_t column_index_{0};
+  int64_t current_row_{0};
+  int64_t row_group_size_{0};
+  int64_t max_row_group_size_{default_row_group_size_};
+
+  std::unique_ptr<ParquetFileWriter> file_writer_;
+  std::unique_ptr<RowGroupWriter, null_deleter> row_group_writer_;
+  std::vector<node_ptr_type> nodes_;
+
+  static constexpr int16_t kDefLevelZero = 0;
+  static constexpr int16_t kDefLevelOne = 1;
+  static constexpr int16_t kRepLevelZero = 0;
+  static constexpr int64_t kBatchSizeOne = 1;
+
+  static int64_t default_row_group_size_;
+};
+
+struct PARQUET_EXPORT EndRowType {};
+constexpr EndRowType EndRow = {};
+
+struct PARQUET_EXPORT EndRowGroupType {};
+constexpr EndRowGroupType EndRowGroup = {};
+
+PARQUET_EXPORT
+StreamWriter& operator<<(StreamWriter&, EndRowType);
+
+PARQUET_EXPORT
+StreamWriter& operator<<(StreamWriter&, EndRowGroupType);
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/symbols.map b/contrib/libs/apache/arrow/cpp/src/parquet/symbols.map
index 9df019e5fcd..4bf032dd584 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/symbols.map
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/symbols.map
@@ -1,40 +1,40 @@
-# Licensed to the Apache Software Foundation (ASF) under one 
-# or more contributor license agreements.  See the NOTICE file 
-# distributed with this work for additional information 
-# regarding copyright ownership.  The ASF licenses this file 
-# to you under the Apache License, Version 2.0 (the 
-# "License"); you may not use this file except in compliance 
-# with the License.  You may obtain a copy of the License at 
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0 
-# 
-# Unless required by applicable law or agreed to in writing, 
-# software distributed under the License is distributed on an 
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-# KIND, either express or implied.  See the License for the 
-# specific language governing permissions and limitations 
-# under the License. 
- 
-{ 
-  # Symbols marked as 'local' are not exported by the DSO and thus may not 
-  # be used by client applications. 
-  local: 
-    # devtoolset / static-libstdc++ symbols 
-    __cxa_*; 
-    __once_proxy; 
- 
-    extern "C++" { 
-      # boost 
-      boost::*; 
- 
-	  # thrift 
-	  apache::thrift::*; 
- 
-      # devtoolset or -static-libstdc++ - the Red Hat devtoolset statically 
-      # links c++11 symbols into binaries so that the result may be executed on 
-      # a system with an older libstdc++ which doesn't include the necessary 
-      # c++11 symbols. 
-      std::*; 
-      *std::__once_call*; 
-    }; 
-}; 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{
+  # Symbols marked as 'local' are not exported by the DSO and thus may not
+  # be used by client applications.
+  local:
+    # devtoolset / static-libstdc++ symbols
+    __cxa_*;
+    __once_proxy;
+
+    extern "C++" {
+      # boost
+      boost::*;
+
+	  # thrift
+	  apache::thrift::*;
+
+      # devtoolset or -static-libstdc++ - the Red Hat devtoolset statically
+      # links c++11 symbols into binaries so that the result may be executed on
+      # a system with an older libstdc++ which doesn't include the necessary
+      # c++11 symbols.
+      std::*;
+      *std::__once_call*;
+    };
+};
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/thrift_internal.h b/contrib/libs/apache/arrow/cpp/src/parquet/thrift_internal.h
index 443d948e30a..ea7df209621 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/thrift_internal.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/thrift_internal.h
@@ -1,494 +1,494 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include "arrow/util/windows_compatibility.h" 
- 
-#include <cstdint> 
-// Check if thrift version < 0.11.0 
-// or if FORCE_BOOST_SMART_PTR is defined. Ref: https://thrift.apache.org/lib/cpp 
-#if defined(PARQUET_THRIFT_USE_BOOST) || defined(FORCE_BOOST_SMART_PTR) 
-#include <boost/shared_ptr.hpp> 
-#else 
-#include <memory> 
-#endif 
-#include <string> 
-#include <vector> 
- 
-// TCompactProtocol requires some #defines to work right. 
-#define SIGNED_RIGHT_SHIFT_IS 1 
-#define ARITHMETIC_RIGHT_SHIFT 1 
-#include <thrift/TApplicationException.h> 
-#include <thrift/protocol/TCompactProtocol.h> 
-#include <thrift/protocol/TDebugProtocol.h> 
- 
-#include <thrift/protocol/TBinaryProtocol.h> 
-#include <thrift/transport/TBufferTransports.h> 
-#include <sstream> 
- 
-#include "arrow/util/logging.h" 
- 
-#include "parquet/encryption/internal_file_decryptor.h" 
-#include "parquet/encryption/internal_file_encryptor.h" 
-#include "parquet/exception.h" 
-#include "parquet/platform.h" 
-#include "parquet/statistics.h" 
-#include "parquet/types.h" 
- 
-#include "generated/parquet_types.h"  // IYWU pragma: export 
- 
-namespace parquet { 
- 
-// Check if thrift version < 0.11.0 
-// or if FORCE_BOOST_SMART_PTR is defined. Ref: https://thrift.apache.org/lib/cpp 
-#if defined(PARQUET_THRIFT_USE_BOOST) || defined(FORCE_BOOST_SMART_PTR) 
-using ::boost::shared_ptr; 
-#else 
-using ::std::shared_ptr; 
-#endif 
- 
-// ---------------------------------------------------------------------- 
-// Convert Thrift enums to Parquet enums 
- 
-// Unsafe enum converters (input is not checked for validity) 
- 
-static inline Type::type FromThriftUnsafe(format::Type::type type) { 
-  return static_cast<Type::type>(type); 
-} 
- 
-static inline ConvertedType::type FromThriftUnsafe(format::ConvertedType::type type) { 
-  // item 0 is NONE 
-  return static_cast<ConvertedType::type>(static_cast<int>(type) + 1); 
-} 
- 
-static inline Repetition::type FromThriftUnsafe(format::FieldRepetitionType::type type) { 
-  return static_cast<Repetition::type>(type); 
-} 
- 
-static inline Encoding::type FromThriftUnsafe(format::Encoding::type type) { 
-  return static_cast<Encoding::type>(type); 
-} 
- 
-static inline PageType::type FromThriftUnsafe(format::PageType::type type) { 
-  return static_cast<PageType::type>(type); 
-} 
- 
-static inline Compression::type FromThriftUnsafe(format::CompressionCodec::type type) { 
-  switch (type) { 
-    case format::CompressionCodec::UNCOMPRESSED: 
-      return Compression::UNCOMPRESSED; 
-    case format::CompressionCodec::SNAPPY: 
-      return Compression::SNAPPY; 
-    case format::CompressionCodec::GZIP: 
-      return Compression::GZIP; 
-    case format::CompressionCodec::LZO: 
-      return Compression::LZO; 
-    case format::CompressionCodec::BROTLI: 
-      return Compression::BROTLI; 
-    case format::CompressionCodec::LZ4: 
-      return Compression::LZ4_HADOOP; 
-    case format::CompressionCodec::LZ4_RAW: 
-      return Compression::LZ4; 
-    case format::CompressionCodec::ZSTD: 
-      return Compression::ZSTD; 
-    default: 
-      DCHECK(false) << "Cannot reach here"; 
-      return Compression::UNCOMPRESSED; 
-  } 
-} 
- 
-namespace internal { 
- 
-template <typename T> 
-struct ThriftEnumTypeTraits {}; 
- 
-template <> 
-struct ThriftEnumTypeTraits<::parquet::format::Type::type> { 
-  using ParquetEnum = Type; 
-}; 
- 
-template <> 
-struct ThriftEnumTypeTraits<::parquet::format::ConvertedType::type> { 
-  using ParquetEnum = ConvertedType; 
-}; 
- 
-template <> 
-struct ThriftEnumTypeTraits<::parquet::format::FieldRepetitionType::type> { 
-  using ParquetEnum = Repetition; 
-}; 
- 
-template <> 
-struct ThriftEnumTypeTraits<::parquet::format::Encoding::type> { 
-  using ParquetEnum = Encoding; 
-}; 
- 
-template <> 
-struct ThriftEnumTypeTraits<::parquet::format::PageType::type> { 
-  using ParquetEnum = PageType; 
-}; 
- 
-// If the parquet file is corrupted it is possible the enum value decoded 
-// will not be in the range of defined values, which is undefined behaviour. 
-// This facility prevents this by loading the value as the underlying type 
-// and checking to make sure it is in range. 
- 
-template <typename EnumType, 
-          typename EnumTypeRaw = typename std::underlying_type<EnumType>::type> 
-inline static EnumTypeRaw LoadEnumRaw(const EnumType* in) { 
-  EnumTypeRaw raw_value; 
-  // Use memcpy(), as a regular cast would be undefined behaviour on invalid values 
-  memcpy(&raw_value, in, sizeof(EnumType)); 
-  return raw_value; 
-} 
- 
-template <typename ApiType> 
-struct SafeLoader { 
-  using ApiTypeEnum = typename ApiType::type; 
-  using ApiTypeRawEnum = typename std::underlying_type<ApiTypeEnum>::type; 
- 
-  template <typename ThriftType> 
-  inline static ApiTypeRawEnum LoadRaw(const ThriftType* in) { 
-    static_assert(sizeof(ApiTypeEnum) == sizeof(ThriftType), 
-                  "parquet type should always be the same size as thrift type"); 
-    return static_cast<ApiTypeRawEnum>(LoadEnumRaw(in)); 
-  } 
- 
-  template <typename ThriftType, bool IsUnsigned = true> 
-  inline static ApiTypeEnum LoadChecked( 
-      const typename std::enable_if<IsUnsigned, ThriftType>::type* in) { 
-    auto raw_value = LoadRaw(in); 
-    if (ARROW_PREDICT_FALSE(raw_value >= 
-                            static_cast<ApiTypeRawEnum>(ApiType::UNDEFINED))) { 
-      return ApiType::UNDEFINED; 
-    } 
-    return FromThriftUnsafe(static_cast<ThriftType>(raw_value)); 
-  } 
- 
-  template <typename ThriftType, bool IsUnsigned = false> 
-  inline static ApiTypeEnum LoadChecked( 
-      const typename std::enable_if<!IsUnsigned, ThriftType>::type* in) { 
-    auto raw_value = LoadRaw(in); 
-    if (ARROW_PREDICT_FALSE(raw_value >= 
-                                static_cast<ApiTypeRawEnum>(ApiType::UNDEFINED) || 
-                            raw_value < 0)) { 
-      return ApiType::UNDEFINED; 
-    } 
-    return FromThriftUnsafe(static_cast<ThriftType>(raw_value)); 
-  } 
- 
-  template <typename ThriftType> 
-  inline static ApiTypeEnum Load(const ThriftType* in) { 
-    return LoadChecked<ThriftType, std::is_unsigned<ApiTypeRawEnum>::value>(in); 
-  } 
-}; 
- 
-}  // namespace internal 
- 
-// Safe enum loader: will check for invalid enum value before converting 
- 
-template <typename ThriftType, 
-          typename ParquetEnum = 
-              typename internal::ThriftEnumTypeTraits<ThriftType>::ParquetEnum> 
-inline typename ParquetEnum::type LoadEnumSafe(const ThriftType* in) { 
-  return internal::SafeLoader<ParquetEnum>::Load(in); 
-} 
- 
-inline typename Compression::type LoadEnumSafe(const format::CompressionCodec::type* in) { 
-  const auto raw_value = internal::LoadEnumRaw(in); 
-  // Check bounds manually, as Compression::type doesn't have the same values 
-  // as format::CompressionCodec. 
-  const auto min_value = 
-      static_cast<decltype(raw_value)>(format::CompressionCodec::UNCOMPRESSED); 
-  const auto max_value = 
-      static_cast<decltype(raw_value)>(format::CompressionCodec::LZ4_RAW); 
-  if (raw_value < min_value || raw_value > max_value) { 
-    return Compression::UNCOMPRESSED; 
-  } 
-  return FromThriftUnsafe(*in); 
-} 
- 
-// Safe non-enum converters 
- 
-static inline AadMetadata FromThrift(format::AesGcmV1 aesGcmV1) { 
-  return AadMetadata{aesGcmV1.aad_prefix, aesGcmV1.aad_file_unique, 
-                     aesGcmV1.supply_aad_prefix}; 
-} 
- 
-static inline AadMetadata FromThrift(format::AesGcmCtrV1 aesGcmCtrV1) { 
-  return AadMetadata{aesGcmCtrV1.aad_prefix, aesGcmCtrV1.aad_file_unique, 
-                     aesGcmCtrV1.supply_aad_prefix}; 
-} 
- 
-static inline EncryptionAlgorithm FromThrift(format::EncryptionAlgorithm encryption) { 
-  EncryptionAlgorithm encryption_algorithm; 
- 
-  if (encryption.__isset.AES_GCM_V1) { 
-    encryption_algorithm.algorithm = ParquetCipher::AES_GCM_V1; 
-    encryption_algorithm.aad = FromThrift(encryption.AES_GCM_V1); 
-  } else if (encryption.__isset.AES_GCM_CTR_V1) { 
-    encryption_algorithm.algorithm = ParquetCipher::AES_GCM_CTR_V1; 
-    encryption_algorithm.aad = FromThrift(encryption.AES_GCM_CTR_V1); 
-  } else { 
-    throw ParquetException("Unsupported algorithm"); 
-  } 
-  return encryption_algorithm; 
-} 
- 
-// ---------------------------------------------------------------------- 
-// Convert Thrift enums from Parquet enums 
- 
-static inline format::Type::type ToThrift(Type::type type) { 
-  return static_cast<format::Type::type>(type); 
-} 
- 
-static inline format::ConvertedType::type ToThrift(ConvertedType::type type) { 
-  // item 0 is NONE 
-  DCHECK_NE(type, ConvertedType::NONE); 
-  // it is forbidden to emit "NA" (PARQUET-1990) 
-  DCHECK_NE(type, ConvertedType::NA); 
-  DCHECK_NE(type, ConvertedType::UNDEFINED); 
-  return static_cast<format::ConvertedType::type>(static_cast<int>(type) - 1); 
-} 
- 
-static inline format::FieldRepetitionType::type ToThrift(Repetition::type type) { 
-  return static_cast<format::FieldRepetitionType::type>(type); 
-} 
- 
-static inline format::Encoding::type ToThrift(Encoding::type type) { 
-  return static_cast<format::Encoding::type>(type); 
-} 
- 
-static inline format::CompressionCodec::type ToThrift(Compression::type type) { 
-  switch (type) { 
-    case Compression::UNCOMPRESSED: 
-      return format::CompressionCodec::UNCOMPRESSED; 
-    case Compression::SNAPPY: 
-      return format::CompressionCodec::SNAPPY; 
-    case Compression::GZIP: 
-      return format::CompressionCodec::GZIP; 
-    case Compression::LZO: 
-      return format::CompressionCodec::LZO; 
-    case Compression::BROTLI: 
-      return format::CompressionCodec::BROTLI; 
-    case Compression::LZ4: 
-      return format::CompressionCodec::LZ4_RAW; 
-    case Compression::LZ4_HADOOP: 
-      // Deprecated "LZ4" Parquet compression has Hadoop-specific framing 
-      return format::CompressionCodec::LZ4; 
-    case Compression::ZSTD: 
-      return format::CompressionCodec::ZSTD; 
-    default: 
-      DCHECK(false) << "Cannot reach here"; 
-      return format::CompressionCodec::UNCOMPRESSED; 
-  } 
-} 
- 
-static inline format::Statistics ToThrift(const EncodedStatistics& stats) { 
-  format::Statistics statistics; 
-  if (stats.has_min) { 
-    statistics.__set_min_value(stats.min()); 
-    // If the order is SIGNED, then the old min value must be set too. 
-    // This for backward compatibility 
-    if (stats.is_signed()) { 
-      statistics.__set_min(stats.min()); 
-    } 
-  } 
-  if (stats.has_max) { 
-    statistics.__set_max_value(stats.max()); 
-    // If the order is SIGNED, then the old max value must be set too. 
-    // This for backward compatibility 
-    if (stats.is_signed()) { 
-      statistics.__set_max(stats.max()); 
-    } 
-  } 
-  if (stats.has_null_count) { 
-    statistics.__set_null_count(stats.null_count); 
-  } 
-  if (stats.has_distinct_count) { 
-    statistics.__set_distinct_count(stats.distinct_count); 
-  } 
- 
-  return statistics; 
-} 
- 
-static inline format::AesGcmV1 ToAesGcmV1Thrift(AadMetadata aad) { 
-  format::AesGcmV1 aesGcmV1; 
-  // aad_file_unique is always set 
-  aesGcmV1.__set_aad_file_unique(aad.aad_file_unique); 
-  aesGcmV1.__set_supply_aad_prefix(aad.supply_aad_prefix); 
-  if (!aad.aad_prefix.empty()) { 
-    aesGcmV1.__set_aad_prefix(aad.aad_prefix); 
-  } 
-  return aesGcmV1; 
-} 
- 
-static inline format::AesGcmCtrV1 ToAesGcmCtrV1Thrift(AadMetadata aad) { 
-  format::AesGcmCtrV1 aesGcmCtrV1; 
-  // aad_file_unique is always set 
-  aesGcmCtrV1.__set_aad_file_unique(aad.aad_file_unique); 
-  aesGcmCtrV1.__set_supply_aad_prefix(aad.supply_aad_prefix); 
-  if (!aad.aad_prefix.empty()) { 
-    aesGcmCtrV1.__set_aad_prefix(aad.aad_prefix); 
-  } 
-  return aesGcmCtrV1; 
-} 
- 
-static inline format::EncryptionAlgorithm ToThrift(EncryptionAlgorithm encryption) { 
-  format::EncryptionAlgorithm encryption_algorithm; 
-  if (encryption.algorithm == ParquetCipher::AES_GCM_V1) { 
-    encryption_algorithm.__set_AES_GCM_V1(ToAesGcmV1Thrift(encryption.aad)); 
-  } else { 
-    encryption_algorithm.__set_AES_GCM_CTR_V1(ToAesGcmCtrV1Thrift(encryption.aad)); 
-  } 
-  return encryption_algorithm; 
-} 
- 
-// ---------------------------------------------------------------------- 
-// Thrift struct serialization / deserialization utilities 
- 
-using ThriftBuffer = apache::thrift::transport::TMemoryBuffer; 
- 
-template <class T> 
-inline void DeserializeThriftUnencryptedMsg(const uint8_t* buf, uint32_t* len, 
-                                            T* deserialized_msg) { 
-  // Deserialize msg bytes into c++ thrift msg using memory transport. 
-  shared_ptr<ThriftBuffer> tmem_transport( 
-      new ThriftBuffer(const_cast<uint8_t*>(buf), *len)); 
-  apache::thrift::protocol::TCompactProtocolFactoryT<ThriftBuffer> tproto_factory; 
-  // Protect against CPU and memory bombs 
-  tproto_factory.setStringSizeLimit(100 * 1000 * 1000); 
-  // Structs in the thrift definition are relatively large (at least 300 bytes). 
-  // This limits total memory to the same order of magnitude as stringSize. 
-  tproto_factory.setContainerSizeLimit(1000 * 1000); 
-  shared_ptr<apache::thrift::protocol::TProtocol> tproto =  // 
-      tproto_factory.getProtocol(tmem_transport); 
-  try { 
-    deserialized_msg->read(tproto.get()); 
-  } catch (std::exception& e) { 
-    std::stringstream ss; 
-    ss << "Couldn't deserialize thrift: " << e.what() << "\n"; 
-    throw ParquetException(ss.str()); 
-  } 
-  uint32_t bytes_left = tmem_transport->available_read(); 
-  *len = *len - bytes_left; 
-} 
- 
-// Deserialize a thrift message from buf/len.  buf/len must at least contain 
-// all the bytes needed to store the thrift message.  On return, len will be 
-// set to the actual length of the header. 
-template <class T> 
-inline void DeserializeThriftMsg(const uint8_t* buf, uint32_t* len, T* deserialized_msg, 
-                                 const std::shared_ptr<Decryptor>& decryptor = NULLPTR) { 
-  // thrift message is not encrypted 
-  if (decryptor == NULLPTR) { 
-    DeserializeThriftUnencryptedMsg(buf, len, deserialized_msg); 
-  } else {  // thrift message is encrypted 
-    uint32_t clen; 
-    clen = *len; 
-    // decrypt 
-    std::shared_ptr<ResizableBuffer> decrypted_buffer = 
-        std::static_pointer_cast<ResizableBuffer>(AllocateBuffer( 
-            decryptor->pool(), 
-            static_cast<int64_t>(clen - decryptor->CiphertextSizeDelta()))); 
-    const uint8_t* cipher_buf = buf; 
-    uint32_t decrypted_buffer_len = 
-        decryptor->Decrypt(cipher_buf, 0, decrypted_buffer->mutable_data()); 
-    if (decrypted_buffer_len <= 0) { 
-      throw ParquetException("Couldn't decrypt buffer\n"); 
-    } 
-    *len = decrypted_buffer_len + decryptor->CiphertextSizeDelta(); 
-    DeserializeThriftMsg(decrypted_buffer->data(), &decrypted_buffer_len, 
-                         deserialized_msg); 
-  } 
-} 
- 
-/// Utility class to serialize thrift objects to a binary format.  This object 
-/// should be reused if possible to reuse the underlying memory. 
-/// Note: thrift will encode NULLs into the serialized buffer so it is not valid 
-/// to treat it as a string. 
-class ThriftSerializer { 
- public: 
-  explicit ThriftSerializer(int initial_buffer_size = 1024) 
-      : mem_buffer_(new ThriftBuffer(initial_buffer_size)) { 
-    apache::thrift::protocol::TCompactProtocolFactoryT<ThriftBuffer> factory; 
-    protocol_ = factory.getProtocol(mem_buffer_); 
-  } 
- 
-  /// Serialize obj into a memory buffer.  The result is returned in buffer/len.  The 
-  /// memory returned is owned by this object and will be invalid when another object 
-  /// is serialized. 
-  template <class T> 
-  void SerializeToBuffer(const T* obj, uint32_t* len, uint8_t** buffer) { 
-    SerializeObject(obj); 
-    mem_buffer_->getBuffer(buffer, len); 
-  } 
- 
-  template <class T> 
-  void SerializeToString(const T* obj, std::string* result) { 
-    SerializeObject(obj); 
-    *result = mem_buffer_->getBufferAsString(); 
-  } 
- 
-  template <class T> 
-  int64_t Serialize(const T* obj, ArrowOutputStream* out, 
-                    const std::shared_ptr<Encryptor>& encryptor = NULLPTR) { 
-    uint8_t* out_buffer; 
-    uint32_t out_length; 
-    SerializeToBuffer(obj, &out_length, &out_buffer); 
- 
-    // obj is not encrypted 
-    if (encryptor == NULLPTR) { 
-      PARQUET_THROW_NOT_OK(out->Write(out_buffer, out_length)); 
-      return static_cast<int64_t>(out_length); 
-    } else {  // obj is encrypted 
-      return SerializeEncryptedObj(out, out_buffer, out_length, encryptor); 
-    } 
-  } 
- 
- private: 
-  template <class T> 
-  void SerializeObject(const T* obj) { 
-    try { 
-      mem_buffer_->resetBuffer(); 
-      obj->write(protocol_.get()); 
-    } catch (std::exception& e) { 
-      std::stringstream ss; 
-      ss << "Couldn't serialize thrift: " << e.what() << "\n"; 
-      throw ParquetException(ss.str()); 
-    } 
-  } 
- 
-  int64_t SerializeEncryptedObj(ArrowOutputStream* out, uint8_t* out_buffer, 
-                                uint32_t out_length, 
-                                const std::shared_ptr<Encryptor>& encryptor) { 
-    std::shared_ptr<ResizableBuffer> cipher_buffer = 
-        std::static_pointer_cast<ResizableBuffer>(AllocateBuffer( 
-            encryptor->pool(), 
-            static_cast<int64_t>(encryptor->CiphertextSizeDelta() + out_length))); 
-    int cipher_buffer_len = 
-        encryptor->Encrypt(out_buffer, out_length, cipher_buffer->mutable_data()); 
- 
-    PARQUET_THROW_NOT_OK(out->Write(cipher_buffer->data(), cipher_buffer_len)); 
-    return static_cast<int64_t>(cipher_buffer_len); 
-  } 
- 
-  shared_ptr<ThriftBuffer> mem_buffer_; 
-  shared_ptr<apache::thrift::protocol::TProtocol> protocol_; 
-}; 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/util/windows_compatibility.h"
+
+#include <cstdint>
+// Check if thrift version < 0.11.0
+// or if FORCE_BOOST_SMART_PTR is defined. Ref: https://thrift.apache.org/lib/cpp
+#if defined(PARQUET_THRIFT_USE_BOOST) || defined(FORCE_BOOST_SMART_PTR)
+#include <boost/shared_ptr.hpp>
+#else
+#include <memory>
+#endif
+#include <string>
+#include <vector>
+
+// TCompactProtocol requires some #defines to work right.
+#define SIGNED_RIGHT_SHIFT_IS 1
+#define ARITHMETIC_RIGHT_SHIFT 1
+#include <thrift/TApplicationException.h>
+#include <thrift/protocol/TCompactProtocol.h>
+#include <thrift/protocol/TDebugProtocol.h>
+
+#include <thrift/protocol/TBinaryProtocol.h>
+#include <thrift/transport/TBufferTransports.h>
+#include <sstream>
+
+#include "arrow/util/logging.h"
+
+#include "parquet/encryption/internal_file_decryptor.h"
+#include "parquet/encryption/internal_file_encryptor.h"
+#include "parquet/exception.h"
+#include "parquet/platform.h"
+#include "parquet/statistics.h"
+#include "parquet/types.h"
+
+#include "generated/parquet_types.h"  // IYWU pragma: export
+
+namespace parquet {
+
+// Check if thrift version < 0.11.0
+// or if FORCE_BOOST_SMART_PTR is defined. Ref: https://thrift.apache.org/lib/cpp
+#if defined(PARQUET_THRIFT_USE_BOOST) || defined(FORCE_BOOST_SMART_PTR)
+using ::boost::shared_ptr;
+#else
+using ::std::shared_ptr;
+#endif
+
+// ----------------------------------------------------------------------
+// Convert Thrift enums to Parquet enums
+
+// Unsafe enum converters (input is not checked for validity)
+
+static inline Type::type FromThriftUnsafe(format::Type::type type) {
+  return static_cast<Type::type>(type);
+}
+
+static inline ConvertedType::type FromThriftUnsafe(format::ConvertedType::type type) {
+  // item 0 is NONE
+  return static_cast<ConvertedType::type>(static_cast<int>(type) + 1);
+}
+
+static inline Repetition::type FromThriftUnsafe(format::FieldRepetitionType::type type) {
+  return static_cast<Repetition::type>(type);
+}
+
+static inline Encoding::type FromThriftUnsafe(format::Encoding::type type) {
+  return static_cast<Encoding::type>(type);
+}
+
+static inline PageType::type FromThriftUnsafe(format::PageType::type type) {
+  return static_cast<PageType::type>(type);
+}
+
+static inline Compression::type FromThriftUnsafe(format::CompressionCodec::type type) {
+  switch (type) {
+    case format::CompressionCodec::UNCOMPRESSED:
+      return Compression::UNCOMPRESSED;
+    case format::CompressionCodec::SNAPPY:
+      return Compression::SNAPPY;
+    case format::CompressionCodec::GZIP:
+      return Compression::GZIP;
+    case format::CompressionCodec::LZO:
+      return Compression::LZO;
+    case format::CompressionCodec::BROTLI:
+      return Compression::BROTLI;
+    case format::CompressionCodec::LZ4:
+      return Compression::LZ4_HADOOP;
+    case format::CompressionCodec::LZ4_RAW:
+      return Compression::LZ4;
+    case format::CompressionCodec::ZSTD:
+      return Compression::ZSTD;
+    default:
+      DCHECK(false) << "Cannot reach here";
+      return Compression::UNCOMPRESSED;
+  }
+}
+
+namespace internal {
+
+template <typename T>
+struct ThriftEnumTypeTraits {};
+
+template <>
+struct ThriftEnumTypeTraits<::parquet::format::Type::type> {
+  using ParquetEnum = Type;
+};
+
+template <>
+struct ThriftEnumTypeTraits<::parquet::format::ConvertedType::type> {
+  using ParquetEnum = ConvertedType;
+};
+
+template <>
+struct ThriftEnumTypeTraits<::parquet::format::FieldRepetitionType::type> {
+  using ParquetEnum = Repetition;
+};
+
+template <>
+struct ThriftEnumTypeTraits<::parquet::format::Encoding::type> {
+  using ParquetEnum = Encoding;
+};
+
+template <>
+struct ThriftEnumTypeTraits<::parquet::format::PageType::type> {
+  using ParquetEnum = PageType;
+};
+
+// If the parquet file is corrupted it is possible the enum value decoded
+// will not be in the range of defined values, which is undefined behaviour.
+// This facility prevents this by loading the value as the underlying type
+// and checking to make sure it is in range.
+
+template <typename EnumType,
+          typename EnumTypeRaw = typename std::underlying_type<EnumType>::type>
+inline static EnumTypeRaw LoadEnumRaw(const EnumType* in) {
+  EnumTypeRaw raw_value;
+  // Use memcpy(), as a regular cast would be undefined behaviour on invalid values
+  memcpy(&raw_value, in, sizeof(EnumType));
+  return raw_value;
+}
+
+template <typename ApiType>
+struct SafeLoader {
+  using ApiTypeEnum = typename ApiType::type;
+  using ApiTypeRawEnum = typename std::underlying_type<ApiTypeEnum>::type;
+
+  template <typename ThriftType>
+  inline static ApiTypeRawEnum LoadRaw(const ThriftType* in) {
+    static_assert(sizeof(ApiTypeEnum) == sizeof(ThriftType),
+                  "parquet type should always be the same size as thrift type");
+    return static_cast<ApiTypeRawEnum>(LoadEnumRaw(in));
+  }
+
+  template <typename ThriftType, bool IsUnsigned = true>
+  inline static ApiTypeEnum LoadChecked(
+      const typename std::enable_if<IsUnsigned, ThriftType>::type* in) {
+    auto raw_value = LoadRaw(in);
+    if (ARROW_PREDICT_FALSE(raw_value >=
+                            static_cast<ApiTypeRawEnum>(ApiType::UNDEFINED))) {
+      return ApiType::UNDEFINED;
+    }
+    return FromThriftUnsafe(static_cast<ThriftType>(raw_value));
+  }
+
+  template <typename ThriftType, bool IsUnsigned = false>
+  inline static ApiTypeEnum LoadChecked(
+      const typename std::enable_if<!IsUnsigned, ThriftType>::type* in) {
+    auto raw_value = LoadRaw(in);
+    if (ARROW_PREDICT_FALSE(raw_value >=
+                                static_cast<ApiTypeRawEnum>(ApiType::UNDEFINED) ||
+                            raw_value < 0)) {
+      return ApiType::UNDEFINED;
+    }
+    return FromThriftUnsafe(static_cast<ThriftType>(raw_value));
+  }
+
+  template <typename ThriftType>
+  inline static ApiTypeEnum Load(const ThriftType* in) {
+    return LoadChecked<ThriftType, std::is_unsigned<ApiTypeRawEnum>::value>(in);
+  }
+};
+
+}  // namespace internal
+
+// Safe enum loader: will check for invalid enum value before converting
+
+template <typename ThriftType,
+          typename ParquetEnum =
+              typename internal::ThriftEnumTypeTraits<ThriftType>::ParquetEnum>
+inline typename ParquetEnum::type LoadEnumSafe(const ThriftType* in) {
+  return internal::SafeLoader<ParquetEnum>::Load(in);
+}
+
+inline typename Compression::type LoadEnumSafe(const format::CompressionCodec::type* in) {
+  const auto raw_value = internal::LoadEnumRaw(in);
+  // Check bounds manually, as Compression::type doesn't have the same values
+  // as format::CompressionCodec.
+  const auto min_value =
+      static_cast<decltype(raw_value)>(format::CompressionCodec::UNCOMPRESSED);
+  const auto max_value =
+      static_cast<decltype(raw_value)>(format::CompressionCodec::LZ4_RAW);
+  if (raw_value < min_value || raw_value > max_value) {
+    return Compression::UNCOMPRESSED;
+  }
+  return FromThriftUnsafe(*in);
+}
+
+// Safe non-enum converters
+
+static inline AadMetadata FromThrift(format::AesGcmV1 aesGcmV1) {
+  return AadMetadata{aesGcmV1.aad_prefix, aesGcmV1.aad_file_unique,
+                     aesGcmV1.supply_aad_prefix};
+}
+
+static inline AadMetadata FromThrift(format::AesGcmCtrV1 aesGcmCtrV1) {
+  return AadMetadata{aesGcmCtrV1.aad_prefix, aesGcmCtrV1.aad_file_unique,
+                     aesGcmCtrV1.supply_aad_prefix};
+}
+
+static inline EncryptionAlgorithm FromThrift(format::EncryptionAlgorithm encryption) {
+  EncryptionAlgorithm encryption_algorithm;
+
+  if (encryption.__isset.AES_GCM_V1) {
+    encryption_algorithm.algorithm = ParquetCipher::AES_GCM_V1;
+    encryption_algorithm.aad = FromThrift(encryption.AES_GCM_V1);
+  } else if (encryption.__isset.AES_GCM_CTR_V1) {
+    encryption_algorithm.algorithm = ParquetCipher::AES_GCM_CTR_V1;
+    encryption_algorithm.aad = FromThrift(encryption.AES_GCM_CTR_V1);
+  } else {
+    throw ParquetException("Unsupported algorithm");
+  }
+  return encryption_algorithm;
+}
+
+// ----------------------------------------------------------------------
+// Convert Thrift enums from Parquet enums
+
+static inline format::Type::type ToThrift(Type::type type) {
+  return static_cast<format::Type::type>(type);
+}
+
+static inline format::ConvertedType::type ToThrift(ConvertedType::type type) {
+  // item 0 is NONE
+  DCHECK_NE(type, ConvertedType::NONE);
+  // it is forbidden to emit "NA" (PARQUET-1990)
+  DCHECK_NE(type, ConvertedType::NA);
+  DCHECK_NE(type, ConvertedType::UNDEFINED);
+  return static_cast<format::ConvertedType::type>(static_cast<int>(type) - 1);
+}
+
+static inline format::FieldRepetitionType::type ToThrift(Repetition::type type) {
+  return static_cast<format::FieldRepetitionType::type>(type);
+}
+
+static inline format::Encoding::type ToThrift(Encoding::type type) {
+  return static_cast<format::Encoding::type>(type);
+}
+
+static inline format::CompressionCodec::type ToThrift(Compression::type type) {
+  switch (type) {
+    case Compression::UNCOMPRESSED:
+      return format::CompressionCodec::UNCOMPRESSED;
+    case Compression::SNAPPY:
+      return format::CompressionCodec::SNAPPY;
+    case Compression::GZIP:
+      return format::CompressionCodec::GZIP;
+    case Compression::LZO:
+      return format::CompressionCodec::LZO;
+    case Compression::BROTLI:
+      return format::CompressionCodec::BROTLI;
+    case Compression::LZ4:
+      return format::CompressionCodec::LZ4_RAW;
+    case Compression::LZ4_HADOOP:
+      // Deprecated "LZ4" Parquet compression has Hadoop-specific framing
+      return format::CompressionCodec::LZ4;
+    case Compression::ZSTD:
+      return format::CompressionCodec::ZSTD;
+    default:
+      DCHECK(false) << "Cannot reach here";
+      return format::CompressionCodec::UNCOMPRESSED;
+  }
+}
+
+static inline format::Statistics ToThrift(const EncodedStatistics& stats) {
+  format::Statistics statistics;
+  if (stats.has_min) {
+    statistics.__set_min_value(stats.min());
+    // If the order is SIGNED, then the old min value must be set too.
+    // This for backward compatibility
+    if (stats.is_signed()) {
+      statistics.__set_min(stats.min());
+    }
+  }
+  if (stats.has_max) {
+    statistics.__set_max_value(stats.max());
+    // If the order is SIGNED, then the old max value must be set too.
+    // This for backward compatibility
+    if (stats.is_signed()) {
+      statistics.__set_max(stats.max());
+    }
+  }
+  if (stats.has_null_count) {
+    statistics.__set_null_count(stats.null_count);
+  }
+  if (stats.has_distinct_count) {
+    statistics.__set_distinct_count(stats.distinct_count);
+  }
+
+  return statistics;
+}
+
+static inline format::AesGcmV1 ToAesGcmV1Thrift(AadMetadata aad) {
+  format::AesGcmV1 aesGcmV1;
+  // aad_file_unique is always set
+  aesGcmV1.__set_aad_file_unique(aad.aad_file_unique);
+  aesGcmV1.__set_supply_aad_prefix(aad.supply_aad_prefix);
+  if (!aad.aad_prefix.empty()) {
+    aesGcmV1.__set_aad_prefix(aad.aad_prefix);
+  }
+  return aesGcmV1;
+}
+
+static inline format::AesGcmCtrV1 ToAesGcmCtrV1Thrift(AadMetadata aad) {
+  format::AesGcmCtrV1 aesGcmCtrV1;
+  // aad_file_unique is always set
+  aesGcmCtrV1.__set_aad_file_unique(aad.aad_file_unique);
+  aesGcmCtrV1.__set_supply_aad_prefix(aad.supply_aad_prefix);
+  if (!aad.aad_prefix.empty()) {
+    aesGcmCtrV1.__set_aad_prefix(aad.aad_prefix);
+  }
+  return aesGcmCtrV1;
+}
+
+static inline format::EncryptionAlgorithm ToThrift(EncryptionAlgorithm encryption) {
+  format::EncryptionAlgorithm encryption_algorithm;
+  if (encryption.algorithm == ParquetCipher::AES_GCM_V1) {
+    encryption_algorithm.__set_AES_GCM_V1(ToAesGcmV1Thrift(encryption.aad));
+  } else {
+    encryption_algorithm.__set_AES_GCM_CTR_V1(ToAesGcmCtrV1Thrift(encryption.aad));
+  }
+  return encryption_algorithm;
+}
+
+// ----------------------------------------------------------------------
+// Thrift struct serialization / deserialization utilities
+
+using ThriftBuffer = apache::thrift::transport::TMemoryBuffer;
+
+template <class T>
+inline void DeserializeThriftUnencryptedMsg(const uint8_t* buf, uint32_t* len,
+                                            T* deserialized_msg) {
+  // Deserialize msg bytes into c++ thrift msg using memory transport.
+  shared_ptr<ThriftBuffer> tmem_transport(
+      new ThriftBuffer(const_cast<uint8_t*>(buf), *len));
+  apache::thrift::protocol::TCompactProtocolFactoryT<ThriftBuffer> tproto_factory;
+  // Protect against CPU and memory bombs
+  tproto_factory.setStringSizeLimit(100 * 1000 * 1000);
+  // Structs in the thrift definition are relatively large (at least 300 bytes).
+  // This limits total memory to the same order of magnitude as stringSize.
+  tproto_factory.setContainerSizeLimit(1000 * 1000);
+  shared_ptr<apache::thrift::protocol::TProtocol> tproto =  //
+      tproto_factory.getProtocol(tmem_transport);
+  try {
+    deserialized_msg->read(tproto.get());
+  } catch (std::exception& e) {
+    std::stringstream ss;
+    ss << "Couldn't deserialize thrift: " << e.what() << "\n";
+    throw ParquetException(ss.str());
+  }
+  uint32_t bytes_left = tmem_transport->available_read();
+  *len = *len - bytes_left;
+}
+
+// Deserialize a thrift message from buf/len.  buf/len must at least contain
+// all the bytes needed to store the thrift message.  On return, len will be
+// set to the actual length of the header.
+template <class T>
+inline void DeserializeThriftMsg(const uint8_t* buf, uint32_t* len, T* deserialized_msg,
+                                 const std::shared_ptr<Decryptor>& decryptor = NULLPTR) {
+  // thrift message is not encrypted
+  if (decryptor == NULLPTR) {
+    DeserializeThriftUnencryptedMsg(buf, len, deserialized_msg);
+  } else {  // thrift message is encrypted
+    uint32_t clen;
+    clen = *len;
+    // decrypt
+    std::shared_ptr<ResizableBuffer> decrypted_buffer =
+        std::static_pointer_cast<ResizableBuffer>(AllocateBuffer(
+            decryptor->pool(),
+            static_cast<int64_t>(clen - decryptor->CiphertextSizeDelta())));
+    const uint8_t* cipher_buf = buf;
+    uint32_t decrypted_buffer_len =
+        decryptor->Decrypt(cipher_buf, 0, decrypted_buffer->mutable_data());
+    if (decrypted_buffer_len <= 0) {
+      throw ParquetException("Couldn't decrypt buffer\n");
+    }
+    *len = decrypted_buffer_len + decryptor->CiphertextSizeDelta();
+    DeserializeThriftMsg(decrypted_buffer->data(), &decrypted_buffer_len,
+                         deserialized_msg);
+  }
+}
+
+/// Utility class to serialize thrift objects to a binary format.  This object
+/// should be reused if possible to reuse the underlying memory.
+/// Note: thrift will encode NULLs into the serialized buffer so it is not valid
+/// to treat it as a string.
+class ThriftSerializer {
+ public:
+  explicit ThriftSerializer(int initial_buffer_size = 1024)
+      : mem_buffer_(new ThriftBuffer(initial_buffer_size)) {
+    apache::thrift::protocol::TCompactProtocolFactoryT<ThriftBuffer> factory;
+    protocol_ = factory.getProtocol(mem_buffer_);
+  }
+
+  /// Serialize obj into a memory buffer.  The result is returned in buffer/len.  The
+  /// memory returned is owned by this object and will be invalid when another object
+  /// is serialized.
+  template <class T>
+  void SerializeToBuffer(const T* obj, uint32_t* len, uint8_t** buffer) {
+    SerializeObject(obj);
+    mem_buffer_->getBuffer(buffer, len);
+  }
+
+  template <class T>
+  void SerializeToString(const T* obj, std::string* result) {
+    SerializeObject(obj);
+    *result = mem_buffer_->getBufferAsString();
+  }
+
+  template <class T>
+  int64_t Serialize(const T* obj, ArrowOutputStream* out,
+                    const std::shared_ptr<Encryptor>& encryptor = NULLPTR) {
+    uint8_t* out_buffer;
+    uint32_t out_length;
+    SerializeToBuffer(obj, &out_length, &out_buffer);
+
+    // obj is not encrypted
+    if (encryptor == NULLPTR) {
+      PARQUET_THROW_NOT_OK(out->Write(out_buffer, out_length));
+      return static_cast<int64_t>(out_length);
+    } else {  // obj is encrypted
+      return SerializeEncryptedObj(out, out_buffer, out_length, encryptor);
+    }
+  }
+
+ private:
+  template <class T>
+  void SerializeObject(const T* obj) {
+    try {
+      mem_buffer_->resetBuffer();
+      obj->write(protocol_.get());
+    } catch (std::exception& e) {
+      std::stringstream ss;
+      ss << "Couldn't serialize thrift: " << e.what() << "\n";
+      throw ParquetException(ss.str());
+    }
+  }
+
+  int64_t SerializeEncryptedObj(ArrowOutputStream* out, uint8_t* out_buffer,
+                                uint32_t out_length,
+                                const std::shared_ptr<Encryptor>& encryptor) {
+    std::shared_ptr<ResizableBuffer> cipher_buffer =
+        std::static_pointer_cast<ResizableBuffer>(AllocateBuffer(
+            encryptor->pool(),
+            static_cast<int64_t>(encryptor->CiphertextSizeDelta() + out_length)));
+    int cipher_buffer_len =
+        encryptor->Encrypt(out_buffer, out_length, cipher_buffer->mutable_data());
+
+    PARQUET_THROW_NOT_OK(out->Write(cipher_buffer->data(), cipher_buffer_len));
+    return static_cast<int64_t>(cipher_buffer_len);
+  }
+
+  shared_ptr<ThriftBuffer> mem_buffer_;
+  shared_ptr<apache::thrift::protocol::TProtocol> protocol_;
+};
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/type_fwd.h b/contrib/libs/apache/arrow/cpp/src/parquet/type_fwd.h
index 2153ea63efb..a427f5a9591 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/type_fwd.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/type_fwd.h
@@ -1,43 +1,43 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-namespace parquet { 
- 
-struct ParquetVersion { 
-  enum type { PARQUET_1_0, PARQUET_2_0 }; 
-}; 
- 
-class FileMetaData; 
-class SchemaDescriptor; 
- 
-class ReaderProperties; 
-class ArrowReaderProperties; 
- 
-class WriterProperties; 
-class WriterPropertiesBuilder; 
-class ArrowWriterProperties; 
-class ArrowWriterPropertiesBuilder; 
- 
-namespace arrow { 
- 
-class FileWriter; 
-class FileReader; 
- 
-}  // namespace arrow 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+namespace parquet {
+
+struct ParquetVersion {
+  enum type { PARQUET_1_0, PARQUET_2_0 };
+};
+
+class FileMetaData;
+class SchemaDescriptor;
+
+class ReaderProperties;
+class ArrowReaderProperties;
+
+class WriterProperties;
+class WriterPropertiesBuilder;
+class ArrowWriterProperties;
+class ArrowWriterPropertiesBuilder;
+
+namespace arrow {
+
+class FileWriter;
+class FileReader;
+
+}  // namespace arrow
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/types.cc b/contrib/libs/apache/arrow/cpp/src/parquet/types.cc
index 35cc43639b8..ef23c40662b 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/types.cc
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/types.cc
@@ -1,1567 +1,1567 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#include <cmath> 
-#include <cstdint> 
-#include <memory> 
-#include <sstream> 
-#include <string> 
- 
-#include "arrow/util/checked_cast.h" 
-#include "arrow/util/compression.h" 
-#include "arrow/util/logging.h" 
- 
-#include "parquet/exception.h" 
-#include "parquet/types.h" 
- 
-#include "generated/parquet_types.h" 
- 
-using arrow::internal::checked_cast; 
-using arrow::util::Codec; 
- 
-namespace parquet { 
- 
-bool IsCodecSupported(Compression::type codec) { 
-  switch (codec) { 
-    case Compression::UNCOMPRESSED: 
-    case Compression::SNAPPY: 
-    case Compression::GZIP: 
-    case Compression::BROTLI: 
-    case Compression::ZSTD: 
-    case Compression::LZ4: 
-    case Compression::LZ4_HADOOP: 
-      return true; 
-    default: 
-      return false; 
-  } 
-} 
- 
-std::unique_ptr<Codec> GetCodec(Compression::type codec) { 
-  return GetCodec(codec, Codec::UseDefaultCompressionLevel()); 
-} 
- 
-std::unique_ptr<Codec> GetCodec(Compression::type codec, int compression_level) { 
-  std::unique_ptr<Codec> result; 
-  if (codec == Compression::LZO) { 
-    throw ParquetException( 
-        "While LZO compression is supported by the Parquet format in " 
-        "general, it is currently not supported by the C++ implementation."); 
-  } 
- 
-  if (!IsCodecSupported(codec)) { 
-    std::stringstream ss; 
-    ss << "Codec type " << Codec::GetCodecAsString(codec) 
-       << " not supported in Parquet format"; 
-    throw ParquetException(ss.str()); 
-  } 
- 
-  PARQUET_ASSIGN_OR_THROW(result, Codec::Create(codec, compression_level)); 
-  return result; 
-} 
- 
-std::string FormatStatValue(Type::type parquet_type, ::arrow::util::string_view val) { 
-  std::stringstream result; 
- 
-  const char* bytes = val.data(); 
-  switch (parquet_type) { 
-    case Type::BOOLEAN: 
-      result << reinterpret_cast<const bool*>(bytes)[0]; 
-      break; 
-    case Type::INT32: 
-      result << reinterpret_cast<const int32_t*>(bytes)[0]; 
-      break; 
-    case Type::INT64: 
-      result << reinterpret_cast<const int64_t*>(bytes)[0]; 
-      break; 
-    case Type::DOUBLE: 
-      result << reinterpret_cast<const double*>(bytes)[0]; 
-      break; 
-    case Type::FLOAT: 
-      result << reinterpret_cast<const float*>(bytes)[0]; 
-      break; 
-    case Type::INT96: { 
-      auto const i32_val = reinterpret_cast<const int32_t*>(bytes); 
-      result << i32_val[0] << " " << i32_val[1] << " " << i32_val[2]; 
-      break; 
-    } 
-    case Type::BYTE_ARRAY: { 
-      return std::string(val); 
-    } 
-    case Type::FIXED_LEN_BYTE_ARRAY: { 
-      return std::string(val); 
-    } 
-    case Type::UNDEFINED: 
-    default: 
-      break; 
-  } 
-  return result.str(); 
-} 
- 
-std::string EncodingToString(Encoding::type t) { 
-  switch (t) { 
-    case Encoding::PLAIN: 
-      return "PLAIN"; 
-    case Encoding::PLAIN_DICTIONARY: 
-      return "PLAIN_DICTIONARY"; 
-    case Encoding::RLE: 
-      return "RLE"; 
-    case Encoding::BIT_PACKED: 
-      return "BIT_PACKED"; 
-    case Encoding::DELTA_BINARY_PACKED: 
-      return "DELTA_BINARY_PACKED"; 
-    case Encoding::DELTA_LENGTH_BYTE_ARRAY: 
-      return "DELTA_LENGTH_BYTE_ARRAY"; 
-    case Encoding::DELTA_BYTE_ARRAY: 
-      return "DELTA_BYTE_ARRAY"; 
-    case Encoding::RLE_DICTIONARY: 
-      return "RLE_DICTIONARY"; 
-    case Encoding::BYTE_STREAM_SPLIT: 
-      return "BYTE_STREAM_SPLIT"; 
-    default: 
-      return "UNKNOWN"; 
-  } 
-} 
- 
-std::string TypeToString(Type::type t) { 
-  switch (t) { 
-    case Type::BOOLEAN: 
-      return "BOOLEAN"; 
-    case Type::INT32: 
-      return "INT32"; 
-    case Type::INT64: 
-      return "INT64"; 
-    case Type::INT96: 
-      return "INT96"; 
-    case Type::FLOAT: 
-      return "FLOAT"; 
-    case Type::DOUBLE: 
-      return "DOUBLE"; 
-    case Type::BYTE_ARRAY: 
-      return "BYTE_ARRAY"; 
-    case Type::FIXED_LEN_BYTE_ARRAY: 
-      return "FIXED_LEN_BYTE_ARRAY"; 
-    case Type::UNDEFINED: 
-    default: 
-      return "UNKNOWN"; 
-  } 
-} 
- 
-std::string ConvertedTypeToString(ConvertedType::type t) { 
-  switch (t) { 
-    case ConvertedType::NONE: 
-      return "NONE"; 
-    case ConvertedType::UTF8: 
-      return "UTF8"; 
-    case ConvertedType::MAP: 
-      return "MAP"; 
-    case ConvertedType::MAP_KEY_VALUE: 
-      return "MAP_KEY_VALUE"; 
-    case ConvertedType::LIST: 
-      return "LIST"; 
-    case ConvertedType::ENUM: 
-      return "ENUM"; 
-    case ConvertedType::DECIMAL: 
-      return "DECIMAL"; 
-    case ConvertedType::DATE: 
-      return "DATE"; 
-    case ConvertedType::TIME_MILLIS: 
-      return "TIME_MILLIS"; 
-    case ConvertedType::TIME_MICROS: 
-      return "TIME_MICROS"; 
-    case ConvertedType::TIMESTAMP_MILLIS: 
-      return "TIMESTAMP_MILLIS"; 
-    case ConvertedType::TIMESTAMP_MICROS: 
-      return "TIMESTAMP_MICROS"; 
-    case ConvertedType::UINT_8: 
-      return "UINT_8"; 
-    case ConvertedType::UINT_16: 
-      return "UINT_16"; 
-    case ConvertedType::UINT_32: 
-      return "UINT_32"; 
-    case ConvertedType::UINT_64: 
-      return "UINT_64"; 
-    case ConvertedType::INT_8: 
-      return "INT_8"; 
-    case ConvertedType::INT_16: 
-      return "INT_16"; 
-    case ConvertedType::INT_32: 
-      return "INT_32"; 
-    case ConvertedType::INT_64: 
-      return "INT_64"; 
-    case ConvertedType::JSON: 
-      return "JSON"; 
-    case ConvertedType::BSON: 
-      return "BSON"; 
-    case ConvertedType::INTERVAL: 
-      return "INTERVAL"; 
-    case ConvertedType::UNDEFINED: 
-    default: 
-      return "UNKNOWN"; 
-  } 
-} 
- 
-int GetTypeByteSize(Type::type parquet_type) { 
-  switch (parquet_type) { 
-    case Type::BOOLEAN: 
-      return type_traits<BooleanType::type_num>::value_byte_size; 
-    case Type::INT32: 
-      return type_traits<Int32Type::type_num>::value_byte_size; 
-    case Type::INT64: 
-      return type_traits<Int64Type::type_num>::value_byte_size; 
-    case Type::INT96: 
-      return type_traits<Int96Type::type_num>::value_byte_size; 
-    case Type::DOUBLE: 
-      return type_traits<DoubleType::type_num>::value_byte_size; 
-    case Type::FLOAT: 
-      return type_traits<FloatType::type_num>::value_byte_size; 
-    case Type::BYTE_ARRAY: 
-      return type_traits<ByteArrayType::type_num>::value_byte_size; 
-    case Type::FIXED_LEN_BYTE_ARRAY: 
-      return type_traits<FLBAType::type_num>::value_byte_size; 
-    case Type::UNDEFINED: 
-    default: 
-      return 0; 
-  } 
-  return 0; 
-} 
- 
-// Return the Sort Order of the Parquet Physical Types 
-SortOrder::type DefaultSortOrder(Type::type primitive) { 
-  switch (primitive) { 
-    case Type::BOOLEAN: 
-    case Type::INT32: 
-    case Type::INT64: 
-    case Type::FLOAT: 
-    case Type::DOUBLE: 
-      return SortOrder::SIGNED; 
-    case Type::BYTE_ARRAY: 
-    case Type::FIXED_LEN_BYTE_ARRAY: 
-      return SortOrder::UNSIGNED; 
-    case Type::INT96: 
-    case Type::UNDEFINED: 
-      return SortOrder::UNKNOWN; 
-  } 
-  return SortOrder::UNKNOWN; 
-} 
- 
-// Return the SortOrder of the Parquet Types using Logical or Physical Types 
-SortOrder::type GetSortOrder(ConvertedType::type converted, Type::type primitive) { 
-  if (converted == ConvertedType::NONE) return DefaultSortOrder(primitive); 
-  switch (converted) { 
-    case ConvertedType::INT_8: 
-    case ConvertedType::INT_16: 
-    case ConvertedType::INT_32: 
-    case ConvertedType::INT_64: 
-    case ConvertedType::DATE: 
-    case ConvertedType::TIME_MICROS: 
-    case ConvertedType::TIME_MILLIS: 
-    case ConvertedType::TIMESTAMP_MICROS: 
-    case ConvertedType::TIMESTAMP_MILLIS: 
-      return SortOrder::SIGNED; 
-    case ConvertedType::UINT_8: 
-    case ConvertedType::UINT_16: 
-    case ConvertedType::UINT_32: 
-    case ConvertedType::UINT_64: 
-    case ConvertedType::ENUM: 
-    case ConvertedType::UTF8: 
-    case ConvertedType::BSON: 
-    case ConvertedType::JSON: 
-      return SortOrder::UNSIGNED; 
-    case ConvertedType::DECIMAL: 
-    case ConvertedType::LIST: 
-    case ConvertedType::MAP: 
-    case ConvertedType::MAP_KEY_VALUE: 
-    case ConvertedType::INTERVAL: 
-    case ConvertedType::NONE:  // required instead of default 
-    case ConvertedType::NA:    // required instead of default 
-    case ConvertedType::UNDEFINED: 
-      return SortOrder::UNKNOWN; 
-  } 
-  return SortOrder::UNKNOWN; 
-} 
- 
-SortOrder::type GetSortOrder(const std::shared_ptr<const LogicalType>& logical_type, 
-                             Type::type primitive) { 
-  SortOrder::type o = SortOrder::UNKNOWN; 
-  if (logical_type && logical_type->is_valid()) { 
-    o = (logical_type->is_none() ? DefaultSortOrder(primitive) 
-                                 : logical_type->sort_order()); 
-  } 
-  return o; 
-} 
- 
-ColumnOrder ColumnOrder::undefined_ = ColumnOrder(ColumnOrder::UNDEFINED); 
-ColumnOrder ColumnOrder::type_defined_ = ColumnOrder(ColumnOrder::TYPE_DEFINED_ORDER); 
- 
-// Static methods for LogicalType class 
- 
-std::shared_ptr<const LogicalType> LogicalType::FromConvertedType( 
-    const ConvertedType::type converted_type, 
-    const schema::DecimalMetadata converted_decimal_metadata) { 
-  switch (converted_type) { 
-    case ConvertedType::UTF8: 
-      return StringLogicalType::Make(); 
-    case ConvertedType::MAP_KEY_VALUE: 
-    case ConvertedType::MAP: 
-      return MapLogicalType::Make(); 
-    case ConvertedType::LIST: 
-      return ListLogicalType::Make(); 
-    case ConvertedType::ENUM: 
-      return EnumLogicalType::Make(); 
-    case ConvertedType::DECIMAL: 
-      return DecimalLogicalType::Make(converted_decimal_metadata.precision, 
-                                      converted_decimal_metadata.scale); 
-    case ConvertedType::DATE: 
-      return DateLogicalType::Make(); 
-    case ConvertedType::TIME_MILLIS: 
-      return TimeLogicalType::Make(true, LogicalType::TimeUnit::MILLIS); 
-    case ConvertedType::TIME_MICROS: 
-      return TimeLogicalType::Make(true, LogicalType::TimeUnit::MICROS); 
-    case ConvertedType::TIMESTAMP_MILLIS: 
-      return TimestampLogicalType::Make(true, LogicalType::TimeUnit::MILLIS, 
-                                        /*is_from_converted_type=*/true, 
-                                        /*force_set_converted_type=*/false); 
-    case ConvertedType::TIMESTAMP_MICROS: 
-      return TimestampLogicalType::Make(true, LogicalType::TimeUnit::MICROS, 
-                                        /*is_from_converted_type=*/true, 
-                                        /*force_set_converted_type=*/false); 
-    case ConvertedType::INTERVAL: 
-      return IntervalLogicalType::Make(); 
-    case ConvertedType::INT_8: 
-      return IntLogicalType::Make(8, true); 
-    case ConvertedType::INT_16: 
-      return IntLogicalType::Make(16, true); 
-    case ConvertedType::INT_32: 
-      return IntLogicalType::Make(32, true); 
-    case ConvertedType::INT_64: 
-      return IntLogicalType::Make(64, true); 
-    case ConvertedType::UINT_8: 
-      return IntLogicalType::Make(8, false); 
-    case ConvertedType::UINT_16: 
-      return IntLogicalType::Make(16, false); 
-    case ConvertedType::UINT_32: 
-      return IntLogicalType::Make(32, false); 
-    case ConvertedType::UINT_64: 
-      return IntLogicalType::Make(64, false); 
-    case ConvertedType::JSON: 
-      return JSONLogicalType::Make(); 
-    case ConvertedType::BSON: 
-      return BSONLogicalType::Make(); 
-    case ConvertedType::NA: 
-      return NullLogicalType::Make(); 
-    case ConvertedType::NONE: 
-      return NoLogicalType::Make(); 
-    case ConvertedType::UNDEFINED: 
-      return UndefinedLogicalType::Make(); 
-  } 
-  return UndefinedLogicalType::Make(); 
-} 
- 
-std::shared_ptr<const LogicalType> LogicalType::FromThrift( 
-    const format::LogicalType& type) { 
-  if (type.__isset.STRING) { 
-    return StringLogicalType::Make(); 
-  } else if (type.__isset.MAP) { 
-    return MapLogicalType::Make(); 
-  } else if (type.__isset.LIST) { 
-    return ListLogicalType::Make(); 
-  } else if (type.__isset.ENUM) { 
-    return EnumLogicalType::Make(); 
-  } else if (type.__isset.DECIMAL) { 
-    return DecimalLogicalType::Make(type.DECIMAL.precision, type.DECIMAL.scale); 
-  } else if (type.__isset.DATE) { 
-    return DateLogicalType::Make(); 
-  } else if (type.__isset.TIME) { 
-    LogicalType::TimeUnit::unit unit; 
-    if (type.TIME.unit.__isset.MILLIS) { 
-      unit = LogicalType::TimeUnit::MILLIS; 
-    } else if (type.TIME.unit.__isset.MICROS) { 
-      unit = LogicalType::TimeUnit::MICROS; 
-    } else if (type.TIME.unit.__isset.NANOS) { 
-      unit = LogicalType::TimeUnit::NANOS; 
-    } else { 
-      unit = LogicalType::TimeUnit::UNKNOWN; 
-    } 
-    return TimeLogicalType::Make(type.TIME.isAdjustedToUTC, unit); 
-  } else if (type.__isset.TIMESTAMP) { 
-    LogicalType::TimeUnit::unit unit; 
-    if (type.TIMESTAMP.unit.__isset.MILLIS) { 
-      unit = LogicalType::TimeUnit::MILLIS; 
-    } else if (type.TIMESTAMP.unit.__isset.MICROS) { 
-      unit = LogicalType::TimeUnit::MICROS; 
-    } else if (type.TIMESTAMP.unit.__isset.NANOS) { 
-      unit = LogicalType::TimeUnit::NANOS; 
-    } else { 
-      unit = LogicalType::TimeUnit::UNKNOWN; 
-    } 
-    return TimestampLogicalType::Make(type.TIMESTAMP.isAdjustedToUTC, unit); 
-    // TODO(tpboudreau): activate the commented code after parquet.thrift 
-    // recognizes IntervalType as a LogicalType 
-    //} else if (type.__isset.INTERVAL) { 
-    //  return IntervalLogicalType::Make(); 
-  } else if (type.__isset.INTEGER) { 
-    return IntLogicalType::Make(static_cast<int>(type.INTEGER.bitWidth), 
-                                type.INTEGER.isSigned); 
-  } else if (type.__isset.UNKNOWN) { 
-    return NullLogicalType::Make(); 
-  } else if (type.__isset.JSON) { 
-    return JSONLogicalType::Make(); 
-  } else if (type.__isset.BSON) { 
-    return BSONLogicalType::Make(); 
-  } else if (type.__isset.UUID) { 
-    return UUIDLogicalType::Make(); 
-  } else { 
-    throw ParquetException("Metadata contains Thrift LogicalType that is not recognized"); 
-  } 
-} 
- 
-std::shared_ptr<const LogicalType> LogicalType::String() { 
-  return StringLogicalType::Make(); 
-} 
- 
-std::shared_ptr<const LogicalType> LogicalType::Map() { return MapLogicalType::Make(); } 
- 
-std::shared_ptr<const LogicalType> LogicalType::List() { return ListLogicalType::Make(); } 
- 
-std::shared_ptr<const LogicalType> LogicalType::Enum() { return EnumLogicalType::Make(); } 
- 
-std::shared_ptr<const LogicalType> LogicalType::Decimal(int32_t precision, 
-                                                        int32_t scale) { 
-  return DecimalLogicalType::Make(precision, scale); 
-} 
- 
-std::shared_ptr<const LogicalType> LogicalType::Date() { return DateLogicalType::Make(); } 
- 
-std::shared_ptr<const LogicalType> LogicalType::Time( 
-    bool is_adjusted_to_utc, LogicalType::TimeUnit::unit time_unit) { 
-  DCHECK(time_unit != LogicalType::TimeUnit::UNKNOWN); 
-  return TimeLogicalType::Make(is_adjusted_to_utc, time_unit); 
-} 
- 
-std::shared_ptr<const LogicalType> LogicalType::Timestamp( 
-    bool is_adjusted_to_utc, LogicalType::TimeUnit::unit time_unit, 
-    bool is_from_converted_type, bool force_set_converted_type) { 
-  DCHECK(time_unit != LogicalType::TimeUnit::UNKNOWN); 
-  return TimestampLogicalType::Make(is_adjusted_to_utc, time_unit, is_from_converted_type, 
-                                    force_set_converted_type); 
-} 
- 
-std::shared_ptr<const LogicalType> LogicalType::Interval() { 
-  return IntervalLogicalType::Make(); 
-} 
- 
-std::shared_ptr<const LogicalType> LogicalType::Int(int bit_width, bool is_signed) { 
-  DCHECK(bit_width == 64 || bit_width == 32 || bit_width == 16 || bit_width == 8); 
-  return IntLogicalType::Make(bit_width, is_signed); 
-} 
- 
-std::shared_ptr<const LogicalType> LogicalType::Null() { return NullLogicalType::Make(); } 
- 
-std::shared_ptr<const LogicalType> LogicalType::JSON() { return JSONLogicalType::Make(); } 
- 
-std::shared_ptr<const LogicalType> LogicalType::BSON() { return BSONLogicalType::Make(); } 
- 
-std::shared_ptr<const LogicalType> LogicalType::UUID() { return UUIDLogicalType::Make(); } 
- 
-std::shared_ptr<const LogicalType> LogicalType::None() { return NoLogicalType::Make(); } 
- 
-/* 
- * The logical type implementation classes are built in four layers: (1) the base 
- * layer, which establishes the interface and provides generally reusable implementations 
- * for the ToJSON() and Equals() methods; (2) an intermediate derived layer for the 
- * "compatibility" methods, which provides implementations for is_compatible() and 
- * ToConvertedType(); (3) another intermediate layer for the "applicability" methods 
- * that provides several implementations for the is_applicable() method; and (4) the 
- * final derived classes, one for each logical type, which supply implementations 
- * for those methods that remain virtual (usually just ToString() and ToThrift()) or 
- * otherwise need to be overridden. 
- */ 
- 
-// LogicalTypeImpl base class 
- 
-class LogicalType::Impl { 
- public: 
-  virtual bool is_applicable(parquet::Type::type primitive_type, 
-                             int32_t primitive_length = -1) const = 0; 
- 
-  virtual bool is_compatible(ConvertedType::type converted_type, 
-                             schema::DecimalMetadata converted_decimal_metadata = { 
-                                 false, -1, -1}) const = 0; 
- 
-  virtual ConvertedType::type ToConvertedType( 
-      schema::DecimalMetadata* out_decimal_metadata) const = 0; 
- 
-  virtual std::string ToString() const = 0; 
- 
-  virtual bool is_serialized() const { 
-    return !(type_ == LogicalType::Type::NONE || type_ == LogicalType::Type::UNDEFINED); 
-  } 
- 
-  virtual std::string ToJSON() const { 
-    std::stringstream json; 
-    json << R"({"Type": ")" << ToString() << R"("})"; 
-    return json.str(); 
-  } 
- 
-  virtual format::LogicalType ToThrift() const { 
-    // logical types inheriting this method should never be serialized 
-    std::stringstream ss; 
-    ss << "Logical type " << ToString() << " should not be serialized"; 
-    throw ParquetException(ss.str()); 
-  } 
- 
-  virtual bool Equals(const LogicalType& other) const { return other.type() == type_; } 
- 
-  LogicalType::Type::type type() const { return type_; } 
- 
-  SortOrder::type sort_order() const { return order_; } 
- 
-  Impl(const Impl&) = delete; 
-  Impl& operator=(const Impl&) = delete; 
-  virtual ~Impl() noexcept {} 
- 
-  class Compatible; 
-  class SimpleCompatible; 
-  class Incompatible; 
- 
-  class Applicable; 
-  class SimpleApplicable; 
-  class TypeLengthApplicable; 
-  class UniversalApplicable; 
-  class Inapplicable; 
- 
-  class String; 
-  class Map; 
-  class List; 
-  class Enum; 
-  class Decimal; 
-  class Date; 
-  class Time; 
-  class Timestamp; 
-  class Interval; 
-  class Int; 
-  class Null; 
-  class JSON; 
-  class BSON; 
-  class UUID; 
-  class No; 
-  class Undefined; 
- 
- protected: 
-  Impl(LogicalType::Type::type t, SortOrder::type o) : type_(t), order_(o) {} 
-  Impl() = default; 
- 
- private: 
-  LogicalType::Type::type type_ = LogicalType::Type::UNDEFINED; 
-  SortOrder::type order_ = SortOrder::UNKNOWN; 
-}; 
- 
-// Special methods for public LogicalType class 
- 
-LogicalType::LogicalType() = default; 
-LogicalType::~LogicalType() noexcept = default; 
- 
-// Delegating methods for public LogicalType class 
- 
-bool LogicalType::is_applicable(parquet::Type::type primitive_type, 
-                                int32_t primitive_length) const { 
-  return impl_->is_applicable(primitive_type, primitive_length); 
-} 
- 
-bool LogicalType::is_compatible( 
-    ConvertedType::type converted_type, 
-    schema::DecimalMetadata converted_decimal_metadata) const { 
-  return impl_->is_compatible(converted_type, converted_decimal_metadata); 
-} 
- 
-ConvertedType::type LogicalType::ToConvertedType( 
-    schema::DecimalMetadata* out_decimal_metadata) const { 
-  return impl_->ToConvertedType(out_decimal_metadata); 
-} 
- 
-std::string LogicalType::ToString() const { return impl_->ToString(); } 
- 
-std::string LogicalType::ToJSON() const { return impl_->ToJSON(); } 
- 
-format::LogicalType LogicalType::ToThrift() const { return impl_->ToThrift(); } 
- 
-bool LogicalType::Equals(const LogicalType& other) const { return impl_->Equals(other); } 
- 
-LogicalType::Type::type LogicalType::type() const { return impl_->type(); } 
- 
-SortOrder::type LogicalType::sort_order() const { return impl_->sort_order(); } 
- 
-// Type checks for public LogicalType class 
- 
-bool LogicalType::is_string() const { return impl_->type() == LogicalType::Type::STRING; } 
-bool LogicalType::is_map() const { return impl_->type() == LogicalType::Type::MAP; } 
-bool LogicalType::is_list() const { return impl_->type() == LogicalType::Type::LIST; } 
-bool LogicalType::is_enum() const { return impl_->type() == LogicalType::Type::ENUM; } 
-bool LogicalType::is_decimal() const { 
-  return impl_->type() == LogicalType::Type::DECIMAL; 
-} 
-bool LogicalType::is_date() const { return impl_->type() == LogicalType::Type::DATE; } 
-bool LogicalType::is_time() const { return impl_->type() == LogicalType::Type::TIME; } 
-bool LogicalType::is_timestamp() const { 
-  return impl_->type() == LogicalType::Type::TIMESTAMP; 
-} 
-bool LogicalType::is_interval() const { 
-  return impl_->type() == LogicalType::Type::INTERVAL; 
-} 
-bool LogicalType::is_int() const { return impl_->type() == LogicalType::Type::INT; } 
-bool LogicalType::is_null() const { return impl_->type() == LogicalType::Type::NIL; } 
-bool LogicalType::is_JSON() const { return impl_->type() == LogicalType::Type::JSON; } 
-bool LogicalType::is_BSON() const { return impl_->type() == LogicalType::Type::BSON; } 
-bool LogicalType::is_UUID() const { return impl_->type() == LogicalType::Type::UUID; } 
-bool LogicalType::is_none() const { return impl_->type() == LogicalType::Type::NONE; } 
-bool LogicalType::is_valid() const { 
-  return impl_->type() != LogicalType::Type::UNDEFINED; 
-} 
-bool LogicalType::is_invalid() const { return !is_valid(); } 
-bool LogicalType::is_nested() const { 
-  return (impl_->type() == LogicalType::Type::LIST) || 
-         (impl_->type() == LogicalType::Type::MAP); 
-} 
-bool LogicalType::is_nonnested() const { return !is_nested(); } 
-bool LogicalType::is_serialized() const { return impl_->is_serialized(); } 
- 
-// LogicalTypeImpl intermediate "compatibility" classes 
- 
-class LogicalType::Impl::Compatible : public virtual LogicalType::Impl { 
- protected: 
-  Compatible() = default; 
-}; 
- 
-#define set_decimal_metadata(m___, i___, p___, s___) \ 
-  {                                                  \ 
-    if (m___) {                                      \ 
-      (m___)->isset = (i___);                        \ 
-      (m___)->scale = (s___);                        \ 
-      (m___)->precision = (p___);                    \ 
-    }                                                \ 
-  } 
- 
-#define reset_decimal_metadata(m___) \ 
-  { set_decimal_metadata(m___, false, -1, -1); } 
- 
-// For logical types that always translate to the same converted type 
-class LogicalType::Impl::SimpleCompatible : public virtual LogicalType::Impl::Compatible { 
- public: 
-  bool is_compatible(ConvertedType::type converted_type, 
-                     schema::DecimalMetadata converted_decimal_metadata) const override { 
-    return (converted_type == converted_type_) && !converted_decimal_metadata.isset; 
-  } 
- 
-  ConvertedType::type ToConvertedType( 
-      schema::DecimalMetadata* out_decimal_metadata) const override { 
-    reset_decimal_metadata(out_decimal_metadata); 
-    return converted_type_; 
-  } 
- 
- protected: 
-  explicit SimpleCompatible(ConvertedType::type c) : converted_type_(c) {} 
- 
- private: 
-  ConvertedType::type converted_type_ = ConvertedType::NA; 
-}; 
- 
-// For logical types that have no corresponding converted type 
-class LogicalType::Impl::Incompatible : public virtual LogicalType::Impl { 
- public: 
-  bool is_compatible(ConvertedType::type converted_type, 
-                     schema::DecimalMetadata converted_decimal_metadata) const override { 
-    return (converted_type == ConvertedType::NONE || 
-            converted_type == ConvertedType::NA) && 
-           !converted_decimal_metadata.isset; 
-  } 
- 
-  ConvertedType::type ToConvertedType( 
-      schema::DecimalMetadata* out_decimal_metadata) const override { 
-    reset_decimal_metadata(out_decimal_metadata); 
-    return ConvertedType::NONE; 
-  } 
- 
- protected: 
-  Incompatible() = default; 
-}; 
- 
-// LogicalTypeImpl intermediate "applicability" classes 
- 
-class LogicalType::Impl::Applicable : public virtual LogicalType::Impl { 
- protected: 
-  Applicable() = default; 
-}; 
- 
-// For logical types that can apply only to a single 
-// physical type 
-class LogicalType::Impl::SimpleApplicable : public virtual LogicalType::Impl::Applicable { 
- public: 
-  bool is_applicable(parquet::Type::type primitive_type, 
-                     int32_t primitive_length = -1) const override { 
-    return primitive_type == type_; 
-  } 
- 
- protected: 
-  explicit SimpleApplicable(parquet::Type::type t) : type_(t) {} 
- 
- private: 
-  parquet::Type::type type_; 
-}; 
- 
-// For logical types that can apply only to a particular 
-// physical type and physical length combination 
-class LogicalType::Impl::TypeLengthApplicable 
-    : public virtual LogicalType::Impl::Applicable { 
- public: 
-  bool is_applicable(parquet::Type::type primitive_type, 
-                     int32_t primitive_length = -1) const override { 
-    return primitive_type == type_ && primitive_length == length_; 
-  } 
- 
- protected: 
-  TypeLengthApplicable(parquet::Type::type t, int32_t l) : type_(t), length_(l) {} 
- 
- private: 
-  parquet::Type::type type_; 
-  int32_t length_; 
-}; 
- 
-// For logical types that can apply to any physical type 
-class LogicalType::Impl::UniversalApplicable 
-    : public virtual LogicalType::Impl::Applicable { 
- public: 
-  bool is_applicable(parquet::Type::type primitive_type, 
-                     int32_t primitive_length = -1) const override { 
-    return true; 
-  } 
- 
- protected: 
-  UniversalApplicable() = default; 
-}; 
- 
-// For logical types that can never apply to any primitive 
-// physical type 
-class LogicalType::Impl::Inapplicable : public virtual LogicalType::Impl { 
- public: 
-  bool is_applicable(parquet::Type::type primitive_type, 
-                     int32_t primitive_length = -1) const override { 
-    return false; 
-  } 
- 
- protected: 
-  Inapplicable() = default; 
-}; 
- 
-// LogicalType implementation final classes 
- 
-#define OVERRIDE_TOSTRING(n___) \ 
-  std::string ToString() const override { return #n___; } 
- 
-#define OVERRIDE_TOTHRIFT(t___, s___)             \ 
-  format::LogicalType ToThrift() const override { \ 
-    format::LogicalType type;                     \ 
-    format::t___ subtype;                         \ 
-    type.__set_##s___(subtype);                   \ 
-    return type;                                  \ 
-  } 
- 
-class LogicalType::Impl::String final : public LogicalType::Impl::SimpleCompatible, 
-                                        public LogicalType::Impl::SimpleApplicable { 
- public: 
-  friend class StringLogicalType; 
- 
-  OVERRIDE_TOSTRING(String) 
-  OVERRIDE_TOTHRIFT(StringType, STRING) 
- 
- private: 
-  String() 
-      : LogicalType::Impl(LogicalType::Type::STRING, SortOrder::UNSIGNED), 
-        LogicalType::Impl::SimpleCompatible(ConvertedType::UTF8), 
-        LogicalType::Impl::SimpleApplicable(parquet::Type::BYTE_ARRAY) {} 
-}; 
- 
-// Each public logical type class's Make() creation method instantiates a corresponding 
-// LogicalType::Impl::* object and installs that implementation in the logical type 
-// it returns. 
- 
-#define GENERATE_MAKE(a___)                                      \ 
-  std::shared_ptr<const LogicalType> a___##LogicalType::Make() { \ 
-    auto* logical_type = new a___##LogicalType();                \ 
-    logical_type->impl_.reset(new LogicalType::Impl::a___());    \ 
-    return std::shared_ptr<const LogicalType>(logical_type);     \ 
-  } 
- 
-GENERATE_MAKE(String) 
- 
-class LogicalType::Impl::Map final : public LogicalType::Impl::SimpleCompatible, 
-                                     public LogicalType::Impl::Inapplicable { 
- public: 
-  friend class MapLogicalType; 
- 
-  bool is_compatible(ConvertedType::type converted_type, 
-                     schema::DecimalMetadata converted_decimal_metadata) const override { 
-    return (converted_type == ConvertedType::MAP || 
-            converted_type == ConvertedType::MAP_KEY_VALUE) && 
-           !converted_decimal_metadata.isset; 
-  } 
- 
-  OVERRIDE_TOSTRING(Map) 
-  OVERRIDE_TOTHRIFT(MapType, MAP) 
- 
- private: 
-  Map() 
-      : LogicalType::Impl(LogicalType::Type::MAP, SortOrder::UNKNOWN), 
-        LogicalType::Impl::SimpleCompatible(ConvertedType::MAP) {} 
-}; 
- 
-GENERATE_MAKE(Map) 
- 
-class LogicalType::Impl::List final : public LogicalType::Impl::SimpleCompatible, 
-                                      public LogicalType::Impl::Inapplicable { 
- public: 
-  friend class ListLogicalType; 
- 
-  OVERRIDE_TOSTRING(List) 
-  OVERRIDE_TOTHRIFT(ListType, LIST) 
- 
- private: 
-  List() 
-      : LogicalType::Impl(LogicalType::Type::LIST, SortOrder::UNKNOWN), 
-        LogicalType::Impl::SimpleCompatible(ConvertedType::LIST) {} 
-}; 
- 
-GENERATE_MAKE(List) 
- 
-class LogicalType::Impl::Enum final : public LogicalType::Impl::SimpleCompatible, 
-                                      public LogicalType::Impl::SimpleApplicable { 
- public: 
-  friend class EnumLogicalType; 
- 
-  OVERRIDE_TOSTRING(Enum) 
-  OVERRIDE_TOTHRIFT(EnumType, ENUM) 
- 
- private: 
-  Enum() 
-      : LogicalType::Impl(LogicalType::Type::ENUM, SortOrder::UNSIGNED), 
-        LogicalType::Impl::SimpleCompatible(ConvertedType::ENUM), 
-        LogicalType::Impl::SimpleApplicable(parquet::Type::BYTE_ARRAY) {} 
-}; 
- 
-GENERATE_MAKE(Enum) 
- 
-// The parameterized logical types (currently Decimal, Time, Timestamp, and Int) 
-// generally can't reuse the simple method implementations available in the base and 
-// intermediate classes and must (re)implement them all 
- 
-class LogicalType::Impl::Decimal final : public LogicalType::Impl::Compatible, 
-                                         public LogicalType::Impl::Applicable { 
- public: 
-  friend class DecimalLogicalType; 
- 
-  bool is_applicable(parquet::Type::type primitive_type, 
-                     int32_t primitive_length = -1) const override; 
-  bool is_compatible(ConvertedType::type converted_type, 
-                     schema::DecimalMetadata converted_decimal_metadata) const override; 
-  ConvertedType::type ToConvertedType( 
-      schema::DecimalMetadata* out_decimal_metadata) const override; 
-  std::string ToString() const override; 
-  std::string ToJSON() const override; 
-  format::LogicalType ToThrift() const override; 
-  bool Equals(const LogicalType& other) const override; 
- 
-  int32_t precision() const { return precision_; } 
-  int32_t scale() const { return scale_; } 
- 
- private: 
-  Decimal(int32_t p, int32_t s) 
-      : LogicalType::Impl(LogicalType::Type::DECIMAL, SortOrder::SIGNED), 
-        precision_(p), 
-        scale_(s) {} 
-  int32_t precision_ = -1; 
-  int32_t scale_ = -1; 
-}; 
- 
-bool LogicalType::Impl::Decimal::is_applicable(parquet::Type::type primitive_type, 
-                                               int32_t primitive_length) const { 
-  bool ok = false; 
-  switch (primitive_type) { 
-    case parquet::Type::INT32: { 
-      ok = (1 <= precision_) && (precision_ <= 9); 
-    } break; 
-    case parquet::Type::INT64: { 
-      ok = (1 <= precision_) && (precision_ <= 18); 
-      if (precision_ < 10) { 
-        // FIXME(tpb): warn that INT32 could be used 
-      } 
-    } break; 
-    case parquet::Type::FIXED_LEN_BYTE_ARRAY: { 
-      ok = precision_ <= static_cast<int32_t>(std::floor( 
-                             std::log10(std::pow(2.0, (8.0 * primitive_length) - 1.0)))); 
-    } break; 
-    case parquet::Type::BYTE_ARRAY: { 
-      ok = true; 
-    } break; 
-    default: { 
-    } break; 
-  } 
-  return ok; 
-} 
- 
-bool LogicalType::Impl::Decimal::is_compatible( 
-    ConvertedType::type converted_type, 
-    schema::DecimalMetadata converted_decimal_metadata) const { 
-  return converted_type == ConvertedType::DECIMAL && 
-         (converted_decimal_metadata.isset && 
-          converted_decimal_metadata.scale == scale_ && 
-          converted_decimal_metadata.precision == precision_); 
-} 
- 
-ConvertedType::type LogicalType::Impl::Decimal::ToConvertedType( 
-    schema::DecimalMetadata* out_decimal_metadata) const { 
-  set_decimal_metadata(out_decimal_metadata, true, precision_, scale_); 
-  return ConvertedType::DECIMAL; 
-} 
- 
-std::string LogicalType::Impl::Decimal::ToString() const { 
-  std::stringstream type; 
-  type << "Decimal(precision=" << precision_ << ", scale=" << scale_ << ")"; 
-  return type.str(); 
-} 
- 
-std::string LogicalType::Impl::Decimal::ToJSON() const { 
-  std::stringstream json; 
-  json << R"({"Type": "Decimal", "precision": )" << precision_ << R"(, "scale": )" 
-       << scale_ << "}"; 
-  return json.str(); 
-} 
- 
-format::LogicalType LogicalType::Impl::Decimal::ToThrift() const { 
-  format::LogicalType type; 
-  format::DecimalType decimal_type; 
-  decimal_type.__set_precision(precision_); 
-  decimal_type.__set_scale(scale_); 
-  type.__set_DECIMAL(decimal_type); 
-  return type; 
-} 
- 
-bool LogicalType::Impl::Decimal::Equals(const LogicalType& other) const { 
-  bool eq = false; 
-  if (other.is_decimal()) { 
-    const auto& other_decimal = checked_cast<const DecimalLogicalType&>(other); 
-    eq = (precision_ == other_decimal.precision() && scale_ == other_decimal.scale()); 
-  } 
-  return eq; 
-} 
- 
-std::shared_ptr<const LogicalType> DecimalLogicalType::Make(int32_t precision, 
-                                                            int32_t scale) { 
-  if (precision < 1) { 
-    throw ParquetException( 
-        "Precision must be greater than or equal to 1 for Decimal logical type"); 
-  } 
-  if (scale < 0 || scale > precision) { 
-    throw ParquetException( 
-        "Scale must be a non-negative integer that does not exceed precision for " 
-        "Decimal logical type"); 
-  } 
-  auto* logical_type = new DecimalLogicalType(); 
-  logical_type->impl_.reset(new LogicalType::Impl::Decimal(precision, scale)); 
-  return std::shared_ptr<const LogicalType>(logical_type); 
-} 
- 
-int32_t DecimalLogicalType::precision() const { 
-  return (dynamic_cast<const LogicalType::Impl::Decimal&>(*impl_)).precision(); 
-} 
- 
-int32_t DecimalLogicalType::scale() const { 
-  return (dynamic_cast<const LogicalType::Impl::Decimal&>(*impl_)).scale(); 
-} 
- 
-class LogicalType::Impl::Date final : public LogicalType::Impl::SimpleCompatible, 
-                                      public LogicalType::Impl::SimpleApplicable { 
- public: 
-  friend class DateLogicalType; 
- 
-  OVERRIDE_TOSTRING(Date) 
-  OVERRIDE_TOTHRIFT(DateType, DATE) 
- 
- private: 
-  Date() 
-      : LogicalType::Impl(LogicalType::Type::DATE, SortOrder::SIGNED), 
-        LogicalType::Impl::SimpleCompatible(ConvertedType::DATE), 
-        LogicalType::Impl::SimpleApplicable(parquet::Type::INT32) {} 
-}; 
- 
-GENERATE_MAKE(Date) 
- 
-#define time_unit_string(u___)                    \ 
-  ((u___) == LogicalType::TimeUnit::MILLIS        \ 
-       ? "milliseconds"                           \ 
-       : ((u___) == LogicalType::TimeUnit::MICROS \ 
-              ? "microseconds"                    \ 
-              : ((u___) == LogicalType::TimeUnit::NANOS ? "nanoseconds" : "unknown"))) 
- 
-class LogicalType::Impl::Time final : public LogicalType::Impl::Compatible, 
-                                      public LogicalType::Impl::Applicable { 
- public: 
-  friend class TimeLogicalType; 
- 
-  bool is_applicable(parquet::Type::type primitive_type, 
-                     int32_t primitive_length = -1) const override; 
-  bool is_compatible(ConvertedType::type converted_type, 
-                     schema::DecimalMetadata converted_decimal_metadata) const override; 
-  ConvertedType::type ToConvertedType( 
-      schema::DecimalMetadata* out_decimal_metadata) const override; 
-  std::string ToString() const override; 
-  std::string ToJSON() const override; 
-  format::LogicalType ToThrift() const override; 
-  bool Equals(const LogicalType& other) const override; 
- 
-  bool is_adjusted_to_utc() const { return adjusted_; } 
-  LogicalType::TimeUnit::unit time_unit() const { return unit_; } 
- 
- private: 
-  Time(bool a, LogicalType::TimeUnit::unit u) 
-      : LogicalType::Impl(LogicalType::Type::TIME, SortOrder::SIGNED), 
-        adjusted_(a), 
-        unit_(u) {} 
-  bool adjusted_ = false; 
-  LogicalType::TimeUnit::unit unit_; 
-}; 
- 
-bool LogicalType::Impl::Time::is_applicable(parquet::Type::type primitive_type, 
-                                            int32_t primitive_length) const { 
-  return (primitive_type == parquet::Type::INT32 && 
-          unit_ == LogicalType::TimeUnit::MILLIS) || 
-         (primitive_type == parquet::Type::INT64 && 
-          (unit_ == LogicalType::TimeUnit::MICROS || 
-           unit_ == LogicalType::TimeUnit::NANOS)); 
-} 
- 
-bool LogicalType::Impl::Time::is_compatible( 
-    ConvertedType::type converted_type, 
-    schema::DecimalMetadata converted_decimal_metadata) const { 
-  if (converted_decimal_metadata.isset) { 
-    return false; 
-  } else if (adjusted_ && unit_ == LogicalType::TimeUnit::MILLIS) { 
-    return converted_type == ConvertedType::TIME_MILLIS; 
-  } else if (adjusted_ && unit_ == LogicalType::TimeUnit::MICROS) { 
-    return converted_type == ConvertedType::TIME_MICROS; 
-  } else { 
-    return (converted_type == ConvertedType::NONE) || 
-           (converted_type == ConvertedType::NA); 
-  } 
-} 
- 
-ConvertedType::type LogicalType::Impl::Time::ToConvertedType( 
-    schema::DecimalMetadata* out_decimal_metadata) const { 
-  reset_decimal_metadata(out_decimal_metadata); 
-  if (adjusted_) { 
-    if (unit_ == LogicalType::TimeUnit::MILLIS) { 
-      return ConvertedType::TIME_MILLIS; 
-    } else if (unit_ == LogicalType::TimeUnit::MICROS) { 
-      return ConvertedType::TIME_MICROS; 
-    } 
-  } 
-  return ConvertedType::NONE; 
-} 
- 
-std::string LogicalType::Impl::Time::ToString() const { 
-  std::stringstream type; 
-  type << "Time(isAdjustedToUTC=" << std::boolalpha << adjusted_ 
-       << ", timeUnit=" << time_unit_string(unit_) << ")"; 
-  return type.str(); 
-} 
- 
-std::string LogicalType::Impl::Time::ToJSON() const { 
-  std::stringstream json; 
-  json << R"({"Type": "Time", "isAdjustedToUTC": )" << std::boolalpha << adjusted_ 
-       << R"(, "timeUnit": ")" << time_unit_string(unit_) << R"("})"; 
-  return json.str(); 
-} 
- 
-format::LogicalType LogicalType::Impl::Time::ToThrift() const { 
-  format::LogicalType type; 
-  format::TimeType time_type; 
-  format::TimeUnit time_unit; 
-  DCHECK(unit_ != LogicalType::TimeUnit::UNKNOWN); 
-  if (unit_ == LogicalType::TimeUnit::MILLIS) { 
-    format::MilliSeconds millis; 
-    time_unit.__set_MILLIS(millis); 
-  } else if (unit_ == LogicalType::TimeUnit::MICROS) { 
-    format::MicroSeconds micros; 
-    time_unit.__set_MICROS(micros); 
-  } else if (unit_ == LogicalType::TimeUnit::NANOS) { 
-    format::NanoSeconds nanos; 
-    time_unit.__set_NANOS(nanos); 
-  } 
-  time_type.__set_isAdjustedToUTC(adjusted_); 
-  time_type.__set_unit(time_unit); 
-  type.__set_TIME(time_type); 
-  return type; 
-} 
- 
-bool LogicalType::Impl::Time::Equals(const LogicalType& other) const { 
-  bool eq = false; 
-  if (other.is_time()) { 
-    const auto& other_time = checked_cast<const TimeLogicalType&>(other); 
-    eq = 
-        (adjusted_ == other_time.is_adjusted_to_utc() && unit_ == other_time.time_unit()); 
-  } 
-  return eq; 
-} 
- 
-std::shared_ptr<const LogicalType> TimeLogicalType::Make( 
-    bool is_adjusted_to_utc, LogicalType::TimeUnit::unit time_unit) { 
-  if (time_unit == LogicalType::TimeUnit::MILLIS || 
-      time_unit == LogicalType::TimeUnit::MICROS || 
-      time_unit == LogicalType::TimeUnit::NANOS) { 
-    auto* logical_type = new TimeLogicalType(); 
-    logical_type->impl_.reset(new LogicalType::Impl::Time(is_adjusted_to_utc, time_unit)); 
-    return std::shared_ptr<const LogicalType>(logical_type); 
-  } else { 
-    throw ParquetException( 
-        "TimeUnit must be one of MILLIS, MICROS, or NANOS for Time logical type"); 
-  } 
-} 
- 
-bool TimeLogicalType::is_adjusted_to_utc() const { 
-  return (dynamic_cast<const LogicalType::Impl::Time&>(*impl_)).is_adjusted_to_utc(); 
-} 
- 
-LogicalType::TimeUnit::unit TimeLogicalType::time_unit() const { 
-  return (dynamic_cast<const LogicalType::Impl::Time&>(*impl_)).time_unit(); 
-} 
- 
-class LogicalType::Impl::Timestamp final : public LogicalType::Impl::Compatible, 
-                                           public LogicalType::Impl::SimpleApplicable { 
- public: 
-  friend class TimestampLogicalType; 
- 
-  bool is_serialized() const override; 
-  bool is_compatible(ConvertedType::type converted_type, 
-                     schema::DecimalMetadata converted_decimal_metadata) const override; 
-  ConvertedType::type ToConvertedType( 
-      schema::DecimalMetadata* out_decimal_metadata) const override; 
-  std::string ToString() const override; 
-  std::string ToJSON() const override; 
-  format::LogicalType ToThrift() const override; 
-  bool Equals(const LogicalType& other) const override; 
- 
-  bool is_adjusted_to_utc() const { return adjusted_; } 
-  LogicalType::TimeUnit::unit time_unit() const { return unit_; } 
- 
-  bool is_from_converted_type() const { return is_from_converted_type_; } 
-  bool force_set_converted_type() const { return force_set_converted_type_; } 
- 
- private: 
-  Timestamp(bool adjusted, LogicalType::TimeUnit::unit unit, bool is_from_converted_type, 
-            bool force_set_converted_type) 
-      : LogicalType::Impl(LogicalType::Type::TIMESTAMP, SortOrder::SIGNED), 
-        LogicalType::Impl::SimpleApplicable(parquet::Type::INT64), 
-        adjusted_(adjusted), 
-        unit_(unit), 
-        is_from_converted_type_(is_from_converted_type), 
-        force_set_converted_type_(force_set_converted_type) {} 
-  bool adjusted_ = false; 
-  LogicalType::TimeUnit::unit unit_; 
-  bool is_from_converted_type_ = false; 
-  bool force_set_converted_type_ = false; 
-}; 
- 
-bool LogicalType::Impl::Timestamp::is_serialized() const { 
-  return !is_from_converted_type_; 
-} 
- 
-bool LogicalType::Impl::Timestamp::is_compatible( 
-    ConvertedType::type converted_type, 
-    schema::DecimalMetadata converted_decimal_metadata) const { 
-  if (converted_decimal_metadata.isset) { 
-    return false; 
-  } else if (unit_ == LogicalType::TimeUnit::MILLIS) { 
-    if (adjusted_ || force_set_converted_type_) { 
-      return converted_type == ConvertedType::TIMESTAMP_MILLIS; 
-    } else { 
-      return (converted_type == ConvertedType::NONE) || 
-             (converted_type == ConvertedType::NA); 
-    } 
-  } else if (unit_ == LogicalType::TimeUnit::MICROS) { 
-    if (adjusted_ || force_set_converted_type_) { 
-      return converted_type == ConvertedType::TIMESTAMP_MICROS; 
-    } else { 
-      return (converted_type == ConvertedType::NONE) || 
-             (converted_type == ConvertedType::NA); 
-    } 
-  } else { 
-    return (converted_type == ConvertedType::NONE) || 
-           (converted_type == ConvertedType::NA); 
-  } 
-} 
- 
-ConvertedType::type LogicalType::Impl::Timestamp::ToConvertedType( 
-    schema::DecimalMetadata* out_decimal_metadata) const { 
-  reset_decimal_metadata(out_decimal_metadata); 
-  if (adjusted_ || force_set_converted_type_) { 
-    if (unit_ == LogicalType::TimeUnit::MILLIS) { 
-      return ConvertedType::TIMESTAMP_MILLIS; 
-    } else if (unit_ == LogicalType::TimeUnit::MICROS) { 
-      return ConvertedType::TIMESTAMP_MICROS; 
-    } 
-  } 
-  return ConvertedType::NONE; 
-} 
- 
-std::string LogicalType::Impl::Timestamp::ToString() const { 
-  std::stringstream type; 
-  type << "Timestamp(isAdjustedToUTC=" << std::boolalpha << adjusted_ 
-       << ", timeUnit=" << time_unit_string(unit_) 
-       << ", is_from_converted_type=" << is_from_converted_type_ 
-       << ", force_set_converted_type=" << force_set_converted_type_ << ")"; 
-  return type.str(); 
-} 
- 
-std::string LogicalType::Impl::Timestamp::ToJSON() const { 
-  std::stringstream json; 
-  json << R"({"Type": "Timestamp", "isAdjustedToUTC": )" << std::boolalpha << adjusted_ 
-       << R"(, "timeUnit": ")" << time_unit_string(unit_) << R"(")" 
-       << R"(, "is_from_converted_type": )" << is_from_converted_type_ 
-       << R"(, "force_set_converted_type": )" << force_set_converted_type_ << R"(})"; 
-  return json.str(); 
-} 
- 
-format::LogicalType LogicalType::Impl::Timestamp::ToThrift() const { 
-  format::LogicalType type; 
-  format::TimestampType timestamp_type; 
-  format::TimeUnit time_unit; 
-  DCHECK(unit_ != LogicalType::TimeUnit::UNKNOWN); 
-  if (unit_ == LogicalType::TimeUnit::MILLIS) { 
-    format::MilliSeconds millis; 
-    time_unit.__set_MILLIS(millis); 
-  } else if (unit_ == LogicalType::TimeUnit::MICROS) { 
-    format::MicroSeconds micros; 
-    time_unit.__set_MICROS(micros); 
-  } else if (unit_ == LogicalType::TimeUnit::NANOS) { 
-    format::NanoSeconds nanos; 
-    time_unit.__set_NANOS(nanos); 
-  } 
-  timestamp_type.__set_isAdjustedToUTC(adjusted_); 
-  timestamp_type.__set_unit(time_unit); 
-  type.__set_TIMESTAMP(timestamp_type); 
-  return type; 
-} 
- 
-bool LogicalType::Impl::Timestamp::Equals(const LogicalType& other) const { 
-  bool eq = false; 
-  if (other.is_timestamp()) { 
-    const auto& other_timestamp = checked_cast<const TimestampLogicalType&>(other); 
-    eq = (adjusted_ == other_timestamp.is_adjusted_to_utc() && 
-          unit_ == other_timestamp.time_unit()); 
-  } 
-  return eq; 
-} 
- 
-std::shared_ptr<const LogicalType> TimestampLogicalType::Make( 
-    bool is_adjusted_to_utc, LogicalType::TimeUnit::unit time_unit, 
-    bool is_from_converted_type, bool force_set_converted_type) { 
-  if (time_unit == LogicalType::TimeUnit::MILLIS || 
-      time_unit == LogicalType::TimeUnit::MICROS || 
-      time_unit == LogicalType::TimeUnit::NANOS) { 
-    auto* logical_type = new TimestampLogicalType(); 
-    logical_type->impl_.reset(new LogicalType::Impl::Timestamp( 
-        is_adjusted_to_utc, time_unit, is_from_converted_type, force_set_converted_type)); 
-    return std::shared_ptr<const LogicalType>(logical_type); 
-  } else { 
-    throw ParquetException( 
-        "TimeUnit must be one of MILLIS, MICROS, or NANOS for Timestamp logical type"); 
-  } 
-} 
- 
-bool TimestampLogicalType::is_adjusted_to_utc() const { 
-  return (dynamic_cast<const LogicalType::Impl::Timestamp&>(*impl_)).is_adjusted_to_utc(); 
-} 
- 
-LogicalType::TimeUnit::unit TimestampLogicalType::time_unit() const { 
-  return (dynamic_cast<const LogicalType::Impl::Timestamp&>(*impl_)).time_unit(); 
-} 
- 
-bool TimestampLogicalType::is_from_converted_type() const { 
-  return (dynamic_cast<const LogicalType::Impl::Timestamp&>(*impl_)) 
-      .is_from_converted_type(); 
-} 
- 
-bool TimestampLogicalType::force_set_converted_type() const { 
-  return (dynamic_cast<const LogicalType::Impl::Timestamp&>(*impl_)) 
-      .force_set_converted_type(); 
-} 
- 
-class LogicalType::Impl::Interval final : public LogicalType::Impl::SimpleCompatible, 
-                                          public LogicalType::Impl::TypeLengthApplicable { 
- public: 
-  friend class IntervalLogicalType; 
- 
-  OVERRIDE_TOSTRING(Interval) 
-  // TODO(tpboudreau): uncomment the following line to enable serialization after 
-  // parquet.thrift recognizes IntervalType as a ConvertedType 
-  // OVERRIDE_TOTHRIFT(IntervalType, INTERVAL) 
- 
- private: 
-  Interval() 
-      : LogicalType::Impl(LogicalType::Type::INTERVAL, SortOrder::UNKNOWN), 
-        LogicalType::Impl::SimpleCompatible(ConvertedType::INTERVAL), 
-        LogicalType::Impl::TypeLengthApplicable(parquet::Type::FIXED_LEN_BYTE_ARRAY, 12) { 
-  } 
-}; 
- 
-GENERATE_MAKE(Interval) 
- 
-class LogicalType::Impl::Int final : public LogicalType::Impl::Compatible, 
-                                     public LogicalType::Impl::Applicable { 
- public: 
-  friend class IntLogicalType; 
- 
-  bool is_applicable(parquet::Type::type primitive_type, 
-                     int32_t primitive_length = -1) const override; 
-  bool is_compatible(ConvertedType::type converted_type, 
-                     schema::DecimalMetadata converted_decimal_metadata) const override; 
-  ConvertedType::type ToConvertedType( 
-      schema::DecimalMetadata* out_decimal_metadata) const override; 
-  std::string ToString() const override; 
-  std::string ToJSON() const override; 
-  format::LogicalType ToThrift() const override; 
-  bool Equals(const LogicalType& other) const override; 
- 
-  int bit_width() const { return width_; } 
-  bool is_signed() const { return signed_; } 
- 
- private: 
-  Int(int w, bool s) 
-      : LogicalType::Impl(LogicalType::Type::INT, 
-                          (s ? SortOrder::SIGNED : SortOrder::UNSIGNED)), 
-        width_(w), 
-        signed_(s) {} 
-  int width_ = 0; 
-  bool signed_ = false; 
-}; 
- 
-bool LogicalType::Impl::Int::is_applicable(parquet::Type::type primitive_type, 
-                                           int32_t primitive_length) const { 
-  return (primitive_type == parquet::Type::INT32 && width_ <= 32) || 
-         (primitive_type == parquet::Type::INT64 && width_ == 64); 
-} 
- 
-bool LogicalType::Impl::Int::is_compatible( 
-    ConvertedType::type converted_type, 
-    schema::DecimalMetadata converted_decimal_metadata) const { 
-  if (converted_decimal_metadata.isset) { 
-    return false; 
-  } else if (signed_ && width_ == 8) { 
-    return converted_type == ConvertedType::INT_8; 
-  } else if (signed_ && width_ == 16) { 
-    return converted_type == ConvertedType::INT_16; 
-  } else if (signed_ && width_ == 32) { 
-    return converted_type == ConvertedType::INT_32; 
-  } else if (signed_ && width_ == 64) { 
-    return converted_type == ConvertedType::INT_64; 
-  } else if (!signed_ && width_ == 8) { 
-    return converted_type == ConvertedType::UINT_8; 
-  } else if (!signed_ && width_ == 16) { 
-    return converted_type == ConvertedType::UINT_16; 
-  } else if (!signed_ && width_ == 32) { 
-    return converted_type == ConvertedType::UINT_32; 
-  } else if (!signed_ && width_ == 64) { 
-    return converted_type == ConvertedType::UINT_64; 
-  } else { 
-    return false; 
-  } 
-} 
- 
-ConvertedType::type LogicalType::Impl::Int::ToConvertedType( 
-    schema::DecimalMetadata* out_decimal_metadata) const { 
-  reset_decimal_metadata(out_decimal_metadata); 
-  if (signed_) { 
-    switch (width_) { 
-      case 8: 
-        return ConvertedType::INT_8; 
-      case 16: 
-        return ConvertedType::INT_16; 
-      case 32: 
-        return ConvertedType::INT_32; 
-      case 64: 
-        return ConvertedType::INT_64; 
-    } 
-  } else {  // unsigned 
-    switch (width_) { 
-      case 8: 
-        return ConvertedType::UINT_8; 
-      case 16: 
-        return ConvertedType::UINT_16; 
-      case 32: 
-        return ConvertedType::UINT_32; 
-      case 64: 
-        return ConvertedType::UINT_64; 
-    } 
-  } 
-  return ConvertedType::NONE; 
-} 
- 
-std::string LogicalType::Impl::Int::ToString() const { 
-  std::stringstream type; 
-  type << "Int(bitWidth=" << width_ << ", isSigned=" << std::boolalpha << signed_ << ")"; 
-  return type.str(); 
-} 
- 
-std::string LogicalType::Impl::Int::ToJSON() const { 
-  std::stringstream json; 
-  json << R"({"Type": "Int", "bitWidth": )" << width_ << R"(, "isSigned": )" 
-       << std::boolalpha << signed_ << "}"; 
-  return json.str(); 
-} 
- 
-format::LogicalType LogicalType::Impl::Int::ToThrift() const { 
-  format::LogicalType type; 
-  format::IntType int_type; 
-  DCHECK(width_ == 64 || width_ == 32 || width_ == 16 || width_ == 8); 
-  int_type.__set_bitWidth(static_cast<int8_t>(width_)); 
-  int_type.__set_isSigned(signed_); 
-  type.__set_INTEGER(int_type); 
-  return type; 
-} 
- 
-bool LogicalType::Impl::Int::Equals(const LogicalType& other) const { 
-  bool eq = false; 
-  if (other.is_int()) { 
-    const auto& other_int = checked_cast<const IntLogicalType&>(other); 
-    eq = (width_ == other_int.bit_width() && signed_ == other_int.is_signed()); 
-  } 
-  return eq; 
-} 
- 
-std::shared_ptr<const LogicalType> IntLogicalType::Make(int bit_width, bool is_signed) { 
-  if (bit_width == 8 || bit_width == 16 || bit_width == 32 || bit_width == 64) { 
-    auto* logical_type = new IntLogicalType(); 
-    logical_type->impl_.reset(new LogicalType::Impl::Int(bit_width, is_signed)); 
-    return std::shared_ptr<const LogicalType>(logical_type); 
-  } else { 
-    throw ParquetException( 
-        "Bit width must be exactly 8, 16, 32, or 64 for Int logical type"); 
-  } 
-} 
- 
-int IntLogicalType::bit_width() const { 
-  return (dynamic_cast<const LogicalType::Impl::Int&>(*impl_)).bit_width(); 
-} 
- 
-bool IntLogicalType::is_signed() const { 
-  return (dynamic_cast<const LogicalType::Impl::Int&>(*impl_)).is_signed(); 
-} 
- 
-class LogicalType::Impl::Null final : public LogicalType::Impl::Incompatible, 
-                                      public LogicalType::Impl::UniversalApplicable { 
- public: 
-  friend class NullLogicalType; 
- 
-  OVERRIDE_TOSTRING(Null) 
-  OVERRIDE_TOTHRIFT(NullType, UNKNOWN) 
- 
- private: 
-  Null() : LogicalType::Impl(LogicalType::Type::NIL, SortOrder::UNKNOWN) {} 
-}; 
- 
-GENERATE_MAKE(Null) 
- 
-class LogicalType::Impl::JSON final : public LogicalType::Impl::SimpleCompatible, 
-                                      public LogicalType::Impl::SimpleApplicable { 
- public: 
-  friend class JSONLogicalType; 
- 
-  OVERRIDE_TOSTRING(JSON) 
-  OVERRIDE_TOTHRIFT(JsonType, JSON) 
- 
- private: 
-  JSON() 
-      : LogicalType::Impl(LogicalType::Type::JSON, SortOrder::UNSIGNED), 
-        LogicalType::Impl::SimpleCompatible(ConvertedType::JSON), 
-        LogicalType::Impl::SimpleApplicable(parquet::Type::BYTE_ARRAY) {} 
-}; 
- 
-GENERATE_MAKE(JSON) 
- 
-class LogicalType::Impl::BSON final : public LogicalType::Impl::SimpleCompatible, 
-                                      public LogicalType::Impl::SimpleApplicable { 
- public: 
-  friend class BSONLogicalType; 
- 
-  OVERRIDE_TOSTRING(BSON) 
-  OVERRIDE_TOTHRIFT(BsonType, BSON) 
- 
- private: 
-  BSON() 
-      : LogicalType::Impl(LogicalType::Type::BSON, SortOrder::UNSIGNED), 
-        LogicalType::Impl::SimpleCompatible(ConvertedType::BSON), 
-        LogicalType::Impl::SimpleApplicable(parquet::Type::BYTE_ARRAY) {} 
-}; 
- 
-GENERATE_MAKE(BSON) 
- 
-class LogicalType::Impl::UUID final : public LogicalType::Impl::Incompatible, 
-                                      public LogicalType::Impl::TypeLengthApplicable { 
- public: 
-  friend class UUIDLogicalType; 
- 
-  OVERRIDE_TOSTRING(UUID) 
-  OVERRIDE_TOTHRIFT(UUIDType, UUID) 
- 
- private: 
-  UUID() 
-      : LogicalType::Impl(LogicalType::Type::UUID, SortOrder::UNSIGNED), 
-        LogicalType::Impl::TypeLengthApplicable(parquet::Type::FIXED_LEN_BYTE_ARRAY, 16) { 
-  } 
-}; 
- 
-GENERATE_MAKE(UUID) 
- 
-class LogicalType::Impl::No final : public LogicalType::Impl::SimpleCompatible, 
-                                    public LogicalType::Impl::UniversalApplicable { 
- public: 
-  friend class NoLogicalType; 
- 
-  OVERRIDE_TOSTRING(None) 
- 
- private: 
-  No() 
-      : LogicalType::Impl(LogicalType::Type::NONE, SortOrder::UNKNOWN), 
-        LogicalType::Impl::SimpleCompatible(ConvertedType::NONE) {} 
-}; 
- 
-GENERATE_MAKE(No) 
- 
-class LogicalType::Impl::Undefined final : public LogicalType::Impl::SimpleCompatible, 
-                                           public LogicalType::Impl::UniversalApplicable { 
- public: 
-  friend class UndefinedLogicalType; 
- 
-  OVERRIDE_TOSTRING(Undefined) 
- 
- private: 
-  Undefined() 
-      : LogicalType::Impl(LogicalType::Type::UNDEFINED, SortOrder::UNKNOWN), 
-        LogicalType::Impl::SimpleCompatible(ConvertedType::UNDEFINED) {} 
-}; 
- 
-GENERATE_MAKE(Undefined) 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cmath>
+#include <cstdint>
+#include <memory>
+#include <sstream>
+#include <string>
+
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/compression.h"
+#include "arrow/util/logging.h"
+
+#include "parquet/exception.h"
+#include "parquet/types.h"
+
+#include "generated/parquet_types.h"
+
+using arrow::internal::checked_cast;
+using arrow::util::Codec;
+
+namespace parquet {
+
+bool IsCodecSupported(Compression::type codec) {
+  switch (codec) {
+    case Compression::UNCOMPRESSED:
+    case Compression::SNAPPY:
+    case Compression::GZIP:
+    case Compression::BROTLI:
+    case Compression::ZSTD:
+    case Compression::LZ4:
+    case Compression::LZ4_HADOOP:
+      return true;
+    default:
+      return false;
+  }
+}
+
+std::unique_ptr<Codec> GetCodec(Compression::type codec) {
+  return GetCodec(codec, Codec::UseDefaultCompressionLevel());
+}
+
+std::unique_ptr<Codec> GetCodec(Compression::type codec, int compression_level) {
+  std::unique_ptr<Codec> result;
+  if (codec == Compression::LZO) {
+    throw ParquetException(
+        "While LZO compression is supported by the Parquet format in "
+        "general, it is currently not supported by the C++ implementation.");
+  }
+
+  if (!IsCodecSupported(codec)) {
+    std::stringstream ss;
+    ss << "Codec type " << Codec::GetCodecAsString(codec)
+       << " not supported in Parquet format";
+    throw ParquetException(ss.str());
+  }
+
+  PARQUET_ASSIGN_OR_THROW(result, Codec::Create(codec, compression_level));
+  return result;
+}
+
+std::string FormatStatValue(Type::type parquet_type, ::arrow::util::string_view val) {
+  std::stringstream result;
+
+  const char* bytes = val.data();
+  switch (parquet_type) {
+    case Type::BOOLEAN:
+      result << reinterpret_cast<const bool*>(bytes)[0];
+      break;
+    case Type::INT32:
+      result << reinterpret_cast<const int32_t*>(bytes)[0];
+      break;
+    case Type::INT64:
+      result << reinterpret_cast<const int64_t*>(bytes)[0];
+      break;
+    case Type::DOUBLE:
+      result << reinterpret_cast<const double*>(bytes)[0];
+      break;
+    case Type::FLOAT:
+      result << reinterpret_cast<const float*>(bytes)[0];
+      break;
+    case Type::INT96: {
+      auto const i32_val = reinterpret_cast<const int32_t*>(bytes);
+      result << i32_val[0] << " " << i32_val[1] << " " << i32_val[2];
+      break;
+    }
+    case Type::BYTE_ARRAY: {
+      return std::string(val);
+    }
+    case Type::FIXED_LEN_BYTE_ARRAY: {
+      return std::string(val);
+    }
+    case Type::UNDEFINED:
+    default:
+      break;
+  }
+  return result.str();
+}
+
+std::string EncodingToString(Encoding::type t) {
+  switch (t) {
+    case Encoding::PLAIN:
+      return "PLAIN";
+    case Encoding::PLAIN_DICTIONARY:
+      return "PLAIN_DICTIONARY";
+    case Encoding::RLE:
+      return "RLE";
+    case Encoding::BIT_PACKED:
+      return "BIT_PACKED";
+    case Encoding::DELTA_BINARY_PACKED:
+      return "DELTA_BINARY_PACKED";
+    case Encoding::DELTA_LENGTH_BYTE_ARRAY:
+      return "DELTA_LENGTH_BYTE_ARRAY";
+    case Encoding::DELTA_BYTE_ARRAY:
+      return "DELTA_BYTE_ARRAY";
+    case Encoding::RLE_DICTIONARY:
+      return "RLE_DICTIONARY";
+    case Encoding::BYTE_STREAM_SPLIT:
+      return "BYTE_STREAM_SPLIT";
+    default:
+      return "UNKNOWN";
+  }
+}
+
+std::string TypeToString(Type::type t) {
+  switch (t) {
+    case Type::BOOLEAN:
+      return "BOOLEAN";
+    case Type::INT32:
+      return "INT32";
+    case Type::INT64:
+      return "INT64";
+    case Type::INT96:
+      return "INT96";
+    case Type::FLOAT:
+      return "FLOAT";
+    case Type::DOUBLE:
+      return "DOUBLE";
+    case Type::BYTE_ARRAY:
+      return "BYTE_ARRAY";
+    case Type::FIXED_LEN_BYTE_ARRAY:
+      return "FIXED_LEN_BYTE_ARRAY";
+    case Type::UNDEFINED:
+    default:
+      return "UNKNOWN";
+  }
+}
+
+std::string ConvertedTypeToString(ConvertedType::type t) {
+  switch (t) {
+    case ConvertedType::NONE:
+      return "NONE";
+    case ConvertedType::UTF8:
+      return "UTF8";
+    case ConvertedType::MAP:
+      return "MAP";
+    case ConvertedType::MAP_KEY_VALUE:
+      return "MAP_KEY_VALUE";
+    case ConvertedType::LIST:
+      return "LIST";
+    case ConvertedType::ENUM:
+      return "ENUM";
+    case ConvertedType::DECIMAL:
+      return "DECIMAL";
+    case ConvertedType::DATE:
+      return "DATE";
+    case ConvertedType::TIME_MILLIS:
+      return "TIME_MILLIS";
+    case ConvertedType::TIME_MICROS:
+      return "TIME_MICROS";
+    case ConvertedType::TIMESTAMP_MILLIS:
+      return "TIMESTAMP_MILLIS";
+    case ConvertedType::TIMESTAMP_MICROS:
+      return "TIMESTAMP_MICROS";
+    case ConvertedType::UINT_8:
+      return "UINT_8";
+    case ConvertedType::UINT_16:
+      return "UINT_16";
+    case ConvertedType::UINT_32:
+      return "UINT_32";
+    case ConvertedType::UINT_64:
+      return "UINT_64";
+    case ConvertedType::INT_8:
+      return "INT_8";
+    case ConvertedType::INT_16:
+      return "INT_16";
+    case ConvertedType::INT_32:
+      return "INT_32";
+    case ConvertedType::INT_64:
+      return "INT_64";
+    case ConvertedType::JSON:
+      return "JSON";
+    case ConvertedType::BSON:
+      return "BSON";
+    case ConvertedType::INTERVAL:
+      return "INTERVAL";
+    case ConvertedType::UNDEFINED:
+    default:
+      return "UNKNOWN";
+  }
+}
+
+int GetTypeByteSize(Type::type parquet_type) {
+  switch (parquet_type) {
+    case Type::BOOLEAN:
+      return type_traits<BooleanType::type_num>::value_byte_size;
+    case Type::INT32:
+      return type_traits<Int32Type::type_num>::value_byte_size;
+    case Type::INT64:
+      return type_traits<Int64Type::type_num>::value_byte_size;
+    case Type::INT96:
+      return type_traits<Int96Type::type_num>::value_byte_size;
+    case Type::DOUBLE:
+      return type_traits<DoubleType::type_num>::value_byte_size;
+    case Type::FLOAT:
+      return type_traits<FloatType::type_num>::value_byte_size;
+    case Type::BYTE_ARRAY:
+      return type_traits<ByteArrayType::type_num>::value_byte_size;
+    case Type::FIXED_LEN_BYTE_ARRAY:
+      return type_traits<FLBAType::type_num>::value_byte_size;
+    case Type::UNDEFINED:
+    default:
+      return 0;
+  }
+  return 0;
+}
+
+// Return the Sort Order of the Parquet Physical Types
+SortOrder::type DefaultSortOrder(Type::type primitive) {
+  switch (primitive) {
+    case Type::BOOLEAN:
+    case Type::INT32:
+    case Type::INT64:
+    case Type::FLOAT:
+    case Type::DOUBLE:
+      return SortOrder::SIGNED;
+    case Type::BYTE_ARRAY:
+    case Type::FIXED_LEN_BYTE_ARRAY:
+      return SortOrder::UNSIGNED;
+    case Type::INT96:
+    case Type::UNDEFINED:
+      return SortOrder::UNKNOWN;
+  }
+  return SortOrder::UNKNOWN;
+}
+
+// Return the SortOrder of the Parquet Types using Logical or Physical Types
+SortOrder::type GetSortOrder(ConvertedType::type converted, Type::type primitive) {
+  if (converted == ConvertedType::NONE) return DefaultSortOrder(primitive);
+  switch (converted) {
+    case ConvertedType::INT_8:
+    case ConvertedType::INT_16:
+    case ConvertedType::INT_32:
+    case ConvertedType::INT_64:
+    case ConvertedType::DATE:
+    case ConvertedType::TIME_MICROS:
+    case ConvertedType::TIME_MILLIS:
+    case ConvertedType::TIMESTAMP_MICROS:
+    case ConvertedType::TIMESTAMP_MILLIS:
+      return SortOrder::SIGNED;
+    case ConvertedType::UINT_8:
+    case ConvertedType::UINT_16:
+    case ConvertedType::UINT_32:
+    case ConvertedType::UINT_64:
+    case ConvertedType::ENUM:
+    case ConvertedType::UTF8:
+    case ConvertedType::BSON:
+    case ConvertedType::JSON:
+      return SortOrder::UNSIGNED;
+    case ConvertedType::DECIMAL:
+    case ConvertedType::LIST:
+    case ConvertedType::MAP:
+    case ConvertedType::MAP_KEY_VALUE:
+    case ConvertedType::INTERVAL:
+    case ConvertedType::NONE:  // required instead of default
+    case ConvertedType::NA:    // required instead of default
+    case ConvertedType::UNDEFINED:
+      return SortOrder::UNKNOWN;
+  }
+  return SortOrder::UNKNOWN;
+}
+
+SortOrder::type GetSortOrder(const std::shared_ptr<const LogicalType>& logical_type,
+                             Type::type primitive) {
+  SortOrder::type o = SortOrder::UNKNOWN;
+  if (logical_type && logical_type->is_valid()) {
+    o = (logical_type->is_none() ? DefaultSortOrder(primitive)
+                                 : logical_type->sort_order());
+  }
+  return o;
+}
+
+ColumnOrder ColumnOrder::undefined_ = ColumnOrder(ColumnOrder::UNDEFINED);
+ColumnOrder ColumnOrder::type_defined_ = ColumnOrder(ColumnOrder::TYPE_DEFINED_ORDER);
+
+// Static methods for LogicalType class
+
+std::shared_ptr<const LogicalType> LogicalType::FromConvertedType(
+    const ConvertedType::type converted_type,
+    const schema::DecimalMetadata converted_decimal_metadata) {
+  switch (converted_type) {
+    case ConvertedType::UTF8:
+      return StringLogicalType::Make();
+    case ConvertedType::MAP_KEY_VALUE:
+    case ConvertedType::MAP:
+      return MapLogicalType::Make();
+    case ConvertedType::LIST:
+      return ListLogicalType::Make();
+    case ConvertedType::ENUM:
+      return EnumLogicalType::Make();
+    case ConvertedType::DECIMAL:
+      return DecimalLogicalType::Make(converted_decimal_metadata.precision,
+                                      converted_decimal_metadata.scale);
+    case ConvertedType::DATE:
+      return DateLogicalType::Make();
+    case ConvertedType::TIME_MILLIS:
+      return TimeLogicalType::Make(true, LogicalType::TimeUnit::MILLIS);
+    case ConvertedType::TIME_MICROS:
+      return TimeLogicalType::Make(true, LogicalType::TimeUnit::MICROS);
+    case ConvertedType::TIMESTAMP_MILLIS:
+      return TimestampLogicalType::Make(true, LogicalType::TimeUnit::MILLIS,
+                                        /*is_from_converted_type=*/true,
+                                        /*force_set_converted_type=*/false);
+    case ConvertedType::TIMESTAMP_MICROS:
+      return TimestampLogicalType::Make(true, LogicalType::TimeUnit::MICROS,
+                                        /*is_from_converted_type=*/true,
+                                        /*force_set_converted_type=*/false);
+    case ConvertedType::INTERVAL:
+      return IntervalLogicalType::Make();
+    case ConvertedType::INT_8:
+      return IntLogicalType::Make(8, true);
+    case ConvertedType::INT_16:
+      return IntLogicalType::Make(16, true);
+    case ConvertedType::INT_32:
+      return IntLogicalType::Make(32, true);
+    case ConvertedType::INT_64:
+      return IntLogicalType::Make(64, true);
+    case ConvertedType::UINT_8:
+      return IntLogicalType::Make(8, false);
+    case ConvertedType::UINT_16:
+      return IntLogicalType::Make(16, false);
+    case ConvertedType::UINT_32:
+      return IntLogicalType::Make(32, false);
+    case ConvertedType::UINT_64:
+      return IntLogicalType::Make(64, false);
+    case ConvertedType::JSON:
+      return JSONLogicalType::Make();
+    case ConvertedType::BSON:
+      return BSONLogicalType::Make();
+    case ConvertedType::NA:
+      return NullLogicalType::Make();
+    case ConvertedType::NONE:
+      return NoLogicalType::Make();
+    case ConvertedType::UNDEFINED:
+      return UndefinedLogicalType::Make();
+  }
+  return UndefinedLogicalType::Make();
+}
+
+std::shared_ptr<const LogicalType> LogicalType::FromThrift(
+    const format::LogicalType& type) {
+  if (type.__isset.STRING) {
+    return StringLogicalType::Make();
+  } else if (type.__isset.MAP) {
+    return MapLogicalType::Make();
+  } else if (type.__isset.LIST) {
+    return ListLogicalType::Make();
+  } else if (type.__isset.ENUM) {
+    return EnumLogicalType::Make();
+  } else if (type.__isset.DECIMAL) {
+    return DecimalLogicalType::Make(type.DECIMAL.precision, type.DECIMAL.scale);
+  } else if (type.__isset.DATE) {
+    return DateLogicalType::Make();
+  } else if (type.__isset.TIME) {
+    LogicalType::TimeUnit::unit unit;
+    if (type.TIME.unit.__isset.MILLIS) {
+      unit = LogicalType::TimeUnit::MILLIS;
+    } else if (type.TIME.unit.__isset.MICROS) {
+      unit = LogicalType::TimeUnit::MICROS;
+    } else if (type.TIME.unit.__isset.NANOS) {
+      unit = LogicalType::TimeUnit::NANOS;
+    } else {
+      unit = LogicalType::TimeUnit::UNKNOWN;
+    }
+    return TimeLogicalType::Make(type.TIME.isAdjustedToUTC, unit);
+  } else if (type.__isset.TIMESTAMP) {
+    LogicalType::TimeUnit::unit unit;
+    if (type.TIMESTAMP.unit.__isset.MILLIS) {
+      unit = LogicalType::TimeUnit::MILLIS;
+    } else if (type.TIMESTAMP.unit.__isset.MICROS) {
+      unit = LogicalType::TimeUnit::MICROS;
+    } else if (type.TIMESTAMP.unit.__isset.NANOS) {
+      unit = LogicalType::TimeUnit::NANOS;
+    } else {
+      unit = LogicalType::TimeUnit::UNKNOWN;
+    }
+    return TimestampLogicalType::Make(type.TIMESTAMP.isAdjustedToUTC, unit);
+    // TODO(tpboudreau): activate the commented code after parquet.thrift
+    // recognizes IntervalType as a LogicalType
+    //} else if (type.__isset.INTERVAL) {
+    //  return IntervalLogicalType::Make();
+  } else if (type.__isset.INTEGER) {
+    return IntLogicalType::Make(static_cast<int>(type.INTEGER.bitWidth),
+                                type.INTEGER.isSigned);
+  } else if (type.__isset.UNKNOWN) {
+    return NullLogicalType::Make();
+  } else if (type.__isset.JSON) {
+    return JSONLogicalType::Make();
+  } else if (type.__isset.BSON) {
+    return BSONLogicalType::Make();
+  } else if (type.__isset.UUID) {
+    return UUIDLogicalType::Make();
+  } else {
+    throw ParquetException("Metadata contains Thrift LogicalType that is not recognized");
+  }
+}
+
+std::shared_ptr<const LogicalType> LogicalType::String() {
+  return StringLogicalType::Make();
+}
+
+std::shared_ptr<const LogicalType> LogicalType::Map() { return MapLogicalType::Make(); }
+
+std::shared_ptr<const LogicalType> LogicalType::List() { return ListLogicalType::Make(); }
+
+std::shared_ptr<const LogicalType> LogicalType::Enum() { return EnumLogicalType::Make(); }
+
+std::shared_ptr<const LogicalType> LogicalType::Decimal(int32_t precision,
+                                                        int32_t scale) {
+  return DecimalLogicalType::Make(precision, scale);
+}
+
+std::shared_ptr<const LogicalType> LogicalType::Date() { return DateLogicalType::Make(); }
+
+std::shared_ptr<const LogicalType> LogicalType::Time(
+    bool is_adjusted_to_utc, LogicalType::TimeUnit::unit time_unit) {
+  DCHECK(time_unit != LogicalType::TimeUnit::UNKNOWN);
+  return TimeLogicalType::Make(is_adjusted_to_utc, time_unit);
+}
+
+std::shared_ptr<const LogicalType> LogicalType::Timestamp(
+    bool is_adjusted_to_utc, LogicalType::TimeUnit::unit time_unit,
+    bool is_from_converted_type, bool force_set_converted_type) {
+  DCHECK(time_unit != LogicalType::TimeUnit::UNKNOWN);
+  return TimestampLogicalType::Make(is_adjusted_to_utc, time_unit, is_from_converted_type,
+                                    force_set_converted_type);
+}
+
+std::shared_ptr<const LogicalType> LogicalType::Interval() {
+  return IntervalLogicalType::Make();
+}
+
+std::shared_ptr<const LogicalType> LogicalType::Int(int bit_width, bool is_signed) {
+  DCHECK(bit_width == 64 || bit_width == 32 || bit_width == 16 || bit_width == 8);
+  return IntLogicalType::Make(bit_width, is_signed);
+}
+
+std::shared_ptr<const LogicalType> LogicalType::Null() { return NullLogicalType::Make(); }
+
+std::shared_ptr<const LogicalType> LogicalType::JSON() { return JSONLogicalType::Make(); }
+
+std::shared_ptr<const LogicalType> LogicalType::BSON() { return BSONLogicalType::Make(); }
+
+std::shared_ptr<const LogicalType> LogicalType::UUID() { return UUIDLogicalType::Make(); }
+
+std::shared_ptr<const LogicalType> LogicalType::None() { return NoLogicalType::Make(); }
+
+/*
+ * The logical type implementation classes are built in four layers: (1) the base
+ * layer, which establishes the interface and provides generally reusable implementations
+ * for the ToJSON() and Equals() methods; (2) an intermediate derived layer for the
+ * "compatibility" methods, which provides implementations for is_compatible() and
+ * ToConvertedType(); (3) another intermediate layer for the "applicability" methods
+ * that provides several implementations for the is_applicable() method; and (4) the
+ * final derived classes, one for each logical type, which supply implementations
+ * for those methods that remain virtual (usually just ToString() and ToThrift()) or
+ * otherwise need to be overridden.
+ */
+
+// LogicalTypeImpl base class
+
+class LogicalType::Impl {
+ public:
+  virtual bool is_applicable(parquet::Type::type primitive_type,
+                             int32_t primitive_length = -1) const = 0;
+
+  virtual bool is_compatible(ConvertedType::type converted_type,
+                             schema::DecimalMetadata converted_decimal_metadata = {
+                                 false, -1, -1}) const = 0;
+
+  virtual ConvertedType::type ToConvertedType(
+      schema::DecimalMetadata* out_decimal_metadata) const = 0;
+
+  virtual std::string ToString() const = 0;
+
+  virtual bool is_serialized() const {
+    return !(type_ == LogicalType::Type::NONE || type_ == LogicalType::Type::UNDEFINED);
+  }
+
+  virtual std::string ToJSON() const {
+    std::stringstream json;
+    json << R"({"Type": ")" << ToString() << R"("})";
+    return json.str();
+  }
+
+  virtual format::LogicalType ToThrift() const {
+    // logical types inheriting this method should never be serialized
+    std::stringstream ss;
+    ss << "Logical type " << ToString() << " should not be serialized";
+    throw ParquetException(ss.str());
+  }
+
+  virtual bool Equals(const LogicalType& other) const { return other.type() == type_; }
+
+  LogicalType::Type::type type() const { return type_; }
+
+  SortOrder::type sort_order() const { return order_; }
+
+  Impl(const Impl&) = delete;
+  Impl& operator=(const Impl&) = delete;
+  virtual ~Impl() noexcept {}
+
+  class Compatible;
+  class SimpleCompatible;
+  class Incompatible;
+
+  class Applicable;
+  class SimpleApplicable;
+  class TypeLengthApplicable;
+  class UniversalApplicable;
+  class Inapplicable;
+
+  class String;
+  class Map;
+  class List;
+  class Enum;
+  class Decimal;
+  class Date;
+  class Time;
+  class Timestamp;
+  class Interval;
+  class Int;
+  class Null;
+  class JSON;
+  class BSON;
+  class UUID;
+  class No;
+  class Undefined;
+
+ protected:
+  Impl(LogicalType::Type::type t, SortOrder::type o) : type_(t), order_(o) {}
+  Impl() = default;
+
+ private:
+  LogicalType::Type::type type_ = LogicalType::Type::UNDEFINED;
+  SortOrder::type order_ = SortOrder::UNKNOWN;
+};
+
+// Special methods for public LogicalType class
+
+LogicalType::LogicalType() = default;
+LogicalType::~LogicalType() noexcept = default;
+
+// Delegating methods for public LogicalType class
+
+bool LogicalType::is_applicable(parquet::Type::type primitive_type,
+                                int32_t primitive_length) const {
+  return impl_->is_applicable(primitive_type, primitive_length);
+}
+
+bool LogicalType::is_compatible(
+    ConvertedType::type converted_type,
+    schema::DecimalMetadata converted_decimal_metadata) const {
+  return impl_->is_compatible(converted_type, converted_decimal_metadata);
+}
+
+ConvertedType::type LogicalType::ToConvertedType(
+    schema::DecimalMetadata* out_decimal_metadata) const {
+  return impl_->ToConvertedType(out_decimal_metadata);
+}
+
+std::string LogicalType::ToString() const { return impl_->ToString(); }
+
+std::string LogicalType::ToJSON() const { return impl_->ToJSON(); }
+
+format::LogicalType LogicalType::ToThrift() const { return impl_->ToThrift(); }
+
+bool LogicalType::Equals(const LogicalType& other) const { return impl_->Equals(other); }
+
+LogicalType::Type::type LogicalType::type() const { return impl_->type(); }
+
+SortOrder::type LogicalType::sort_order() const { return impl_->sort_order(); }
+
+// Type checks for public LogicalType class
+
+bool LogicalType::is_string() const { return impl_->type() == LogicalType::Type::STRING; }
+bool LogicalType::is_map() const { return impl_->type() == LogicalType::Type::MAP; }
+bool LogicalType::is_list() const { return impl_->type() == LogicalType::Type::LIST; }
+bool LogicalType::is_enum() const { return impl_->type() == LogicalType::Type::ENUM; }
+bool LogicalType::is_decimal() const {
+  return impl_->type() == LogicalType::Type::DECIMAL;
+}
+bool LogicalType::is_date() const { return impl_->type() == LogicalType::Type::DATE; }
+bool LogicalType::is_time() const { return impl_->type() == LogicalType::Type::TIME; }
+bool LogicalType::is_timestamp() const {
+  return impl_->type() == LogicalType::Type::TIMESTAMP;
+}
+bool LogicalType::is_interval() const {
+  return impl_->type() == LogicalType::Type::INTERVAL;
+}
+bool LogicalType::is_int() const { return impl_->type() == LogicalType::Type::INT; }
+bool LogicalType::is_null() const { return impl_->type() == LogicalType::Type::NIL; }
+bool LogicalType::is_JSON() const { return impl_->type() == LogicalType::Type::JSON; }
+bool LogicalType::is_BSON() const { return impl_->type() == LogicalType::Type::BSON; }
+bool LogicalType::is_UUID() const { return impl_->type() == LogicalType::Type::UUID; }
+bool LogicalType::is_none() const { return impl_->type() == LogicalType::Type::NONE; }
+bool LogicalType::is_valid() const {
+  return impl_->type() != LogicalType::Type::UNDEFINED;
+}
+bool LogicalType::is_invalid() const { return !is_valid(); }
+bool LogicalType::is_nested() const {
+  return (impl_->type() == LogicalType::Type::LIST) ||
+         (impl_->type() == LogicalType::Type::MAP);
+}
+bool LogicalType::is_nonnested() const { return !is_nested(); }
+bool LogicalType::is_serialized() const { return impl_->is_serialized(); }
+
+// LogicalTypeImpl intermediate "compatibility" classes
+
+class LogicalType::Impl::Compatible : public virtual LogicalType::Impl {
+ protected:
+  Compatible() = default;
+};
+
+#define set_decimal_metadata(m___, i___, p___, s___) \
+  {                                                  \
+    if (m___) {                                      \
+      (m___)->isset = (i___);                        \
+      (m___)->scale = (s___);                        \
+      (m___)->precision = (p___);                    \
+    }                                                \
+  }
+
+#define reset_decimal_metadata(m___) \
+  { set_decimal_metadata(m___, false, -1, -1); }
+
+// For logical types that always translate to the same converted type
+class LogicalType::Impl::SimpleCompatible : public virtual LogicalType::Impl::Compatible {
+ public:
+  bool is_compatible(ConvertedType::type converted_type,
+                     schema::DecimalMetadata converted_decimal_metadata) const override {
+    return (converted_type == converted_type_) && !converted_decimal_metadata.isset;
+  }
+
+  ConvertedType::type ToConvertedType(
+      schema::DecimalMetadata* out_decimal_metadata) const override {
+    reset_decimal_metadata(out_decimal_metadata);
+    return converted_type_;
+  }
+
+ protected:
+  explicit SimpleCompatible(ConvertedType::type c) : converted_type_(c) {}
+
+ private:
+  ConvertedType::type converted_type_ = ConvertedType::NA;
+};
+
+// For logical types that have no corresponding converted type
+class LogicalType::Impl::Incompatible : public virtual LogicalType::Impl {
+ public:
+  bool is_compatible(ConvertedType::type converted_type,
+                     schema::DecimalMetadata converted_decimal_metadata) const override {
+    return (converted_type == ConvertedType::NONE ||
+            converted_type == ConvertedType::NA) &&
+           !converted_decimal_metadata.isset;
+  }
+
+  ConvertedType::type ToConvertedType(
+      schema::DecimalMetadata* out_decimal_metadata) const override {
+    reset_decimal_metadata(out_decimal_metadata);
+    return ConvertedType::NONE;
+  }
+
+ protected:
+  Incompatible() = default;
+};
+
+// LogicalTypeImpl intermediate "applicability" classes
+
+class LogicalType::Impl::Applicable : public virtual LogicalType::Impl {
+ protected:
+  Applicable() = default;
+};
+
+// For logical types that can apply only to a single
+// physical type
+class LogicalType::Impl::SimpleApplicable : public virtual LogicalType::Impl::Applicable {
+ public:
+  bool is_applicable(parquet::Type::type primitive_type,
+                     int32_t primitive_length = -1) const override {
+    return primitive_type == type_;
+  }
+
+ protected:
+  explicit SimpleApplicable(parquet::Type::type t) : type_(t) {}
+
+ private:
+  parquet::Type::type type_;
+};
+
+// For logical types that can apply only to a particular
+// physical type and physical length combination
+class LogicalType::Impl::TypeLengthApplicable
+    : public virtual LogicalType::Impl::Applicable {
+ public:
+  bool is_applicable(parquet::Type::type primitive_type,
+                     int32_t primitive_length = -1) const override {
+    return primitive_type == type_ && primitive_length == length_;
+  }
+
+ protected:
+  TypeLengthApplicable(parquet::Type::type t, int32_t l) : type_(t), length_(l) {}
+
+ private:
+  parquet::Type::type type_;
+  int32_t length_;
+};
+
+// For logical types that can apply to any physical type
+class LogicalType::Impl::UniversalApplicable
+    : public virtual LogicalType::Impl::Applicable {
+ public:
+  bool is_applicable(parquet::Type::type primitive_type,
+                     int32_t primitive_length = -1) const override {
+    return true;
+  }
+
+ protected:
+  UniversalApplicable() = default;
+};
+
+// For logical types that can never apply to any primitive
+// physical type
+class LogicalType::Impl::Inapplicable : public virtual LogicalType::Impl {
+ public:
+  bool is_applicable(parquet::Type::type primitive_type,
+                     int32_t primitive_length = -1) const override {
+    return false;
+  }
+
+ protected:
+  Inapplicable() = default;
+};
+
+// LogicalType implementation final classes
+
+#define OVERRIDE_TOSTRING(n___) \
+  std::string ToString() const override { return #n___; }
+
+#define OVERRIDE_TOTHRIFT(t___, s___)             \
+  format::LogicalType ToThrift() const override { \
+    format::LogicalType type;                     \
+    format::t___ subtype;                         \
+    type.__set_##s___(subtype);                   \
+    return type;                                  \
+  }
+
+class LogicalType::Impl::String final : public LogicalType::Impl::SimpleCompatible,
+                                        public LogicalType::Impl::SimpleApplicable {
+ public:
+  friend class StringLogicalType;
+
+  OVERRIDE_TOSTRING(String)
+  OVERRIDE_TOTHRIFT(StringType, STRING)
+
+ private:
+  String()
+      : LogicalType::Impl(LogicalType::Type::STRING, SortOrder::UNSIGNED),
+        LogicalType::Impl::SimpleCompatible(ConvertedType::UTF8),
+        LogicalType::Impl::SimpleApplicable(parquet::Type::BYTE_ARRAY) {}
+};
+
+// Each public logical type class's Make() creation method instantiates a corresponding
+// LogicalType::Impl::* object and installs that implementation in the logical type
+// it returns.
+
+#define GENERATE_MAKE(a___)                                      \
+  std::shared_ptr<const LogicalType> a___##LogicalType::Make() { \
+    auto* logical_type = new a___##LogicalType();                \
+    logical_type->impl_.reset(new LogicalType::Impl::a___());    \
+    return std::shared_ptr<const LogicalType>(logical_type);     \
+  }
+
+GENERATE_MAKE(String)
+
+class LogicalType::Impl::Map final : public LogicalType::Impl::SimpleCompatible,
+                                     public LogicalType::Impl::Inapplicable {
+ public:
+  friend class MapLogicalType;
+
+  bool is_compatible(ConvertedType::type converted_type,
+                     schema::DecimalMetadata converted_decimal_metadata) const override {
+    return (converted_type == ConvertedType::MAP ||
+            converted_type == ConvertedType::MAP_KEY_VALUE) &&
+           !converted_decimal_metadata.isset;
+  }
+
+  OVERRIDE_TOSTRING(Map)
+  OVERRIDE_TOTHRIFT(MapType, MAP)
+
+ private:
+  Map()
+      : LogicalType::Impl(LogicalType::Type::MAP, SortOrder::UNKNOWN),
+        LogicalType::Impl::SimpleCompatible(ConvertedType::MAP) {}
+};
+
+GENERATE_MAKE(Map)
+
+class LogicalType::Impl::List final : public LogicalType::Impl::SimpleCompatible,
+                                      public LogicalType::Impl::Inapplicable {
+ public:
+  friend class ListLogicalType;
+
+  OVERRIDE_TOSTRING(List)
+  OVERRIDE_TOTHRIFT(ListType, LIST)
+
+ private:
+  List()
+      : LogicalType::Impl(LogicalType::Type::LIST, SortOrder::UNKNOWN),
+        LogicalType::Impl::SimpleCompatible(ConvertedType::LIST) {}
+};
+
+GENERATE_MAKE(List)
+
+class LogicalType::Impl::Enum final : public LogicalType::Impl::SimpleCompatible,
+                                      public LogicalType::Impl::SimpleApplicable {
+ public:
+  friend class EnumLogicalType;
+
+  OVERRIDE_TOSTRING(Enum)
+  OVERRIDE_TOTHRIFT(EnumType, ENUM)
+
+ private:
+  Enum()
+      : LogicalType::Impl(LogicalType::Type::ENUM, SortOrder::UNSIGNED),
+        LogicalType::Impl::SimpleCompatible(ConvertedType::ENUM),
+        LogicalType::Impl::SimpleApplicable(parquet::Type::BYTE_ARRAY) {}
+};
+
+GENERATE_MAKE(Enum)
+
+// The parameterized logical types (currently Decimal, Time, Timestamp, and Int)
+// generally can't reuse the simple method implementations available in the base and
+// intermediate classes and must (re)implement them all
+
+class LogicalType::Impl::Decimal final : public LogicalType::Impl::Compatible,
+                                         public LogicalType::Impl::Applicable {
+ public:
+  friend class DecimalLogicalType;
+
+  bool is_applicable(parquet::Type::type primitive_type,
+                     int32_t primitive_length = -1) const override;
+  bool is_compatible(ConvertedType::type converted_type,
+                     schema::DecimalMetadata converted_decimal_metadata) const override;
+  ConvertedType::type ToConvertedType(
+      schema::DecimalMetadata* out_decimal_metadata) const override;
+  std::string ToString() const override;
+  std::string ToJSON() const override;
+  format::LogicalType ToThrift() const override;
+  bool Equals(const LogicalType& other) const override;
+
+  int32_t precision() const { return precision_; }
+  int32_t scale() const { return scale_; }
+
+ private:
+  Decimal(int32_t p, int32_t s)
+      : LogicalType::Impl(LogicalType::Type::DECIMAL, SortOrder::SIGNED),
+        precision_(p),
+        scale_(s) {}
+  int32_t precision_ = -1;
+  int32_t scale_ = -1;
+};
+
+bool LogicalType::Impl::Decimal::is_applicable(parquet::Type::type primitive_type,
+                                               int32_t primitive_length) const {
+  bool ok = false;
+  switch (primitive_type) {
+    case parquet::Type::INT32: {
+      ok = (1 <= precision_) && (precision_ <= 9);
+    } break;
+    case parquet::Type::INT64: {
+      ok = (1 <= precision_) && (precision_ <= 18);
+      if (precision_ < 10) {
+        // FIXME(tpb): warn that INT32 could be used
+      }
+    } break;
+    case parquet::Type::FIXED_LEN_BYTE_ARRAY: {
+      ok = precision_ <= static_cast<int32_t>(std::floor(
+                             std::log10(std::pow(2.0, (8.0 * primitive_length) - 1.0))));
+    } break;
+    case parquet::Type::BYTE_ARRAY: {
+      ok = true;
+    } break;
+    default: {
+    } break;
+  }
+  return ok;
+}
+
+bool LogicalType::Impl::Decimal::is_compatible(
+    ConvertedType::type converted_type,
+    schema::DecimalMetadata converted_decimal_metadata) const {
+  return converted_type == ConvertedType::DECIMAL &&
+         (converted_decimal_metadata.isset &&
+          converted_decimal_metadata.scale == scale_ &&
+          converted_decimal_metadata.precision == precision_);
+}
+
+ConvertedType::type LogicalType::Impl::Decimal::ToConvertedType(
+    schema::DecimalMetadata* out_decimal_metadata) const {
+  set_decimal_metadata(out_decimal_metadata, true, precision_, scale_);
+  return ConvertedType::DECIMAL;
+}
+
+std::string LogicalType::Impl::Decimal::ToString() const {
+  std::stringstream type;
+  type << "Decimal(precision=" << precision_ << ", scale=" << scale_ << ")";
+  return type.str();
+}
+
+std::string LogicalType::Impl::Decimal::ToJSON() const {
+  std::stringstream json;
+  json << R"({"Type": "Decimal", "precision": )" << precision_ << R"(, "scale": )"
+       << scale_ << "}";
+  return json.str();
+}
+
+format::LogicalType LogicalType::Impl::Decimal::ToThrift() const {
+  format::LogicalType type;
+  format::DecimalType decimal_type;
+  decimal_type.__set_precision(precision_);
+  decimal_type.__set_scale(scale_);
+  type.__set_DECIMAL(decimal_type);
+  return type;
+}
+
+bool LogicalType::Impl::Decimal::Equals(const LogicalType& other) const {
+  bool eq = false;
+  if (other.is_decimal()) {
+    const auto& other_decimal = checked_cast<const DecimalLogicalType&>(other);
+    eq = (precision_ == other_decimal.precision() && scale_ == other_decimal.scale());
+  }
+  return eq;
+}
+
+std::shared_ptr<const LogicalType> DecimalLogicalType::Make(int32_t precision,
+                                                            int32_t scale) {
+  if (precision < 1) {
+    throw ParquetException(
+        "Precision must be greater than or equal to 1 for Decimal logical type");
+  }
+  if (scale < 0 || scale > precision) {
+    throw ParquetException(
+        "Scale must be a non-negative integer that does not exceed precision for "
+        "Decimal logical type");
+  }
+  auto* logical_type = new DecimalLogicalType();
+  logical_type->impl_.reset(new LogicalType::Impl::Decimal(precision, scale));
+  return std::shared_ptr<const LogicalType>(logical_type);
+}
+
+int32_t DecimalLogicalType::precision() const {
+  return (dynamic_cast<const LogicalType::Impl::Decimal&>(*impl_)).precision();
+}
+
+int32_t DecimalLogicalType::scale() const {
+  return (dynamic_cast<const LogicalType::Impl::Decimal&>(*impl_)).scale();
+}
+
+class LogicalType::Impl::Date final : public LogicalType::Impl::SimpleCompatible,
+                                      public LogicalType::Impl::SimpleApplicable {
+ public:
+  friend class DateLogicalType;
+
+  OVERRIDE_TOSTRING(Date)
+  OVERRIDE_TOTHRIFT(DateType, DATE)
+
+ private:
+  Date()
+      : LogicalType::Impl(LogicalType::Type::DATE, SortOrder::SIGNED),
+        LogicalType::Impl::SimpleCompatible(ConvertedType::DATE),
+        LogicalType::Impl::SimpleApplicable(parquet::Type::INT32) {}
+};
+
+GENERATE_MAKE(Date)
+
+#define time_unit_string(u___)                    \
+  ((u___) == LogicalType::TimeUnit::MILLIS        \
+       ? "milliseconds"                           \
+       : ((u___) == LogicalType::TimeUnit::MICROS \
+              ? "microseconds"                    \
+              : ((u___) == LogicalType::TimeUnit::NANOS ? "nanoseconds" : "unknown")))
+
+class LogicalType::Impl::Time final : public LogicalType::Impl::Compatible,
+                                      public LogicalType::Impl::Applicable {
+ public:
+  friend class TimeLogicalType;
+
+  bool is_applicable(parquet::Type::type primitive_type,
+                     int32_t primitive_length = -1) const override;
+  bool is_compatible(ConvertedType::type converted_type,
+                     schema::DecimalMetadata converted_decimal_metadata) const override;
+  ConvertedType::type ToConvertedType(
+      schema::DecimalMetadata* out_decimal_metadata) const override;
+  std::string ToString() const override;
+  std::string ToJSON() const override;
+  format::LogicalType ToThrift() const override;
+  bool Equals(const LogicalType& other) const override;
+
+  bool is_adjusted_to_utc() const { return adjusted_; }
+  LogicalType::TimeUnit::unit time_unit() const { return unit_; }
+
+ private:
+  Time(bool a, LogicalType::TimeUnit::unit u)
+      : LogicalType::Impl(LogicalType::Type::TIME, SortOrder::SIGNED),
+        adjusted_(a),
+        unit_(u) {}
+  bool adjusted_ = false;
+  LogicalType::TimeUnit::unit unit_;
+};
+
+bool LogicalType::Impl::Time::is_applicable(parquet::Type::type primitive_type,
+                                            int32_t primitive_length) const {
+  return (primitive_type == parquet::Type::INT32 &&
+          unit_ == LogicalType::TimeUnit::MILLIS) ||
+         (primitive_type == parquet::Type::INT64 &&
+          (unit_ == LogicalType::TimeUnit::MICROS ||
+           unit_ == LogicalType::TimeUnit::NANOS));
+}
+
+bool LogicalType::Impl::Time::is_compatible(
+    ConvertedType::type converted_type,
+    schema::DecimalMetadata converted_decimal_metadata) const {
+  if (converted_decimal_metadata.isset) {
+    return false;
+  } else if (adjusted_ && unit_ == LogicalType::TimeUnit::MILLIS) {
+    return converted_type == ConvertedType::TIME_MILLIS;
+  } else if (adjusted_ && unit_ == LogicalType::TimeUnit::MICROS) {
+    return converted_type == ConvertedType::TIME_MICROS;
+  } else {
+    return (converted_type == ConvertedType::NONE) ||
+           (converted_type == ConvertedType::NA);
+  }
+}
+
+ConvertedType::type LogicalType::Impl::Time::ToConvertedType(
+    schema::DecimalMetadata* out_decimal_metadata) const {
+  reset_decimal_metadata(out_decimal_metadata);
+  if (adjusted_) {
+    if (unit_ == LogicalType::TimeUnit::MILLIS) {
+      return ConvertedType::TIME_MILLIS;
+    } else if (unit_ == LogicalType::TimeUnit::MICROS) {
+      return ConvertedType::TIME_MICROS;
+    }
+  }
+  return ConvertedType::NONE;
+}
+
+std::string LogicalType::Impl::Time::ToString() const {
+  std::stringstream type;
+  type << "Time(isAdjustedToUTC=" << std::boolalpha << adjusted_
+       << ", timeUnit=" << time_unit_string(unit_) << ")";
+  return type.str();
+}
+
+std::string LogicalType::Impl::Time::ToJSON() const {
+  std::stringstream json;
+  json << R"({"Type": "Time", "isAdjustedToUTC": )" << std::boolalpha << adjusted_
+       << R"(, "timeUnit": ")" << time_unit_string(unit_) << R"("})";
+  return json.str();
+}
+
+format::LogicalType LogicalType::Impl::Time::ToThrift() const {
+  format::LogicalType type;
+  format::TimeType time_type;
+  format::TimeUnit time_unit;
+  DCHECK(unit_ != LogicalType::TimeUnit::UNKNOWN);
+  if (unit_ == LogicalType::TimeUnit::MILLIS) {
+    format::MilliSeconds millis;
+    time_unit.__set_MILLIS(millis);
+  } else if (unit_ == LogicalType::TimeUnit::MICROS) {
+    format::MicroSeconds micros;
+    time_unit.__set_MICROS(micros);
+  } else if (unit_ == LogicalType::TimeUnit::NANOS) {
+    format::NanoSeconds nanos;
+    time_unit.__set_NANOS(nanos);
+  }
+  time_type.__set_isAdjustedToUTC(adjusted_);
+  time_type.__set_unit(time_unit);
+  type.__set_TIME(time_type);
+  return type;
+}
+
+bool LogicalType::Impl::Time::Equals(const LogicalType& other) const {
+  bool eq = false;
+  if (other.is_time()) {
+    const auto& other_time = checked_cast<const TimeLogicalType&>(other);
+    eq =
+        (adjusted_ == other_time.is_adjusted_to_utc() && unit_ == other_time.time_unit());
+  }
+  return eq;
+}
+
+std::shared_ptr<const LogicalType> TimeLogicalType::Make(
+    bool is_adjusted_to_utc, LogicalType::TimeUnit::unit time_unit) {
+  if (time_unit == LogicalType::TimeUnit::MILLIS ||
+      time_unit == LogicalType::TimeUnit::MICROS ||
+      time_unit == LogicalType::TimeUnit::NANOS) {
+    auto* logical_type = new TimeLogicalType();
+    logical_type->impl_.reset(new LogicalType::Impl::Time(is_adjusted_to_utc, time_unit));
+    return std::shared_ptr<const LogicalType>(logical_type);
+  } else {
+    throw ParquetException(
+        "TimeUnit must be one of MILLIS, MICROS, or NANOS for Time logical type");
+  }
+}
+
+bool TimeLogicalType::is_adjusted_to_utc() const {
+  return (dynamic_cast<const LogicalType::Impl::Time&>(*impl_)).is_adjusted_to_utc();
+}
+
+LogicalType::TimeUnit::unit TimeLogicalType::time_unit() const {
+  return (dynamic_cast<const LogicalType::Impl::Time&>(*impl_)).time_unit();
+}
+
+class LogicalType::Impl::Timestamp final : public LogicalType::Impl::Compatible,
+                                           public LogicalType::Impl::SimpleApplicable {
+ public:
+  friend class TimestampLogicalType;
+
+  bool is_serialized() const override;
+  bool is_compatible(ConvertedType::type converted_type,
+                     schema::DecimalMetadata converted_decimal_metadata) const override;
+  ConvertedType::type ToConvertedType(
+      schema::DecimalMetadata* out_decimal_metadata) const override;
+  std::string ToString() const override;
+  std::string ToJSON() const override;
+  format::LogicalType ToThrift() const override;
+  bool Equals(const LogicalType& other) const override;
+
+  bool is_adjusted_to_utc() const { return adjusted_; }
+  LogicalType::TimeUnit::unit time_unit() const { return unit_; }
+
+  bool is_from_converted_type() const { return is_from_converted_type_; }
+  bool force_set_converted_type() const { return force_set_converted_type_; }
+
+ private:
+  Timestamp(bool adjusted, LogicalType::TimeUnit::unit unit, bool is_from_converted_type,
+            bool force_set_converted_type)
+      : LogicalType::Impl(LogicalType::Type::TIMESTAMP, SortOrder::SIGNED),
+        LogicalType::Impl::SimpleApplicable(parquet::Type::INT64),
+        adjusted_(adjusted),
+        unit_(unit),
+        is_from_converted_type_(is_from_converted_type),
+        force_set_converted_type_(force_set_converted_type) {}
+  bool adjusted_ = false;
+  LogicalType::TimeUnit::unit unit_;
+  bool is_from_converted_type_ = false;
+  bool force_set_converted_type_ = false;
+};
+
+bool LogicalType::Impl::Timestamp::is_serialized() const {
+  return !is_from_converted_type_;
+}
+
+bool LogicalType::Impl::Timestamp::is_compatible(
+    ConvertedType::type converted_type,
+    schema::DecimalMetadata converted_decimal_metadata) const {
+  if (converted_decimal_metadata.isset) {
+    return false;
+  } else if (unit_ == LogicalType::TimeUnit::MILLIS) {
+    if (adjusted_ || force_set_converted_type_) {
+      return converted_type == ConvertedType::TIMESTAMP_MILLIS;
+    } else {
+      return (converted_type == ConvertedType::NONE) ||
+             (converted_type == ConvertedType::NA);
+    }
+  } else if (unit_ == LogicalType::TimeUnit::MICROS) {
+    if (adjusted_ || force_set_converted_type_) {
+      return converted_type == ConvertedType::TIMESTAMP_MICROS;
+    } else {
+      return (converted_type == ConvertedType::NONE) ||
+             (converted_type == ConvertedType::NA);
+    }
+  } else {
+    return (converted_type == ConvertedType::NONE) ||
+           (converted_type == ConvertedType::NA);
+  }
+}
+
+ConvertedType::type LogicalType::Impl::Timestamp::ToConvertedType(
+    schema::DecimalMetadata* out_decimal_metadata) const {
+  reset_decimal_metadata(out_decimal_metadata);
+  if (adjusted_ || force_set_converted_type_) {
+    if (unit_ == LogicalType::TimeUnit::MILLIS) {
+      return ConvertedType::TIMESTAMP_MILLIS;
+    } else if (unit_ == LogicalType::TimeUnit::MICROS) {
+      return ConvertedType::TIMESTAMP_MICROS;
+    }
+  }
+  return ConvertedType::NONE;
+}
+
+std::string LogicalType::Impl::Timestamp::ToString() const {
+  std::stringstream type;
+  type << "Timestamp(isAdjustedToUTC=" << std::boolalpha << adjusted_
+       << ", timeUnit=" << time_unit_string(unit_)
+       << ", is_from_converted_type=" << is_from_converted_type_
+       << ", force_set_converted_type=" << force_set_converted_type_ << ")";
+  return type.str();
+}
+
+std::string LogicalType::Impl::Timestamp::ToJSON() const {
+  std::stringstream json;
+  json << R"({"Type": "Timestamp", "isAdjustedToUTC": )" << std::boolalpha << adjusted_
+       << R"(, "timeUnit": ")" << time_unit_string(unit_) << R"(")"
+       << R"(, "is_from_converted_type": )" << is_from_converted_type_
+       << R"(, "force_set_converted_type": )" << force_set_converted_type_ << R"(})";
+  return json.str();
+}
+
+format::LogicalType LogicalType::Impl::Timestamp::ToThrift() const {
+  format::LogicalType type;
+  format::TimestampType timestamp_type;
+  format::TimeUnit time_unit;
+  DCHECK(unit_ != LogicalType::TimeUnit::UNKNOWN);
+  if (unit_ == LogicalType::TimeUnit::MILLIS) {
+    format::MilliSeconds millis;
+    time_unit.__set_MILLIS(millis);
+  } else if (unit_ == LogicalType::TimeUnit::MICROS) {
+    format::MicroSeconds micros;
+    time_unit.__set_MICROS(micros);
+  } else if (unit_ == LogicalType::TimeUnit::NANOS) {
+    format::NanoSeconds nanos;
+    time_unit.__set_NANOS(nanos);
+  }
+  timestamp_type.__set_isAdjustedToUTC(adjusted_);
+  timestamp_type.__set_unit(time_unit);
+  type.__set_TIMESTAMP(timestamp_type);
+  return type;
+}
+
+bool LogicalType::Impl::Timestamp::Equals(const LogicalType& other) const {
+  bool eq = false;
+  if (other.is_timestamp()) {
+    const auto& other_timestamp = checked_cast<const TimestampLogicalType&>(other);
+    eq = (adjusted_ == other_timestamp.is_adjusted_to_utc() &&
+          unit_ == other_timestamp.time_unit());
+  }
+  return eq;
+}
+
+std::shared_ptr<const LogicalType> TimestampLogicalType::Make(
+    bool is_adjusted_to_utc, LogicalType::TimeUnit::unit time_unit,
+    bool is_from_converted_type, bool force_set_converted_type) {
+  if (time_unit == LogicalType::TimeUnit::MILLIS ||
+      time_unit == LogicalType::TimeUnit::MICROS ||
+      time_unit == LogicalType::TimeUnit::NANOS) {
+    auto* logical_type = new TimestampLogicalType();
+    logical_type->impl_.reset(new LogicalType::Impl::Timestamp(
+        is_adjusted_to_utc, time_unit, is_from_converted_type, force_set_converted_type));
+    return std::shared_ptr<const LogicalType>(logical_type);
+  } else {
+    throw ParquetException(
+        "TimeUnit must be one of MILLIS, MICROS, or NANOS for Timestamp logical type");
+  }
+}
+
+bool TimestampLogicalType::is_adjusted_to_utc() const {
+  return (dynamic_cast<const LogicalType::Impl::Timestamp&>(*impl_)).is_adjusted_to_utc();
+}
+
+LogicalType::TimeUnit::unit TimestampLogicalType::time_unit() const {
+  return (dynamic_cast<const LogicalType::Impl::Timestamp&>(*impl_)).time_unit();
+}
+
+bool TimestampLogicalType::is_from_converted_type() const {
+  return (dynamic_cast<const LogicalType::Impl::Timestamp&>(*impl_))
+      .is_from_converted_type();
+}
+
+bool TimestampLogicalType::force_set_converted_type() const {
+  return (dynamic_cast<const LogicalType::Impl::Timestamp&>(*impl_))
+      .force_set_converted_type();
+}
+
+class LogicalType::Impl::Interval final : public LogicalType::Impl::SimpleCompatible,
+                                          public LogicalType::Impl::TypeLengthApplicable {
+ public:
+  friend class IntervalLogicalType;
+
+  OVERRIDE_TOSTRING(Interval)
+  // TODO(tpboudreau): uncomment the following line to enable serialization after
+  // parquet.thrift recognizes IntervalType as a ConvertedType
+  // OVERRIDE_TOTHRIFT(IntervalType, INTERVAL)
+
+ private:
+  Interval()
+      : LogicalType::Impl(LogicalType::Type::INTERVAL, SortOrder::UNKNOWN),
+        LogicalType::Impl::SimpleCompatible(ConvertedType::INTERVAL),
+        LogicalType::Impl::TypeLengthApplicable(parquet::Type::FIXED_LEN_BYTE_ARRAY, 12) {
+  }
+};
+
+GENERATE_MAKE(Interval)
+
+class LogicalType::Impl::Int final : public LogicalType::Impl::Compatible,
+                                     public LogicalType::Impl::Applicable {
+ public:
+  friend class IntLogicalType;
+
+  bool is_applicable(parquet::Type::type primitive_type,
+                     int32_t primitive_length = -1) const override;
+  bool is_compatible(ConvertedType::type converted_type,
+                     schema::DecimalMetadata converted_decimal_metadata) const override;
+  ConvertedType::type ToConvertedType(
+      schema::DecimalMetadata* out_decimal_metadata) const override;
+  std::string ToString() const override;
+  std::string ToJSON() const override;
+  format::LogicalType ToThrift() const override;
+  bool Equals(const LogicalType& other) const override;
+
+  int bit_width() const { return width_; }
+  bool is_signed() const { return signed_; }
+
+ private:
+  Int(int w, bool s)
+      : LogicalType::Impl(LogicalType::Type::INT,
+                          (s ? SortOrder::SIGNED : SortOrder::UNSIGNED)),
+        width_(w),
+        signed_(s) {}
+  int width_ = 0;
+  bool signed_ = false;
+};
+
+bool LogicalType::Impl::Int::is_applicable(parquet::Type::type primitive_type,
+                                           int32_t primitive_length) const {
+  return (primitive_type == parquet::Type::INT32 && width_ <= 32) ||
+         (primitive_type == parquet::Type::INT64 && width_ == 64);
+}
+
+bool LogicalType::Impl::Int::is_compatible(
+    ConvertedType::type converted_type,
+    schema::DecimalMetadata converted_decimal_metadata) const {
+  if (converted_decimal_metadata.isset) {
+    return false;
+  } else if (signed_ && width_ == 8) {
+    return converted_type == ConvertedType::INT_8;
+  } else if (signed_ && width_ == 16) {
+    return converted_type == ConvertedType::INT_16;
+  } else if (signed_ && width_ == 32) {
+    return converted_type == ConvertedType::INT_32;
+  } else if (signed_ && width_ == 64) {
+    return converted_type == ConvertedType::INT_64;
+  } else if (!signed_ && width_ == 8) {
+    return converted_type == ConvertedType::UINT_8;
+  } else if (!signed_ && width_ == 16) {
+    return converted_type == ConvertedType::UINT_16;
+  } else if (!signed_ && width_ == 32) {
+    return converted_type == ConvertedType::UINT_32;
+  } else if (!signed_ && width_ == 64) {
+    return converted_type == ConvertedType::UINT_64;
+  } else {
+    return false;
+  }
+}
+
+ConvertedType::type LogicalType::Impl::Int::ToConvertedType(
+    schema::DecimalMetadata* out_decimal_metadata) const {
+  reset_decimal_metadata(out_decimal_metadata);
+  if (signed_) {
+    switch (width_) {
+      case 8:
+        return ConvertedType::INT_8;
+      case 16:
+        return ConvertedType::INT_16;
+      case 32:
+        return ConvertedType::INT_32;
+      case 64:
+        return ConvertedType::INT_64;
+    }
+  } else {  // unsigned
+    switch (width_) {
+      case 8:
+        return ConvertedType::UINT_8;
+      case 16:
+        return ConvertedType::UINT_16;
+      case 32:
+        return ConvertedType::UINT_32;
+      case 64:
+        return ConvertedType::UINT_64;
+    }
+  }
+  return ConvertedType::NONE;
+}
+
+std::string LogicalType::Impl::Int::ToString() const {
+  std::stringstream type;
+  type << "Int(bitWidth=" << width_ << ", isSigned=" << std::boolalpha << signed_ << ")";
+  return type.str();
+}
+
+std::string LogicalType::Impl::Int::ToJSON() const {
+  std::stringstream json;
+  json << R"({"Type": "Int", "bitWidth": )" << width_ << R"(, "isSigned": )"
+       << std::boolalpha << signed_ << "}";
+  return json.str();
+}
+
+format::LogicalType LogicalType::Impl::Int::ToThrift() const {
+  format::LogicalType type;
+  format::IntType int_type;
+  DCHECK(width_ == 64 || width_ == 32 || width_ == 16 || width_ == 8);
+  int_type.__set_bitWidth(static_cast<int8_t>(width_));
+  int_type.__set_isSigned(signed_);
+  type.__set_INTEGER(int_type);
+  return type;
+}
+
+bool LogicalType::Impl::Int::Equals(const LogicalType& other) const {
+  bool eq = false;
+  if (other.is_int()) {
+    const auto& other_int = checked_cast<const IntLogicalType&>(other);
+    eq = (width_ == other_int.bit_width() && signed_ == other_int.is_signed());
+  }
+  return eq;
+}
+
+std::shared_ptr<const LogicalType> IntLogicalType::Make(int bit_width, bool is_signed) {
+  if (bit_width == 8 || bit_width == 16 || bit_width == 32 || bit_width == 64) {
+    auto* logical_type = new IntLogicalType();
+    logical_type->impl_.reset(new LogicalType::Impl::Int(bit_width, is_signed));
+    return std::shared_ptr<const LogicalType>(logical_type);
+  } else {
+    throw ParquetException(
+        "Bit width must be exactly 8, 16, 32, or 64 for Int logical type");
+  }
+}
+
+int IntLogicalType::bit_width() const {
+  return (dynamic_cast<const LogicalType::Impl::Int&>(*impl_)).bit_width();
+}
+
+bool IntLogicalType::is_signed() const {
+  return (dynamic_cast<const LogicalType::Impl::Int&>(*impl_)).is_signed();
+}
+
+class LogicalType::Impl::Null final : public LogicalType::Impl::Incompatible,
+                                      public LogicalType::Impl::UniversalApplicable {
+ public:
+  friend class NullLogicalType;
+
+  OVERRIDE_TOSTRING(Null)
+  OVERRIDE_TOTHRIFT(NullType, UNKNOWN)
+
+ private:
+  Null() : LogicalType::Impl(LogicalType::Type::NIL, SortOrder::UNKNOWN) {}
+};
+
+GENERATE_MAKE(Null)
+
+class LogicalType::Impl::JSON final : public LogicalType::Impl::SimpleCompatible,
+                                      public LogicalType::Impl::SimpleApplicable {
+ public:
+  friend class JSONLogicalType;
+
+  OVERRIDE_TOSTRING(JSON)
+  OVERRIDE_TOTHRIFT(JsonType, JSON)
+
+ private:
+  JSON()
+      : LogicalType::Impl(LogicalType::Type::JSON, SortOrder::UNSIGNED),
+        LogicalType::Impl::SimpleCompatible(ConvertedType::JSON),
+        LogicalType::Impl::SimpleApplicable(parquet::Type::BYTE_ARRAY) {}
+};
+
+GENERATE_MAKE(JSON)
+
+class LogicalType::Impl::BSON final : public LogicalType::Impl::SimpleCompatible,
+                                      public LogicalType::Impl::SimpleApplicable {
+ public:
+  friend class BSONLogicalType;
+
+  OVERRIDE_TOSTRING(BSON)
+  OVERRIDE_TOTHRIFT(BsonType, BSON)
+
+ private:
+  BSON()
+      : LogicalType::Impl(LogicalType::Type::BSON, SortOrder::UNSIGNED),
+        LogicalType::Impl::SimpleCompatible(ConvertedType::BSON),
+        LogicalType::Impl::SimpleApplicable(parquet::Type::BYTE_ARRAY) {}
+};
+
+GENERATE_MAKE(BSON)
+
+class LogicalType::Impl::UUID final : public LogicalType::Impl::Incompatible,
+                                      public LogicalType::Impl::TypeLengthApplicable {
+ public:
+  friend class UUIDLogicalType;
+
+  OVERRIDE_TOSTRING(UUID)
+  OVERRIDE_TOTHRIFT(UUIDType, UUID)
+
+ private:
+  UUID()
+      : LogicalType::Impl(LogicalType::Type::UUID, SortOrder::UNSIGNED),
+        LogicalType::Impl::TypeLengthApplicable(parquet::Type::FIXED_LEN_BYTE_ARRAY, 16) {
+  }
+};
+
+GENERATE_MAKE(UUID)
+
+class LogicalType::Impl::No final : public LogicalType::Impl::SimpleCompatible,
+                                    public LogicalType::Impl::UniversalApplicable {
+ public:
+  friend class NoLogicalType;
+
+  OVERRIDE_TOSTRING(None)
+
+ private:
+  No()
+      : LogicalType::Impl(LogicalType::Type::NONE, SortOrder::UNKNOWN),
+        LogicalType::Impl::SimpleCompatible(ConvertedType::NONE) {}
+};
+
+GENERATE_MAKE(No)
+
+class LogicalType::Impl::Undefined final : public LogicalType::Impl::SimpleCompatible,
+                                           public LogicalType::Impl::UniversalApplicable {
+ public:
+  friend class UndefinedLogicalType;
+
+  OVERRIDE_TOSTRING(Undefined)
+
+ private:
+  Undefined()
+      : LogicalType::Impl(LogicalType::Type::UNDEFINED, SortOrder::UNKNOWN),
+        LogicalType::Impl::SimpleCompatible(ConvertedType::UNDEFINED) {}
+};
+
+GENERATE_MAKE(Undefined)
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/types.h b/contrib/libs/apache/arrow/cpp/src/parquet/types.h
index 40981d9bf1a..c25719830ec 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/types.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/types.h
@@ -1,765 +1,765 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include <algorithm> 
-#include <cstdint> 
-#include <cstring> 
-#include <memory> 
-#include <sstream> 
-#include <string> 
- 
-#include "arrow/util/string_view.h" 
- 
-#include "parquet/platform.h" 
-#include "parquet/type_fwd.h" 
- 
-#ifdef _WIN32 
- 
-// Repetition::OPTIONAL conflicts with a #define, so we undefine it 
-#ifdef OPTIONAL 
-#undef OPTIONAL 
-#endif 
- 
-#endif  // _WIN32 
- 
-namespace arrow { 
-namespace util { 
- 
-class Codec; 
- 
-}  // namespace util 
-}  // namespace arrow 
- 
-namespace parquet { 
- 
-// ---------------------------------------------------------------------- 
-// Metadata enums to match Thrift metadata 
-// 
-// The reason we maintain our own enums is to avoid transitive dependency on 
-// the compiled Thrift headers (and thus thrift/Thrift.h) for users of the 
-// public API. After building parquet-cpp, you should not need to include 
-// Thrift headers in your application. This means some boilerplate to convert 
-// between our types and Parquet's Thrift types. 
-// 
-// We can also add special values like NONE to distinguish between metadata 
-// values being set and not set. As an example consider ConvertedType and 
-// CompressionCodec 
- 
-// Mirrors parquet::Type 
-struct Type { 
-  enum type { 
-    BOOLEAN = 0, 
-    INT32 = 1, 
-    INT64 = 2, 
-    INT96 = 3, 
-    FLOAT = 4, 
-    DOUBLE = 5, 
-    BYTE_ARRAY = 6, 
-    FIXED_LEN_BYTE_ARRAY = 7, 
-    // Should always be last element. 
-    UNDEFINED = 8 
-  }; 
-}; 
- 
-// Mirrors parquet::ConvertedType 
-struct ConvertedType { 
-  enum type { 
-    NONE,  // Not a real converted type, but means no converted type is specified 
-    UTF8, 
-    MAP, 
-    MAP_KEY_VALUE, 
-    LIST, 
-    ENUM, 
-    DECIMAL, 
-    DATE, 
-    TIME_MILLIS, 
-    TIME_MICROS, 
-    TIMESTAMP_MILLIS, 
-    TIMESTAMP_MICROS, 
-    UINT_8, 
-    UINT_16, 
-    UINT_32, 
-    UINT_64, 
-    INT_8, 
-    INT_16, 
-    INT_32, 
-    INT_64, 
-    JSON, 
-    BSON, 
-    INTERVAL, 
-    // DEPRECATED INVALID ConvertedType for all-null data. 
-    // Only useful for reading legacy files written out by interim Parquet C++ releases. 
-    // For writing, always emit LogicalType::Null instead. 
-    // See PARQUET-1990. 
-    NA = 25, 
-    UNDEFINED = 26  // Not a real converted type; should always be last element 
-  }; 
-}; 
- 
-// forward declaration 
-namespace format { 
- 
-class LogicalType; 
- 
-} 
- 
-// Mirrors parquet::FieldRepetitionType 
-struct Repetition { 
-  enum type { REQUIRED = 0, OPTIONAL = 1, REPEATED = 2, /*Always last*/ UNDEFINED = 3 }; 
-}; 
- 
-// Reference: 
-// parquet-mr/parquet-hadoop/src/main/java/org/apache/parquet/ 
-//                            format/converter/ParquetMetadataConverter.java 
-// Sort order for page and column statistics. Types are associated with sort 
-// orders (e.g., UTF8 columns should use UNSIGNED) and column stats are 
-// aggregated using a sort order. As of parquet-format version 2.3.1, the 
-// order used to aggregate stats is always SIGNED and is not stored in the 
-// Parquet file. These stats are discarded for types that need unsigned. 
-// See PARQUET-686. 
-struct SortOrder { 
-  enum type { SIGNED, UNSIGNED, UNKNOWN }; 
-}; 
- 
-namespace schema { 
- 
-struct DecimalMetadata { 
-  bool isset; 
-  int32_t scale; 
-  int32_t precision; 
-}; 
- 
-}  // namespace schema 
- 
-/// \brief Implementation of parquet.thrift LogicalType types. 
-class PARQUET_EXPORT LogicalType { 
- public: 
-  struct Type { 
-    enum type { 
-      UNDEFINED = 0,  // Not a real logical type 
-      STRING = 1, 
-      MAP, 
-      LIST, 
-      ENUM, 
-      DECIMAL, 
-      DATE, 
-      TIME, 
-      TIMESTAMP, 
-      INTERVAL, 
-      INT, 
-      NIL,  // Thrift NullType: annotates data that is always null 
-      JSON, 
-      BSON, 
-      UUID, 
-      NONE  // Not a real logical type; should always be last element 
-    }; 
-  }; 
- 
-  struct TimeUnit { 
-    enum unit { UNKNOWN = 0, MILLIS = 1, MICROS, NANOS }; 
-  }; 
- 
-  /// \brief If possible, return a logical type equivalent to the given legacy 
-  /// converted type (and decimal metadata if applicable). 
-  static std::shared_ptr<const LogicalType> FromConvertedType( 
-      const parquet::ConvertedType::type converted_type, 
-      const parquet::schema::DecimalMetadata converted_decimal_metadata = {false, -1, 
-                                                                           -1}); 
- 
-  /// \brief Return the logical type represented by the Thrift intermediary object. 
-  static std::shared_ptr<const LogicalType> FromThrift( 
-      const parquet::format::LogicalType& thrift_logical_type); 
- 
-  /// \brief Return the explicitly requested logical type. 
-  static std::shared_ptr<const LogicalType> String(); 
-  static std::shared_ptr<const LogicalType> Map(); 
-  static std::shared_ptr<const LogicalType> List(); 
-  static std::shared_ptr<const LogicalType> Enum(); 
-  static std::shared_ptr<const LogicalType> Decimal(int32_t precision, int32_t scale = 0); 
-  static std::shared_ptr<const LogicalType> Date(); 
-  static std::shared_ptr<const LogicalType> Time(bool is_adjusted_to_utc, 
-                                                 LogicalType::TimeUnit::unit time_unit); 
- 
-  /// \brief Create a Timestamp logical type 
-  /// \param[in] is_adjusted_to_utc set true if the data is UTC-normalized 
-  /// \param[in] time_unit the resolution of the timestamp 
-  /// \param[in] is_from_converted_type if true, the timestamp was generated 
-  /// by translating a legacy converted type of TIMESTAMP_MILLIS or 
-  /// TIMESTAMP_MICROS. Default is false. 
-  /// \param[in] force_set_converted_type if true, always set the 
-  /// legacy ConvertedType TIMESTAMP_MICROS and TIMESTAMP_MILLIS 
-  /// metadata. Default is false 
-  static std::shared_ptr<const LogicalType> Timestamp( 
-      bool is_adjusted_to_utc, LogicalType::TimeUnit::unit time_unit, 
-      bool is_from_converted_type = false, bool force_set_converted_type = false); 
- 
-  static std::shared_ptr<const LogicalType> Interval(); 
-  static std::shared_ptr<const LogicalType> Int(int bit_width, bool is_signed); 
- 
-  /// \brief Create a logical type for data that's always null 
-  /// 
-  /// Any physical type can be annotated with this logical type. 
-  static std::shared_ptr<const LogicalType> Null(); 
- 
-  static std::shared_ptr<const LogicalType> JSON(); 
-  static std::shared_ptr<const LogicalType> BSON(); 
-  static std::shared_ptr<const LogicalType> UUID(); 
- 
-  /// \brief Create a placeholder for when no logical type is specified 
-  static std::shared_ptr<const LogicalType> None(); 
- 
-  /// \brief Return true if this logical type is consistent with the given underlying 
-  /// physical type. 
-  bool is_applicable(parquet::Type::type primitive_type, 
-                     int32_t primitive_length = -1) const; 
- 
-  /// \brief Return true if this logical type is equivalent to the given legacy converted 
-  /// type (and decimal metadata if applicable). 
-  bool is_compatible(parquet::ConvertedType::type converted_type, 
-                     parquet::schema::DecimalMetadata converted_decimal_metadata = { 
-                         false, -1, -1}) const; 
- 
-  /// \brief If possible, return the legacy converted type (and decimal metadata if 
-  /// applicable) equivalent to this logical type. 
-  parquet::ConvertedType::type ToConvertedType( 
-      parquet::schema::DecimalMetadata* out_decimal_metadata) const; 
- 
-  /// \brief Return a printable representation of this logical type. 
-  std::string ToString() const; 
- 
-  /// \brief Return a JSON representation of this logical type. 
-  std::string ToJSON() const; 
- 
-  /// \brief Return a serializable Thrift object for this logical type. 
-  parquet::format::LogicalType ToThrift() const; 
- 
-  /// \brief Return true if the given logical type is equivalent to this logical type. 
-  bool Equals(const LogicalType& other) const; 
- 
-  /// \brief Return the enumerated type of this logical type. 
-  LogicalType::Type::type type() const; 
- 
-  /// \brief Return the appropriate sort order for this logical type. 
-  SortOrder::type sort_order() const; 
- 
-  // Type checks ... 
-  bool is_string() const; 
-  bool is_map() const; 
-  bool is_list() const; 
-  bool is_enum() const; 
-  bool is_decimal() const; 
-  bool is_date() const; 
-  bool is_time() const; 
-  bool is_timestamp() const; 
-  bool is_interval() const; 
-  bool is_int() const; 
-  bool is_null() const; 
-  bool is_JSON() const; 
-  bool is_BSON() const; 
-  bool is_UUID() const; 
-  bool is_none() const; 
-  /// \brief Return true if this logical type is of a known type. 
-  bool is_valid() const; 
-  bool is_invalid() const; 
-  /// \brief Return true if this logical type is suitable for a schema GroupNode. 
-  bool is_nested() const; 
-  bool is_nonnested() const; 
-  /// \brief Return true if this logical type is included in the Thrift output for its 
-  /// node. 
-  bool is_serialized() const; 
- 
-  LogicalType(const LogicalType&) = delete; 
-  LogicalType& operator=(const LogicalType&) = delete; 
-  virtual ~LogicalType() noexcept; 
- 
- protected: 
-  LogicalType(); 
- 
-  class Impl; 
-  std::unique_ptr<const Impl> impl_; 
-}; 
- 
-/// \brief Allowed for physical type BYTE_ARRAY, must be encoded as UTF-8. 
-class PARQUET_EXPORT StringLogicalType : public LogicalType { 
- public: 
-  static std::shared_ptr<const LogicalType> Make(); 
- 
- private: 
-  StringLogicalType() = default; 
-}; 
- 
-/// \brief Allowed for group nodes only. 
-class PARQUET_EXPORT MapLogicalType : public LogicalType { 
- public: 
-  static std::shared_ptr<const LogicalType> Make(); 
- 
- private: 
-  MapLogicalType() = default; 
-}; 
- 
-/// \brief Allowed for group nodes only. 
-class PARQUET_EXPORT ListLogicalType : public LogicalType { 
- public: 
-  static std::shared_ptr<const LogicalType> Make(); 
- 
- private: 
-  ListLogicalType() = default; 
-}; 
- 
-/// \brief Allowed for physical type BYTE_ARRAY, must be encoded as UTF-8. 
-class PARQUET_EXPORT EnumLogicalType : public LogicalType { 
- public: 
-  static std::shared_ptr<const LogicalType> Make(); 
- 
- private: 
-  EnumLogicalType() = default; 
-}; 
- 
-/// \brief Allowed for physical type INT32, INT64, FIXED_LEN_BYTE_ARRAY, or BYTE_ARRAY, 
-/// depending on the precision. 
-class PARQUET_EXPORT DecimalLogicalType : public LogicalType { 
- public: 
-  static std::shared_ptr<const LogicalType> Make(int32_t precision, int32_t scale = 0); 
-  int32_t precision() const; 
-  int32_t scale() const; 
- 
- private: 
-  DecimalLogicalType() = default; 
-}; 
- 
-/// \brief Allowed for physical type INT32. 
-class PARQUET_EXPORT DateLogicalType : public LogicalType { 
- public: 
-  static std::shared_ptr<const LogicalType> Make(); 
- 
- private: 
-  DateLogicalType() = default; 
-}; 
- 
-/// \brief Allowed for physical type INT32 (for MILLIS) or INT64 (for MICROS and NANOS). 
-class PARQUET_EXPORT TimeLogicalType : public LogicalType { 
- public: 
-  static std::shared_ptr<const LogicalType> Make(bool is_adjusted_to_utc, 
-                                                 LogicalType::TimeUnit::unit time_unit); 
-  bool is_adjusted_to_utc() const; 
-  LogicalType::TimeUnit::unit time_unit() const; 
- 
- private: 
-  TimeLogicalType() = default; 
-}; 
- 
-/// \brief Allowed for physical type INT64. 
-class PARQUET_EXPORT TimestampLogicalType : public LogicalType { 
- public: 
-  static std::shared_ptr<const LogicalType> Make(bool is_adjusted_to_utc, 
-                                                 LogicalType::TimeUnit::unit time_unit, 
-                                                 bool is_from_converted_type = false, 
-                                                 bool force_set_converted_type = false); 
-  bool is_adjusted_to_utc() const; 
-  LogicalType::TimeUnit::unit time_unit() const; 
- 
-  /// \brief If true, will not set LogicalType in Thrift metadata 
-  bool is_from_converted_type() const; 
- 
-  /// \brief If true, will set ConvertedType for micros and millis 
-  /// resolution in legacy ConvertedType Thrift metadata 
-  bool force_set_converted_type() const; 
- 
- private: 
-  TimestampLogicalType() = default; 
-}; 
- 
-/// \brief Allowed for physical type FIXED_LEN_BYTE_ARRAY with length 12 
-class PARQUET_EXPORT IntervalLogicalType : public LogicalType { 
- public: 
-  static std::shared_ptr<const LogicalType> Make(); 
- 
- private: 
-  IntervalLogicalType() = default; 
-}; 
- 
-/// \brief Allowed for physical type INT32 (for bit widths 8, 16, and 32) and INT64 
-/// (for bit width 64). 
-class PARQUET_EXPORT IntLogicalType : public LogicalType { 
- public: 
-  static std::shared_ptr<const LogicalType> Make(int bit_width, bool is_signed); 
-  int bit_width() const; 
-  bool is_signed() const; 
- 
- private: 
-  IntLogicalType() = default; 
-}; 
- 
-/// \brief Allowed for any physical type. 
-class PARQUET_EXPORT NullLogicalType : public LogicalType { 
- public: 
-  static std::shared_ptr<const LogicalType> Make(); 
- 
- private: 
-  NullLogicalType() = default; 
-}; 
- 
-/// \brief Allowed for physical type BYTE_ARRAY. 
-class PARQUET_EXPORT JSONLogicalType : public LogicalType { 
- public: 
-  static std::shared_ptr<const LogicalType> Make(); 
- 
- private: 
-  JSONLogicalType() = default; 
-}; 
- 
-/// \brief Allowed for physical type BYTE_ARRAY. 
-class PARQUET_EXPORT BSONLogicalType : public LogicalType { 
- public: 
-  static std::shared_ptr<const LogicalType> Make(); 
- 
- private: 
-  BSONLogicalType() = default; 
-}; 
- 
-/// \brief Allowed for physical type FIXED_LEN_BYTE_ARRAY with length 16, 
-/// must encode raw UUID bytes. 
-class PARQUET_EXPORT UUIDLogicalType : public LogicalType { 
- public: 
-  static std::shared_ptr<const LogicalType> Make(); 
- 
- private: 
-  UUIDLogicalType() = default; 
-}; 
- 
-/// \brief Allowed for any physical type. 
-class PARQUET_EXPORT NoLogicalType : public LogicalType { 
- public: 
-  static std::shared_ptr<const LogicalType> Make(); 
- 
- private: 
-  NoLogicalType() = default; 
-}; 
- 
-// Internal API, for unrecognized logical types 
-class PARQUET_EXPORT UndefinedLogicalType : public LogicalType { 
- public: 
-  static std::shared_ptr<const LogicalType> Make(); 
- 
- private: 
-  UndefinedLogicalType() = default; 
-}; 
- 
-// Data encodings. Mirrors parquet::Encoding 
-struct Encoding { 
-  enum type { 
-    PLAIN = 0, 
-    PLAIN_DICTIONARY = 2, 
-    RLE = 3, 
-    BIT_PACKED = 4, 
-    DELTA_BINARY_PACKED = 5, 
-    DELTA_LENGTH_BYTE_ARRAY = 6, 
-    DELTA_BYTE_ARRAY = 7, 
-    RLE_DICTIONARY = 8, 
-    BYTE_STREAM_SPLIT = 9, 
-    // Should always be last element (except UNKNOWN) 
-    UNDEFINED = 10, 
-    UNKNOWN = 999 
-  }; 
-}; 
- 
-// Exposed data encodings. It is the encoding of the data read from the file, 
-// rather than the encoding of the data in the file. E.g., the data encoded as 
-// RLE_DICTIONARY in the file can be read as dictionary indices by RLE 
-// decoding, in which case the data read from the file is DICTIONARY encoded. 
-enum class ExposedEncoding { 
-  NO_ENCODING = 0,  // data is not encoded, i.e. already decoded during reading 
-  DICTIONARY = 1 
-}; 
- 
-/// \brief Return true if Parquet supports indicated compression type 
-PARQUET_EXPORT 
-bool IsCodecSupported(Compression::type codec); 
- 
-PARQUET_EXPORT 
-std::unique_ptr<Codec> GetCodec(Compression::type codec); 
- 
-PARQUET_EXPORT 
-std::unique_ptr<Codec> GetCodec(Compression::type codec, int compression_level); 
- 
-struct ParquetCipher { 
-  enum type { AES_GCM_V1 = 0, AES_GCM_CTR_V1 = 1 }; 
-}; 
- 
-struct AadMetadata { 
-  std::string aad_prefix; 
-  std::string aad_file_unique; 
-  bool supply_aad_prefix; 
-}; 
- 
-struct EncryptionAlgorithm { 
-  ParquetCipher::type algorithm; 
-  AadMetadata aad; 
-}; 
- 
-// parquet::PageType 
-struct PageType { 
-  enum type { 
-    DATA_PAGE, 
-    INDEX_PAGE, 
-    DICTIONARY_PAGE, 
-    DATA_PAGE_V2, 
-    // Should always be last element 
-    UNDEFINED 
-  }; 
-}; 
- 
-class ColumnOrder { 
- public: 
-  enum type { UNDEFINED, TYPE_DEFINED_ORDER }; 
-  explicit ColumnOrder(ColumnOrder::type column_order) : column_order_(column_order) {} 
-  // Default to Type Defined Order 
-  ColumnOrder() : column_order_(type::TYPE_DEFINED_ORDER) {} 
-  ColumnOrder::type get_order() { return column_order_; } 
- 
-  static ColumnOrder undefined_; 
-  static ColumnOrder type_defined_; 
- 
- private: 
-  ColumnOrder::type column_order_; 
-}; 
- 
-// ---------------------------------------------------------------------- 
- 
-struct ByteArray { 
-  ByteArray() : len(0), ptr(NULLPTR) {} 
-  ByteArray(uint32_t len, const uint8_t* ptr) : len(len), ptr(ptr) {} 
- 
-  ByteArray(::arrow::util::string_view view)  // NOLINT implicit conversion 
-      : ByteArray(static_cast<uint32_t>(view.size()), 
-                  reinterpret_cast<const uint8_t*>(view.data())) {} 
-  uint32_t len; 
-  const uint8_t* ptr; 
-}; 
- 
-inline bool operator==(const ByteArray& left, const ByteArray& right) { 
-  return left.len == right.len && 
-         (left.len == 0 || std::memcmp(left.ptr, right.ptr, left.len) == 0); 
-} 
- 
-inline bool operator!=(const ByteArray& left, const ByteArray& right) { 
-  return !(left == right); 
-} 
- 
-struct FixedLenByteArray { 
-  FixedLenByteArray() : ptr(NULLPTR) {} 
-  explicit FixedLenByteArray(const uint8_t* ptr) : ptr(ptr) {} 
-  const uint8_t* ptr; 
-}; 
- 
-using FLBA = FixedLenByteArray; 
- 
-// Julian day at unix epoch. 
-// 
-// The Julian Day Number (JDN) is the integer assigned to a whole solar day in 
-// the Julian day count starting from noon Universal time, with Julian day 
-// number 0 assigned to the day starting at noon on Monday, January 1, 4713 BC, 
-// proleptic Julian calendar (November 24, 4714 BC, in the proleptic Gregorian 
-// calendar), 
-constexpr int64_t kJulianToUnixEpochDays = INT64_C(2440588); 
-constexpr int64_t kSecondsPerDay = INT64_C(60 * 60 * 24); 
-constexpr int64_t kMillisecondsPerDay = kSecondsPerDay * INT64_C(1000); 
-constexpr int64_t kMicrosecondsPerDay = kMillisecondsPerDay * INT64_C(1000); 
-constexpr int64_t kNanosecondsPerDay = kMicrosecondsPerDay * INT64_C(1000); 
- 
-MANUALLY_ALIGNED_STRUCT(1) Int96 { uint32_t value[3]; }; 
-STRUCT_END(Int96, 12); 
- 
-inline bool operator==(const Int96& left, const Int96& right) { 
-  return std::equal(left.value, left.value + 3, right.value); 
-} 
- 
-inline bool operator!=(const Int96& left, const Int96& right) { return !(left == right); } 
- 
-static inline std::string ByteArrayToString(const ByteArray& a) { 
-  return std::string(reinterpret_cast<const char*>(a.ptr), a.len); 
-} 
- 
-static inline void Int96SetNanoSeconds(parquet::Int96& i96, int64_t nanoseconds) { 
-  std::memcpy(&i96.value, &nanoseconds, sizeof(nanoseconds)); 
-} 
- 
-struct DecodedInt96 { 
-  uint64_t days_since_epoch; 
-  uint64_t nanoseconds; 
-}; 
- 
-static inline DecodedInt96 DecodeInt96Timestamp(const parquet::Int96& i96) { 
-  // We do the computations in the unsigned domain to avoid unsigned behaviour 
-  // on overflow. 
-  DecodedInt96 result; 
-  result.days_since_epoch = i96.value[2] - static_cast<uint64_t>(kJulianToUnixEpochDays); 
-  result.nanoseconds = 0; 
- 
-  memcpy(&result.nanoseconds, &i96.value, sizeof(uint64_t)); 
-  return result; 
-} 
- 
-static inline int64_t Int96GetNanoSeconds(const parquet::Int96& i96) { 
-  const auto decoded = DecodeInt96Timestamp(i96); 
-  return static_cast<int64_t>(decoded.days_since_epoch * kNanosecondsPerDay + 
-                              decoded.nanoseconds); 
-} 
- 
-static inline int64_t Int96GetMicroSeconds(const parquet::Int96& i96) { 
-  const auto decoded = DecodeInt96Timestamp(i96); 
-  uint64_t microseconds = decoded.nanoseconds / static_cast<uint64_t>(1000); 
-  return static_cast<int64_t>(decoded.days_since_epoch * kMicrosecondsPerDay + 
-                              microseconds); 
-} 
- 
-static inline int64_t Int96GetMilliSeconds(const parquet::Int96& i96) { 
-  const auto decoded = DecodeInt96Timestamp(i96); 
-  uint64_t milliseconds = decoded.nanoseconds / static_cast<uint64_t>(1000000); 
-  return static_cast<int64_t>(decoded.days_since_epoch * kMillisecondsPerDay + 
-                              milliseconds); 
-} 
- 
-static inline int64_t Int96GetSeconds(const parquet::Int96& i96) { 
-  const auto decoded = DecodeInt96Timestamp(i96); 
-  uint64_t seconds = decoded.nanoseconds / static_cast<uint64_t>(1000000000); 
-  return static_cast<int64_t>(decoded.days_since_epoch * kSecondsPerDay + seconds); 
-} 
- 
-static inline std::string Int96ToString(const Int96& a) { 
-  std::ostringstream result; 
-  std::copy(a.value, a.value + 3, std::ostream_iterator<uint32_t>(result, " ")); 
-  return result.str(); 
-} 
- 
-static inline std::string FixedLenByteArrayToString(const FixedLenByteArray& a, int len) { 
-  std::ostringstream result; 
-  std::copy(a.ptr, a.ptr + len, std::ostream_iterator<uint32_t>(result, " ")); 
-  return result.str(); 
-} 
- 
-template <Type::type TYPE> 
-struct type_traits {}; 
- 
-template <> 
-struct type_traits<Type::BOOLEAN> { 
-  using value_type = bool; 
- 
-  static constexpr int value_byte_size = 1; 
-  static constexpr const char* printf_code = "d"; 
-}; 
- 
-template <> 
-struct type_traits<Type::INT32> { 
-  using value_type = int32_t; 
- 
-  static constexpr int value_byte_size = 4; 
-  static constexpr const char* printf_code = "d"; 
-}; 
- 
-template <> 
-struct type_traits<Type::INT64> { 
-  using value_type = int64_t; 
- 
-  static constexpr int value_byte_size = 8; 
-  static constexpr const char* printf_code = "ld"; 
-}; 
- 
-template <> 
-struct type_traits<Type::INT96> { 
-  using value_type = Int96; 
- 
-  static constexpr int value_byte_size = 12; 
-  static constexpr const char* printf_code = "s"; 
-}; 
- 
-template <> 
-struct type_traits<Type::FLOAT> { 
-  using value_type = float; 
- 
-  static constexpr int value_byte_size = 4; 
-  static constexpr const char* printf_code = "f"; 
-}; 
- 
-template <> 
-struct type_traits<Type::DOUBLE> { 
-  using value_type = double; 
- 
-  static constexpr int value_byte_size = 8; 
-  static constexpr const char* printf_code = "lf"; 
-}; 
- 
-template <> 
-struct type_traits<Type::BYTE_ARRAY> { 
-  using value_type = ByteArray; 
- 
-  static constexpr int value_byte_size = sizeof(ByteArray); 
-  static constexpr const char* printf_code = "s"; 
-}; 
- 
-template <> 
-struct type_traits<Type::FIXED_LEN_BYTE_ARRAY> { 
-  using value_type = FixedLenByteArray; 
- 
-  static constexpr int value_byte_size = sizeof(FixedLenByteArray); 
-  static constexpr const char* printf_code = "s"; 
-}; 
- 
-template <Type::type TYPE> 
-struct PhysicalType { 
-  using c_type = typename type_traits<TYPE>::value_type; 
-  static constexpr Type::type type_num = TYPE; 
-}; 
- 
-using BooleanType = PhysicalType<Type::BOOLEAN>; 
-using Int32Type = PhysicalType<Type::INT32>; 
-using Int64Type = PhysicalType<Type::INT64>; 
-using Int96Type = PhysicalType<Type::INT96>; 
-using FloatType = PhysicalType<Type::FLOAT>; 
-using DoubleType = PhysicalType<Type::DOUBLE>; 
-using ByteArrayType = PhysicalType<Type::BYTE_ARRAY>; 
-using FLBAType = PhysicalType<Type::FIXED_LEN_BYTE_ARRAY>; 
- 
-template <typename Type> 
-inline std::string format_fwf(int width) { 
-  std::stringstream ss; 
-  ss << "%-" << width << type_traits<Type::type_num>::printf_code; 
-  return ss.str(); 
-} 
- 
-PARQUET_EXPORT std::string EncodingToString(Encoding::type t); 
- 
-PARQUET_EXPORT std::string ConvertedTypeToString(ConvertedType::type t); 
- 
-PARQUET_EXPORT std::string TypeToString(Type::type t); 
- 
-PARQUET_EXPORT std::string FormatStatValue(Type::type parquet_type, 
-                                           ::arrow::util::string_view val); 
- 
-PARQUET_EXPORT int GetTypeByteSize(Type::type t); 
- 
-PARQUET_EXPORT SortOrder::type DefaultSortOrder(Type::type primitive); 
- 
-PARQUET_EXPORT SortOrder::type GetSortOrder(ConvertedType::type converted, 
-                                            Type::type primitive); 
- 
-PARQUET_EXPORT SortOrder::type GetSortOrder( 
-    const std::shared_ptr<const LogicalType>& logical_type, Type::type primitive); 
- 
-}  // namespace parquet 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <sstream>
+#include <string>
+
+#include "arrow/util/string_view.h"
+
+#include "parquet/platform.h"
+#include "parquet/type_fwd.h"
+
+#ifdef _WIN32
+
+// Repetition::OPTIONAL conflicts with a #define, so we undefine it
+#ifdef OPTIONAL
+#undef OPTIONAL
+#endif
+
+#endif  // _WIN32
+
+namespace arrow {
+namespace util {
+
+class Codec;
+
+}  // namespace util
+}  // namespace arrow
+
+namespace parquet {
+
+// ----------------------------------------------------------------------
+// Metadata enums to match Thrift metadata
+//
+// The reason we maintain our own enums is to avoid transitive dependency on
+// the compiled Thrift headers (and thus thrift/Thrift.h) for users of the
+// public API. After building parquet-cpp, you should not need to include
+// Thrift headers in your application. This means some boilerplate to convert
+// between our types and Parquet's Thrift types.
+//
+// We can also add special values like NONE to distinguish between metadata
+// values being set and not set. As an example consider ConvertedType and
+// CompressionCodec
+
+// Mirrors parquet::Type
+struct Type {
+  enum type {
+    BOOLEAN = 0,
+    INT32 = 1,
+    INT64 = 2,
+    INT96 = 3,
+    FLOAT = 4,
+    DOUBLE = 5,
+    BYTE_ARRAY = 6,
+    FIXED_LEN_BYTE_ARRAY = 7,
+    // Should always be last element.
+    UNDEFINED = 8
+  };
+};
+
+// Mirrors parquet::ConvertedType
+struct ConvertedType {
+  enum type {
+    NONE,  // Not a real converted type, but means no converted type is specified
+    UTF8,
+    MAP,
+    MAP_KEY_VALUE,
+    LIST,
+    ENUM,
+    DECIMAL,
+    DATE,
+    TIME_MILLIS,
+    TIME_MICROS,
+    TIMESTAMP_MILLIS,
+    TIMESTAMP_MICROS,
+    UINT_8,
+    UINT_16,
+    UINT_32,
+    UINT_64,
+    INT_8,
+    INT_16,
+    INT_32,
+    INT_64,
+    JSON,
+    BSON,
+    INTERVAL,
+    // DEPRECATED INVALID ConvertedType for all-null data.
+    // Only useful for reading legacy files written out by interim Parquet C++ releases.
+    // For writing, always emit LogicalType::Null instead.
+    // See PARQUET-1990.
+    NA = 25,
+    UNDEFINED = 26  // Not a real converted type; should always be last element
+  };
+};
+
+// forward declaration
+namespace format {
+
+class LogicalType;
+
+}
+
+// Mirrors parquet::FieldRepetitionType
+struct Repetition {
+  enum type { REQUIRED = 0, OPTIONAL = 1, REPEATED = 2, /*Always last*/ UNDEFINED = 3 };
+};
+
+// Reference:
+// parquet-mr/parquet-hadoop/src/main/java/org/apache/parquet/
+//                            format/converter/ParquetMetadataConverter.java
+// Sort order for page and column statistics. Types are associated with sort
+// orders (e.g., UTF8 columns should use UNSIGNED) and column stats are
+// aggregated using a sort order. As of parquet-format version 2.3.1, the
+// order used to aggregate stats is always SIGNED and is not stored in the
+// Parquet file. These stats are discarded for types that need unsigned.
+// See PARQUET-686.
+struct SortOrder {
+  enum type { SIGNED, UNSIGNED, UNKNOWN };
+};
+
+namespace schema {
+
+struct DecimalMetadata {
+  bool isset;
+  int32_t scale;
+  int32_t precision;
+};
+
+}  // namespace schema
+
+/// \brief Implementation of parquet.thrift LogicalType types.
+class PARQUET_EXPORT LogicalType {
+ public:
+  struct Type {
+    enum type {
+      UNDEFINED = 0,  // Not a real logical type
+      STRING = 1,
+      MAP,
+      LIST,
+      ENUM,
+      DECIMAL,
+      DATE,
+      TIME,
+      TIMESTAMP,
+      INTERVAL,
+      INT,
+      NIL,  // Thrift NullType: annotates data that is always null
+      JSON,
+      BSON,
+      UUID,
+      NONE  // Not a real logical type; should always be last element
+    };
+  };
+
+  struct TimeUnit {
+    enum unit { UNKNOWN = 0, MILLIS = 1, MICROS, NANOS };
+  };
+
+  /// \brief If possible, return a logical type equivalent to the given legacy
+  /// converted type (and decimal metadata if applicable).
+  static std::shared_ptr<const LogicalType> FromConvertedType(
+      const parquet::ConvertedType::type converted_type,
+      const parquet::schema::DecimalMetadata converted_decimal_metadata = {false, -1,
+                                                                           -1});
+
+  /// \brief Return the logical type represented by the Thrift intermediary object.
+  static std::shared_ptr<const LogicalType> FromThrift(
+      const parquet::format::LogicalType& thrift_logical_type);
+
+  /// \brief Return the explicitly requested logical type.
+  static std::shared_ptr<const LogicalType> String();
+  static std::shared_ptr<const LogicalType> Map();
+  static std::shared_ptr<const LogicalType> List();
+  static std::shared_ptr<const LogicalType> Enum();
+  static std::shared_ptr<const LogicalType> Decimal(int32_t precision, int32_t scale = 0);
+  static std::shared_ptr<const LogicalType> Date();
+  static std::shared_ptr<const LogicalType> Time(bool is_adjusted_to_utc,
+                                                 LogicalType::TimeUnit::unit time_unit);
+
+  /// \brief Create a Timestamp logical type
+  /// \param[in] is_adjusted_to_utc set true if the data is UTC-normalized
+  /// \param[in] time_unit the resolution of the timestamp
+  /// \param[in] is_from_converted_type if true, the timestamp was generated
+  /// by translating a legacy converted type of TIMESTAMP_MILLIS or
+  /// TIMESTAMP_MICROS. Default is false.
+  /// \param[in] force_set_converted_type if true, always set the
+  /// legacy ConvertedType TIMESTAMP_MICROS and TIMESTAMP_MILLIS
+  /// metadata. Default is false
+  static std::shared_ptr<const LogicalType> Timestamp(
+      bool is_adjusted_to_utc, LogicalType::TimeUnit::unit time_unit,
+      bool is_from_converted_type = false, bool force_set_converted_type = false);
+
+  static std::shared_ptr<const LogicalType> Interval();
+  static std::shared_ptr<const LogicalType> Int(int bit_width, bool is_signed);
+
+  /// \brief Create a logical type for data that's always null
+  ///
+  /// Any physical type can be annotated with this logical type.
+  static std::shared_ptr<const LogicalType> Null();
+
+  static std::shared_ptr<const LogicalType> JSON();
+  static std::shared_ptr<const LogicalType> BSON();
+  static std::shared_ptr<const LogicalType> UUID();
+
+  /// \brief Create a placeholder for when no logical type is specified
+  static std::shared_ptr<const LogicalType> None();
+
+  /// \brief Return true if this logical type is consistent with the given underlying
+  /// physical type.
+  bool is_applicable(parquet::Type::type primitive_type,
+                     int32_t primitive_length = -1) const;
+
+  /// \brief Return true if this logical type is equivalent to the given legacy converted
+  /// type (and decimal metadata if applicable).
+  bool is_compatible(parquet::ConvertedType::type converted_type,
+                     parquet::schema::DecimalMetadata converted_decimal_metadata = {
+                         false, -1, -1}) const;
+
+  /// \brief If possible, return the legacy converted type (and decimal metadata if
+  /// applicable) equivalent to this logical type.
+  parquet::ConvertedType::type ToConvertedType(
+      parquet::schema::DecimalMetadata* out_decimal_metadata) const;
+
+  /// \brief Return a printable representation of this logical type.
+  std::string ToString() const;
+
+  /// \brief Return a JSON representation of this logical type.
+  std::string ToJSON() const;
+
+  /// \brief Return a serializable Thrift object for this logical type.
+  parquet::format::LogicalType ToThrift() const;
+
+  /// \brief Return true if the given logical type is equivalent to this logical type.
+  bool Equals(const LogicalType& other) const;
+
+  /// \brief Return the enumerated type of this logical type.
+  LogicalType::Type::type type() const;
+
+  /// \brief Return the appropriate sort order for this logical type.
+  SortOrder::type sort_order() const;
+
+  // Type checks ...
+  bool is_string() const;
+  bool is_map() const;
+  bool is_list() const;
+  bool is_enum() const;
+  bool is_decimal() const;
+  bool is_date() const;
+  bool is_time() const;
+  bool is_timestamp() const;
+  bool is_interval() const;
+  bool is_int() const;
+  bool is_null() const;
+  bool is_JSON() const;
+  bool is_BSON() const;
+  bool is_UUID() const;
+  bool is_none() const;
+  /// \brief Return true if this logical type is of a known type.
+  bool is_valid() const;
+  bool is_invalid() const;
+  /// \brief Return true if this logical type is suitable for a schema GroupNode.
+  bool is_nested() const;
+  bool is_nonnested() const;
+  /// \brief Return true if this logical type is included in the Thrift output for its
+  /// node.
+  bool is_serialized() const;
+
+  LogicalType(const LogicalType&) = delete;
+  LogicalType& operator=(const LogicalType&) = delete;
+  virtual ~LogicalType() noexcept;
+
+ protected:
+  LogicalType();
+
+  class Impl;
+  std::unique_ptr<const Impl> impl_;
+};
+
+/// \brief Allowed for physical type BYTE_ARRAY, must be encoded as UTF-8.
+class PARQUET_EXPORT StringLogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make();
+
+ private:
+  StringLogicalType() = default;
+};
+
+/// \brief Allowed for group nodes only.
+class PARQUET_EXPORT MapLogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make();
+
+ private:
+  MapLogicalType() = default;
+};
+
+/// \brief Allowed for group nodes only.
+class PARQUET_EXPORT ListLogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make();
+
+ private:
+  ListLogicalType() = default;
+};
+
+/// \brief Allowed for physical type BYTE_ARRAY, must be encoded as UTF-8.
+class PARQUET_EXPORT EnumLogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make();
+
+ private:
+  EnumLogicalType() = default;
+};
+
+/// \brief Allowed for physical type INT32, INT64, FIXED_LEN_BYTE_ARRAY, or BYTE_ARRAY,
+/// depending on the precision.
+class PARQUET_EXPORT DecimalLogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make(int32_t precision, int32_t scale = 0);
+  int32_t precision() const;
+  int32_t scale() const;
+
+ private:
+  DecimalLogicalType() = default;
+};
+
+/// \brief Allowed for physical type INT32.
+class PARQUET_EXPORT DateLogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make();
+
+ private:
+  DateLogicalType() = default;
+};
+
+/// \brief Allowed for physical type INT32 (for MILLIS) or INT64 (for MICROS and NANOS).
+class PARQUET_EXPORT TimeLogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make(bool is_adjusted_to_utc,
+                                                 LogicalType::TimeUnit::unit time_unit);
+  bool is_adjusted_to_utc() const;
+  LogicalType::TimeUnit::unit time_unit() const;
+
+ private:
+  TimeLogicalType() = default;
+};
+
+/// \brief Allowed for physical type INT64.
+class PARQUET_EXPORT TimestampLogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make(bool is_adjusted_to_utc,
+                                                 LogicalType::TimeUnit::unit time_unit,
+                                                 bool is_from_converted_type = false,
+                                                 bool force_set_converted_type = false);
+  bool is_adjusted_to_utc() const;
+  LogicalType::TimeUnit::unit time_unit() const;
+
+  /// \brief If true, will not set LogicalType in Thrift metadata
+  bool is_from_converted_type() const;
+
+  /// \brief If true, will set ConvertedType for micros and millis
+  /// resolution in legacy ConvertedType Thrift metadata
+  bool force_set_converted_type() const;
+
+ private:
+  TimestampLogicalType() = default;
+};
+
+/// \brief Allowed for physical type FIXED_LEN_BYTE_ARRAY with length 12
+class PARQUET_EXPORT IntervalLogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make();
+
+ private:
+  IntervalLogicalType() = default;
+};
+
+/// \brief Allowed for physical type INT32 (for bit widths 8, 16, and 32) and INT64
+/// (for bit width 64).
+class PARQUET_EXPORT IntLogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make(int bit_width, bool is_signed);
+  int bit_width() const;
+  bool is_signed() const;
+
+ private:
+  IntLogicalType() = default;
+};
+
+/// \brief Allowed for any physical type.
+class PARQUET_EXPORT NullLogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make();
+
+ private:
+  NullLogicalType() = default;
+};
+
+/// \brief Allowed for physical type BYTE_ARRAY.
+class PARQUET_EXPORT JSONLogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make();
+
+ private:
+  JSONLogicalType() = default;
+};
+
+/// \brief Allowed for physical type BYTE_ARRAY.
+class PARQUET_EXPORT BSONLogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make();
+
+ private:
+  BSONLogicalType() = default;
+};
+
+/// \brief Allowed for physical type FIXED_LEN_BYTE_ARRAY with length 16,
+/// must encode raw UUID bytes.
+class PARQUET_EXPORT UUIDLogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make();
+
+ private:
+  UUIDLogicalType() = default;
+};
+
+/// \brief Allowed for any physical type.
+class PARQUET_EXPORT NoLogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make();
+
+ private:
+  NoLogicalType() = default;
+};
+
+// Internal API, for unrecognized logical types
+class PARQUET_EXPORT UndefinedLogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make();
+
+ private:
+  UndefinedLogicalType() = default;
+};
+
+// Data encodings. Mirrors parquet::Encoding
+struct Encoding {
+  enum type {
+    PLAIN = 0,
+    PLAIN_DICTIONARY = 2,
+    RLE = 3,
+    BIT_PACKED = 4,
+    DELTA_BINARY_PACKED = 5,
+    DELTA_LENGTH_BYTE_ARRAY = 6,
+    DELTA_BYTE_ARRAY = 7,
+    RLE_DICTIONARY = 8,
+    BYTE_STREAM_SPLIT = 9,
+    // Should always be last element (except UNKNOWN)
+    UNDEFINED = 10,
+    UNKNOWN = 999
+  };
+};
+
+// Exposed data encodings. It is the encoding of the data read from the file,
+// rather than the encoding of the data in the file. E.g., the data encoded as
+// RLE_DICTIONARY in the file can be read as dictionary indices by RLE
+// decoding, in which case the data read from the file is DICTIONARY encoded.
+enum class ExposedEncoding {
+  NO_ENCODING = 0,  // data is not encoded, i.e. already decoded during reading
+  DICTIONARY = 1
+};
+
+/// \brief Return true if Parquet supports indicated compression type
+PARQUET_EXPORT
+bool IsCodecSupported(Compression::type codec);
+
+PARQUET_EXPORT
+std::unique_ptr<Codec> GetCodec(Compression::type codec);
+
+PARQUET_EXPORT
+std::unique_ptr<Codec> GetCodec(Compression::type codec, int compression_level);
+
+struct ParquetCipher {
+  enum type { AES_GCM_V1 = 0, AES_GCM_CTR_V1 = 1 };
+};
+
+struct AadMetadata {
+  std::string aad_prefix;
+  std::string aad_file_unique;
+  bool supply_aad_prefix;
+};
+
+struct EncryptionAlgorithm {
+  ParquetCipher::type algorithm;
+  AadMetadata aad;
+};
+
+// parquet::PageType
+struct PageType {
+  enum type {
+    DATA_PAGE,
+    INDEX_PAGE,
+    DICTIONARY_PAGE,
+    DATA_PAGE_V2,
+    // Should always be last element
+    UNDEFINED
+  };
+};
+
+class ColumnOrder {
+ public:
+  enum type { UNDEFINED, TYPE_DEFINED_ORDER };
+  explicit ColumnOrder(ColumnOrder::type column_order) : column_order_(column_order) {}
+  // Default to Type Defined Order
+  ColumnOrder() : column_order_(type::TYPE_DEFINED_ORDER) {}
+  ColumnOrder::type get_order() { return column_order_; }
+
+  static ColumnOrder undefined_;
+  static ColumnOrder type_defined_;
+
+ private:
+  ColumnOrder::type column_order_;
+};
+
+// ----------------------------------------------------------------------
+
+struct ByteArray {
+  ByteArray() : len(0), ptr(NULLPTR) {}
+  ByteArray(uint32_t len, const uint8_t* ptr) : len(len), ptr(ptr) {}
+
+  ByteArray(::arrow::util::string_view view)  // NOLINT implicit conversion
+      : ByteArray(static_cast<uint32_t>(view.size()),
+                  reinterpret_cast<const uint8_t*>(view.data())) {}
+  uint32_t len;
+  const uint8_t* ptr;
+};
+
+inline bool operator==(const ByteArray& left, const ByteArray& right) {
+  return left.len == right.len &&
+         (left.len == 0 || std::memcmp(left.ptr, right.ptr, left.len) == 0);
+}
+
+inline bool operator!=(const ByteArray& left, const ByteArray& right) {
+  return !(left == right);
+}
+
+struct FixedLenByteArray {
+  FixedLenByteArray() : ptr(NULLPTR) {}
+  explicit FixedLenByteArray(const uint8_t* ptr) : ptr(ptr) {}
+  const uint8_t* ptr;
+};
+
+using FLBA = FixedLenByteArray;
+
+// Julian day at unix epoch.
+//
+// The Julian Day Number (JDN) is the integer assigned to a whole solar day in
+// the Julian day count starting from noon Universal time, with Julian day
+// number 0 assigned to the day starting at noon on Monday, January 1, 4713 BC,
+// proleptic Julian calendar (November 24, 4714 BC, in the proleptic Gregorian
+// calendar),
+constexpr int64_t kJulianToUnixEpochDays = INT64_C(2440588);
+constexpr int64_t kSecondsPerDay = INT64_C(60 * 60 * 24);
+constexpr int64_t kMillisecondsPerDay = kSecondsPerDay * INT64_C(1000);
+constexpr int64_t kMicrosecondsPerDay = kMillisecondsPerDay * INT64_C(1000);
+constexpr int64_t kNanosecondsPerDay = kMicrosecondsPerDay * INT64_C(1000);
+
+MANUALLY_ALIGNED_STRUCT(1) Int96 { uint32_t value[3]; };
+STRUCT_END(Int96, 12);
+
+inline bool operator==(const Int96& left, const Int96& right) {
+  return std::equal(left.value, left.value + 3, right.value);
+}
+
+inline bool operator!=(const Int96& left, const Int96& right) { return !(left == right); }
+
+static inline std::string ByteArrayToString(const ByteArray& a) {
+  return std::string(reinterpret_cast<const char*>(a.ptr), a.len);
+}
+
+static inline void Int96SetNanoSeconds(parquet::Int96& i96, int64_t nanoseconds) {
+  std::memcpy(&i96.value, &nanoseconds, sizeof(nanoseconds));
+}
+
+struct DecodedInt96 {
+  uint64_t days_since_epoch;
+  uint64_t nanoseconds;
+};
+
+static inline DecodedInt96 DecodeInt96Timestamp(const parquet::Int96& i96) {
+  // We do the computations in the unsigned domain to avoid unsigned behaviour
+  // on overflow.
+  DecodedInt96 result;
+  result.days_since_epoch = i96.value[2] - static_cast<uint64_t>(kJulianToUnixEpochDays);
+  result.nanoseconds = 0;
+
+  memcpy(&result.nanoseconds, &i96.value, sizeof(uint64_t));
+  return result;
+}
+
+static inline int64_t Int96GetNanoSeconds(const parquet::Int96& i96) {
+  const auto decoded = DecodeInt96Timestamp(i96);
+  return static_cast<int64_t>(decoded.days_since_epoch * kNanosecondsPerDay +
+                              decoded.nanoseconds);
+}
+
+static inline int64_t Int96GetMicroSeconds(const parquet::Int96& i96) {
+  const auto decoded = DecodeInt96Timestamp(i96);
+  uint64_t microseconds = decoded.nanoseconds / static_cast<uint64_t>(1000);
+  return static_cast<int64_t>(decoded.days_since_epoch * kMicrosecondsPerDay +
+                              microseconds);
+}
+
+static inline int64_t Int96GetMilliSeconds(const parquet::Int96& i96) {
+  const auto decoded = DecodeInt96Timestamp(i96);
+  uint64_t milliseconds = decoded.nanoseconds / static_cast<uint64_t>(1000000);
+  return static_cast<int64_t>(decoded.days_since_epoch * kMillisecondsPerDay +
+                              milliseconds);
+}
+
+static inline int64_t Int96GetSeconds(const parquet::Int96& i96) {
+  const auto decoded = DecodeInt96Timestamp(i96);
+  uint64_t seconds = decoded.nanoseconds / static_cast<uint64_t>(1000000000);
+  return static_cast<int64_t>(decoded.days_since_epoch * kSecondsPerDay + seconds);
+}
+
+static inline std::string Int96ToString(const Int96& a) {
+  std::ostringstream result;
+  std::copy(a.value, a.value + 3, std::ostream_iterator<uint32_t>(result, " "));
+  return result.str();
+}
+
+static inline std::string FixedLenByteArrayToString(const FixedLenByteArray& a, int len) {
+  std::ostringstream result;
+  std::copy(a.ptr, a.ptr + len, std::ostream_iterator<uint32_t>(result, " "));
+  return result.str();
+}
+
+template <Type::type TYPE>
+struct type_traits {};
+
+template <>
+struct type_traits<Type::BOOLEAN> {
+  using value_type = bool;
+
+  static constexpr int value_byte_size = 1;
+  static constexpr const char* printf_code = "d";
+};
+
+template <>
+struct type_traits<Type::INT32> {
+  using value_type = int32_t;
+
+  static constexpr int value_byte_size = 4;
+  static constexpr const char* printf_code = "d";
+};
+
+template <>
+struct type_traits<Type::INT64> {
+  using value_type = int64_t;
+
+  static constexpr int value_byte_size = 8;
+  static constexpr const char* printf_code = "ld";
+};
+
+template <>
+struct type_traits<Type::INT96> {
+  using value_type = Int96;
+
+  static constexpr int value_byte_size = 12;
+  static constexpr const char* printf_code = "s";
+};
+
+template <>
+struct type_traits<Type::FLOAT> {
+  using value_type = float;
+
+  static constexpr int value_byte_size = 4;
+  static constexpr const char* printf_code = "f";
+};
+
+template <>
+struct type_traits<Type::DOUBLE> {
+  using value_type = double;
+
+  static constexpr int value_byte_size = 8;
+  static constexpr const char* printf_code = "lf";
+};
+
+template <>
+struct type_traits<Type::BYTE_ARRAY> {
+  using value_type = ByteArray;
+
+  static constexpr int value_byte_size = sizeof(ByteArray);
+  static constexpr const char* printf_code = "s";
+};
+
+template <>
+struct type_traits<Type::FIXED_LEN_BYTE_ARRAY> {
+  using value_type = FixedLenByteArray;
+
+  static constexpr int value_byte_size = sizeof(FixedLenByteArray);
+  static constexpr const char* printf_code = "s";
+};
+
+template <Type::type TYPE>
+struct PhysicalType {
+  using c_type = typename type_traits<TYPE>::value_type;
+  static constexpr Type::type type_num = TYPE;
+};
+
+using BooleanType = PhysicalType<Type::BOOLEAN>;
+using Int32Type = PhysicalType<Type::INT32>;
+using Int64Type = PhysicalType<Type::INT64>;
+using Int96Type = PhysicalType<Type::INT96>;
+using FloatType = PhysicalType<Type::FLOAT>;
+using DoubleType = PhysicalType<Type::DOUBLE>;
+using ByteArrayType = PhysicalType<Type::BYTE_ARRAY>;
+using FLBAType = PhysicalType<Type::FIXED_LEN_BYTE_ARRAY>;
+
+template <typename Type>
+inline std::string format_fwf(int width) {
+  std::stringstream ss;
+  ss << "%-" << width << type_traits<Type::type_num>::printf_code;
+  return ss.str();
+}
+
+PARQUET_EXPORT std::string EncodingToString(Encoding::type t);
+
+PARQUET_EXPORT std::string ConvertedTypeToString(ConvertedType::type t);
+
+PARQUET_EXPORT std::string TypeToString(Type::type t);
+
+PARQUET_EXPORT std::string FormatStatValue(Type::type parquet_type,
+                                           ::arrow::util::string_view val);
+
+PARQUET_EXPORT int GetTypeByteSize(Type::type t);
+
+PARQUET_EXPORT SortOrder::type DefaultSortOrder(Type::type primitive);
+
+PARQUET_EXPORT SortOrder::type GetSortOrder(ConvertedType::type converted,
+                                            Type::type primitive);
+
+PARQUET_EXPORT SortOrder::type GetSortOrder(
+    const std::shared_ptr<const LogicalType>& logical_type, Type::type primitive);
+
+}  // namespace parquet
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/windows_compatibility.h b/contrib/libs/apache/arrow/cpp/src/parquet/windows_compatibility.h
index 6e5b6b330e6..31ca04c8b66 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/windows_compatibility.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/windows_compatibility.h
@@ -1,30 +1,30 @@
-// Licensed to the Apache Software Foundation (ASF) under one 
-// or more contributor license agreements.  See the NOTICE file 
-// distributed with this work for additional information 
-// regarding copyright ownership.  The ASF licenses this file 
-// to you under the Apache License, Version 2.0 (the 
-// "License"); you may not use this file except in compliance 
-// with the License.  You may obtain a copy of the License at 
-// 
-//   http://www.apache.org/licenses/LICENSE-2.0 
-// 
-// Unless required by applicable law or agreed to in writing, 
-// software distributed under the License is distributed on an 
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
-// KIND, either express or implied.  See the License for the 
-// specific language governing permissions and limitations 
-// under the License. 
- 
-#pragma once 
- 
-#include "arrow/util/windows_compatibility.h" 
- 
-#ifdef _WIN32 
- 
-// parquet.thrift's OPTIONAL RepetitionType conflicts with a #define from 
-// above, so we undefine it 
-#ifdef OPTIONAL 
-#undef OPTIONAL 
-#endif 
- 
-#endif 
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/util/windows_compatibility.h"
+
+#ifdef _WIN32
+
+// parquet.thrift's OPTIONAL RepetitionType conflicts with a #define from
+// above, so we undefine it
+#ifdef OPTIONAL
+#undef OPTIONAL
+#endif
+
+#endif
author	thegeorg <[email protected]>	2022-02-10 16:45:12 +0300
committer	Daniil Cherednik <[email protected]>	2022-02-10 16:45:12 +0300
commit	49116032d905455a7b1c994e4a696afc885c1e71 (patch)
tree	be835aa92c6248212e705f25388ebafcf84bc7a1 /contrib/libs/apache/arrow/cpp
parent	4e839db24a3bbc9f1c610c43d6faaaa99824dcca (diff)